Mercurial > repos > immuneml > immuneml_tools
view immuneml_train_ml_model.xml @ 8:65815d4754e5 draft
"planemo upload commit 8aef44a2b3bc8fc00a1efe0ce7ecab83eded053f-dirty"
author | immuneml |
---|---|
date | Wed, 28 Jul 2021 09:48:46 +0000 |
parents | 45ca02982e1f |
children | e33b3d370124 |
line wrap: on
line source
<tool id="immuneml_train_ml_model" name="Train machine learning models" version="@VERSION@.0"> <description></description> <macros> <import>prod_macros.xml</import> </macros> <expand macro="requirements" /> <command><![CDATA[ #if $iml_input cp -r ${iml_input.extra_files_path}/result/* . && (mv repertoires/* . &>/dev/null || :) && rm -rf repertoires && #end if #set $input_orig_names = [] #if $data_input #for $input in $data_input #set input_orig_names += [str($input.element_identifier)] ([ -e ./"$input.element_identifier" ] && echo "File '$input.element_identifier' already exists in the input folder, skipping." || ln -s $input "$input.element_identifier") && #end for# #end if cp "$yaml_input" yaml_copy && immune-ml ./yaml_copy ${html_outfile.files_path} --tool GalaxyTrainMLModel && mv ${html_outfile.files_path}/index.html ${html_outfile} && mv ${html_outfile.files_path}/exported_models/*.zip ${optimal_model} && mv ${html_outfile.files_path}/immuneML_output.zip $archive ]]> </command> <inputs> <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/> <param name="data_input" type="data" multiple="true" label="Additional files" optional="true" help="This field should include individual repertoire files, metadata files, receptor data and others."/> <param name="iml_input" type="data" format="immuneml_receptors" label="Dataset input" optional="true" help="This field accepts an ImmuneML dataset, as created by the Create Dataset tool."/> </inputs> <outputs> <data format="zip" name="optimal_model" label="optimal_ml_settings.zip"/> <data format="zip" name="archive" label="Archive: ML model training"/> <data format="html" name="html_outfile" label="Summary: ML model training"/> </outputs> <help> <![CDATA[ This tool can be used to run hyperparameter optimization over several different ML settings, which include ML models and their parameters, encodings and preprocessing steps. Nested cross-validation is used to identify the optimal combination of ML settings. This is a YAML-based Galaxy tool, if you prefer a button-based interface that assumes less ML knowledge, see `Train immune receptor classifiers (easy interface) <root?tool_id=immuneml_train_classifiers>`_ and `Train immune repertoire classifiers (easy interface) <root?tool_id=novice_immuneml_interface>`_. For more details on how to train ML models in Galaxy, see `the documentation <https://docs.immuneml.uio.no/latest/galaxy/galaxy_train_ml_models.html>`_. **Tool output** This Galaxy tool will produce the following history elements: - Summary: ML model training: a HTML page that allows you to browse through all results, including prediction accuracies on the various data splits and report results. - Archive: ML model training: a .zip file containing the complete output folder as it was produced by immuneML. This folder contains the output of the TrainMLModel instruction including all trained models and their predictions, and report results. Furthermore, the folder contains the complete YAML specification file for the immuneML run, the HTML output and a log file. - optimal_ml_settings.zip: a .zip file containing the raw files for the optimal trained ML settings (ML model, encoding, and optionally preprocessing steps). This .zip file can subsequently be used as an input when `applying previously trained ML models to a new AIRR dataset in Galaxy <https://docs.immuneml.uio.no/latest/galaxy/galaxy_apply_ml_models.html>`_. ]]> </help> </tool>