Mercurial > repos > immuneml > immuneml_tools

diff immuneml_train_ml_model.xml @ 0:629e7e403e19 draft
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
author: immuneml
date: Thu, 01 Jul 2021 11:36:43 +0000
children: ed3932e6d616
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/immuneml_train_ml_model.xml	Thu Jul 01 11:36:43 2021 +0000
@@ -0,0 +1,72 @@
+<tool id="immuneml_train_ml_model" name="Train machine learning models" version="@VERSION@.0">
+  <description></description>
+    <macros>
+        <import>prod_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+  <command><![CDATA[
+
+      #if $iml_input
+         cp -r ${iml_input.extra_files_path}/result/* . &&
+	 (mv repertoires/* . &>/dev/null || :) &&
+	 rm -rf repertoires &&
+      #end if
+
+      #set $input_orig_names = []
+      #if $data_input
+          #for $input in $data_input
+             #set input_orig_names += [str($input.element_identifier)]
+             ([ -e ./"$input.element_identifier" ] && echo "File '$input.element_identifier' already exists in the input folder, skipping." || ln -s $input "$input.element_identifier") &&
+          #end for#
+      #end if
+
+      cp "$yaml_input" yaml_copy &&
+      immune-ml ./yaml_copy ${html_outfile.files_path} --tool GalaxyTrainMLModel &&
+      mv ${html_outfile.files_path}/index.html ${html_outfile} &&
+      mv ${html_outfile.files_path}/exported_models/*.zip ${optimal_model} &&
+      mv ${html_outfile.files_path}/immuneML_output.zip $archive
+      ]]>
+  </command>
+  <inputs>
+      <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/>
+      <param name="data_input" type="data" multiple="true" label="Additional files" optional="true" help="This field should include individual repertoire files, metadata files, receptor data and others."/>
+      <param name="iml_input" type="data" format="iml_dataset" label="Dataset input" optional="true" help="This field accepts an ImmuneML dataset, as created by the Create Dataset tool."/>
+  </inputs>
+    <outputs>
+        <data format="zip" name="optimal_model" label="optimal_ml_settings.zip"/>
+        <data format="zip" name="archive" label="Archive: ML model training"/>
+        <data format="html" name="html_outfile" label="Summary: ML model training"/>
+    </outputs>
+
+
+  <help>
+      <![CDATA[
+
+      This tool can be used to run hyperparameter optimization over several different ML settings,
+      which include ML models and their parameters, encodings and preprocessing steps. Nested cross-validation is used to identify the optimal combination of ML settings.
+
+      This is a YAML-based Galaxy tool, if you prefer a button-based interface that assumes less ML knowledge,
+      see `Train immune receptor classifiers (easy interface) <https://galaxy.immuneml.uio.no/root?tool_id=immuneml_train_classifiers>`_ and
+      `Train immune repertoire classifiers (easy interface) <https://galaxy.immuneml.uio.no/root?tool_id=novice_immuneml_interface>`_.
+
+      For more details on how to train ML models in Galaxy, see `the documentation <https://docs.immuneml.uio.no/galaxy/galaxy_train_ml_models.html>`_.
+
+      **Tool output**
+
+      This Galaxy tool will produce the following history elements:
+
+      - Summary: ML model training: a HTML page that allows you to browse through all results, including prediction accuracies on
+        the various data splits and report results.
+
+      - Archive: ML model training: a .zip file containing the complete output folder as it was produced by immuneML. This folder
+        contains the output of the TrainMLModel instruction including all trained models and their predictions, and report results.
+        Furthermore, the folder contains the complete YAML specification file for the immuneML run, the HTML output and a log file.
+
+      - optimal_ml_settings.zip: a .zip file containing the raw files for the optimal trained ML settings (ML model, encoding, and
+        optionally preprocessing steps). This .zip file can subsequently be used as an input when `applying previously trained ML models to a new AIRR dataset in Galaxy <https://docs.immuneml.uio.no/galaxy/galaxy_apply_ml_models.html>`_.
+
+    ]]>
+
+  </help>
+
+</tool>
author	immuneml
date	Thu, 01 Jul 2021 11:36:43 +0000
parents
children	ed3932e6d616