annotate immuneml_train_repert.xml @ 16:cd57c1c66f8b draft

"planemo upload commit 60f280176ecdc2883fc7e85deb9aaa151f7c2088"
author immuneml
date Wed, 05 Jan 2022 09:53:02 +0000
parents 45ca02982e1f
children 051d349fdc8c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
1 <tool id="novice_immuneml_interface" name="Train immune repertoire classifiers (simplified interface)" version="@VERSION@.0">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
2 <description></description>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
3 <macros>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
4 <import>prod_macros.xml</import>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
5 </macros>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
6 <expand macro="requirements" />
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
7 <command><![CDATA[
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
8
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
9 #if $iml_input
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
10 cp -r ${iml_input.extra_files_path}/result/* . &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
11 (mv repertoires/* . &>/dev/null || :) &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
12 rm -rf repertoires &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
13 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
14
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
15 python '$__tool_directory__/build_yaml_from_arguments_wrapper.py' --output_path $specs.files_path
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
16 #if $labels
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
17 --labels "$labels"
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
18 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
19 #if $ml_methods
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
20 #set methods_splitted = str($ml_methods).replace(",", " ")
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
21 --ml_methods $methods_splitted
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
22 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
23 #if $training_percentage
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
24 --training_percentage $training_percentage
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
25 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
26 #if $split_count
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
27 --split_count $split_count
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
28 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
29 #if $sequence_cond.sequence_type
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
30 --sequence_type $sequence_cond.sequence_type
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
31 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
32 #if $sequence_cond.sequence_type == "subsequence"
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
33 --position_type $sequence_cond.position_type
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
34 --gap_type $sequence_cond.gap_cond.gap_type
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
35 #if $sequence_cond.gap_cond.gap_type == "ungapped"
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
36 --k $sequence_cond.gap_cond.k
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
37 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
38 #if $sequence_cond.gap_cond.gap_type == "gapped"
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
39 --k_left $sequence_cond.gap_cond.k_left
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
40 --k_right $sequence_cond.gap_cond.k_right
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
41 --min_gap $sequence_cond.gap_cond.min_gap
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
42 --max_gap $sequence_cond.gap_cond.max_gap
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
43 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
44 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
45 #if $reads
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
46 --reads $reads
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
47 #end if
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
48
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
49 && cp ${specs.files_path}/specs.yaml yaml_copy &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
50
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
51 immune-ml ./yaml_copy ${html_outfile.files_path} --tool GalaxyTrainMLModel
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
52
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
53 && mv ${html_outfile.files_path}/index.html ${html_outfile}
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
54 && mv ${specs.files_path}/specs.yaml ${specs}
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
55 && mv ${html_outfile.files_path}/immuneML_output.zip $archive
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
56 && mv ${html_outfile.files_path}/exported_models/*.zip ${optimal_model}
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
57 ]]>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
58 </command>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
59 <inputs>
16
cd57c1c66f8b "planemo upload commit 60f280176ecdc2883fc7e85deb9aaa151f7c2088"
immuneml
parents: 7
diff changeset
60 <param name="iml_input" type="data" format="immuneml_receptors" label="immuneML dataset (immune repertoires)" help="Here you can select an ImmuneML dataset containing a repertoire dataset, as produced by the ‘Create dataset’ tool. Please make sure your dataset contains enough repertoires, we recommend using at least 50. The minimum number of repertoires needed to run this tool successfully is 14 (for example: 7 diseased and 7 healthy). More repertoires are needed if your dataset is imbalanced (many more diseased or many more healthy), or if you decrease the percentage of data that is used for training. "/>
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
61 <param type="text" name="labels" optional="false" label="Which property (“label”) of the repertoires would you like to predict?" help="Repertoire property to predict could for example be a disease status. This property must be present as a label in the repertoire metadata."/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
62
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
63 <conditional name="sequence_cond">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
64 <param type="select" name="sequence_type" label="I assume that the true class of a repertoire (for example: disease status) can be determined based on the presence of..." display="radio" help="See 'Encoding' in the tool description.">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
65 <option value="subsequence">Similar (but not identical) CDR3 sequences, or identical subsequences</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
66 <option value="complete">Complete and identical receptor sequences</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
67 </param>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
68 <when value="subsequence">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
69 <param type="boolean" name="position_type" label="If the same CDR3 subsequence occurs in a different position in two receptors, is this expected to be the same signal? "
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
70 truevalue="invariant" falsevalue="positional" checked="true"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
71 <conditional name="gap_cond">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
72 <param type="select" name="gap_type" label="The signal is expected to correspond to:" display="radio">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
73 <option value="ungapped">Contiguous subsequences of amino acids</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
74 <option value="gapped">Subsequences of amino acids separated by a gap</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
75 </param>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
76 <when value="ungapped">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
77 <param type="integer" name="k" label="Given a contiguous subsequence of amino acids containing a signal, the expected length of this subsequence is:" value="3" min="0"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
78 </when>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
79 <when value="gapped">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
80 <param type="integer" name="k_left" label="Given a gapped signal, the sequence length before the gap is:" value="2" min="0"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
81 <param type="integer" name="k_right" label="And the sequence length after the gap is:" value="2" min="0"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
82 <param type="integer" name="min_gap" label="While the minimal gap length is:" value="0" min="0"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
83 <param type="integer" name="max_gap" label="And the maximal gap length is:" value="5" min="0"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
84 </when>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
85 </conditional>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
86 </when>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
87 </conditional>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
88 <param type="select" name="reads" label="I assume that" display="radio" help="If only the presence/absence of a clonotype matters, the read frequency (‘count’) information is ignored. Otherwise, the importance of a sequence or subsequence is scaled by its read frequency, and large clonotypes will have more influence on the ML model and its results.">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
89 <option value="unique">Only the presence/absence of a clone matters</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
90 <option value="all">The frequency of a clone matters</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
91 </param>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
92
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
93 <param type="select" name="ml_methods" label="Which ML methods would you like to include?" help="For each ML method, the optimal hyper parameter settings are determined and the performance of the methods is compared to each other."
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
94 display="checkboxes" multiple="true">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
95 <option value="RandomForestClassifier">Random forest</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
96 <option value="SimpleLogisticRegression">Logistic regression</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
97 <option value="SVM">Support Vector Machine</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
98 <option value="KNN">K-nearest neighbors</option>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
99 </param>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
100
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
101 <param type="integer" name="training_percentage" label="Percentage of data that is used for training + validation (the remainder is used for testing):" value="70" min="50" max="90" help="This part of the data is used for training the classifier i.e., learning the relevant patterns in the data and determining the optimal hyper parameter settings for the classifier. The remaining data is used to test the performance of the classifier. There is no golden rule that determines the optimal percentage of training data, but typically a value between 60 and 80% is chosen."/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
102 <param type="integer" name="split_count" label="Number of times to repeat the training process with different random splits of data:" value="5" min="0" help="This is the number of times we split into random sets for training + validation and testing. The more often the experiment is repeated, the better the performance of the ML models can be estimated, but the longer it will take for the analysis to complete. "/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
103
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
104 </inputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
105 <outputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
106 <data format="txt" name="specs" label="repertoire_classification.yaml"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
107 <data format="zip" name="optimal_model" label="optimal_ml_settings.zip"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
108 <data format="zip" name="archive" label="Archive: repertoire classification"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
109 <data format="html" name="html_outfile" label="Summary: repertoire classification"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
110 </outputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
111
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
112
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
113 <help><![CDATA[
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
114 The purpose of this tool is to train machine learning (ML) models to predict a characteristic per immune repertoire, such as
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
115 a disease status. One or more ML models are trained to classify repertoires based on the information within the sets of CDR3 sequences. Finally, the performance
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
116 of the different methods is compared.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
117 Alternatively, if you want to predict a property per immune receptor, such as antigen specificity, check out the
7
45ca02982e1f "planemo upload commit 8aef44a2b3bc8fc00a1efe0ce7ecab83eded053f-dirty"
immuneml
parents: 6
diff changeset
118 `Train immune receptor classifiers (simplified interface) <root?tool_id=immuneml_train_classifiers>`_ tool instead.
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
119
6
2d3dd9ff7e84 "planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents: 3
diff changeset
120 The full documentation can be found `here <https://docs.immuneml.uio.no/latest/galaxy/galaxy_simple_repertoires.html>`_.
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
121
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
122 **Basic terminology**
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
123
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
124 In the context of ML, the characteristics to predict per repertoire are called **labels** and the values that these labels can take on are **classes**.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
125 One could thus have a label named ‘CMV_status’ with possible classes ‘positive’ and ‘negative’. The labels and classes must be present in the metadata
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
126 file, in columns where the header and values correspond to the label and classes respectively.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
127
6
2d3dd9ff7e84 "planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents: 3
diff changeset
128 .. image:: https://docs.immuneml.uio.no/latest/_images/metadata_repertoire_classification.png
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
129 :height: 150
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
130
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
131 |
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
132
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
133 When training an ML model, the goal is for the model to learn **signals** within the data which discriminate between the different classes. An ML model
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
134 that predicts classes is also referred to as a **classifier**. A signal can have a variety of definitions, including the presence of specific receptors,
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
135 groups of similar receptors or short CDR3 subsequences in an immune repertoire. Our assumptions about what makes up a ‘signal’ determines how we
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
136 should represent our data to the ML model. This representation is called **encoding**. In this tool, the encoding is automatically chosen based on
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
137 the user's assumptions about the dataset.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
138
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
139
6
2d3dd9ff7e84 "planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents: 3
diff changeset
140 .. image:: https://docs.immuneml.uio.no/latest/_images/repertoire_classification_overview.png
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
141 :height: 500
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
142
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
143 |
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
144 |
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
145
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
146 **An overview of the components of the immuneML repertoire classification tool.**
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
147 immuneML reads in repertoire data with labels (+ and -), encodes the
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
148 data, trains user-specified ML models and summarizes the performance statistics per ML method.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
149 Encoding: different forms of encoding are shown; full sequence encoding and position dependent and invariant subsequence encoding.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
150 The disease-associated sequences or sub-sequences are highlighted with color. The different colors represent independent elements of the disease signal.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
151 Each color represents one (sub)sequence, and position dependent subsequences can only have the same color when they occur in the same position,
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
152 although different colors (i.e., nucleotide or amino acid sequences) may occur in the same position.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
153 Training: the training and validation data is used to train ML models and find the optimal hyperparameters through 5-fold cross-validation.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
154 The test set is left out and is used to obtain a fair estimate of the model performance.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
155
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
156 **Encoding**
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
157
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
158 The simplest encoding represents an immune repertoire based on the full CDR3 sequences that it contains. This means the ML models will learn to look
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
159 at which CDR3 sequences are more often present in the ‘positive’ or ‘negative’ classes. It also means that two similar (non-identical) CDR3 sequences
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
160 are treated as independent pieces of information; if a particular sequence often occurs in diseased repertoires, then finding a similar sequence in a
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
161 new repertoire is no evidence for this repertoire also being diseased.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
162
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
163 Other encoding variants are based on shorter subsequences (e.g., 3 – 5 amino acids long, also referred to as k-mers) in the CDR3 regions of an immune repertoire. With this
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
164 encoding, the CDR3 regions are divided into overlapping subsequences and the (disease) signal may be characterized by the presence or absence of
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
165 certain sequence motifs in the CDR3 regions. Here, two similar CDR3 sequences are no longer independent, because they contain many identical subsequences.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
166 A graphical representation of how a CDR3 sequence can be divided into k-mers, and how these k-mers can relate to specific positions in a 3D immune receptor
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
167 (here: antibody) is shown in this figure:
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
168
6
2d3dd9ff7e84 "planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents: 3
diff changeset
169 .. image:: https://docs.immuneml.uio.no/latest/_images/3mer_to_3d.png
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
170 :height: 250
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
171
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
172 |
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
173
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
174 The subsequences may be position-dependent or invariant. Position invariant means that if a subsequence, e.g., ‘EDNA’ occurs in different positions
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
175 in the CDR3 it will still be considered the same signal. This is not the case for position dependent subsequences, if ‘EDNA’ often occurs in the
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
176 beginning of the CDR3 in diseased repertoires, then finding ‘EDNA’ in the end of a CDR3 in a new repertoire will be considered unrelated. Positions
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
177 are determined based on the IMGT numbering scheme.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
178
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
179 Finally, it is possible to introduce gaps in the encoding of subsequences (not shown in the Figure). In this case, a motif is defined by two
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
180 subsequences separated by a region of varying nucleotide or amino acid length. Thus, the subsequences ‘EDNA’, ‘EDGNA’ and ‘EDGAGAGNA’ may all be
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
181 considered to be part of the same motif: ‘ED’ followed by ‘NA’ with a gap of 0 – 5 amino acids in between.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
182
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
183 Note that in any case, the (sub)sequences that are associated with the ‘positive’ class may still be present in the ‘negative’ class, albeit at a lower rate.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
184
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
185
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
186
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
187 **Training a machine learning model**
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
188
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
189 Training an ML model means optimizing the **parameters** for the model with the goal of predicting the correct class of an (unseen) immune repertoire.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
190 Different ML methods require different procedures for training. In addition to the model parameters there are the **hyperparameters**, which
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
191 do not directly change the predictions of a model, but they control the learning process (for example: the learning speed).
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
192
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
193 The immune repertoires are divided into sets with different purposes: the training and validation sets are used for finding the optimal parameters
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
194 and hyperparameters respectively. The test set is held out, and is only used to estimate the performance of a trained model.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
195
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
196 In this tool, a range of plausible hyperparameters have been predefined for each ML method. The optimal hyperparameters are found by splitting the
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
197 training/validation data into 5 equal portions, where 4 portions are used to train the ML model (with different hyperparameters) and the remaining
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
198 portion is used to validate the performance of these hyperparameter settings. This is repeated 5 times such that each portion has been used for
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
199 validation once. With the best hyperparameters found in the 5 repetitions, a final model is trained using all 5 portions of the data. This procedure
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
200 is also referred to as 5-fold cross-validation. Note that this 5-fold cross-validation is separate from the number of times the splitting into
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
201 training + validation and testing sets is done (see the overview figure).
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
202
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
203 Finally, the whole process is repeated one or more times with different randomly selected repertoires in the test set, to see how robust the performance
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
204 of the ML methods is. The number of times to repeat this splitting into training + validation and test sets is determined in the last question.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
205
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
206
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
207 **Tool output**
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
208
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
209 This Galaxy tool will produce the following history elements:
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
210
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
211 - Summary: repertoire classification: a HTML page that allows you to browse through all results, including prediction accuracies on
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
212 the various data splits and plots showing the performance of classifiers and learned parameters.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
213
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
214 - Archive: repertoire classification: a .zip file containing the complete output folder as it was produced by immuneML. This folder
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
215 contains the output of the TrainMLModel instruction including all trained models and their predictions, and report results.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
216 Furthermore, the folder contains the complete YAML specification file for the immuneML run, the HTML output and a log file.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
217
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
218 - optimal_ml_settings.zip: a .zip file containing the raw files for the optimal trained ML settings (ML model, encoding).
6
2d3dd9ff7e84 "planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents: 3
diff changeset
219 This .zip file can subsequently be used as an input when `applying previously trained ML models to a new AIRR dataset in Galaxy <https://docs.immuneml.uio.no/latest/galaxy/galaxy_apply_ml_models.html>`_.
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
220
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
221 - repertoire_classification.yaml: the YAML specification file that was used by immuneML internally to run the analysis. This file can be
7
45ca02982e1f "planemo upload commit 8aef44a2b3bc8fc00a1efe0ce7ecab83eded053f-dirty"
immuneml
parents: 6
diff changeset
222 downloaded, altered, and run again by immuneML using the `Train machine learning models <root?tool_id=immuneml_train_ml_model>`_ Galaxy tool.
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
223
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
224 **More analysis options**
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
225
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
226 A limited selection of immuneML options is available through this tool. If you wish to have full control of the analysis, consider using
7
45ca02982e1f "planemo upload commit 8aef44a2b3bc8fc00a1efe0ce7ecab83eded053f-dirty"
immuneml
parents: 6
diff changeset
227 the `Train machine learning models <root?tool_id=immuneml_train_ml_model>`_ Galaxy tool.
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
228 This tool provides other encodings and machine learning methods to choose from, as well as
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
229 data preprocessing and settings for hyperparameter optimization. The interface of the YAML-based tool expects more independence and knowledge about
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
230 machine learning from the user.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
231
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
232 ]]>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
233 </help>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
234
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
235 </tool>