annotate immuneml_simulate_dataset.xml @ 6:2d3dd9ff7e84 draft

"planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
author immuneml
date Tue, 27 Jul 2021 09:30:50 +0000
parents ed3932e6d616
children 45ca02982e1f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
1 <tool id="immuneml_simulate_dataset" name="Simulate a synthetic immune receptor or repertoire dataset" version="@VERSION@.0">
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
2 <description></description>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
3 <macros>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
4 <import>prod_macros.xml</import>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
5 </macros>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
6 <expand macro="requirements" />
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
7 <command><![CDATA[
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
8
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
9 cp "$yaml_input" yaml_copy &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
10 immune-ml ./yaml_copy ${html_outfile.files_path} --tool DataSimulationTool &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
11
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
12 mv ${html_outfile.files_path}/index.html ${html_outfile} &&
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
13 mv ${html_outfile.files_path}/immuneML_output.zip $archive
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
14
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
15 ]]>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
16 </command>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
17 <inputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
18 <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
19 </inputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
20 <outputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
21 <data format="zip" name="archive" label="Archive: dataset simulation"/>
3
ed3932e6d616 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents: 0
diff changeset
22 <data format="immuneml_receptors" name="html_outfile" label="ImmuneML dataset (simulated sequences)"/>
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
23 </outputs>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
24
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
25
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
26 <help><![CDATA[
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
27
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
28 This Galaxy tool allows you to quickly make a dummy dataset.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
29 The tool generates a SequenceDataset, ReceptorDataset or RepertoireDataset consisting of random CDR3 sequences, which could be used for benchmarking machine learning methods or encodings,
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
30 or testing out other functionalities.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
31 The amino acids in the sequences are chosen from a uniform random distribution, and there is no underlying structure in the sequences.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
32
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
33 You can control:
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
34
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
35 - The amount of sequences in the dataset, and in the case of a RepertoireDataset, the amount of repertoires
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
36
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
37 - The length of the generated sequences
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
38
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
39 - Labels, which can be used as a target when training ML models
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
40
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
41 Note that since these labels are randomly assigned, they do not bear any meaning and it is not possible to train a ML model with high classification accuracy on this data.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
42 Meaningful labels can be added using the `Simulate immune events into existing repertoire/receptor dataset <https://galaxy.immuneml.uio.no/root?tool_id=immuneml_simulation>`_ Galaxy tool.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
43
6
2d3dd9ff7e84 "planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents: 3
diff changeset
44 For the exhaustive documentation of this tool and an example YAML specification, see the tutorial `How to simulate an AIRR dataset in Galaxy <https://docs.immuneml.uio.no/latest/galaxy/galaxy_simulate_dataset.html>`_.
0
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
45
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
46 **Tool output**
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
47
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
48 This Galaxy tool will produce the following history elements:
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
49
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
50 - ImmuneML dataset (simulated sequences): a sequence, receptor or repertoire dataset which can be used as an input to other immuneML tools. The history element contains a summary HTML page describing general characteristics of the dataset, including the name of the dataset
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
51 (which is used in the dataset definition of a yaml specification), the dataset type and size, available labels, and a link to download the raw data files.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
52
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
53 - Archive: dataset simulation: a .zip file containing the complete output folder as it was produced by immuneML. This folder
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
54 contains the output of the DatasetExport instruction including raw data files.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
55 Furthermore, the folder contains the complete YAML specification file for the immuneML run, the HTML output and a log file.
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
56
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
57
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
58 ]]>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
59 </help>
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
60
629e7e403e19 "planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff changeset
61 </tool>