Mercurial > repos > immuneml > immuneml_tools
comparison immuneml_create_dataset.xml @ 0:629e7e403e19 draft
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
author | immuneml |
---|---|
date | Thu, 01 Jul 2021 11:36:43 +0000 |
parents | |
children | ed3932e6d616 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:629e7e403e19 |
---|---|
1 <tool id="immune_ml_dataset" name="Create dataset" version="@VERSION@.0"> | |
2 <description></description> | |
3 <macros> | |
4 <import>prod_macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <command><![CDATA[ | |
8 #set $input_orig_names = [] | |
9 #for $input in $interface_cond.data_input | |
10 #if $input | |
11 #set input_orig_names += ["./"+str($input.element_identifier)] | |
12 ([ -e ./"$input.element_identifier" ] && echo "File '$input.element_identifier' already exists in the input folder, skipping." || ln -s $input "$input.element_identifier") && | |
13 #end if | |
14 #end for | |
15 | |
16 #if $interface_cond.interface == "simple" | |
17 python3 '$__tool_directory__/build_dataset_yaml_wrapper.py' | |
18 --output_path . --file_name specs.yaml | |
19 #if $interface_cond.dataset_cond.dataset_type == "repertoire" | |
20 --is_repertoire True | |
21 --format "$interface_cond.dataset_cond.metadata_cond.data_format" | |
22 #if $interface_cond.dataset_cond.metadata_cond.data_format != "IReceptor" | |
23 --metadata_file "$interface_cond.dataset_cond.metadata_cond.metadata_input" && | |
24 cp $interface_cond.dataset_cond.metadata_cond.metadata_input "$interface_cond.dataset_cond.metadata_cond.metadata_input.element_identifier" | |
25 #end if | |
26 #else | |
27 --is_repertoire False | |
28 --format "$interface_cond.dataset_cond.data_format" | |
29 --metadata_columns "$interface_cond.dataset_cond.metadata_columns" | |
30 #if $interface_cond.dataset_cond.dataset_type == "sequence" | |
31 --paired False | |
32 #elif $interface_cond.dataset_cond.dataset_type == "receptor" | |
33 --paired True | |
34 --receptor_chains $interface_cond.dataset_cond.receptor_type | |
35 #end if | |
36 #end if | |
37 && mv ./specs.yaml create_dataset.yaml && | |
38 #else | |
39 cp $yaml_input create_dataset.yaml && | |
40 #end if | |
41 | |
42 immune-ml ./create_dataset.yaml ${html_outfile.files_path} --tool DatasetGenerationTool && | |
43 | |
44 mv ${html_outfile.files_path}/index.html ${html_outfile} && | |
45 mv ./create_dataset.yaml ${specs} | |
46 | |
47 ]]> | |
48 </command> | |
49 <inputs> | |
50 <conditional name="interface_cond"> | |
51 <param type="select" name="interface" label="Which interface would you like to use?" display="radio"> | |
52 <option value="simple">Simplified (limited options)</option> | |
53 <option value="advanced">Advanced (full control through YAML) </option> | |
54 </param> | |
55 <when value="simple"> | |
56 <conditional name="dataset_cond"> | |
57 <param type="select" name="dataset_type" label="Dataset type" display="radio" help="Repertoire datasets | |
58 should be used when making predictions per repertoire, such as predicting a disease state. Sequence or | |
59 receptor datasets should be used when predicting values for unpaired (single-chain) and paired immune | |
60 receptors respectively, like antigen specificity."> | |
61 <option value="repertoire">Repertoire dataset</option> | |
62 <option value="sequence">Sequence dataset (single chain)</option> | |
63 <option value="receptor">Receptor dataset (paired chains)</option> | |
64 </param> | |
65 <when value="repertoire"> | |
66 <conditional name="metadata_cond"> | |
67 <param type="select" name="data_format" label="Data format" display="radio"> | |
68 <option value="AIRR">AIRR</option> | |
69 <option value="IReceptor">iReceptor Gateway</option> | |
70 <option value="ImmunoSEQRearrangement">immunoSEQ: rearrangement-level files</option> | |
71 <option value="ImmunoSEQSample">immunoSEQ: sample-level files</option> | |
72 <option value="MiXCR">MiXCR</option> | |
73 <option value="VDJdb">VDJdb</option> | |
74 <option value="TenxGenomics">10x Genomics ‘Clonotype consensus annotations’ (CSV)</option> | |
75 </param> | |
76 <when value="AIRR"> | |
77 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire ARR file described in the metadata file must be selected under 'Data files'."/> | |
78 </when> | |
79 <when value="ImmunoSEQRearrangement"> | |
80 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire immunoSEQ rearrangement file described in the metadata file must be selected under 'Data files'."/> | |
81 </when> | |
82 <when value="ImmunoSEQSample"> | |
83 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire immunoSEQ sample file described in the metadata file must be selected under 'Data files'."/> | |
84 </when> | |
85 <when value="MiXCR"> | |
86 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire MiXCR file described in the metadata file must be selected under 'Data files'."/> | |
87 </when> | |
88 <when value="VDJdb"> | |
89 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire VDJdb file described in the metadata file must be selected under 'Data files'."/> | |
90 </when> | |
91 <when value="TenxGenomics"> | |
92 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire 10x Genomics file described in the metadata file must be selected under 'Data files'."/> | |
93 </when> | |
94 </conditional> | |
95 </when> | |
96 <when value="sequence"> | |
97 <param type="select" name="data_format" label="Data format" display="radio"> | |
98 <option value="AIRR">AIRR</option> | |
99 <option value="IReceptor">iReceptor Gateway</option> | |
100 <option value="ImmunoSEQRearrangement">ImmunoSEQ: rearrangement-level files</option> | |
101 <option value="ImmunoSEQSample">ImmunoSEQ: sample-level files</option> | |
102 <option value="MiXCR">MiXCR</option> | |
103 <option value="VDJdb">VDJdb</option> | |
104 <option value="TenxGenomics">10xGenomics ‘Clonotype consensus annotations’ (CSV)</option> | |
105 </param> | |
106 <param type="text" name="metadata_columns" optional="false" label="Metadata columns" help="Please | |
107 specify the names of the columns that contain metadata. The metadata columns specified here can be | |
108 used as labels for prediction. Multiple metadata columns may be specified and separated by comma, | |
109 for example: Epitope,Epitope gene,Epitope species"/> | |
110 </when> | |
111 <when value="receptor"> | |
112 <param type="select" name="data_format" label="Data format" display="radio"> | |
113 <option value="AIRR">AIRR</option> | |
114 <option value="IReceptor">iReceptor Gateway</option> | |
115 <option value="VDJdb">VDJdb</option> | |
116 <option value="TenxGenomics">10xGenomics ‘Clonotype consensus annotations’ (CSV)</option> | |
117 </param> | |
118 <param type="select" name="receptor_type" label="Receptor type" display="radio"> | |
119 <option value="TRA_TRB">T cell alpha/beta</option> | |
120 <option value="TRG_TRD">T cell gamma/delta</option> | |
121 <option value="IGH_IGL">B cell heavy/light</option> | |
122 <option value="IGH_IGK">B cell heavy/kappa</option> | |
123 </param> | |
124 <param type="text" name="metadata_columns" optional="false" label="Metadata columns" help="Please | |
125 specify the names of the columns that contain metadata. The metadata columns specified here can be | |
126 used as labels for prediction. Multiple metadata columns may be specified and separated by comma, | |
127 for example: Epitope,Epitope gene,Epitope species"/> | |
128 </when> | |
129 </conditional> | |
130 <param name="data_input" type="data" multiple="true" label="Data files" min="1" max="2000" help="This field should include individual repertoire or receptor files, or iReceptor zip files. Multiple files can be selected by holding down the control/command or shift key, or by clicking 'browse datasets' (folder button on the right). Important: make sure all the files you want to include in the dataset are highlighted in blue or gray."/> | |
131 </when> | |
132 <when value="advanced"> | |
133 <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/> | |
134 <param name="data_input" type="data" multiple="true" label="Data and metadata files" optional="true" help="This field should include individual repertoire or receptor files, or iReceptor zip files, and optionally a metadata file. Multiple files can be selected by holding down the control/command or shift key, or by clicking 'browse datasets' (folder button on the right). Important: make sure all the files you want to include in the dataset are highlighted."/> | |
135 </when> | |
136 </conditional> | |
137 </inputs> | |
138 <outputs> | |
139 <data format="txt" name="specs" label="create_dataset.yaml"/> | |
140 <data format="iml_dataset" name="html_outfile" label="ImmuneML dataset"/> | |
141 </outputs> | |
142 | |
143 | |
144 <help><![CDATA[ | |
145 | |
146 In Galaxy, an immuneML dataset is simply a Galaxy collection containing all relevant files (including an optional metadata file). | |
147 The Create dataset Galaxy tool allows users to import data from various formats and create immuneML datasets in Galaxy. | |
148 These datasets are in an optimized binary (Pickle) format, which ensures that you can quickly import the dataset into | |
149 Galaxy tools without having to repeatedly specify the import parameters. | |
150 | |
151 Before creating a dataset, the relevant data files must first be uploaded to the Galaxy interface. This can be done either | |
152 by uploading files from your local computer (use the 'Upload file' tool under the 'Get local data' menu), or by fetching | |
153 remote data from the iReceptor Plus Gateway or VDJdb (see `How to import remote AIRR datasets in Galaxy <https://docs.immuneml.uio.no/galaxy/galaxy_import_remote_data.html>`_). | |
154 | |
155 The imported immuneML dataset is stored in a Galaxy collection, which will appear as a history item on the right side of the screen, | |
156 and can later be selected as input to other tools. | |
157 | |
158 The tool has a simplified and an advanced interface. The simplified interface is fully button-based, and relies | |
159 on default settings for importing datasets. The advanced interface gives full control over import settings through a YAML | |
160 specification. In most cases, the simplified interface will suffice. | |
161 | |
162 For the exhaustive documentation of this tool and more information about immuneML datasets, see the tutorial `How to make an immuneML dataset in Galaxy <https://docs.immuneml.uio.no/galaxy/galaxy_dataset.html>`_. | |
163 | |
164 **Tool output** | |
165 | |
166 This Galaxy tool will produce the following history elements: | |
167 | |
168 - ImmuneML dataset: a sequence, receptor or repertoire dataset which can be used as an input to other immuneML tools. The history element contains a summary HTML page describing general characteristics of the dataset, including the name of the dataset | |
169 (which is used in the dataset definition of a yaml specification), the dataset type and size, available labels, and a link to download the raw data files. | |
170 | |
171 - create_dataset.yaml: the YAML specification file that was used by immuneML to create the dataset. | |
172 This file can be downloaded and altered (for example to export files in AIRR format, or use non-standard import parameters), | |
173 and run again using the 'Advanced' interface. | |
174 | |
175 ]]> | |
176 </help> | |
177 | |
178 </tool> |