Mercurial > repos > immuneml > immuneml_tools
annotate immuneml_create_dataset.xml @ 11:bd77c7f1985c draft
"planemo upload commit 8e6575073c34112d0c4dcfdb77406bc41800a843"
author | immuneml |
---|---|
date | Mon, 23 Aug 2021 11:12:21 +0000 |
parents | 2d3dd9ff7e84 |
children |
rev | line source |
---|---|
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
1 <tool id="immune_ml_dataset" name="Create dataset" version="@VERSION@.0"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
2 <description></description> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
3 <macros> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
4 <import>prod_macros.xml</import> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
5 </macros> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
6 <expand macro="requirements" /> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
7 <command><![CDATA[ |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
8 #set $input_orig_names = [] |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
9 #for $input in $interface_cond.data_input |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
10 #if $input |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
11 #set input_orig_names += ["./"+str($input.element_identifier)] |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
12 ([ -e ./"$input.element_identifier" ] && echo "File '$input.element_identifier' already exists in the input folder, skipping." || ln -s $input "$input.element_identifier") && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
13 #end if |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
14 #end for |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
15 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
16 #if $interface_cond.interface == "simple" |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
17 python3 '$__tool_directory__/build_dataset_yaml_wrapper.py' |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
18 --output_path . --file_name specs.yaml |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
19 #if $interface_cond.dataset_cond.dataset_type == "repertoire" |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
20 --is_repertoire True |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
21 --format "$interface_cond.dataset_cond.metadata_cond.data_format" |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
22 #if $interface_cond.dataset_cond.metadata_cond.data_format != "IReceptor" |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
23 --metadata_file "$interface_cond.dataset_cond.metadata_cond.metadata_input" && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
24 cp $interface_cond.dataset_cond.metadata_cond.metadata_input "$interface_cond.dataset_cond.metadata_cond.metadata_input.element_identifier" |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
25 #end if |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
26 #else |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
27 --is_repertoire False |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
28 --format "$interface_cond.dataset_cond.data_format" |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
29 --metadata_columns "$interface_cond.dataset_cond.metadata_columns" |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
30 #if $interface_cond.dataset_cond.dataset_type == "sequence" |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
31 --paired False |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
32 #elif $interface_cond.dataset_cond.dataset_type == "receptor" |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
33 --paired True |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
34 --receptor_chains $interface_cond.dataset_cond.receptor_type |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
35 #end if |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
36 #end if |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
37 && mv ./specs.yaml create_dataset.yaml && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
38 #else |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
39 cp $yaml_input create_dataset.yaml && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
40 #end if |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
41 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
42 immune-ml ./create_dataset.yaml ${html_outfile.files_path} --tool DatasetGenerationTool && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
43 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
44 mv ${html_outfile.files_path}/index.html ${html_outfile} && |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
45 mv ./create_dataset.yaml ${specs} |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
46 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
47 ]]> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
48 </command> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
49 <inputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
50 <conditional name="interface_cond"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
51 <param type="select" name="interface" label="Which interface would you like to use?" display="radio"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
52 <option value="simple">Simplified (limited options)</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
53 <option value="advanced">Advanced (full control through YAML) </option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
54 </param> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
55 <when value="simple"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
56 <conditional name="dataset_cond"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
57 <param type="select" name="dataset_type" label="Dataset type" display="radio" help="Repertoire datasets |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
58 should be used when making predictions per repertoire, such as predicting a disease state. Sequence or |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
59 receptor datasets should be used when predicting values for unpaired (single-chain) and paired immune |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
60 receptors respectively, like antigen specificity."> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
61 <option value="repertoire">Repertoire dataset</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
62 <option value="sequence">Sequence dataset (single chain)</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
63 <option value="receptor">Receptor dataset (paired chains)</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
64 </param> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
65 <when value="repertoire"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
66 <conditional name="metadata_cond"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
67 <param type="select" name="data_format" label="Data format" display="radio"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
68 <option value="AIRR">AIRR</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
69 <option value="IReceptor">iReceptor Gateway</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
70 <option value="ImmunoSEQRearrangement">immunoSEQ: rearrangement-level files</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
71 <option value="ImmunoSEQSample">immunoSEQ: sample-level files</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
72 <option value="MiXCR">MiXCR</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
73 <option value="VDJdb">VDJdb</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
74 <option value="TenxGenomics">10x Genomics ‘Clonotype consensus annotations’ (CSV)</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
75 </param> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
76 <when value="AIRR"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
77 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire ARR file described in the metadata file must be selected under 'Data files'."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
78 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
79 <when value="ImmunoSEQRearrangement"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
80 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire immunoSEQ rearrangement file described in the metadata file must be selected under 'Data files'."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
81 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
82 <when value="ImmunoSEQSample"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
83 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire immunoSEQ sample file described in the metadata file must be selected under 'Data files'."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
84 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
85 <when value="MiXCR"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
86 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire MiXCR file described in the metadata file must be selected under 'Data files'."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
87 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
88 <when value="VDJdb"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
89 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire VDJdb file described in the metadata file must be selected under 'Data files'."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
90 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
91 <when value="TenxGenomics"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
92 <param name="metadata_input" type="data" format="txt" label="Metadata file" multiple="false" help="The metadata file describes metadata information for all repertoires included in the dataset. Every repertoire 10x Genomics file described in the metadata file must be selected under 'Data files'."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
93 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
94 </conditional> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
95 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
96 <when value="sequence"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
97 <param type="select" name="data_format" label="Data format" display="radio"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
98 <option value="AIRR">AIRR</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
99 <option value="IReceptor">iReceptor Gateway</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
100 <option value="ImmunoSEQRearrangement">ImmunoSEQ: rearrangement-level files</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
101 <option value="ImmunoSEQSample">ImmunoSEQ: sample-level files</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
102 <option value="MiXCR">MiXCR</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
103 <option value="VDJdb">VDJdb</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
104 <option value="TenxGenomics">10xGenomics ‘Clonotype consensus annotations’ (CSV)</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
105 </param> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
106 <param type="text" name="metadata_columns" optional="false" label="Metadata columns" help="Please |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
107 specify the names of the columns that contain metadata. The metadata columns specified here can be |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
108 used as labels for prediction. Multiple metadata columns may be specified and separated by comma, |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
109 for example: Epitope,Epitope gene,Epitope species"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
110 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
111 <when value="receptor"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
112 <param type="select" name="data_format" label="Data format" display="radio"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
113 <option value="AIRR">AIRR</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
114 <option value="IReceptor">iReceptor Gateway</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
115 <option value="VDJdb">VDJdb</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
116 <option value="TenxGenomics">10xGenomics ‘Clonotype consensus annotations’ (CSV)</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
117 </param> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
118 <param type="select" name="receptor_type" label="Receptor type" display="radio"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
119 <option value="TRA_TRB">T cell alpha/beta</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
120 <option value="TRG_TRD">T cell gamma/delta</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
121 <option value="IGH_IGL">B cell heavy/light</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
122 <option value="IGH_IGK">B cell heavy/kappa</option> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
123 </param> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
124 <param type="text" name="metadata_columns" optional="false" label="Metadata columns" help="Please |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
125 specify the names of the columns that contain metadata. The metadata columns specified here can be |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
126 used as labels for prediction. Multiple metadata columns may be specified and separated by comma, |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
127 for example: Epitope,Epitope gene,Epitope species"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
128 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
129 </conditional> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
130 <param name="data_input" type="data" multiple="true" label="Data files" min="1" max="2000" help="This field should include individual repertoire or receptor files, or iReceptor zip files. Multiple files can be selected by holding down the control/command or shift key, or by clicking 'browse datasets' (folder button on the right). Important: make sure all the files you want to include in the dataset are highlighted in blue or gray."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
131 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
132 <when value="advanced"> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
133 <param name="yaml_input" type="data" format="txt" label="YAML specification" multiple="false"/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
134 <param name="data_input" type="data" multiple="true" label="Data and metadata files" optional="true" help="This field should include individual repertoire or receptor files, or iReceptor zip files, and optionally a metadata file. Multiple files can be selected by holding down the control/command or shift key, or by clicking 'browse datasets' (folder button on the right). Important: make sure all the files you want to include in the dataset are highlighted."/> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
135 </when> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
136 </conditional> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
137 </inputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
138 <outputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
139 <data format="txt" name="specs" label="create_dataset.yaml"/> |
3
ed3932e6d616
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
0
diff
changeset
|
140 <data format="immuneml_receptors" name="html_outfile" label="ImmuneML dataset"/> |
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
141 </outputs> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
142 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
143 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
144 <help><![CDATA[ |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
145 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
146 In Galaxy, an immuneML dataset is simply a Galaxy collection containing all relevant files (including an optional metadata file). |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
147 The Create dataset Galaxy tool allows users to import data from various formats and create immuneML datasets in Galaxy. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
148 These datasets are in an optimized binary (Pickle) format, which ensures that you can quickly import the dataset into |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
149 Galaxy tools without having to repeatedly specify the import parameters. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
150 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
151 Before creating a dataset, the relevant data files must first be uploaded to the Galaxy interface. This can be done either |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
152 by uploading files from your local computer (use the 'Upload file' tool under the 'Get local data' menu), or by fetching |
6
2d3dd9ff7e84
"planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents:
3
diff
changeset
|
153 remote data from the iReceptor Plus Gateway or VDJdb (see `How to import remote AIRR datasets in Galaxy <https://docs.immuneml.uio.no/latest/galaxy/galaxy_import_remote_data.html>`_). |
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
154 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
155 The imported immuneML dataset is stored in a Galaxy collection, which will appear as a history item on the right side of the screen, |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
156 and can later be selected as input to other tools. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
157 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
158 The tool has a simplified and an advanced interface. The simplified interface is fully button-based, and relies |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
159 on default settings for importing datasets. The advanced interface gives full control over import settings through a YAML |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
160 specification. In most cases, the simplified interface will suffice. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
161 |
6
2d3dd9ff7e84
"planemo upload commit 74f2bd15d2b7723c8e5a22d743913706dc7d8333-dirty"
immuneml
parents:
3
diff
changeset
|
162 For the exhaustive documentation of this tool and more information about immuneML datasets, see the tutorial `How to make an immuneML dataset in Galaxy <https://docs.immuneml.uio.no/latest/galaxy/galaxy_dataset.html>`_. |
0
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
163 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
164 **Tool output** |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
165 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
166 This Galaxy tool will produce the following history elements: |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
167 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
168 - ImmuneML dataset: a sequence, receptor or repertoire dataset which can be used as an input to other immuneML tools. The history element contains a summary HTML page describing general characteristics of the dataset, including the name of the dataset |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
169 (which is used in the dataset definition of a yaml specification), the dataset type and size, available labels, and a link to download the raw data files. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
170 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
171 - create_dataset.yaml: the YAML specification file that was used by immuneML to create the dataset. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
172 This file can be downloaded and altered (for example to export files in AIRR format, or use non-standard import parameters), |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
173 and run again using the 'Advanced' interface. |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
174 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
175 ]]> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
176 </help> |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
177 |
629e7e403e19
"planemo upload commit 2fed2858d4044a3897a93a5604223d1d183ceac0-dirty"
immuneml
parents:
diff
changeset
|
178 </tool> |