Mercurial > repos > iuc > ncbi_datasets
comparison datasets_genome.xml @ 5:6c829a430475 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit a58a3198ea1b60b6aa9567c6c65f00f8361794f6"
author | iuc |
---|---|
date | Wed, 04 May 2022 13:15:43 +0000 |
parents | 1a7773882d2c |
children | 5a2656cc84cb |
comparison
equal
deleted
inserted
replaced
4:c47a0a93ffd2 | 5:6c829a430475 |
---|---|
17 '$query.subcommand.taxon' | 17 '$query.subcommand.taxon' |
18 #end if | 18 #end if |
19 $filters.reference | 19 $filters.reference |
20 $filters.annotated | 20 $filters.annotated |
21 #if $filters.assembly_level: | 21 #if $filters.assembly_level: |
22 --assembly_level $filters.assembly_level | 22 --assembly-level $filters.assembly_level |
23 #end if | 23 #end if |
24 #if $filters.assembly_source: | 24 #if $filters.assembly_source: |
25 --assembly_source $filters.assembly_source | 25 --assembly-source $filters.assembly_source |
26 #end if | 26 #end if |
27 #if $filters.chromosomes: | 27 #if $filters.chromosomes: |
28 --chromosomes '$filters.chromosomes' | 28 --chromosomes '$filters.chromosomes' |
29 #end if | 29 #end if |
30 @EXCLUDES_GENOME@ | 30 @EXCLUDES_GENOME@ |
66 | 66 |
67 <repeat name="search" title="Add search terms"> | 67 <repeat name="search" title="Add search terms"> |
68 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> | 68 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> |
69 </repeat> | 69 </repeat> |
70 </section> | 70 </section> |
71 <section name="file_choices" title="File Choices"> | 71 <section name="file_choices" title="File Choices" expanded="true"> |
72 <expand macro="excludes_genome"></expand> | 72 <expand macro="excludes_genome"></expand> |
73 <expand macro="includes_genome"></expand> | 73 <expand macro="includes_genome"></expand> |
74 </section> | 74 </section> |
75 <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/> | 75 <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/> |
76 </inputs> | 76 </inputs> |
88 <discover_datasets pattern="(?P<identifier_0>.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 88 <discover_datasets pattern="(?P<identifier_0>.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
89 <filter>uncompressed</filter> | 89 <filter>uncompressed</filter> |
90 </collection> | 90 </collection> |
91 <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list"> | 91 <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list"> |
92 <discover_datasets pattern="(?P<identifier_0>.*?)\/.*(?<!cds_from)(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 92 <discover_datasets pattern="(?P<identifier_0>.*?)\/.*(?<!cds_from)(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
93 <filter>uncompressed and file_choices['exclude_seq']</filter> | 93 <filter>uncompressed and not file_choices['exclude_seq']</filter> |
94 </collection> | 94 </collection> |
95 <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list"> | 95 <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list"> |
96 <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 96 <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
97 <filter>uncompressed and file_choices['exclude_genomic_cds']</filter> | 97 <filter>uncompressed and not file_choices['exclude_genomic_cds']</filter> |
98 </collection> | 98 </collection> |
99 <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list"> | 99 <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list"> |
100 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 100 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
101 <filter>uncompressed and file_choices['exclude_gff3']</filter> | 101 <filter>uncompressed and not file_choices['exclude_gff3']</filter> |
102 </collection> | 102 </collection> |
103 <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> | 103 <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> |
104 <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 104 <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
105 <filter>uncompressed and file_choices['exclude_rna']</filter> | 105 <filter>uncompressed and not file_choices['exclude_rna']</filter> |
106 </collection> | 106 </collection> |
107 <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> | 107 <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> |
108 <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 108 <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
109 <filter>uncompressed and file_choices['exclude_protein']</filter> | 109 <filter>uncompressed and not file_choices['exclude_protein']</filter> |
110 </collection> | 110 </collection> |
111 <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list"> | 111 <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list"> |
112 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 112 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
113 <filter>uncompressed and file_choices['include_gbff']</filter> | 113 <filter>uncompressed and file_choices['include_gbff']</filter> |
114 </collection> | 114 </collection> |
123 <param name="download_by" value="taxon"></param> | 123 <param name="download_by" value="taxon"></param> |
124 <param name="text_or_file" value="text"></param> | 124 <param name="text_or_file" value="text"></param> |
125 <param name="taxon" value="human"></param> | 125 <param name="taxon" value="human"></param> |
126 </conditional> | 126 </conditional> |
127 <param name="chromosomes" value="21"></param> | 127 <param name="chromosomes" value="21"></param> |
128 <param name="exclude_protein" value="true"/> | |
129 <param name="exclude_rna" value="true"/> | |
130 <param name="exclude_seq" value="true"/> | |
131 <param name="exclude_genomic_cds" value="true"/> | |
132 <param name="exclude_gff3" value="true"/> | |
128 <param name="uncompressed" value="false"/> | 133 <param name="uncompressed" value="false"/> |
129 <param name="released_before" value="01/01/2018"></param> | 134 <param name="released_before" value="01/01/2018"></param> |
130 <output name="archive_contents"> | 135 <output name="archive_contents"> |
131 <assert_contents> | 136 <assert_contents> |
132 <has_text text="ncbi_dataset/data/dataset_catalog.json"/> | 137 <has_text text="ncbi_dataset/data/dataset_catalog.json"/> |
133 </assert_contents> | 138 </assert_contents> |
134 </output> | 139 </output> |
135 </test> | 140 </test> |
136 <test expect_num_outputs="5"> | 141 <test expect_num_outputs="2"> |
142 <conditional name="query|subcommand"> | |
143 <param name="download_by" value="taxon"></param> | |
144 <param name="text_or_file" value="text"></param> | |
145 <param name="taxon" value="human"></param> | |
146 </conditional> | |
147 <param name="chromosomes" value="21"></param> | |
148 <param name="uncompressed" value="false"/> | |
149 <param name="exclude_protein" value="true"/> | |
150 <param name="exclude_rna" value="true"/> | |
151 <param name="exclude_seq" value="true"/> | |
152 <param name="exclude_genomic_cds" value="true"/> | |
153 <param name="exclude_gff3" value="true"/> | |
154 <param name="assembly_level" value="chromosome"/> | |
155 <param name="released_before" value="01/01/2018"></param> | |
156 <output name="archive_contents"> | |
157 <assert_contents> | |
158 <has_text text="ncbi_dataset/data/dataset_catalog.json"/> | |
159 </assert_contents> | |
160 </output> | |
161 </test> | |
162 <test expect_num_outputs="4"> | |
137 <conditional name="query|subcommand"> | 163 <conditional name="query|subcommand"> |
138 <param name="download_by" value="accession"></param> | 164 <param name="download_by" value="accession"></param> |
139 <conditional name="text_or_file"> | 165 <conditional name="text_or_file"> |
140 <param name="text_or_file" value="text"></param> | 166 <param name="text_or_file" value="text"></param> |
141 <param name="accession" value="GCF_000013305.1 GCF_000007445.1"></param> | 167 <param name="accession" value="GCF_000013305.1 GCF_000007445.1"></param> |
142 </conditional> | 168 </conditional> |
143 </conditional> | 169 </conditional> |
144 <param name="uncompressed" value="true"/> | 170 <param name="uncompressed" value="true"/> |
145 <param name="released_before" value="01/01/2007"></param> | 171 <param name="released_before" value="01/01/2007"></param> |
146 <param name="exclude_genomic_cds" value="true"/> | 172 <param name="exclude_protein" value="true"/> |
173 <param name="exclude_rna" value="true"/> | |
174 <param name="exclude_seq" value="true"/> | |
175 <param name="exclude_gff3" value="true"/> | |
147 <param name="include_gtf" value="true"/> | 176 <param name="include_gtf" value="true"/> |
148 <output name="genome_data_report"> | 177 <output name="genome_data_report"> |
149 <assert_contents> | 178 <assert_contents> |
150 <has_text text="GCF_000013305.1"/> | 179 <has_text text="GCF_000013305.1"/> |
151 </assert_contents> | 180 </assert_contents> |
152 </output> | 181 </output> |
153 <output_collection name="sequence_report" type="list"> | |
154 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/> | |
155 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/> | |
156 </output_collection> | |
157 <output_collection name="genomic_gtf" type="list"> | 182 <output_collection name="genomic_gtf" type="list"> |
158 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/> | 183 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/> |
159 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> | 184 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> |
160 </output_collection> | 185 </output_collection> |
161 <output_collection name="genomic_cds" type="list"> | 186 <output_collection name="genomic_cds" type="list"> |
170 <param name="text_or_file" value="file"></param> | 195 <param name="text_or_file" value="file"></param> |
171 <param name="inputfile" value="accessions.txt"></param> | 196 <param name="inputfile" value="accessions.txt"></param> |
172 </conditional> | 197 </conditional> |
173 </conditional> | 198 </conditional> |
174 <param name="include_gbff" value="true"/> | 199 <param name="include_gbff" value="true"/> |
175 <param name="exclude_seq" value="false"/> | 200 <param name="exclude_protein" value="true"/> |
176 <param name="exclude_gff3" value="true"/> | 201 <param name="exclude_rna" value="true"/> |
202 <param name="exclude_seq" value="true"/> | |
203 <param name="exclude_genomic_cds" value="true"/> | |
177 <param name="uncompressed" value="true"/> | 204 <param name="uncompressed" value="true"/> |
178 <param name="released_before" value="01/02/2007"></param> | 205 <param name="released_before" value="01/02/2007"></param> |
179 <output name="genome_data_report"> | 206 <output name="genome_data_report"> |
180 <assert_contents> | 207 <assert_contents> |
181 <has_text text="SAMN02604181"/> | 208 <has_text text="SAMN02604181"/> |
182 </assert_contents> | 209 </assert_contents> |
183 </output> | 210 </output> |
184 <output_collection name="sequence_report" type="list"> | |
185 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/> | |
186 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/> | |
187 </output_collection> | |
188 <output_collection name="genomic_gff" type="list"> | 211 <output_collection name="genomic_gff" type="list"> |
189 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/> | 212 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/> |
190 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/> | 213 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/> |
191 </output_collection> | 214 </output_collection> |
192 <output_collection name="genomic_gbff" type="list"> | 215 <output_collection name="genomic_gbff" type="list"> |
193 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/> | 216 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/> |
194 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/> | 217 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/> |
218 </output_collection> | |
219 </test> | |
220 <test expect_num_outputs="2"> | |
221 <conditional name="query|subcommand"> | |
222 <param name="download_by" value="accession"></param> | |
223 <conditional name="text_or_file"> | |
224 <param name="text_or_file" value="text"></param> | |
225 <param name="accession" value="GCF_000001405.40"></param> | |
226 </conditional> | |
227 </conditional> | |
228 <param name="exclude_protein" value="true"/> | |
229 <param name="exclude_rna" value="true"/> | |
230 <param name="exclude_seq" value="true"/> | |
231 <param name="exclude_genomic_cds" value="true"/> | |
232 <param name="exclude_gff3" value="true"/> | |
233 <param name="uncompressed" value="true"/> | |
234 <param name="released_before" value="01/02/2007"></param> | |
235 <output_collection name="sequence_report"> | |
236 <element name="GCF_000001405.40" file="GCF_000001405.40.seq.rpt.jsonl" compare="contains"/> | |
195 </output_collection> | 237 </output_collection> |
196 </test> | 238 </test> |
197 </tests> | 239 </tests> |
198 <help> | 240 <help> |
199 <![CDATA[ | 241 <![CDATA[ |
201 | 243 |
202 Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report. | 244 Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report. |
203 Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file. | 245 Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file. |
204 | 246 |
205 Tthe default genome dataset includes the following files (if available): | 247 Tthe default genome dataset includes the following files (if available): |
248 * data_report.jsonl (genome assembly and annotation metadata, not always available) | |
206 * genomic.fna (genomic sequences) | 249 * genomic.fna (genomic sequences) |
207 * rna.fna (transcript sequences) | 250 * rna.fna (transcript sequences) |
208 * protein.faa (protein sequences) | 251 * protein.faa (protein sequences) |
209 * genomic.gff (genome annotation in gff3 format) | 252 * genomic.gff (genome annotation in gff3 format) |
210 * data_report.jsonl (data report with genome assembly and annotation metadata) | |
211 * dataset_catalog.json (a list of files and file types included in the dataset) | 253 * dataset_catalog.json (a list of files and file types included in the dataset) |
212 ]]> | 254 ]]> |
213 </help> | 255 </help> |
214 | 256 |
215 </tool> | 257 </tool> |