comparison datasets_genome.xml @ 14:75acf92c8fd5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit 390b8177036c256e7e870dfe67c6de7d6417ad39
author iuc
date Thu, 21 Sep 2023 23:02:58 +0000
parents 1e188c9610c3
children 198c75abbf55
comparison
equal deleted inserted replaced
13:1e188c9610c3 14:75acf92c8fd5
160 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> 160 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>
161 <filter>file_choices['include'] and "gbff" in file_choices['include']</filter> 161 <filter>file_choices['include'] and "gbff" in file_choices['include']</filter>
162 </collection> 162 </collection>
163 </outputs> 163 </outputs>
164 <tests> 164 <tests>
165 <!-- Note: All but one test use the non-default decompress="true"
166
167 this is because (at 11/22) Galaxy can not apply text assertions on the content
168 of compressed files https://github.com/galaxyproject/galaxy/pull/15085
169
170 So with decompress="true" more powerfull assertions are powerful.
171 A single test checks the default, ie decompress="false".
172 -->
173 <test expect_num_outputs="3"> 165 <test expect_num_outputs="3">
174 <conditional name="query|subcommand"> 166 <conditional name="query|subcommand">
175 <param name="download_by" value="taxon"/> 167 <param name="download_by" value="taxon"/>
176 <param name="taxon_positional" value="human"/> 168 <param name="taxon_positional" value="human"/>
177 </conditional> 169 </conditional>
183 <param name="include" value="rna,gff3"/> 175 <param name="include" value="rna,gff3"/>
184 </section> 176 </section>
185 <output name="genome_data_report"> 177 <output name="genome_data_report">
186 <assert_contents> 178 <assert_contents>
187 <has_text text="Assembly Accession&#009;Assembly Name&#009;Assembly Submitter&#009;Organism Name"/> 179 <has_text text="Assembly Accession&#009;Assembly Name&#009;Assembly Submitter&#009;Organism Name"/>
188 <has_n_lines n="144"/> 180 <has_n_lines n="142"/>
189 <has_n_columns n="4"/> 181 <has_n_columns n="4"/>
190 </assert_contents> 182 </assert_contents>
191 </output> 183 </output>
192 <output_collection name="rna_fasta" type="list" count="1"> 184 <output_collection name="rna_fasta" type="list" count="1">
193 <element name="GCF_000306695.2" decompress="true"> 185 <element name="GCF_000306695.2" decompress="true">
199 <output_collection name="genomic_gff" type="list"> 191 <output_collection name="genomic_gff" type="list">
200 <element name="GCF_000306695.2"> 192 <element name="GCF_000306695.2">
201 <assert_contents> 193 <assert_contents>
202 <has_n_lines min="1000000"/> 194 <has_n_lines min="1000000"/>
203 <has_line line="##gff-version 3"/> 195 <has_line line="##gff-version 3"/>
204 <!-- TODO this will only work when the galaxy python packakes for 22.05 have been released 196 <has_n_columns n="9" comment="#"/>
205 <has_n_columns n="9" comment="#"/> -->
206 </assert_contents> 197 </assert_contents>
207 </element> 198 </element>
208 </output_collection> 199 </output_collection>
209 <assert_command> 200 <assert_command>
210 <has_text text="gunzip"/> 201 <has_text text="gunzip"/>
220 <param name="released_before" value="01/01/2018"/> 211 <param name="released_before" value="01/01/2018"/>
221 <section name="file_choices"> 212 <section name="file_choices">
222 <param name="include" value="genome"/> 213 <param name="include" value="genome"/>
223 <param name="decompress" value="true"/> 214 <param name="decompress" value="true"/>
224 </section> 215 </section>
225 <output_collection name="genome_fasta" type="list:list" count="14"> 216 <output_collection name="genome_fasta" type="list:list" count="12">
226 <expand macro="genome_fasta_assert" el1="GCA_000002115.2" el2="chr21" expression=">"/> 217 <expand macro="genome_fasta_assert" el1="GCA_000002115.2" el2="chr21" expression=">"/>
227 <expand macro="genome_fasta_assert" el1="GCA_000002125.2" el2="chr21" expression=">"/> 218 <expand macro="genome_fasta_assert" el1="GCA_000002125.2" el2="chr21" expression=">"/>
228 <expand macro="genome_fasta_assert" el1="GCA_000002135.3" el2="GCA_000002135.3_CRA_TCAGchr7v2" expression=">"/>
229 <expand macro="genome_fasta_assert" el1="GCA_000212995.1" el2="chr21" expression=">"/> 219 <expand macro="genome_fasta_assert" el1="GCA_000212995.1" el2="chr21" expression=">"/>
230 <expand macro="genome_fasta_assert" el1="GCA_000252825.1" el2="chr21" expression=">"/> 220 <expand macro="genome_fasta_assert" el1="GCA_000252825.1" el2="chr21" expression=">"/>
231 <expand macro="genome_fasta_assert" el1="GCA_000306695.2" el2="chr21" expression=">"/> 221 <expand macro="genome_fasta_assert" el1="GCA_000306695.2" el2="chr21" expression=">"/>
232 <expand macro="genome_fasta_assert" el1="GCA_000365445.1" el2="chr21" expression=">"/> 222 <expand macro="genome_fasta_assert" el1="GCA_000365445.1" el2="chr21" expression=">"/>
233 <!-- TODO chromosomes argument (or data) seems not reliable https://github.com/ncbi/datasets/issues/188-->
234 <expand macro="genome_fasta_assert" el1="GCA_000442335.2" el2="GCA_000442335.2_LinearCen1.1_normalized" expression=">" expression_n="25"/>
235 <expand macro="genome_fasta_assert" el1="GCA_001292825.2" el2="chr21" expression=">"/> 223 <expand macro="genome_fasta_assert" el1="GCA_001292825.2" el2="chr21" expression=">"/>
236 <expand macro="genome_fasta_assert" el1="GCA_001524155.4" el2="chr21" expression=">"/> 224 <expand macro="genome_fasta_assert" el1="GCA_001524155.4" el2="chr21" expression=">"/>
237 <expand macro="genome_fasta_assert" el1="GCA_001712695.1" el2="chr21" expression=">"/> 225 <expand macro="genome_fasta_assert" el1="GCA_001712695.1" el2="chr21" expression=">"/>
238 <expand macro="genome_fasta_assert" el1="GCA_022833125.2" el2="chr21" expression=">"/> 226 <expand macro="genome_fasta_assert" el1="GCA_022833125.2" el2="chr21" expression=">"/>
239 <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/> 227 <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/>
240 <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/> 228 <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/>
229 <!-- According to https://github.com/ncbi/datasets/issues/188, the following should not be included among the returned results anymore 09/2023 -->
230 <!--
231 <expand macro="genome_fasta_assert" el1="GCA_000442335.2" el2="GCA_000442335.2_LinearCen1.1_normalized" expression=">" expression_n="25"/>
232 <expand macro="genome_fasta_assert" el1="GCA_000002135.3" el2="GCA_000002135.3_CRA_TCAGchr7v2" expression=">"/>
233 -->
241 </output_collection> 234 </output_collection>
242 <output name="genome_data_report"> 235 <output name="genome_data_report">
243 <assert_contents> 236 <assert_contents>
244 <has_text text="Homo sapiens"/> 237 <has_text text="Homo sapiens"/>
245 <has_n_columns n="4"/> 238 <has_n_columns n="4"/>
246 </assert_contents> 239 </assert_contents>
247 </output> 240 </output>
248 </test> 241 </test>
249 <!-- same as previous test but assembly_source (refseq which removes some of the genomes) --> 242 <!-- same as previous test but assembly_source=refseq, which removes all of the genomes -->
250 <test expect_num_outputs="2"> 243 <test expect_failure="true">
251 <conditional name="query|subcommand"> 244 <conditional name="query|subcommand">
252 <param name="download_by" value="taxon"/> 245 <param name="download_by" value="taxon"/>
253 <param name="taxon_positional" value="human"/> 246 <param name="taxon_positional" value="human"/>
254 </conditional> 247 </conditional>
255 <param name="chromosomes" value="21"/> 248 <param name="chromosomes" value="21"/>
258 <param name="released_before" value="01/01/2018"/> 251 <param name="released_before" value="01/01/2018"/>
259 <section name="file_choices"> 252 <section name="file_choices">
260 <param name="include" value="genome"/> 253 <param name="include" value="genome"/>
261 <param name="decompress" value="true"/> 254 <param name="decompress" value="true"/>
262 </section> 255 </section>
256 <assert_stderr>
257 <has_text text="No assemblies found that match selection"/>
258 </assert_stderr>
259 <!-- In the current state of the NCBI tool/DB, no output to check.
260 But the returned results seem to change from time to time and it might
261 be necessary to re-enable this code block if the test fails in the future. -->
262 <!--
263 <output_collection name="genome_fasta" type="list:list" count="2"> 263 <output_collection name="genome_fasta" type="list:list" count="2">
264 <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/> 264 <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/>
265 <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/> 265 <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/>
266 </output_collection> 266 </output_collection>
267 <output name="genome_data_report"> 267 <output name="genome_data_report">
268 <assert_contents> 268 <assert_contents>
269 <has_text text="Homo sapiens"/> 269 <has_text text="Homo sapiens"/>
270 <has_n_lines n="5"/> 270 <has_n_lines n="5"/>
271 <has_n_columns n="4"/> 271 <has_n_columns n="4"/>
272 </assert_contents> 272 </assert_contents>
273 </output> 273 </output> -->
274 </test> 274 </test>
275 <test expect_num_outputs="4"> 275 <test expect_num_outputs="4">
276 <conditional name="query|subcommand"> 276 <conditional name="query|subcommand">
277 <param name="download_by" value="accession"/> 277 <param name="download_by" value="accession"/>
278 <conditional name="text_or_file"> 278 <conditional name="text_or_file">
311 <output_collection name="genomic_gtf" type="list"> 311 <output_collection name="genomic_gtf" type="list">
312 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> 312 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/>
313 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/> 313 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/>
314 </output_collection> 314 </output_collection>
315 <output_collection name="genomic_cds" type="list"> 315 <output_collection name="genomic_cds" type="list">
316 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.genomic.cds" compare="contains" decompress="true"/> 316 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.genomic.cds" compare="contains"/>
317 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.cds" compare="contains" decompress="true"/> 317 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.cds" compare="contains"/>
318 </output_collection> 318 </output_collection>
319 </test> 319 </test>
320 <test expect_num_outputs="4"> 320 <test expect_num_outputs="4">
321 <conditional name="query|subcommand"> 321 <conditional name="query|subcommand">
322 <param name="download_by" value="accession"/> 322 <param name="download_by" value="accession"/>
347 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/> 347 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/>
348 </output_collection> 348 </output_collection>
349 </test> 349 </test>
350 350
351 <!-- should not fail https://github.com/ncbi/datasets/issues/194 --> 351 <!-- should not fail https://github.com/ncbi/datasets/issues/194 -->
352 <test expect_num_outputs="2" expect_failure="true"> 352 <test expect_num_outputs="2"> <!-- expect_failure="true"> -->
353 <conditional name="query|subcommand"> 353 <conditional name="query|subcommand">
354 <param name="download_by" value="accession"/> 354 <param name="download_by" value="accession"/>
355 <conditional name="text_or_file"> 355 <conditional name="text_or_file">
356 <param name="text_or_file" value="text"/> 356 <param name="text_or_file" value="text"/>
357 <param name="accession" value="GCF_000001405"/> 357 <param name="accession" value="GCF_000001405"/>
419 </element> 419 </element>
420 </output_collection> 420 </output_collection>
421 <output_collection name="protein_fasta" type="list" count="1"> 421 <output_collection name="protein_fasta" type="list" count="1">
422 <element name="GCF_000146045.2" ftype="fasta.gz"> 422 <element name="GCF_000146045.2" ftype="fasta.gz">
423 <assert_contents> 423 <assert_contents>
424 <has_size value="1844838"/> 424 <has_size value="1845038"/>
425 </assert_contents> 425 </assert_contents>
426 </element> 426 </element>
427 </output_collection> 427 </output_collection>
428 <output_collection name="rna_fasta" type="list" count="1"> 428 <output_collection name="rna_fasta" type="list" count="1">
429 <element name="GCF_000146045.2" ftype="fasta.gz"> 429 <element name="GCF_000146045.2" ftype="fasta.gz">
430 <assert_contents> 430 <assert_contents>
431 <has_size value="2784534"/> 431 <has_size value="2784899"/>
432 </assert_contents> 432 </assert_contents>
433 </element> 433 </element>
434 </output_collection> 434 </output_collection>
435 </test> 435 </test>
436 <test expect_num_outputs="3"> 436 <test expect_num_outputs="3">