ncbi_datasets: datasets_genome.xml comparison

comparison datasets_genome.xml @ 12:d78faac2c6ef draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit affffdbe7237a2c0ba5793c0e7dd11cebb8413a9

author	iuc
date	Sat, 03 Dec 2022 13:29:32 +0000
parents	ac24fff14f23
children	1e188c9610c3

comparison

equal deleted inserted replaced

-:ac24fff14f23
+:d78faac2c6ef
 <tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
 <description>download genome sequence, annotation and metadata</description>
+<expand macro="bio_tools"/>
 <macros>
 <import>macros.xml</import>
 </macros>
 <expand macro="requirements"></expand>
+<expand macro="version_command"/>
 <command><![CDATA[
 #import re
 @SETUP_CERTIFICATES@
 datasets download genome $query.subcommand.download_by
 #if $query.subcommand.download_by == 'accession':
 ## rename all faa, fna (resp faa.gz, fna.gz) to fasta (resp fasta.gz) to allow discovery
 && find ncbi_dataset \( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \) -exec sh -c 'mv {} \$(echo {} | sed "s/.f[an]a\(.gz\)\?\$/.fasta\1/")' \;
 ## unzip all compressed (non-fasta) files (jsonl files are just named .gz)
+## note "not decompress" means that the datasets are provided uncompressed (datasets rehydrate is called we --gzip)
+##      in this case we need to decompress all datasets that don't have a Galaxy datatype allowing for compression
 && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .gz)' \;
-#if $file_choices.decompress
+#if not $file_choices.decompress
 && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \;
 #end if
 #if "seq-report" in $file_choices.include
 && find ncbi_dataset -name sequence_report.jsonl -exec sh -c 'dataformat tsv genome-seq --inputfile {} > \$(dirname {})/\$(basename {} .jsonl).tsv' \;
 of compressed files https://github.com/galaxyproject/galaxy/pull/15085
 So with decompress="true" more powerfull assertions are powerful.
 A single test checks the default, ie decompress="false".
 -->
-<test expect_num_outputs="1">
+<test expect_num_outputs="3">
 <conditional name="query|subcommand">
 <param name="download_by" value="taxon"/>
 <param name="taxon_positional" value="human"/>
 </conditional>
 <param name="chromosomes" value="21"/>
 <param name="released_before" value="01/01/2018"/>
 <section name="file_choices">
-<param name="include" value=""/>
+<!-- include a sequence (which should be downloaded as fasta.gz)
+and one non-sequence (which should be decompressed) output -->
+<param name="include" value="rna,gff3"/>
 </section>
 <output name="genome_data_report">
 <assert_contents>
 <has_text text="Assembly Accession&#009;Assembly Name&#009;Assembly Submitter&#009;Organism Name"/>
 <has_n_lines n="144"/>
 <has_n_columns n="4"/>
 </assert_contents>
 </output>
+<output_collection name="rna_fasta" type="list" count="1">
+<element name="GCF_000306695.2" decompress="true">
+<assert_contents>
+<has_text text=">"/>
+</assert_contents>
+</element>
+</output_collection>
+<output_collection name="genomic_gff" type="list">
+<element name="GCF_000306695.2">
+<assert_contents>
+<has_n_lines min="1000000"/>
+<has_line line="##gff-version 3"/>
+<!-- TODO this will only work when the galaxy python packakes for 22.05 have been released
+<has_n_columns n="9" comment="#"/> -->
+</assert_contents>
+</element>
+</output_collection>
+<assert_command>
+<has_text text="gunzip"/>
+</assert_command>
 </test>
 <test expect_num_outputs="2">
 <conditional name="query|subcommand">
 <param name="download_by" value="taxon"/>
 <param name="taxon_positional" value="human"/>

Mercurial > repos > iuc > ncbi_datasets

comparison datasets_genome.xml @ 12:d78faac2c6ef draft