Mercurial > repos > iuc > ncbi_datasets
changeset 25:bc18e25d4204 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ncbi_datasets commit 5563c48ba342dbd3ef5dedf7254bfd5cb586ba65
| author | iuc |
|---|---|
| date | Wed, 14 Jan 2026 15:05:01 +0000 |
| parents | 94e32337ba54 |
| children | |
| files | datasets_genome.xml macros.xml |
| diffstat | 2 files changed, 37 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/datasets_genome.xml Fri Dec 26 17:17:02 2025 +0000 +++ b/datasets_genome.xml Wed Jan 14 15:05:01 2026 +0000 @@ -65,20 +65,19 @@ ## rehydrate && datasets rehydrate --directory ./ - #if not $file_choices.decompress - --gzip - #end if + --gzip --max-workers \${NCBI_DATASETS_MAX_WORKERS:-10} ## rename all faa, fna (resp faa.gz, fna.gz) to fasta (resp fasta.gz) to allow discovery && find ncbi_dataset \( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \) -exec sh -c 'mv {} \$(echo {} | sed "s/.f[an]a\(.gz\)\?\$/.fasta\1/")' \; ## unzip all compressed (non-fasta) files (jsonl files are just named .gz) - ## note "not decompress" means that the datasets are provided uncompressed (datasets rehydrate is called we --gzip) + ## note "not decompress" means that the datasets are provided uncompressed (datasets rehydrate is called with --gzip) ## in this case we need to decompress all datasets that don't have a Galaxy datatype allowing for compression && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .gz)' \; - #if not $file_choices.decompress - && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \; + && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \; + #if $file_choices.decompress + && find ncbi_dataset -name "*fasta.gz" -exec gunzip {} \; #end if #if "seq-report" in $file_choices.include @@ -174,38 +173,43 @@ </collection> </outputs> <tests> + <!-- download sequence and non-sequence data to test if unzipping works + sequence should be downloaded as gz and non-sequence unzipped + + restrict download size for testing by using release data filtering + --> <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="taxon"/> <param name="taxon_positional" value="human"/> </conditional> <section name="filters"> - <param name="chromosomes" value="21"/> - <param name="released_before" value="01/01/2018"/> + <param name="released_after" value="08/31/2004"/> + <param name="released_before" value="01/01/2005"/> </section> <section name="file_choices"> - <!-- include a sequence (which should be downloaded as fasta.gz) - and one non-sequence (which should be decompressed) output --> <param name="include" value="rna,gff3"/> </section> <output name="genome_data_report"> <assert_contents> <has_text text="Assembly Accession	Assembly Name	Assembly Submitter	Organism Name"/> - <has_n_lines min="140"/> + <!-- no idea why the report contains 2 entries, but only one is downloaded + https://github.com/ncbi/datasets/issues/553 --> + <has_n_lines n="3"/> <has_n_columns n="4"/> </assert_contents> </output> - <output_collection name="rna_fasta" type="list"> - <element name="GCF_000306695.2" decompress="true"> + <output_collection name="rna_fasta" type="list" count="1"> + <element name="GCF_000002135.2" decompress="true" ftype="fasta.gz"> <assert_contents> <has_text text=">"/> </assert_contents> </element> </output_collection> - <output_collection name="genomic_gff" type="list"> - <element name="GCF_000306695.2"> + <output_collection name="genomic_gff" type="list" count="1"> + <element name="GCF_000002135.2" ftype="gff3"> <assert_contents> - <has_n_lines min="1000000"/> + <has_n_lines min="40000"/> <has_line line="##gff-version 3"/> <has_n_columns n="9" comment="#"/> </assert_contents> @@ -485,11 +489,20 @@ <param name="taxon_positional" value="4932"/> <param name="tax_exact_match" value="true"/> </conditional> + <section name="filters"> + <param name="released_before" value="11/01/2012"/> + </section> + <section name="file_choices"> + <param name="include" value="seq-report"/> + <param name="decompress" value="true"/> + </section> <output name="genome_data_report"> <assert_contents> + <has_n_lines n="2"/> <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/> </assert_contents> </output> + </test> <!-- test search filter --> <test expect_num_outputs="1"> @@ -498,6 +511,9 @@ <param name="taxon_positional" value="Streptococcus"/> </conditional> <section name="filters"> + <param name="released_before" value="01/01/2010"/> + </section> + <section name="filters"> <repeat name="search"> <param name="search" value="pyogenes"/> </repeat> @@ -507,7 +523,8 @@ </section> <output name="genome_data_report"> <assert_contents> - <has_text text="pyogenes"/> + <has_n_lines n="21"/> + <has_text text="pyogenes" n="20"/> </assert_contents> </output> </test>
--- a/macros.xml Fri Dec 26 17:17:02 2025 +0000 +++ b/macros.xml Wed Jan 14 15:05:01 2026 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">18.13.0</token> + <token name="@TOOL_VERSION@">18.14.0</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">23.0</token> <token name="@LICENSE@">MIT</token> @@ -11,7 +11,7 @@ <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement> - <requirement type="package" version="2025.8.3">ca-certificates</requirement> + <requirement type="package" version="2026.1.4">ca-certificates</requirement> <!-- Removed line below because it was causing "skipping: [..] need PK compat. v4.5 (can do v2.1)" --> <!-- <requirement type="package" version="6.0">unzip</requirement> --> </requirements> @@ -380,7 +380,7 @@ </xml> <xml name="released_options" token_released_what="genomes" token_before_or_after="before"> <param argument="--released-@BEFORE_OR_AFTER@" type="text" optional="true" label="Only include @RELEASED_WHAT@ that have been released @BEFORE_OR_AFTER@ a specified date (MM/DD/YYYY)"> - <validator type="regex" message="enter a date in the form MM/DD/YYYY">[0-9]{2}/[0-9]{2}/[0-9]{4}</validator> + <!-- <validator type="regex" message="enter a date in the form MM/DD/YYYY">[0-9]{2}/[0-9]{2}/[0-9]{4}</validator> --> </param> </xml> <token name="@RELEASED_BEFORE@">#if $filters.released_before:
