Mercurial > repos > iuc > ncbi_datasets
diff datasets_genome.xml @ 12:d78faac2c6ef draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit affffdbe7237a2c0ba5793c0e7dd11cebb8413a9
author | iuc |
---|---|
date | Sat, 03 Dec 2022 13:29:32 +0000 |
parents | ac24fff14f23 |
children | 1e188c9610c3 |
line wrap: on
line diff
--- a/datasets_genome.xml Fri Dec 02 10:52:48 2022 +0000 +++ b/datasets_genome.xml Sat Dec 03 13:29:32 2022 +0000 @@ -1,9 +1,11 @@ <tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>download genome sequence, annotation and metadata</description> + <expand macro="bio_tools"/> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"></expand> + <expand macro="version_command"/> <command><![CDATA[ #import re @SETUP_CERTIFICATES@ @@ -63,8 +65,10 @@ && find ncbi_dataset \( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \) -exec sh -c 'mv {} \$(echo {} | sed "s/.f[an]a\(.gz\)\?\$/.fasta\1/")' \; ## unzip all compressed (non-fasta) files (jsonl files are just named .gz) + ## note "not decompress" means that the datasets are provided uncompressed (datasets rehydrate is called we --gzip) + ## in this case we need to decompress all datasets that don't have a Galaxy datatype allowing for compression && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .gz)' \; - #if $file_choices.decompress + #if not $file_choices.decompress && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \; #end if @@ -166,7 +170,7 @@ So with decompress="true" more powerfull assertions are powerful. A single test checks the default, ie decompress="false". --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="taxon"/> <param name="taxon_positional" value="human"/> @@ -174,7 +178,9 @@ <param name="chromosomes" value="21"/> <param name="released_before" value="01/01/2018"/> <section name="file_choices"> - <param name="include" value=""/> + <!-- include a sequence (which should be downloaded as fasta.gz) + and one non-sequence (which should be decompressed) output --> + <param name="include" value="rna,gff3"/> </section> <output name="genome_data_report"> <assert_contents> @@ -183,6 +189,26 @@ <has_n_columns n="4"/> </assert_contents> </output> + <output_collection name="rna_fasta" type="list" count="1"> + <element name="GCF_000306695.2" decompress="true"> + <assert_contents> + <has_text text=">"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="genomic_gff" type="list"> + <element name="GCF_000306695.2"> + <assert_contents> + <has_n_lines min="1000000"/> + <has_line line="##gff-version 3"/> + <!-- TODO this will only work when the galaxy python packakes for 22.05 have been released + <has_n_columns n="9" comment="#"/> --> + </assert_contents> + </element> + </output_collection> + <assert_command> + <has_text text="gunzip"/> + </assert_command> </test> <test expect_num_outputs="2"> <conditional name="query|subcommand">