Mercurial > repos > iuc > ncbi_datasets

--- a/datasets_genome.xml	Wed Aug 24 13:00:58 2022 +0000
+++ b/datasets_genome.xml	Mon Nov 21 11:40:05 2022 +0000
@@ -15,25 +15,28 @@
     #end if
 #else:
     '$query.subcommand.taxon'
+    $query.subcommand.tax_exact_match
 #end if
 $filters.reference
 $filters.annotated
 #if $filters.assembly_level:
---assembly-level $filters.assembly_level
+    --assembly-level $filters.assembly_level
 #end if
+--assembly-version $filters.assembly_version
 #if $filters.assembly_source:
---assembly-source $filters.assembly_source
+    --assembly-source $filters.assembly_source
 #end if
 #if $filters.chromosomes:
---chromosomes '$filters.chromosomes'
+    --chromosomes '$filters.chromosomes'
 #end if
-@EXCLUDES_GENOME@
-@INCLUDES_GENOME@
+$filters.exclude_atypical
+@INCLUDE@
 @RELEASED_BEFORE@
-@RELEASED_SINCE@
+@RELEASED_AFTER@
 #for search_term in $filters.search:
     --search '$filters.search_term'
 #end for
+--no-progressbar
 #if $uncompressed
 && 7z x -y ncbi_dataset.zip
 #else
@@ -51,26 +54,32 @@
                     <expand macro="text_or_file"/>
                 </when>
                 <when value="taxon">
-                    <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."></param>
+                    <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."/>
+                    <param argument="--tax-exact-match" type="boolean" truevalue="--tax-exact-match" falsevalue="" label="Exclude sub-species when a species-level taxon is specified"/>
                 </when>
             </conditional>
         </section>
         <section name="filters" title="Filters and Limit">
-            <param argument="--reference" type="boolean" truevalue="--reference" falsevalue="" label="Limit to reference and representative (GCF_ and GCA_) assemblies"/>
-            <expand macro="annotation"></expand>
-            <expand macro="assembly_level"></expand>
-            <expand macro="assembly_source"></expand>
-            <expand macro="chromosomes"></expand>
-            <expand macro="released_options"></expand>
-            <expand macro="released_options" before_or_after="since"></expand>
+            <param argument="--reference" type="boolean" truevalue="--reference" falsevalue="" label="Limit to reference and representative (GCF_ and GCA_) assemblies"/>
+            <expand macro="annotation"/>
+            <expand macro="assembly_level"/>
+            <param argument="--assembly-version" type="select" label="Assembly version(s)">
+                <option value="latest">Latest</option>
+                <option value="all">All</option>
+            </param>
+            <!-- TODO add test for assembly source: according to CLI doc args are RefSeq, GenBank, All and not refseq / genbank-->
+            <expand macro="assembly_source"/>
+            <expand macro="chromosomes"/>
+            <param argument="--exclude-atypical" type="boolean" truevalue="--exclude-atypical" falsevalue="" label="Exclude atypical assemblies"/>
+            <expand macro="released_options"/>
+            <expand macro="released_options" before_or_after="after"/>

             <repeat name="search" title="Add search terms">
                 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/>
             </repeat>
         </section>
         <section name="file_choices" title="File Choices" expanded="true">
-            <expand macro="excludes_genome"></expand>
-            <expand macro="includes_genome"></expand>
+            <expand macro="include"/>
         </section>
         <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/>
     </inputs>
@@ -85,53 +94,49 @@
             <filter>uncompressed</filter>
         </data>
         <collection name="sequence_report" label="NCBI Genome Datasets: Sequence Data Report" type="list">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>
+            <filter>uncompressed and file_choices['include'] and "seq-report" in file_choices['include']</filter>
         </collection>
         <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list:list">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/(?!rna|cds_from)(?P&lt;identifier_1&gt;.*?)\.(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and not file_choices['exclude_seq']</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)/(?!rna|cds_from)(?P&lt;identifier_1&gt;.*?)(_genomic)?.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>
+            <filter>uncompressed and file_choices['include'] and "genome" in file_choices['include']</filter>
+        </collection>
+        <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>
+            <filter>uncompressed and file_choices['include'] and "rna" in file_choices['include']</filter>
+        </collection>
+        <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>
+            <filter>uncompressed and file_choices['include'] and "protein" in file_choices['include']</filter>
         </collection>
         <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and not file_choices['exclude_genomic_cds']</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>
+            <filter>uncompressed and file_choices['include'] and "cds" in file_choices['include']</filter>
         </collection>
         <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and not file_choices['exclude_gff3']</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>
+            <filter>uncompressed and file_choices['include'] and "gff3" in file_choices['include']</filter>
         </collection>
-        <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and not file_choices['exclude_rna']</filter>
-        </collection>
-        <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and not file_choices['exclude_protein']</filter>
+        <collection name="genomic_gtf" label="NCBI Genome Datasets: gtf" type="list">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gtf" ext="gtf" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>
+            <filter>uncompressed and file_choices['include'] and "gtf" in file_choices['include']</filter>
         </collection>
         <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and file_choices['include_gbff']</filter>
-        </collection>
-        <collection name="genomic_gtf" label="NCBI Genome Datasets: gtf" type="list">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gtf" ext="gtf" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and file_choices['include_gtf']</filter>
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/>
+            <filter>uncompressed and file_choices['include'] and "gbff" in file_choices['include']</filter>
         </collection>
     </outputs>
     <tests>
         <test expect_num_outputs="2">
             <conditional name="query|subcommand">
-                <param name="download_by" value="taxon"></param>
-                <param name="text_or_file" value="text"></param>
-                <param name="taxon" value="human"></param>
+                <param name="download_by" value="taxon"/>
+                <param name="text_or_file" value="text"/>
+                <param name="taxon" value="human"/>
             </conditional>
-            <param name="chromosomes" value="21"></param>
-            <param name="exclude_protein" value="true"/>
-            <param name="exclude_rna" value="true"/>
-            <param name="exclude_seq" value="true"/>
-            <param name="exclude_genomic_cds" value="true"/>
-            <param name="exclude_gff3" value="true"/>
+            <param name="chromosomes" value="21"/>
+            <param name="include" value=""/>
             <param name="uncompressed" value="false"/>
-            <param name="released_before" value="01/01/2018"></param>
+            <param name="released_before" value="01/01/2018"/>
             <output name="archive_contents">
                 <assert_contents>
                     <has_text text="ncbi_dataset/data/dataset_catalog.json"/>
@@ -140,40 +145,72 @@
         </test>
         <test expect_num_outputs="2">
             <conditional name="query|subcommand">
-                <param name="download_by" value="taxon"></param>
-                <param name="text_or_file" value="text"></param>
-                <param name="taxon" value="human"></param>
+                <param name="download_by" value="taxon"/>
+                <param name="text_or_file" value="text"/>
+                <param name="taxon" value="human"/>
             </conditional>
-            <param name="chromosomes" value="21"></param>
-            <param name="uncompressed" value="false"/>
-            <param name="exclude_protein" value="true"/>
-            <param name="exclude_rna" value="true"/>
-            <param name="exclude_seq" value="true"/>
-            <param name="exclude_genomic_cds" value="true"/>
-            <param name="exclude_gff3" value="true"/>
-            <param name="assembly_level" value="chromosome"/>
-            <param name="released_before" value="01/01/2018"></param>
-            <output name="archive_contents">
+            <param name="chromosomes" value="21"/>
+            <param name="include" value="genome"/>
+            <param name="uncompressed" value="true"/>
+            <param name="assembly_level" value="chromosome,complete"/>
+            <param name="released_before" value="01/01/2018"/>
+            <output_collection name="genome_fasta" type="list:list" count="14">
+                <expand macro="genome_fasta_assert" el1="GCA_000002115.2" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCA_000002125.2" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCA_000002135.3" el2="GCA_000002135.3_CRA_TCAGchr7v2" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCA_000212995.1" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCA_000252825.1" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCA_000306695.2" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCA_000365445.1" el2="chr21" expression=">"/>
+                <!-- TODO chromosomes argument (or data) seems not reliable https://github.com/ncbi/datasets/issues/188-->
+                <expand macro="genome_fasta_assert" el1="GCA_000442335.2" el2="GCA_000442335.2_LinearCen1.1_normalized" expression=">" expression_n="25"/>
+                <expand macro="genome_fasta_assert" el1="GCA_001292825.2" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCA_001524155.4" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCA_001712695.1" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCA_022833125.2" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/>
+            </output_collection>
+            <output name="genome_data_report">
                 <assert_contents>
-                    <has_text text="ncbi_dataset/data/dataset_catalog.json"/>
+                    <has_text text="Homo sapiens"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- same as precious test but assembly_source (refseq which removes some of the genomes) -->
+        <test expect_num_outputs="2">
+            <conditional name="query|subcommand">
+                <param name="download_by" value="taxon"/>
+                <param name="text_or_file" value="text"/>
+                <param name="taxon" value="human"/>
+            </conditional>
+            <param name="chromosomes" value="21"/>
+            <param name="include" value="genome"/>
+            <param name="uncompressed" value="true"/>
+            <param name="assembly_level" value="chromosome,complete"/>
+            <param name="assembly_source" value="refseq"/>
+            <param name="released_before" value="01/01/2018"/>
+            <output_collection name="genome_fasta" type="list:list" count="2">
+                <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/>
+                <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/>
+            </output_collection>
+            <output name="genome_data_report">
+                <assert_contents>
+                    <has_text text="Homo sapiens"/>
                 </assert_contents>
             </output>
         </test>
         <test expect_num_outputs="4">
             <conditional name="query|subcommand">
-                <param name="download_by" value="accession"></param>
+                <param name="download_by" value="accession"/>
                 <conditional name="text_or_file">
-                    <param name="text_or_file" value="text"></param>
-                    <param name="accession" value="GCF_000013305.1 GCF_000007445.1"></param>
+                    <param name="text_or_file" value="text"/>
+                    <param name="accession" value="GCF_000013305.1 GCF_000007445.1"/>
                 </conditional>
             </conditional>
+            <param name="include" value="seq-report,gtf,cds"/>
             <param name="uncompressed" value="true"/>
-            <param name="released_before" value="01/01/2007"></param>
-            <param name="exclude_protein" value="true"/>
-            <param name="exclude_rna" value="true"/>
-            <param name="exclude_seq" value="true"/>
-            <param name="exclude_gff3" value="true"/>
-            <param name="include_gtf" value="true"/>
+            <param name="released_before" value="01/01/2007"/>
             <output name="genome_data_report">
                 <assert_contents>
                     <has_text text="GCF_000013305.1"/>
@@ -190,19 +227,15 @@
         </test>
         <test expect_num_outputs="4">
             <conditional name="query|subcommand">
-                <param name="download_by" value="accession"></param>
+                <param name="download_by" value="accession"/>
                 <conditional name="text_or_file">
-                    <param name="text_or_file" value="file"></param>
-                    <param name="inputfile" value="accessions.txt"></param>
+                    <param name="text_or_file" value="file"/>
+                    <param name="inputfile" value="accessions.txt"/>
                 </conditional>
             </conditional>
-            <param name="include_gbff" value="true"/>
-            <param name="exclude_protein" value="true"/>
-            <param name="exclude_rna" value="true"/>
-            <param name="exclude_seq" value="true"/>
-            <param name="exclude_genomic_cds" value="true"/>
+            <param name="include" value="seq-report,gbff,gff3"/>
             <param name="uncompressed" value="true"/>
-            <param name="released_before" value="01/02/2007"></param>
+            <param name="released_before" value="01/01/2007"/>
             <output name="genome_data_report">
                 <assert_contents>
                    <has_text text="SAMN02604181"/>
@@ -219,21 +252,33 @@
         </test>
         <test expect_num_outputs="2">
             <conditional name="query|subcommand">
-                <param name="download_by" value="accession"></param>
+                <param name="download_by" value="accession"/>
                 <conditional name="text_or_file">
-                    <param name="text_or_file" value="text"></param>
-                    <param name="accession" value="GCF_000001405.40"></param>
+                    <param name="text_or_file" value="text"/>
+                    <param name="accession" value="GCF_000001405"/>
                 </conditional>
             </conditional>
-            <param name="exclude_protein" value="true"/>
-            <param name="exclude_rna" value="true"/>
-            <param name="exclude_seq" value="true"/>
-            <param name="exclude_genomic_cds" value="true"/>
-            <param name="exclude_gff3" value="true"/>
+            <param name="include" value="seq-report"/>
             <param name="uncompressed" value="true"/>
-            <param name="released_before" value="01/02/2007"></param>
-            <output_collection name="sequence_report">
-                <element name="GCF_000001405.40">
+            <param name="released_before" value="01/01/2015"/>
+            <param name="assembly_version" value="all"/>
+            <output_collection name="sequence_report" count="4">
+                <element name="GCF_000001405.25">
+                    <assert_contents>
+                        <has_text text="assignedMoleculeLocationType"/>
+                    </assert_contents>
+                 </element>
+                <element name="GCF_000001405.26">
+                    <assert_contents>
+                        <has_text text="assignedMoleculeLocationType"/>
+                    </assert_contents>
+                 </element>
+                <element name="GCF_000001405.27">
+                    <assert_contents>
+                        <has_text text="assignedMoleculeLocationType"/>
+                    </assert_contents>
+                 </element>
+                <element name="GCF_000001405.28">
                     <assert_contents>
                         <has_text text="assignedMoleculeLocationType"/>
                     </assert_contents>
@@ -242,59 +287,51 @@
         </test>
         <test expect_num_outputs="5">
             <conditional name="query|subcommand">
-                <param name="download_by" value="accession"></param>
+                <param name="download_by" value="accession"/>
                 <conditional name="text_or_file">
-                    <param name="text_or_file" value="text"></param>
-                    <param name="accession" value="GCF_000146045.2"></param>
+                    <param name="text_or_file" value="text"/>
+                    <param name="accession" value="GCF_000146045.2"/>
                 </conditional>
             </conditional>
-            <param name="exclude_protein" value="true"/>
-            <param name="exclude_rna" value="false"/>
-            <param name="exclude_seq" value="false"/>
-            <param name="exclude_genomic_cds" value="false"/>
-            <param name="exclude_gff3" value="true"/>
+            <param name="include" value="seq-report,genome,rna,cds"/>
             <param name="uncompressed" value="true"/>
             <output_collection name="genome_fasta" type="list:list" count="1">
-                <element name="GCF_000146045.2">
-                    <element name="chrI">
-                        <assert_contents>
-                            <has_text text=">NC_001133.9"/>
-                        </assert_contents>
-                    </element>
-                </element>
+                <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>
             </output_collection>
         </test>
         <test expect_num_outputs="3">
             <conditional name="query|subcommand">
-                <param name="download_by" value="accession"></param>
+                <param name="download_by" value="accession"/>
                 <conditional name="text_or_file">
-                    <param name="text_or_file" value="text"></param>
-                    <param name="accession" value="GCF_000146045.2 GCF_000002945.1"></param>
+                    <param name="text_or_file" value="text"/>
+                    <param name="accession" value="GCF_000146045.2 GCF_000002945.1"/>
                 </conditional>
             </conditional>
-            <param name="exclude_protein" value="true"/>
-            <param name="exclude_rna" value="true"/>
-            <param name="exclude_seq" value="false"/>
-            <param name="exclude_genomic_cds" value="true"/>
-            <param name="exclude_gff3" value="true"/>
+            <param name="include" value="seq-report,genome"/>
             <param name="uncompressed" value="true"/>
             <output_collection name="genome_fasta" type="list:list" count="2">
-                <element name="GCF_000002945.1">
-                    <element name="chrI">
-                        <assert_contents>
-                            <has_text text=">NC_003424.3"/>
-                        </assert_contents>
-                    </element>
-                </element>
-                <element name="GCF_000146045.2">
-                    <element name="chrI">
-                        <assert_contents>
-                            <has_text text=">NC_001133.9"/>
-                        </assert_contents>
-                    </element>
-                </element>
+                <expand macro="genome_fasta_assert" el1="GCF_000002945.1" el2="GCF_000002945.1_ASM294v2" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="4"/>
+                <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/>
             </output_collection>
         </test>
+        <!-- tax_exact_match seems not able to filter out strains
+             https://github.com/ncbi/datasets/issues/187
+             hence we set  expect_test_failure="true"-->
+        <test expect_num_outputs="1" expect_test_failure="true">
+            <conditional name="query|subcommand">
+                <param name="download_by" value="taxon"/>
+                <param name="text_or_file" value="text"/>
+                <param name="taxon" value="4932"/>
+                <param name="tax_exact_match" value="true"/>
+            </conditional>
+            <param name="include" value=""/>
+            <param name="uncompressed" value="true"/>
+            <output name="genome_data_report">
+                <assert_contents>
+                   <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/>
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help>
 <![CDATA[
--- a/macros.xml	Wed Aug 24 13:00:58 2022 +0000
+++ b/macros.xml	Mon Nov 21 11:40:05 2022 +0000
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">13.35.0</token>
+    <token name="@TOOL_VERSION@">14.3</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">21.01</token>
     <token name="@LICENSE@">MIT</token>
@@ -11,28 +11,25 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement>
-            <requirement type="package" version="2022.6.15">ca-certificates</requirement>
+            <requirement type="package" version="2022.9.24">ca-certificates</requirement>
             <requirement type="package" version="16.02">p7zip</requirement>
         </requirements>
     </xml>
     <xml name="annotation">
         <param argument="--annotated" type="boolean" truevalue="--annotated" falsevalue="" label="Only include genomes with annotation ?"/>
     </xml>
-    <xml name="dehydrated">
-        <param argument="--dehydrated" type="boolean" truevalue="--dehydrated" falsevalue="" label="Download a dehydrated zip archive including the data report and locations of data files ?" help="Use the rehydrate tools to retrieve data files"/>
-    </xml>
     <xml name="assembly_level">
         <param argument="--assembly-level" type="select" label="Restrict assemblies to a comma-separated list of one or more of these" multiple="true" optional="true">
             <option value="chromosome">Chromosome</option>
-            <option value="complete_genome">Complete Genome</option>
+            <option value="complete">Complete Genome</option>
             <option value="contig">Contig</option>
             <option value="scaffold">Scaffold</option>
         </param>
     </xml>
     <xml name="assembly_source">
-        <param argument="--assembly-source" type="select" optional="true">
+        <param argument="--assembly-source" type="select" optional="true" label="Assembly source" help="Default (nothing selected) is both " >
             <option value="refseq">RefSeq</option>
-            <option value="genabnk">GenBank</option>
+            <option value="genbank">GenBank</option>
         </param>
     </xml>
     <xml name="text_or_file" token_what="accession" token_what_extended="NCBI Assembly accession" token_help="Can be NCBI Assembly or BioProject accession">
@@ -62,58 +59,47 @@
             </sanitizer>
         </param>
     </xml>
-    <xml name="include" token_include_what="gbff" token_include_label="Include GenBank flat file sequence and annotation, if available">
-        <param argument="--include-@INCLUDE_WHAT@" type="boolean" truevalue="--include-@INCLUDE_WHAT@" falsevalue="" label="@INCLUDE_LABEL@" />
-    </xml>
-    <xml name="includes_genome">
-        <expand macro="include" include_what="gbff" include_label="Include GenBank flat file sequence and annotation, if available"/>
-        <expand macro="include" include_what="gtf" include_label="Include gtf annotation file, if available"/>
-    </xml>
-    <xml name="exclude" token_exclude_what="gff3" token_exclude_label="Exclude gff3 annotation file" token_checked="false">
-        <param argument="--exclude-@EXCLUDE_WHAT@" type="boolean" truevalue="--exclude-@EXCLUDE_WHAT@" falsevalue="" label="@EXCLUDE_LABEL@" checked="@CHECKED@"/>
-    </xml>
-    <xml name="excludes_genome">
-        <expand macro="exclude" exclude_what="seq" exclude_label="Exclude genomic sequence file"/>
-        <expand macro="exclude" exclude_what="gff3" exclude_label="Exclude gff3 annotation file"/>
-        <expand macro="exclude" exclude_what="genomic-cds" exclude_label="Exclude cds from genomic sequence file"/>
-        <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/>
-        <expand macro="exclude" exclude_what="rna" exclude_label="Exclude transcript sequence file"/>
-    </xml>
-    <xml name="excludes_gene">
-        <expand macro="exclude" exclude_what="gene" exclude_label="Exclude gene sequence file"/>
-        <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/>
-        <expand macro="exclude" exclude_what="rna" exclude_label="Exclude transcript sequence file"/>
+    <xml name="include">
+        <param argument="--include" type="select" multiple="true" optional="true">
+            <option value="genome" selected="true">genomic sequence (genome)</option>
+            <option value="rna">transcript (rna)</option>
+            <option value="protein">amnio acid sequences (protein)</option>
+            <option value="cds">nucleotide coding sequences (cds)</option>
+            <option value="gff3">general feature file (gff3)</option>
+            <option value="gtf">gene transfer format (gtf)</option>
+            <option value="gbff">GenBank flat file (gbff)</option>
+            <option value="seq-report">sequence report file (seq-report)</option>
+        </param>
     </xml>
-    <xml name="excludes_virus_protein">
-        <yield/>
-        <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/>
-        <expand macro="exclude" exclude_what="pdb" exclude_label="Exclude protein structure files (pdb)"/>
-        <expand macro="exclude" exclude_what="gpff" exclude_label="Exclude protein sequence and annotation in GenPept flat file"/>
-        <expand macro="exclude" exclude_what="cds" exclude_label="Exclude CDS sequence file"/>
-    </xml>
-    <xml name="excludes_virus_genome">
-        <expand macro="excludes_virus_protein">
-            <expand macro="exclude" exclude_what="seq" exclude_label="Exclude genomic sequence file"/>
-        </expand>
-    </xml>
-    <token name="@EXCLUDES_GENOME@">$file_choices.exclude_gff3 $file_choices.exclude_genomic_cds $file_choices.exclude_protein $file_choices.exclude_rna $file_choices.exclude_seq</token>
-    <token name="@EXCLUDES_GENE@">$exclude_gene $exclude_protein $exclude_rna</token>
-    <token name="@EXCLUDES_VIRUS_PROTEIN@">$exclude_protein $exclude_pdb $exclude_gpff $exclude_cds</token>
-    <token name="@EXCLUDES_VIRUS_GENOME@">$exclude_seq @EXCLUDES_VIRUS_PROTEIN@</token>
-    <xml name="includes_virus_genome">
-        <expand macro="include" include_what="gbff" include_label="Include GenBank flat file sequence and annotation"/>
-    </xml>
-    <token name="@INCLUDES_GENOME@">$file_choices.include_gbff $file_choices.include_gtf</token>
-    <token name="@INCLUDES_VIRUS_GENOME@">$include_gbff</token>
+    <token name="@INCLUDE@"><![CDATA[
+        --include
+        #if $file_choices.include
+            #echo ",".join($file_choices.include)
+        #else
+            none
+        #end if
+    ]]></token>
     <xml name="released_options" token_released_what="genomes" token_before_or_after="before">
-        <param argument="--released-@BEFORE_OR_AFTER@" type="text" optional="true" label="Only include @RELEASED_WHAT@ that have been released @BEFORE_OR_AFTER@ a specified date (MM/DD/YYYY)"></param>
+        <param argument="--released-@BEFORE_OR_AFTER@" type="text" optional="true" label="Only include @RELEASED_WHAT@ that have been released @BEFORE_OR_AFTER@ a specified date (MM/DD/YYYY)">
+            <validator type="regex" message="enter a date in the form MM/DD/YYYY">[0-9]{2}/[0-9]{2}/[0-9]{4}</validator>
+        </param>
     </xml>
     <token name="@RELEASED_BEFORE@">#if $filters.released_before:
 --released-before '$filters.released_before'
 #end if
     </token>
-    <token name="@RELEASED_SINCE@">#if $filters.released_since:
---released-since '$filters.released_since'
+    <token name="@RELEASED_AFTER@">#if $filters.released_after:
+--released-after '$filters.released_after'
 #end if
     </token>
+
+    <xml name="genome_fasta_assert" tokens="el1,el2,expression" token_expression_n="1">
+        <element name="@EL1@">
+            <element name="@EL2@">
+                <assert_contents>
+                    <has_text_matching expression="@EXPRESSION@" n="@EXPRESSION_N@"/>
+                </assert_contents>
+            </element>
+        </element>
+    </xml>
 </macros>