Mercurial > repos > iuc > ncbi_datasets
changeset 10:a3395b1d871b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit 3d012f702b54172f30a49543bf7e5fff2dd71f30
author | iuc |
---|---|
date | Mon, 21 Nov 2022 11:40:05 +0000 |
parents | 18eed8fa7f23 |
children | ac24fff14f23 |
files | datasets_genome.xml macros.xml |
diffstat | 2 files changed, 203 insertions(+), 180 deletions(-) [+] |
line wrap: on
line diff
--- a/datasets_genome.xml Wed Aug 24 13:00:58 2022 +0000 +++ b/datasets_genome.xml Mon Nov 21 11:40:05 2022 +0000 @@ -15,25 +15,28 @@ #end if #else: '$query.subcommand.taxon' + $query.subcommand.tax_exact_match #end if $filters.reference $filters.annotated #if $filters.assembly_level: ---assembly-level $filters.assembly_level + --assembly-level $filters.assembly_level #end if +--assembly-version $filters.assembly_version #if $filters.assembly_source: ---assembly-source $filters.assembly_source + --assembly-source $filters.assembly_source #end if #if $filters.chromosomes: ---chromosomes '$filters.chromosomes' + --chromosomes '$filters.chromosomes' #end if -@EXCLUDES_GENOME@ -@INCLUDES_GENOME@ +$filters.exclude_atypical +@INCLUDE@ @RELEASED_BEFORE@ -@RELEASED_SINCE@ +@RELEASED_AFTER@ #for search_term in $filters.search: --search '$filters.search_term' #end for +--no-progressbar #if $uncompressed && 7z x -y ncbi_dataset.zip #else @@ -51,26 +54,32 @@ <expand macro="text_or_file"/> </when> <when value="taxon"> - <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."></param> + <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."/> + <param argument="--tax-exact-match" type="boolean" truevalue="--tax-exact-match" falsevalue="" label="Exclude sub-species when a species-level taxon is specified"/> </when> </conditional> </section> <section name="filters" title="Filters and Limit"> - <param argument="--reference" type="boolean" truevalue="--reference" falsevalue="" label="Limit to reference and representative (GCF_ and GCA_) assemblies"/> - <expand macro="annotation"></expand> - <expand macro="assembly_level"></expand> - <expand macro="assembly_source"></expand> - <expand macro="chromosomes"></expand> - <expand macro="released_options"></expand> - <expand macro="released_options" before_or_after="since"></expand> + <param argument="--reference" type="boolean" truevalue="--reference" falsevalue="" label="Limit to reference and representative (GCF_ and GCA_) assemblies"/> + <expand macro="annotation"/> + <expand macro="assembly_level"/> + <param argument="--assembly-version" type="select" label="Assembly version(s)"> + <option value="latest">Latest</option> + <option value="all">All</option> + </param> + <!-- TODO add test for assembly source: according to CLI doc args are RefSeq, GenBank, All and not refseq / genbank--> + <expand macro="assembly_source"/> + <expand macro="chromosomes"/> + <param argument="--exclude-atypical" type="boolean" truevalue="--exclude-atypical" falsevalue="" label="Exclude atypical assemblies"/> + <expand macro="released_options"/> + <expand macro="released_options" before_or_after="after"/> <repeat name="search" title="Add search terms"> <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> </repeat> </section> <section name="file_choices" title="File Choices" expanded="true"> - <expand macro="excludes_genome"></expand> - <expand macro="includes_genome"></expand> + <expand macro="include"/> </section> <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/> </inputs> @@ -85,53 +94,49 @@ <filter>uncompressed</filter> </data> <collection name="sequence_report" label="NCBI Genome Datasets: Sequence Data Report" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed</filter> + <discover_datasets pattern="(?P<identifier_0>.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <filter>uncompressed and file_choices['include'] and "seq-report" in file_choices['include']</filter> </collection> <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list:list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/(?!rna|cds_from)(?P<identifier_1>.*?)\.(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and not file_choices['exclude_seq']</filter> + <discover_datasets pattern="(?P<identifier_0>.*?)/(?!rna|cds_from)(?P<identifier_1>.*?)(_genomic)?.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <filter>uncompressed and file_choices['include'] and "genome" in file_choices['include']</filter> + </collection> + <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> + <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <filter>uncompressed and file_choices['include'] and "rna" in file_choices['include']</filter> + </collection> + <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> + <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <filter>uncompressed and file_choices['include'] and "protein" in file_choices['include']</filter> </collection> <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and not file_choices['exclude_genomic_cds']</filter> + <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <filter>uncompressed and file_choices['include'] and "cds" in file_choices['include']</filter> </collection> <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and not file_choices['exclude_gff3']</filter> + <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <filter>uncompressed and file_choices['include'] and "gff3" in file_choices['include']</filter> </collection> - <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and not file_choices['exclude_rna']</filter> - </collection> - <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and not file_choices['exclude_protein']</filter> + <collection name="genomic_gtf" label="NCBI Genome Datasets: gtf" type="list"> + <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gtf" ext="gtf" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <filter>uncompressed and file_choices['include'] and "gtf" in file_choices['include']</filter> </collection> <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and file_choices['include_gbff']</filter> - </collection> - <collection name="genomic_gtf" label="NCBI Genome Datasets: gtf" type="list"> - <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gtf" ext="gtf" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and file_choices['include_gtf']</filter> + <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"/> + <filter>uncompressed and file_choices['include'] and "gbff" in file_choices['include']</filter> </collection> </outputs> <tests> <test expect_num_outputs="2"> <conditional name="query|subcommand"> - <param name="download_by" value="taxon"></param> - <param name="text_or_file" value="text"></param> - <param name="taxon" value="human"></param> + <param name="download_by" value="taxon"/> + <param name="text_or_file" value="text"/> + <param name="taxon" value="human"/> </conditional> - <param name="chromosomes" value="21"></param> - <param name="exclude_protein" value="true"/> - <param name="exclude_rna" value="true"/> - <param name="exclude_seq" value="true"/> - <param name="exclude_genomic_cds" value="true"/> - <param name="exclude_gff3" value="true"/> + <param name="chromosomes" value="21"/> + <param name="include" value=""/> <param name="uncompressed" value="false"/> - <param name="released_before" value="01/01/2018"></param> + <param name="released_before" value="01/01/2018"/> <output name="archive_contents"> <assert_contents> <has_text text="ncbi_dataset/data/dataset_catalog.json"/> @@ -140,40 +145,72 @@ </test> <test expect_num_outputs="2"> <conditional name="query|subcommand"> - <param name="download_by" value="taxon"></param> - <param name="text_or_file" value="text"></param> - <param name="taxon" value="human"></param> + <param name="download_by" value="taxon"/> + <param name="text_or_file" value="text"/> + <param name="taxon" value="human"/> </conditional> - <param name="chromosomes" value="21"></param> - <param name="uncompressed" value="false"/> - <param name="exclude_protein" value="true"/> - <param name="exclude_rna" value="true"/> - <param name="exclude_seq" value="true"/> - <param name="exclude_genomic_cds" value="true"/> - <param name="exclude_gff3" value="true"/> - <param name="assembly_level" value="chromosome"/> - <param name="released_before" value="01/01/2018"></param> - <output name="archive_contents"> + <param name="chromosomes" value="21"/> + <param name="include" value="genome"/> + <param name="uncompressed" value="true"/> + <param name="assembly_level" value="chromosome,complete"/> + <param name="released_before" value="01/01/2018"/> + <output_collection name="genome_fasta" type="list:list" count="14"> + <expand macro="genome_fasta_assert" el1="GCA_000002115.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000002125.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000002135.3" el2="GCA_000002135.3_CRA_TCAGchr7v2" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000212995.1" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000252825.1" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000306695.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_000365445.1" el2="chr21" expression=">"/> + <!-- TODO chromosomes argument (or data) seems not reliable https://github.com/ncbi/datasets/issues/188--> + <expand macro="genome_fasta_assert" el1="GCA_000442335.2" el2="GCA_000442335.2_LinearCen1.1_normalized" expression=">" expression_n="25"/> + <expand macro="genome_fasta_assert" el1="GCA_001292825.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_001524155.4" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_001712695.1" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCA_022833125.2" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/> + </output_collection> + <output name="genome_data_report"> <assert_contents> - <has_text text="ncbi_dataset/data/dataset_catalog.json"/> + <has_text text="Homo sapiens"/> + </assert_contents> + </output> + </test> + <!-- same as precious test but assembly_source (refseq which removes some of the genomes) --> + <test expect_num_outputs="2"> + <conditional name="query|subcommand"> + <param name="download_by" value="taxon"/> + <param name="text_or_file" value="text"/> + <param name="taxon" value="human"/> + </conditional> + <param name="chromosomes" value="21"/> + <param name="include" value="genome"/> + <param name="uncompressed" value="true"/> + <param name="assembly_level" value="chromosome,complete"/> + <param name="assembly_source" value="refseq"/> + <param name="released_before" value="01/01/2018"/> + <output_collection name="genome_fasta" type="list:list" count="2"> + <expand macro="genome_fasta_assert" el1="GCF_000002125.1" el2="chr21" expression=">"/> + <expand macro="genome_fasta_assert" el1="GCF_000306695.2" el2="chr21" expression=">"/> + </output_collection> + <output name="genome_data_report"> + <assert_contents> + <has_text text="Homo sapiens"/> </assert_contents> </output> </test> <test expect_num_outputs="4"> <conditional name="query|subcommand"> - <param name="download_by" value="accession"></param> + <param name="download_by" value="accession"/> <conditional name="text_or_file"> - <param name="text_or_file" value="text"></param> - <param name="accession" value="GCF_000013305.1 GCF_000007445.1"></param> + <param name="text_or_file" value="text"/> + <param name="accession" value="GCF_000013305.1 GCF_000007445.1"/> </conditional> </conditional> + <param name="include" value="seq-report,gtf,cds"/> <param name="uncompressed" value="true"/> - <param name="released_before" value="01/01/2007"></param> - <param name="exclude_protein" value="true"/> - <param name="exclude_rna" value="true"/> - <param name="exclude_seq" value="true"/> - <param name="exclude_gff3" value="true"/> - <param name="include_gtf" value="true"/> + <param name="released_before" value="01/01/2007"/> <output name="genome_data_report"> <assert_contents> <has_text text="GCF_000013305.1"/> @@ -190,19 +227,15 @@ </test> <test expect_num_outputs="4"> <conditional name="query|subcommand"> - <param name="download_by" value="accession"></param> + <param name="download_by" value="accession"/> <conditional name="text_or_file"> - <param name="text_or_file" value="file"></param> - <param name="inputfile" value="accessions.txt"></param> + <param name="text_or_file" value="file"/> + <param name="inputfile" value="accessions.txt"/> </conditional> </conditional> - <param name="include_gbff" value="true"/> - <param name="exclude_protein" value="true"/> - <param name="exclude_rna" value="true"/> - <param name="exclude_seq" value="true"/> - <param name="exclude_genomic_cds" value="true"/> + <param name="include" value="seq-report,gbff,gff3"/> <param name="uncompressed" value="true"/> - <param name="released_before" value="01/02/2007"></param> + <param name="released_before" value="01/01/2007"/> <output name="genome_data_report"> <assert_contents> <has_text text="SAMN02604181"/> @@ -219,21 +252,33 @@ </test> <test expect_num_outputs="2"> <conditional name="query|subcommand"> - <param name="download_by" value="accession"></param> + <param name="download_by" value="accession"/> <conditional name="text_or_file"> - <param name="text_or_file" value="text"></param> - <param name="accession" value="GCF_000001405.40"></param> + <param name="text_or_file" value="text"/> + <param name="accession" value="GCF_000001405"/> </conditional> </conditional> - <param name="exclude_protein" value="true"/> - <param name="exclude_rna" value="true"/> - <param name="exclude_seq" value="true"/> - <param name="exclude_genomic_cds" value="true"/> - <param name="exclude_gff3" value="true"/> + <param name="include" value="seq-report"/> <param name="uncompressed" value="true"/> - <param name="released_before" value="01/02/2007"></param> - <output_collection name="sequence_report"> - <element name="GCF_000001405.40"> + <param name="released_before" value="01/01/2015"/> + <param name="assembly_version" value="all"/> + <output_collection name="sequence_report" count="4"> + <element name="GCF_000001405.25"> + <assert_contents> + <has_text text="assignedMoleculeLocationType"/> + </assert_contents> + </element> + <element name="GCF_000001405.26"> + <assert_contents> + <has_text text="assignedMoleculeLocationType"/> + </assert_contents> + </element> + <element name="GCF_000001405.27"> + <assert_contents> + <has_text text="assignedMoleculeLocationType"/> + </assert_contents> + </element> + <element name="GCF_000001405.28"> <assert_contents> <has_text text="assignedMoleculeLocationType"/> </assert_contents> @@ -242,59 +287,51 @@ </test> <test expect_num_outputs="5"> <conditional name="query|subcommand"> - <param name="download_by" value="accession"></param> + <param name="download_by" value="accession"/> <conditional name="text_or_file"> - <param name="text_or_file" value="text"></param> - <param name="accession" value="GCF_000146045.2"></param> + <param name="text_or_file" value="text"/> + <param name="accession" value="GCF_000146045.2"/> </conditional> </conditional> - <param name="exclude_protein" value="true"/> - <param name="exclude_rna" value="false"/> - <param name="exclude_seq" value="false"/> - <param name="exclude_genomic_cds" value="false"/> - <param name="exclude_gff3" value="true"/> + <param name="include" value="seq-report,genome,rna,cds"/> <param name="uncompressed" value="true"/> <output_collection name="genome_fasta" type="list:list" count="1"> - <element name="GCF_000146045.2"> - <element name="chrI"> - <assert_contents> - <has_text text=">NC_001133.9"/> - </assert_contents> - </element> - </element> + <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/> </output_collection> </test> <test expect_num_outputs="3"> <conditional name="query|subcommand"> - <param name="download_by" value="accession"></param> + <param name="download_by" value="accession"/> <conditional name="text_or_file"> - <param name="text_or_file" value="text"></param> - <param name="accession" value="GCF_000146045.2 GCF_000002945.1"></param> + <param name="text_or_file" value="text"/> + <param name="accession" value="GCF_000146045.2 GCF_000002945.1"/> </conditional> </conditional> - <param name="exclude_protein" value="true"/> - <param name="exclude_rna" value="true"/> - <param name="exclude_seq" value="false"/> - <param name="exclude_genomic_cds" value="true"/> - <param name="exclude_gff3" value="true"/> + <param name="include" value="seq-report,genome"/> <param name="uncompressed" value="true"/> <output_collection name="genome_fasta" type="list:list" count="2"> - <element name="GCF_000002945.1"> - <element name="chrI"> - <assert_contents> - <has_text text=">NC_003424.3"/> - </assert_contents> - </element> - </element> - <element name="GCF_000146045.2"> - <element name="chrI"> - <assert_contents> - <has_text text=">NC_001133.9"/> - </assert_contents> - </element> - </element> + <expand macro="genome_fasta_assert" el1="GCF_000002945.1" el2="GCF_000002945.1_ASM294v2" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="4"/> + <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/> </output_collection> </test> + <!-- tax_exact_match seems not able to filter out strains + https://github.com/ncbi/datasets/issues/187 + hence we set expect_test_failure="true"--> + <test expect_num_outputs="1" expect_test_failure="true"> + <conditional name="query|subcommand"> + <param name="download_by" value="taxon"/> + <param name="text_or_file" value="text"/> + <param name="taxon" value="4932"/> + <param name="tax_exact_match" value="true"/> + </conditional> + <param name="include" value=""/> + <param name="uncompressed" value="true"/> + <output name="genome_data_report"> + <assert_contents> + <has_text text="Saccharomyces cerevisiae ZTW1" negate="true"/> + </assert_contents> + </output> + </test> </tests> <help> <![CDATA[
--- a/macros.xml Wed Aug 24 13:00:58 2022 +0000 +++ b/macros.xml Mon Nov 21 11:40:05 2022 +0000 @@ -1,5 +1,5 @@ <macros> - <token name="@TOOL_VERSION@">13.35.0</token> + <token name="@TOOL_VERSION@">14.3</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">21.01</token> <token name="@LICENSE@">MIT</token> @@ -11,28 +11,25 @@ <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">ncbi-datasets-cli</requirement> - <requirement type="package" version="2022.6.15">ca-certificates</requirement> + <requirement type="package" version="2022.9.24">ca-certificates</requirement> <requirement type="package" version="16.02">p7zip</requirement> </requirements> </xml> <xml name="annotation"> <param argument="--annotated" type="boolean" truevalue="--annotated" falsevalue="" label="Only include genomes with annotation ?"/> </xml> - <xml name="dehydrated"> - <param argument="--dehydrated" type="boolean" truevalue="--dehydrated" falsevalue="" label="Download a dehydrated zip archive including the data report and locations of data files ?" help="Use the rehydrate tools to retrieve data files"/> - </xml> <xml name="assembly_level"> <param argument="--assembly-level" type="select" label="Restrict assemblies to a comma-separated list of one or more of these" multiple="true" optional="true"> <option value="chromosome">Chromosome</option> - <option value="complete_genome">Complete Genome</option> + <option value="complete">Complete Genome</option> <option value="contig">Contig</option> <option value="scaffold">Scaffold</option> </param> </xml> <xml name="assembly_source"> - <param argument="--assembly-source" type="select" optional="true"> + <param argument="--assembly-source" type="select" optional="true" label="Assembly source" help="Default (nothing selected) is both " > <option value="refseq">RefSeq</option> - <option value="genabnk">GenBank</option> + <option value="genbank">GenBank</option> </param> </xml> <xml name="text_or_file" token_what="accession" token_what_extended="NCBI Assembly accession" token_help="Can be NCBI Assembly or BioProject accession"> @@ -62,58 +59,47 @@ </sanitizer> </param> </xml> - <xml name="include" token_include_what="gbff" token_include_label="Include GenBank flat file sequence and annotation, if available"> - <param argument="--include-@INCLUDE_WHAT@" type="boolean" truevalue="--include-@INCLUDE_WHAT@" falsevalue="" label="@INCLUDE_LABEL@" /> - </xml> - <xml name="includes_genome"> - <expand macro="include" include_what="gbff" include_label="Include GenBank flat file sequence and annotation, if available"/> - <expand macro="include" include_what="gtf" include_label="Include gtf annotation file, if available"/> - </xml> - <xml name="exclude" token_exclude_what="gff3" token_exclude_label="Exclude gff3 annotation file" token_checked="false"> - <param argument="--exclude-@EXCLUDE_WHAT@" type="boolean" truevalue="--exclude-@EXCLUDE_WHAT@" falsevalue="" label="@EXCLUDE_LABEL@" checked="@CHECKED@"/> - </xml> - <xml name="excludes_genome"> - <expand macro="exclude" exclude_what="seq" exclude_label="Exclude genomic sequence file"/> - <expand macro="exclude" exclude_what="gff3" exclude_label="Exclude gff3 annotation file"/> - <expand macro="exclude" exclude_what="genomic-cds" exclude_label="Exclude cds from genomic sequence file"/> - <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/> - <expand macro="exclude" exclude_what="rna" exclude_label="Exclude transcript sequence file"/> - </xml> - <xml name="excludes_gene"> - <expand macro="exclude" exclude_what="gene" exclude_label="Exclude gene sequence file"/> - <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/> - <expand macro="exclude" exclude_what="rna" exclude_label="Exclude transcript sequence file"/> + <xml name="include"> + <param argument="--include" type="select" multiple="true" optional="true"> + <option value="genome" selected="true">genomic sequence (genome)</option> + <option value="rna">transcript (rna)</option> + <option value="protein">amnio acid sequences (protein)</option> + <option value="cds">nucleotide coding sequences (cds)</option> + <option value="gff3">general feature file (gff3)</option> + <option value="gtf">gene transfer format (gtf)</option> + <option value="gbff">GenBank flat file (gbff)</option> + <option value="seq-report">sequence report file (seq-report)</option> + </param> </xml> - <xml name="excludes_virus_protein"> - <yield/> - <expand macro="exclude" exclude_what="protein" exclude_label="Exclude protein sequence file"/> - <expand macro="exclude" exclude_what="pdb" exclude_label="Exclude protein structure files (pdb)"/> - <expand macro="exclude" exclude_what="gpff" exclude_label="Exclude protein sequence and annotation in GenPept flat file"/> - <expand macro="exclude" exclude_what="cds" exclude_label="Exclude CDS sequence file"/> - </xml> - <xml name="excludes_virus_genome"> - <expand macro="excludes_virus_protein"> - <expand macro="exclude" exclude_what="seq" exclude_label="Exclude genomic sequence file"/> - </expand> - </xml> - <token name="@EXCLUDES_GENOME@">$file_choices.exclude_gff3 $file_choices.exclude_genomic_cds $file_choices.exclude_protein $file_choices.exclude_rna $file_choices.exclude_seq</token> - <token name="@EXCLUDES_GENE@">$exclude_gene $exclude_protein $exclude_rna</token> - <token name="@EXCLUDES_VIRUS_PROTEIN@">$exclude_protein $exclude_pdb $exclude_gpff $exclude_cds</token> - <token name="@EXCLUDES_VIRUS_GENOME@">$exclude_seq @EXCLUDES_VIRUS_PROTEIN@</token> - <xml name="includes_virus_genome"> - <expand macro="include" include_what="gbff" include_label="Include GenBank flat file sequence and annotation"/> - </xml> - <token name="@INCLUDES_GENOME@">$file_choices.include_gbff $file_choices.include_gtf</token> - <token name="@INCLUDES_VIRUS_GENOME@">$include_gbff</token> + <token name="@INCLUDE@"><![CDATA[ + --include + #if $file_choices.include + #echo ",".join($file_choices.include) + #else + none + #end if + ]]></token> <xml name="released_options" token_released_what="genomes" token_before_or_after="before"> - <param argument="--released-@BEFORE_OR_AFTER@" type="text" optional="true" label="Only include @RELEASED_WHAT@ that have been released @BEFORE_OR_AFTER@ a specified date (MM/DD/YYYY)"></param> + <param argument="--released-@BEFORE_OR_AFTER@" type="text" optional="true" label="Only include @RELEASED_WHAT@ that have been released @BEFORE_OR_AFTER@ a specified date (MM/DD/YYYY)"> + <validator type="regex" message="enter a date in the form MM/DD/YYYY">[0-9]{2}/[0-9]{2}/[0-9]{4}</validator> + </param> </xml> <token name="@RELEASED_BEFORE@">#if $filters.released_before: --released-before '$filters.released_before' #end if </token> - <token name="@RELEASED_SINCE@">#if $filters.released_since: ---released-since '$filters.released_since' + <token name="@RELEASED_AFTER@">#if $filters.released_after: +--released-after '$filters.released_after' #end if </token> + + <xml name="genome_fasta_assert" tokens="el1,el2,expression" token_expression_n="1"> + <element name="@EL1@"> + <element name="@EL2@"> + <assert_contents> + <has_text_matching expression="@EXPRESSION@" n="@EXPRESSION_N@"/> + </assert_contents> + </element> + </element> + </xml> </macros>