Mercurial > repos > iuc > ncbi_datasets
diff datasets_genome.xml @ 5:6c829a430475 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit a58a3198ea1b60b6aa9567c6c65f00f8361794f6"
author | iuc |
---|---|
date | Wed, 04 May 2022 13:15:43 +0000 |
parents | 1a7773882d2c |
children | 5a2656cc84cb |
line wrap: on
line diff
--- a/datasets_genome.xml Mon Mar 07 12:05:56 2022 +0000 +++ b/datasets_genome.xml Wed May 04 13:15:43 2022 +0000 @@ -19,10 +19,10 @@ $filters.reference $filters.annotated #if $filters.assembly_level: ---assembly_level $filters.assembly_level +--assembly-level $filters.assembly_level #end if #if $filters.assembly_source: ---assembly_source $filters.assembly_source +--assembly-source $filters.assembly_source #end if #if $filters.chromosomes: --chromosomes '$filters.chromosomes' @@ -68,7 +68,7 @@ <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> </repeat> </section> - <section name="file_choices" title="File Choices"> + <section name="file_choices" title="File Choices" expanded="true"> <expand macro="excludes_genome"></expand> <expand macro="includes_genome"></expand> </section> @@ -90,23 +90,23 @@ </collection> <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list"> <discover_datasets pattern="(?P<identifier_0>.*?)\/.*(?<!cds_from)(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and file_choices['exclude_seq']</filter> + <filter>uncompressed and not file_choices['exclude_seq']</filter> </collection> <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list"> <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and file_choices['exclude_genomic_cds']</filter> + <filter>uncompressed and not file_choices['exclude_genomic_cds']</filter> </collection> <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list"> <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and file_choices['exclude_gff3']</filter> + <filter>uncompressed and not file_choices['exclude_gff3']</filter> </collection> <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and file_choices['exclude_rna']</filter> + <filter>uncompressed and not file_choices['exclude_rna']</filter> </collection> <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> - <filter>uncompressed and file_choices['exclude_protein']</filter> + <filter>uncompressed and not file_choices['exclude_protein']</filter> </collection> <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list"> <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> @@ -125,6 +125,11 @@ <param name="taxon" value="human"></param> </conditional> <param name="chromosomes" value="21"></param> + <param name="exclude_protein" value="true"/> + <param name="exclude_rna" value="true"/> + <param name="exclude_seq" value="true"/> + <param name="exclude_genomic_cds" value="true"/> + <param name="exclude_gff3" value="true"/> <param name="uncompressed" value="false"/> <param name="released_before" value="01/01/2018"></param> <output name="archive_contents"> @@ -133,7 +138,28 @@ </assert_contents> </output> </test> - <test expect_num_outputs="5"> + <test expect_num_outputs="2"> + <conditional name="query|subcommand"> + <param name="download_by" value="taxon"></param> + <param name="text_or_file" value="text"></param> + <param name="taxon" value="human"></param> + </conditional> + <param name="chromosomes" value="21"></param> + <param name="uncompressed" value="false"/> + <param name="exclude_protein" value="true"/> + <param name="exclude_rna" value="true"/> + <param name="exclude_seq" value="true"/> + <param name="exclude_genomic_cds" value="true"/> + <param name="exclude_gff3" value="true"/> + <param name="assembly_level" value="chromosome"/> + <param name="released_before" value="01/01/2018"></param> + <output name="archive_contents"> + <assert_contents> + <has_text text="ncbi_dataset/data/dataset_catalog.json"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> <conditional name="query|subcommand"> <param name="download_by" value="accession"></param> <conditional name="text_or_file"> @@ -143,17 +169,16 @@ </conditional> <param name="uncompressed" value="true"/> <param name="released_before" value="01/01/2007"></param> - <param name="exclude_genomic_cds" value="true"/> + <param name="exclude_protein" value="true"/> + <param name="exclude_rna" value="true"/> + <param name="exclude_seq" value="true"/> + <param name="exclude_gff3" value="true"/> <param name="include_gtf" value="true"/> <output name="genome_data_report"> <assert_contents> <has_text text="GCF_000013305.1"/> </assert_contents> </output> - <output_collection name="sequence_report" type="list"> - <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/> - <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/> - </output_collection> <output_collection name="genomic_gtf" type="list"> <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/> <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> @@ -172,8 +197,10 @@ </conditional> </conditional> <param name="include_gbff" value="true"/> - <param name="exclude_seq" value="false"/> - <param name="exclude_gff3" value="true"/> + <param name="exclude_protein" value="true"/> + <param name="exclude_rna" value="true"/> + <param name="exclude_seq" value="true"/> + <param name="exclude_genomic_cds" value="true"/> <param name="uncompressed" value="true"/> <param name="released_before" value="01/02/2007"></param> <output name="genome_data_report"> @@ -181,10 +208,6 @@ <has_text text="SAMN02604181"/> </assert_contents> </output> - <output_collection name="sequence_report" type="list"> - <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/> - <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/> - </output_collection> <output_collection name="genomic_gff" type="list"> <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/> <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/> @@ -194,6 +217,25 @@ <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/> </output_collection> </test> + <test expect_num_outputs="2"> + <conditional name="query|subcommand"> + <param name="download_by" value="accession"></param> + <conditional name="text_or_file"> + <param name="text_or_file" value="text"></param> + <param name="accession" value="GCF_000001405.40"></param> + </conditional> + </conditional> + <param name="exclude_protein" value="true"/> + <param name="exclude_rna" value="true"/> + <param name="exclude_seq" value="true"/> + <param name="exclude_genomic_cds" value="true"/> + <param name="exclude_gff3" value="true"/> + <param name="uncompressed" value="true"/> + <param name="released_before" value="01/02/2007"></param> + <output_collection name="sequence_report"> + <element name="GCF_000001405.40" file="GCF_000001405.40.seq.rpt.jsonl" compare="contains"/> + </output_collection> + </test> </tests> <help> <![CDATA[ @@ -203,11 +245,11 @@ Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file. Tthe default genome dataset includes the following files (if available): + * data_report.jsonl (genome assembly and annotation metadata, not always available) * genomic.fna (genomic sequences) * rna.fna (transcript sequences) * protein.faa (protein sequences) * genomic.gff (genome annotation in gff3 format) - * data_report.jsonl (data report with genome assembly and annotation metadata) * dataset_catalog.json (a list of files and file types included in the dataset) ]]> </help>