diff datasets_genome.xml @ 5:6c829a430475 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit a58a3198ea1b60b6aa9567c6c65f00f8361794f6"
author iuc
date Wed, 04 May 2022 13:15:43 +0000
parents 1a7773882d2c
children 5a2656cc84cb
line wrap: on
line diff
--- a/datasets_genome.xml	Mon Mar 07 12:05:56 2022 +0000
+++ b/datasets_genome.xml	Wed May 04 13:15:43 2022 +0000
@@ -19,10 +19,10 @@
 $filters.reference
 $filters.annotated
 #if $filters.assembly_level:
---assembly_level $filters.assembly_level
+--assembly-level $filters.assembly_level
 #end if
 #if $filters.assembly_source:
---assembly_source $filters.assembly_source
+--assembly-source $filters.assembly_source
 #end if
 #if $filters.chromosomes:
 --chromosomes '$filters.chromosomes'
@@ -68,7 +68,7 @@
                 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/>
             </repeat>
         </section>
-        <section name="file_choices" title="File Choices">
+        <section name="file_choices" title="File Choices" expanded="true">
             <expand macro="excludes_genome"></expand>
             <expand macro="includes_genome"></expand>
         </section>
@@ -90,23 +90,23 @@
         </collection>
         <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list">
             <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/.*(?&lt;!cds_from)(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and file_choices['exclude_seq']</filter>
+            <filter>uncompressed and not file_choices['exclude_seq']</filter>
         </collection>
         <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list">
             <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and file_choices['exclude_genomic_cds']</filter>
+            <filter>uncompressed and not file_choices['exclude_genomic_cds']</filter>
         </collection>
         <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list">
             <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and file_choices['exclude_gff3']</filter>
+            <filter>uncompressed and not file_choices['exclude_gff3']</filter>
         </collection>
         <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list">
             <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and file_choices['exclude_rna']</filter>
+            <filter>uncompressed and not file_choices['exclude_rna']</filter>
         </collection>
         <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list">
             <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
-            <filter>uncompressed and file_choices['exclude_protein']</filter>
+            <filter>uncompressed and not file_choices['exclude_protein']</filter>
         </collection>
         <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list">
             <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
@@ -125,6 +125,11 @@
                 <param name="taxon" value="human"></param>
             </conditional>
             <param name="chromosomes" value="21"></param>
+            <param name="exclude_protein" value="true"/>
+            <param name="exclude_rna" value="true"/>
+            <param name="exclude_seq" value="true"/>
+            <param name="exclude_genomic_cds" value="true"/>
+            <param name="exclude_gff3" value="true"/>
             <param name="uncompressed" value="false"/>
             <param name="released_before" value="01/01/2018"></param>
             <output name="archive_contents">
@@ -133,7 +138,28 @@
                 </assert_contents>
             </output>
         </test>
-        <test expect_num_outputs="5">
+        <test expect_num_outputs="2">
+            <conditional name="query|subcommand">
+                <param name="download_by" value="taxon"></param>
+                <param name="text_or_file" value="text"></param>
+                <param name="taxon" value="human"></param>
+            </conditional>
+            <param name="chromosomes" value="21"></param>
+            <param name="uncompressed" value="false"/>
+            <param name="exclude_protein" value="true"/>
+            <param name="exclude_rna" value="true"/>
+            <param name="exclude_seq" value="true"/>
+            <param name="exclude_genomic_cds" value="true"/>
+            <param name="exclude_gff3" value="true"/>
+            <param name="assembly_level" value="chromosome"/>
+            <param name="released_before" value="01/01/2018"></param>
+            <output name="archive_contents">
+                <assert_contents>
+                    <has_text text="ncbi_dataset/data/dataset_catalog.json"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="4">
             <conditional name="query|subcommand">
                 <param name="download_by" value="accession"></param>
                 <conditional name="text_or_file">
@@ -143,17 +169,16 @@
             </conditional>
             <param name="uncompressed" value="true"/>
             <param name="released_before" value="01/01/2007"></param>
-            <param name="exclude_genomic_cds" value="true"/>
+            <param name="exclude_protein" value="true"/>
+            <param name="exclude_rna" value="true"/>
+            <param name="exclude_seq" value="true"/>
+            <param name="exclude_gff3" value="true"/>
             <param name="include_gtf" value="true"/>
             <output name="genome_data_report">
                 <assert_contents>
                     <has_text text="GCF_000013305.1"/>
                 </assert_contents>
             </output>
-            <output_collection name="sequence_report" type="list">
-                <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/>
-                <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/>
-            </output_collection>
             <output_collection name="genomic_gtf" type="list">
                 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/>
                 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/>
@@ -172,8 +197,10 @@
                 </conditional>
             </conditional>
             <param name="include_gbff" value="true"/>
-            <param name="exclude_seq" value="false"/>
-            <param name="exclude_gff3" value="true"/>
+            <param name="exclude_protein" value="true"/>
+            <param name="exclude_rna" value="true"/>
+            <param name="exclude_seq" value="true"/>
+            <param name="exclude_genomic_cds" value="true"/>
             <param name="uncompressed" value="true"/>
             <param name="released_before" value="01/02/2007"></param>
             <output name="genome_data_report">
@@ -181,10 +208,6 @@
                    <has_text text="SAMN02604181"/>
                 </assert_contents>
             </output>
-            <output_collection name="sequence_report" type="list">
-                <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/>
-                <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/>
-            </output_collection>
             <output_collection name="genomic_gff" type="list">
                 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/>
                 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/>
@@ -194,6 +217,25 @@
                 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/>
             </output_collection>
         </test>
+        <test expect_num_outputs="2">
+            <conditional name="query|subcommand">
+                <param name="download_by" value="accession"></param>
+                <conditional name="text_or_file">
+                    <param name="text_or_file" value="text"></param>
+                    <param name="accession" value="GCF_000001405.40"></param>
+                </conditional>
+            </conditional>
+            <param name="exclude_protein" value="true"/>
+            <param name="exclude_rna" value="true"/>
+            <param name="exclude_seq" value="true"/>
+            <param name="exclude_genomic_cds" value="true"/>
+            <param name="exclude_gff3" value="true"/>
+            <param name="uncompressed" value="true"/>
+            <param name="released_before" value="01/02/2007"></param>
+            <output_collection name="sequence_report">
+                <element name="GCF_000001405.40" file="GCF_000001405.40.seq.rpt.jsonl" compare="contains"/>
+            </output_collection>
+        </test>
     </tests>
     <help>
 <![CDATA[
@@ -203,11 +245,11 @@
 Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file.
 
 Tthe default genome dataset includes the following files (if available):
+ * data_report.jsonl (genome assembly and annotation metadata, not always available)
  * genomic.fna (genomic sequences)
  * rna.fna (transcript sequences)
  * protein.faa (protein sequences)
  * genomic.gff (genome annotation in gff3 format)
- * data_report.jsonl (data report with genome assembly and annotation metadata)
  * dataset_catalog.json (a list of files and file types included in the dataset)
 ]]>
     </help>