comparison datasets_genome.xml @ 5:6c829a430475 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit a58a3198ea1b60b6aa9567c6c65f00f8361794f6"
author iuc
date Wed, 04 May 2022 13:15:43 +0000
parents 1a7773882d2c
children 5a2656cc84cb
comparison
equal deleted inserted replaced
4:c47a0a93ffd2 5:6c829a430475
17 '$query.subcommand.taxon' 17 '$query.subcommand.taxon'
18 #end if 18 #end if
19 $filters.reference 19 $filters.reference
20 $filters.annotated 20 $filters.annotated
21 #if $filters.assembly_level: 21 #if $filters.assembly_level:
22 --assembly_level $filters.assembly_level 22 --assembly-level $filters.assembly_level
23 #end if 23 #end if
24 #if $filters.assembly_source: 24 #if $filters.assembly_source:
25 --assembly_source $filters.assembly_source 25 --assembly-source $filters.assembly_source
26 #end if 26 #end if
27 #if $filters.chromosomes: 27 #if $filters.chromosomes:
28 --chromosomes '$filters.chromosomes' 28 --chromosomes '$filters.chromosomes'
29 #end if 29 #end if
30 @EXCLUDES_GENOME@ 30 @EXCLUDES_GENOME@
66 66
67 <repeat name="search" title="Add search terms"> 67 <repeat name="search" title="Add search terms">
68 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> 68 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/>
69 </repeat> 69 </repeat>
70 </section> 70 </section>
71 <section name="file_choices" title="File Choices"> 71 <section name="file_choices" title="File Choices" expanded="true">
72 <expand macro="excludes_genome"></expand> 72 <expand macro="excludes_genome"></expand>
73 <expand macro="includes_genome"></expand> 73 <expand macro="includes_genome"></expand>
74 </section> 74 </section>
75 <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/> 75 <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/>
76 </inputs> 76 </inputs>
88 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> 88 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
89 <filter>uncompressed</filter> 89 <filter>uncompressed</filter>
90 </collection> 90 </collection>
91 <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list"> 91 <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list">
92 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/.*(?&lt;!cds_from)(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> 92 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/.*(?&lt;!cds_from)(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
93 <filter>uncompressed and file_choices['exclude_seq']</filter> 93 <filter>uncompressed and not file_choices['exclude_seq']</filter>
94 </collection> 94 </collection>
95 <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list"> 95 <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list">
96 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> 96 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
97 <filter>uncompressed and file_choices['exclude_genomic_cds']</filter> 97 <filter>uncompressed and not file_choices['exclude_genomic_cds']</filter>
98 </collection> 98 </collection>
99 <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list"> 99 <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list">
100 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> 100 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
101 <filter>uncompressed and file_choices['exclude_gff3']</filter> 101 <filter>uncompressed and not file_choices['exclude_gff3']</filter>
102 </collection> 102 </collection>
103 <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> 103 <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list">
104 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> 104 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
105 <filter>uncompressed and file_choices['exclude_rna']</filter> 105 <filter>uncompressed and not file_choices['exclude_rna']</filter>
106 </collection> 106 </collection>
107 <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> 107 <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list">
108 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> 108 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
109 <filter>uncompressed and file_choices['exclude_protein']</filter> 109 <filter>uncompressed and not file_choices['exclude_protein']</filter>
110 </collection> 110 </collection>
111 <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list"> 111 <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list">
112 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> 112 <discover_datasets pattern="(?P&lt;identifier_0&gt;.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets>
113 <filter>uncompressed and file_choices['include_gbff']</filter> 113 <filter>uncompressed and file_choices['include_gbff']</filter>
114 </collection> 114 </collection>
123 <param name="download_by" value="taxon"></param> 123 <param name="download_by" value="taxon"></param>
124 <param name="text_or_file" value="text"></param> 124 <param name="text_or_file" value="text"></param>
125 <param name="taxon" value="human"></param> 125 <param name="taxon" value="human"></param>
126 </conditional> 126 </conditional>
127 <param name="chromosomes" value="21"></param> 127 <param name="chromosomes" value="21"></param>
128 <param name="exclude_protein" value="true"/>
129 <param name="exclude_rna" value="true"/>
130 <param name="exclude_seq" value="true"/>
131 <param name="exclude_genomic_cds" value="true"/>
132 <param name="exclude_gff3" value="true"/>
128 <param name="uncompressed" value="false"/> 133 <param name="uncompressed" value="false"/>
129 <param name="released_before" value="01/01/2018"></param> 134 <param name="released_before" value="01/01/2018"></param>
130 <output name="archive_contents"> 135 <output name="archive_contents">
131 <assert_contents> 136 <assert_contents>
132 <has_text text="ncbi_dataset/data/dataset_catalog.json"/> 137 <has_text text="ncbi_dataset/data/dataset_catalog.json"/>
133 </assert_contents> 138 </assert_contents>
134 </output> 139 </output>
135 </test> 140 </test>
136 <test expect_num_outputs="5"> 141 <test expect_num_outputs="2">
142 <conditional name="query|subcommand">
143 <param name="download_by" value="taxon"></param>
144 <param name="text_or_file" value="text"></param>
145 <param name="taxon" value="human"></param>
146 </conditional>
147 <param name="chromosomes" value="21"></param>
148 <param name="uncompressed" value="false"/>
149 <param name="exclude_protein" value="true"/>
150 <param name="exclude_rna" value="true"/>
151 <param name="exclude_seq" value="true"/>
152 <param name="exclude_genomic_cds" value="true"/>
153 <param name="exclude_gff3" value="true"/>
154 <param name="assembly_level" value="chromosome"/>
155 <param name="released_before" value="01/01/2018"></param>
156 <output name="archive_contents">
157 <assert_contents>
158 <has_text text="ncbi_dataset/data/dataset_catalog.json"/>
159 </assert_contents>
160 </output>
161 </test>
162 <test expect_num_outputs="4">
137 <conditional name="query|subcommand"> 163 <conditional name="query|subcommand">
138 <param name="download_by" value="accession"></param> 164 <param name="download_by" value="accession"></param>
139 <conditional name="text_or_file"> 165 <conditional name="text_or_file">
140 <param name="text_or_file" value="text"></param> 166 <param name="text_or_file" value="text"></param>
141 <param name="accession" value="GCF_000013305.1 GCF_000007445.1"></param> 167 <param name="accession" value="GCF_000013305.1 GCF_000007445.1"></param>
142 </conditional> 168 </conditional>
143 </conditional> 169 </conditional>
144 <param name="uncompressed" value="true"/> 170 <param name="uncompressed" value="true"/>
145 <param name="released_before" value="01/01/2007"></param> 171 <param name="released_before" value="01/01/2007"></param>
146 <param name="exclude_genomic_cds" value="true"/> 172 <param name="exclude_protein" value="true"/>
173 <param name="exclude_rna" value="true"/>
174 <param name="exclude_seq" value="true"/>
175 <param name="exclude_gff3" value="true"/>
147 <param name="include_gtf" value="true"/> 176 <param name="include_gtf" value="true"/>
148 <output name="genome_data_report"> 177 <output name="genome_data_report">
149 <assert_contents> 178 <assert_contents>
150 <has_text text="GCF_000013305.1"/> 179 <has_text text="GCF_000013305.1"/>
151 </assert_contents> 180 </assert_contents>
152 </output> 181 </output>
153 <output_collection name="sequence_report" type="list">
154 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/>
155 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/>
156 </output_collection>
157 <output_collection name="genomic_gtf" type="list"> 182 <output_collection name="genomic_gtf" type="list">
158 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/> 183 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/>
159 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> 184 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/>
160 </output_collection> 185 </output_collection>
161 <output_collection name="genomic_cds" type="list"> 186 <output_collection name="genomic_cds" type="list">
170 <param name="text_or_file" value="file"></param> 195 <param name="text_or_file" value="file"></param>
171 <param name="inputfile" value="accessions.txt"></param> 196 <param name="inputfile" value="accessions.txt"></param>
172 </conditional> 197 </conditional>
173 </conditional> 198 </conditional>
174 <param name="include_gbff" value="true"/> 199 <param name="include_gbff" value="true"/>
175 <param name="exclude_seq" value="false"/> 200 <param name="exclude_protein" value="true"/>
176 <param name="exclude_gff3" value="true"/> 201 <param name="exclude_rna" value="true"/>
202 <param name="exclude_seq" value="true"/>
203 <param name="exclude_genomic_cds" value="true"/>
177 <param name="uncompressed" value="true"/> 204 <param name="uncompressed" value="true"/>
178 <param name="released_before" value="01/02/2007"></param> 205 <param name="released_before" value="01/02/2007"></param>
179 <output name="genome_data_report"> 206 <output name="genome_data_report">
180 <assert_contents> 207 <assert_contents>
181 <has_text text="SAMN02604181"/> 208 <has_text text="SAMN02604181"/>
182 </assert_contents> 209 </assert_contents>
183 </output> 210 </output>
184 <output_collection name="sequence_report" type="list">
185 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/>
186 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/>
187 </output_collection>
188 <output_collection name="genomic_gff" type="list"> 211 <output_collection name="genomic_gff" type="list">
189 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/> 212 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/>
190 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/> 213 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/>
191 </output_collection> 214 </output_collection>
192 <output_collection name="genomic_gbff" type="list"> 215 <output_collection name="genomic_gbff" type="list">
193 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/> 216 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/>
194 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/> 217 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/>
218 </output_collection>
219 </test>
220 <test expect_num_outputs="2">
221 <conditional name="query|subcommand">
222 <param name="download_by" value="accession"></param>
223 <conditional name="text_or_file">
224 <param name="text_or_file" value="text"></param>
225 <param name="accession" value="GCF_000001405.40"></param>
226 </conditional>
227 </conditional>
228 <param name="exclude_protein" value="true"/>
229 <param name="exclude_rna" value="true"/>
230 <param name="exclude_seq" value="true"/>
231 <param name="exclude_genomic_cds" value="true"/>
232 <param name="exclude_gff3" value="true"/>
233 <param name="uncompressed" value="true"/>
234 <param name="released_before" value="01/02/2007"></param>
235 <output_collection name="sequence_report">
236 <element name="GCF_000001405.40" file="GCF_000001405.40.seq.rpt.jsonl" compare="contains"/>
195 </output_collection> 237 </output_collection>
196 </test> 238 </test>
197 </tests> 239 </tests>
198 <help> 240 <help>
199 <![CDATA[ 241 <![CDATA[
201 243
202 Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report. 244 Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report.
203 Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file. 245 Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file.
204 246
205 Tthe default genome dataset includes the following files (if available): 247 Tthe default genome dataset includes the following files (if available):
248 * data_report.jsonl (genome assembly and annotation metadata, not always available)
206 * genomic.fna (genomic sequences) 249 * genomic.fna (genomic sequences)
207 * rna.fna (transcript sequences) 250 * rna.fna (transcript sequences)
208 * protein.faa (protein sequences) 251 * protein.faa (protein sequences)
209 * genomic.gff (genome annotation in gff3 format) 252 * genomic.gff (genome annotation in gff3 format)
210 * data_report.jsonl (data report with genome assembly and annotation metadata)
211 * dataset_catalog.json (a list of files and file types included in the dataset) 253 * dataset_catalog.json (a list of files and file types included in the dataset)
212 ]]> 254 ]]>
213 </help> 255 </help>
214 256
215 </tool> 257 </tool>