Mercurial > repos > iuc > ncbi_datasets
comparison datasets_genome.xml @ 0:1a7773882d2c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit 800d16f3bd40266d8734f4572988cb2b306b4fd3"
author | iuc |
---|---|
date | Thu, 27 Jan 2022 08:20:47 +0000 |
parents | |
children | 6c829a430475 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1a7773882d2c |
---|---|
1 <tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE" version="@TOOL_VERSION@"> | |
2 <description>download genome sequence, annotation and metadata</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"></expand> | |
7 <command><![CDATA[ | |
8 @SETUP_CERTIFICATES@ | |
9 datasets download genome $query.subcommand.download_by | |
10 #if $query.subcommand.download_by == 'accession': | |
11 #if $query.subcommand.text_or_file.text_or_file == 'text': | |
12 #echo " ".join(f"'{x}'" for x in $query.subcommand.text_or_file.accession.split(' ') if x) | |
13 #else | |
14 --inputfile '$query.subcommand.text_or_file.inputfile' | |
15 #end if | |
16 #else: | |
17 '$query.subcommand.taxon' | |
18 #end if | |
19 $filters.reference | |
20 $filters.annotated | |
21 #if $filters.assembly_level: | |
22 --assembly_level $filters.assembly_level | |
23 #end if | |
24 #if $filters.assembly_source: | |
25 --assembly_source $filters.assembly_source | |
26 #end if | |
27 #if $filters.chromosomes: | |
28 --chromosomes '$filters.chromosomes' | |
29 #end if | |
30 @EXCLUDES_GENOME@ | |
31 @INCLUDES_GENOME@ | |
32 @RELEASED_BEFORE@ | |
33 @RELEASED_SINCE@ | |
34 #for search_term in $filters.search: | |
35 --search '$filters.search_term' | |
36 #end for | |
37 #if $uncompressed | |
38 && unzip ncbi_dataset.zip | |
39 #else | |
40 && unzip -l ncbi_dataset.zip > ncbi_dataset.txt | |
41 #end if | |
42 ]]></command> | |
43 <inputs> | |
44 <section name="query" title="Query" expanded="true"> | |
45 <conditional name="subcommand"> | |
46 <param name="download_by" type="select" label="Choose how to find genomes to download"> | |
47 <option value="accession">Download by NCBI assembly or BioProject accession</option> | |
48 <option value="taxon">Download by taxon</option> | |
49 </param> | |
50 <when value="accession"> | |
51 <expand macro="text_or_file"/> | |
52 </when> | |
53 <when value="taxon"> | |
54 <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurus, etc."></param> | |
55 </when> | |
56 </conditional> | |
57 </section> | |
58 <section name="filters" title="Filters and Limit"> | |
59 <param argument="--reference" type="boolean" truevalue="--reference" falsevalue="" label="Limit to reference and representative (GCF_ and GCA_) assemblies"/> | |
60 <expand macro="annotation"></expand> | |
61 <expand macro="assembly_level"></expand> | |
62 <expand macro="assembly_source"></expand> | |
63 <expand macro="chromosomes"></expand> | |
64 <expand macro="released_options"></expand> | |
65 <expand macro="released_options" before_or_after="since"></expand> | |
66 | |
67 <repeat name="search" title="Add search terms"> | |
68 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> | |
69 </repeat> | |
70 </section> | |
71 <section name="file_choices" title="File Choices"> | |
72 <expand macro="excludes_genome"></expand> | |
73 <expand macro="includes_genome"></expand> | |
74 </section> | |
75 <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/> | |
76 </inputs> | |
77 <outputs> | |
78 <data name="compressed_archive" format="zip" label="Compressed Archive" from_work_dir="ncbi_dataset.zip"> | |
79 <filter>not uncompressed</filter> | |
80 </data> | |
81 <data name="archive_contents" format="txt" label="Archive Contents" from_work_dir="ncbi_dataset.txt"> | |
82 <filter>not uncompressed</filter> | |
83 </data> | |
84 <data name="genome_data_report" format="json" label="NCBI Genome Datasets: Data Report" from_work_dir="ncbi_dataset/data/assembly_data_report.jsonl"> | |
85 <filter>uncompressed</filter> | |
86 </data> | |
87 <collection name="sequence_report" label="NCBI Genome Datasets: Sequence Data Report" type="list"> | |
88 <discover_datasets pattern="(?P<identifier_0>.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | |
89 <filter>uncompressed</filter> | |
90 </collection> | |
91 <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list"> | |
92 <discover_datasets pattern="(?P<identifier_0>.*?)\/.*(?<!cds_from)(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | |
93 <filter>uncompressed and file_choices['exclude_seq']</filter> | |
94 </collection> | |
95 <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list"> | |
96 <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | |
97 <filter>uncompressed and file_choices['exclude_genomic_cds']</filter> | |
98 </collection> | |
99 <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list"> | |
100 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | |
101 <filter>uncompressed and file_choices['exclude_gff3']</filter> | |
102 </collection> | |
103 <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> | |
104 <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | |
105 <filter>uncompressed and file_choices['exclude_rna']</filter> | |
106 </collection> | |
107 <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> | |
108 <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | |
109 <filter>uncompressed and file_choices['exclude_protein']</filter> | |
110 </collection> | |
111 <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list"> | |
112 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | |
113 <filter>uncompressed and file_choices['include_gbff']</filter> | |
114 </collection> | |
115 <collection name="genomic_gtf" label="NCBI Genome Datasets: gtf" type="list"> | |
116 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gtf" ext="gtf" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | |
117 <filter>uncompressed and file_choices['include_gtf']</filter> | |
118 </collection> | |
119 </outputs> | |
120 <tests> | |
121 <test expect_num_outputs="2"> | |
122 <conditional name="query|subcommand"> | |
123 <param name="download_by" value="taxon"></param> | |
124 <param name="text_or_file" value="text"></param> | |
125 <param name="taxon" value="human"></param> | |
126 </conditional> | |
127 <param name="chromosomes" value="21"></param> | |
128 <param name="uncompressed" value="false"/> | |
129 <param name="released_before" value="01/01/2018"></param> | |
130 <output name="archive_contents"> | |
131 <assert_contents> | |
132 <has_text text="ncbi_dataset/data/dataset_catalog.json"/> | |
133 </assert_contents> | |
134 </output> | |
135 </test> | |
136 <test expect_num_outputs="5"> | |
137 <conditional name="query|subcommand"> | |
138 <param name="download_by" value="accession"></param> | |
139 <conditional name="text_or_file"> | |
140 <param name="text_or_file" value="text"></param> | |
141 <param name="accession" value="GCF_000013305.1 GCF_000007445.1"></param> | |
142 </conditional> | |
143 </conditional> | |
144 <param name="uncompressed" value="true"/> | |
145 <param name="released_before" value="01/01/2007"></param> | |
146 <param name="exclude_genomic_cds" value="true"/> | |
147 <param name="include_gtf" value="true"/> | |
148 <output name="genome_data_report"> | |
149 <assert_contents> | |
150 <has_text text="GCF_000013305.1"/> | |
151 </assert_contents> | |
152 </output> | |
153 <output_collection name="sequence_report" type="list"> | |
154 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/> | |
155 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/> | |
156 </output_collection> | |
157 <output_collection name="genomic_gtf" type="list"> | |
158 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/> | |
159 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> | |
160 </output_collection> | |
161 <output_collection name="genomic_cds" type="list"> | |
162 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.cds" compare="contains"/> | |
163 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.genomic.cds" compare="contains"/> | |
164 </output_collection> | |
165 </test> | |
166 <test expect_num_outputs="4"> | |
167 <conditional name="query|subcommand"> | |
168 <param name="download_by" value="accession"></param> | |
169 <conditional name="text_or_file"> | |
170 <param name="text_or_file" value="file"></param> | |
171 <param name="inputfile" value="accessions.txt"></param> | |
172 </conditional> | |
173 </conditional> | |
174 <param name="include_gbff" value="true"/> | |
175 <param name="exclude_seq" value="false"/> | |
176 <param name="exclude_gff3" value="true"/> | |
177 <param name="uncompressed" value="true"/> | |
178 <param name="released_before" value="01/02/2007"></param> | |
179 <output name="genome_data_report"> | |
180 <assert_contents> | |
181 <has_text text="SAMN02604181"/> | |
182 </assert_contents> | |
183 </output> | |
184 <output_collection name="sequence_report" type="list"> | |
185 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/> | |
186 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/> | |
187 </output_collection> | |
188 <output_collection name="genomic_gff" type="list"> | |
189 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/> | |
190 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/> | |
191 </output_collection> | |
192 <output_collection name="genomic_gbff" type="list"> | |
193 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/> | |
194 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/> | |
195 </output_collection> | |
196 </test> | |
197 </tests> | |
198 <help> | |
199 <![CDATA[ | |
200 **Download Genome Datasets from NCBI** | |
201 | |
202 Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report. | |
203 Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file. | |
204 | |
205 Tthe default genome dataset includes the following files (if available): | |
206 * genomic.fna (genomic sequences) | |
207 * rna.fna (transcript sequences) | |
208 * protein.faa (protein sequences) | |
209 * genomic.gff (genome annotation in gff3 format) | |
210 * data_report.jsonl (data report with genome assembly and annotation metadata) | |
211 * dataset_catalog.json (a list of files and file types included in the dataset) | |
212 ]]> | |
213 </help> | |
214 | |
215 </tool> |