Mercurial > repos > iuc > ncbi_datasets
comparison datasets_genome.xml @ 12:d78faac2c6ef draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit affffdbe7237a2c0ba5793c0e7dd11cebb8413a9
author | iuc |
---|---|
date | Sat, 03 Dec 2022 13:29:32 +0000 |
parents | ac24fff14f23 |
children | 1e188c9610c3 |
comparison
equal
deleted
inserted
replaced
11:ac24fff14f23 | 12:d78faac2c6ef |
---|---|
1 <tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | 1 <tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> |
2 <description>download genome sequence, annotation and metadata</description> | 2 <description>download genome sequence, annotation and metadata</description> |
3 <expand macro="bio_tools"/> | |
3 <macros> | 4 <macros> |
4 <import>macros.xml</import> | 5 <import>macros.xml</import> |
5 </macros> | 6 </macros> |
6 <expand macro="requirements"></expand> | 7 <expand macro="requirements"></expand> |
8 <expand macro="version_command"/> | |
7 <command><![CDATA[ | 9 <command><![CDATA[ |
8 #import re | 10 #import re |
9 @SETUP_CERTIFICATES@ | 11 @SETUP_CERTIFICATES@ |
10 datasets download genome $query.subcommand.download_by | 12 datasets download genome $query.subcommand.download_by |
11 #if $query.subcommand.download_by == 'accession': | 13 #if $query.subcommand.download_by == 'accession': |
61 | 63 |
62 ## rename all faa, fna (resp faa.gz, fna.gz) to fasta (resp fasta.gz) to allow discovery | 64 ## rename all faa, fna (resp faa.gz, fna.gz) to fasta (resp fasta.gz) to allow discovery |
63 && find ncbi_dataset \( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \) -exec sh -c 'mv {} \$(echo {} | sed "s/.f[an]a\(.gz\)\?\$/.fasta\1/")' \; | 65 && find ncbi_dataset \( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \) -exec sh -c 'mv {} \$(echo {} | sed "s/.f[an]a\(.gz\)\?\$/.fasta\1/")' \; |
64 | 66 |
65 ## unzip all compressed (non-fasta) files (jsonl files are just named .gz) | 67 ## unzip all compressed (non-fasta) files (jsonl files are just named .gz) |
68 ## note "not decompress" means that the datasets are provided uncompressed (datasets rehydrate is called we --gzip) | |
69 ## in this case we need to decompress all datasets that don't have a Galaxy datatype allowing for compression | |
66 && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .gz)' \; | 70 && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .gz)' \; |
67 #if $file_choices.decompress | 71 #if not $file_choices.decompress |
68 && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \; | 72 && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \; |
69 #end if | 73 #end if |
70 | 74 |
71 #if "seq-report" in $file_choices.include | 75 #if "seq-report" in $file_choices.include |
72 && find ncbi_dataset -name sequence_report.jsonl -exec sh -c 'dataformat tsv genome-seq --inputfile {} > \$(dirname {})/\$(basename {} .jsonl).tsv' \; | 76 && find ncbi_dataset -name sequence_report.jsonl -exec sh -c 'dataformat tsv genome-seq --inputfile {} > \$(dirname {})/\$(basename {} .jsonl).tsv' \; |
164 of compressed files https://github.com/galaxyproject/galaxy/pull/15085 | 168 of compressed files https://github.com/galaxyproject/galaxy/pull/15085 |
165 | 169 |
166 So with decompress="true" more powerfull assertions are powerful. | 170 So with decompress="true" more powerfull assertions are powerful. |
167 A single test checks the default, ie decompress="false". | 171 A single test checks the default, ie decompress="false". |
168 --> | 172 --> |
169 <test expect_num_outputs="1"> | 173 <test expect_num_outputs="3"> |
170 <conditional name="query|subcommand"> | 174 <conditional name="query|subcommand"> |
171 <param name="download_by" value="taxon"/> | 175 <param name="download_by" value="taxon"/> |
172 <param name="taxon_positional" value="human"/> | 176 <param name="taxon_positional" value="human"/> |
173 </conditional> | 177 </conditional> |
174 <param name="chromosomes" value="21"/> | 178 <param name="chromosomes" value="21"/> |
175 <param name="released_before" value="01/01/2018"/> | 179 <param name="released_before" value="01/01/2018"/> |
176 <section name="file_choices"> | 180 <section name="file_choices"> |
177 <param name="include" value=""/> | 181 <!-- include a sequence (which should be downloaded as fasta.gz) |
182 and one non-sequence (which should be decompressed) output --> | |
183 <param name="include" value="rna,gff3"/> | |
178 </section> | 184 </section> |
179 <output name="genome_data_report"> | 185 <output name="genome_data_report"> |
180 <assert_contents> | 186 <assert_contents> |
181 <has_text text="Assembly Accession	Assembly Name	Assembly Submitter	Organism Name"/> | 187 <has_text text="Assembly Accession	Assembly Name	Assembly Submitter	Organism Name"/> |
182 <has_n_lines n="144"/> | 188 <has_n_lines n="144"/> |
183 <has_n_columns n="4"/> | 189 <has_n_columns n="4"/> |
184 </assert_contents> | 190 </assert_contents> |
185 </output> | 191 </output> |
192 <output_collection name="rna_fasta" type="list" count="1"> | |
193 <element name="GCF_000306695.2" decompress="true"> | |
194 <assert_contents> | |
195 <has_text text=">"/> | |
196 </assert_contents> | |
197 </element> | |
198 </output_collection> | |
199 <output_collection name="genomic_gff" type="list"> | |
200 <element name="GCF_000306695.2"> | |
201 <assert_contents> | |
202 <has_n_lines min="1000000"/> | |
203 <has_line line="##gff-version 3"/> | |
204 <!-- TODO this will only work when the galaxy python packakes for 22.05 have been released | |
205 <has_n_columns n="9" comment="#"/> --> | |
206 </assert_contents> | |
207 </element> | |
208 </output_collection> | |
209 <assert_command> | |
210 <has_text text="gunzip"/> | |
211 </assert_command> | |
186 </test> | 212 </test> |
187 <test expect_num_outputs="2"> | 213 <test expect_num_outputs="2"> |
188 <conditional name="query|subcommand"> | 214 <conditional name="query|subcommand"> |
189 <param name="download_by" value="taxon"/> | 215 <param name="download_by" value="taxon"/> |
190 <param name="taxon_positional" value="human"/> | 216 <param name="taxon_positional" value="human"/> |