Mercurial > repos > iuc > ncbi_datasets
changeset 12:d78faac2c6ef draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit affffdbe7237a2c0ba5793c0e7dd11cebb8413a9
author | iuc |
---|---|
date | Sat, 03 Dec 2022 13:29:32 +0000 |
parents | ac24fff14f23 |
children | 1e188c9610c3 |
files | datasets_gene.xml datasets_genome.xml macros.xml |
diffstat | 3 files changed, 46 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/datasets_gene.xml Fri Dec 02 10:52:48 2022 +0000 +++ b/datasets_gene.xml Sat Dec 03 13:29:32 2022 +0000 @@ -1,9 +1,11 @@ <tool id="datasets_download_gene" name="NCBI Datasets Gene" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>download gene sequences and metadata</description> + <expand macro="bio_tools"/> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"></expand> + <expand macro="version_command"/> <command><![CDATA[ #import re @SETUP_CERTIFICATES@ @@ -286,7 +288,7 @@ <assert_contents> <has_text text="baboon"/> <has_text text="glyceraldehyde-3-phosphate dehydrogenase"/> - <has_n_lines n="31"/> + <has_n_lines min="30"/> <has_n_columns n="8"/> </assert_contents> </output> @@ -350,7 +352,7 @@ <assert_contents> <has_text text="house mouse"/> <has_text text="XR_004936704.1"/> - <has_n_lines n="137"/> + <has_n_lines min="130"/> <has_n_columns n="38"/> </assert_contents> </output> @@ -384,7 +386,7 @@ <assert_contents> <has_text text="rat"/> <has_text text="Brca1"/> - <has_n_lines n="38"/> + <has_n_lines min="30"/> <has_n_columns n="8"/> </assert_contents> </output> @@ -430,7 +432,7 @@ <output name="gene_data_report"> <assert_contents> <has_text text="human"/> - <has_n_lines n="823"/> + <has_n_lines min="800"/> <has_n_columns n="8"/> </assert_contents> </output> @@ -488,7 +490,7 @@ <output name="gene_data_report"> <assert_contents> <has_text text="human"/> - <has_n_lines n="72533"/> + <has_n_lines min="72000"/> <has_n_columns n="8"/> </assert_contents> </output> @@ -513,7 +515,7 @@ <output name="gene_data_report"> <assert_contents> <has_text text="human"/> - <has_n_lines n="72533"/> + <has_n_lines min="72000"/> <has_n_columns n="8"/> </assert_contents> </output>
--- a/datasets_genome.xml Fri Dec 02 10:52:48 2022 +0000 +++ b/datasets_genome.xml Sat Dec 03 13:29:32 2022 +0000 @@ -1,9 +1,11 @@ <tool id="datasets_download_genome" name="NCBI Datasets Genomes" profile="@PROFILE@" license="@LICENSE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>download genome sequence, annotation and metadata</description> + <expand macro="bio_tools"/> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"></expand> + <expand macro="version_command"/> <command><![CDATA[ #import re @SETUP_CERTIFICATES@ @@ -63,8 +65,10 @@ && find ncbi_dataset \( -name "*.faa" -o -name "*.fna" -o -name "*.faa.gz" -o -name "*.fna.gz" \) -exec sh -c 'mv {} \$(echo {} | sed "s/.f[an]a\(.gz\)\?\$/.fasta\1/")' \; ## unzip all compressed (non-fasta) files (jsonl files are just named .gz) + ## note "not decompress" means that the datasets are provided uncompressed (datasets rehydrate is called we --gzip) + ## in this case we need to decompress all datasets that don't have a Galaxy datatype allowing for compression && find ncbi_dataset -name "*.jsonl.gz" -exec sh -c 'mv {} \$(dirname {})/\$(basename {} .gz)' \; - #if $file_choices.decompress + #if not $file_choices.decompress && find ncbi_dataset \( -name "*.gz" ! -name "*fasta.gz" \) -exec gunzip {} \; #end if @@ -166,7 +170,7 @@ So with decompress="true" more powerfull assertions are powerful. A single test checks the default, ie decompress="false". --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="taxon"/> <param name="taxon_positional" value="human"/> @@ -174,7 +178,9 @@ <param name="chromosomes" value="21"/> <param name="released_before" value="01/01/2018"/> <section name="file_choices"> - <param name="include" value=""/> + <!-- include a sequence (which should be downloaded as fasta.gz) + and one non-sequence (which should be decompressed) output --> + <param name="include" value="rna,gff3"/> </section> <output name="genome_data_report"> <assert_contents> @@ -183,6 +189,26 @@ <has_n_columns n="4"/> </assert_contents> </output> + <output_collection name="rna_fasta" type="list" count="1"> + <element name="GCF_000306695.2" decompress="true"> + <assert_contents> + <has_text text=">"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="genomic_gff" type="list"> + <element name="GCF_000306695.2"> + <assert_contents> + <has_n_lines min="1000000"/> + <has_line line="##gff-version 3"/> + <!-- TODO this will only work when the galaxy python packakes for 22.05 have been released + <has_n_columns n="9" comment="#"/> --> + </assert_contents> + </element> + </output_collection> + <assert_command> + <has_text text="gunzip"/> + </assert_command> </test> <test expect_num_outputs="2"> <conditional name="query|subcommand">
--- a/macros.xml Fri Dec 02 10:52:48 2022 +0000 +++ b/macros.xml Sat Dec 03 13:29:32 2022 +0000 @@ -1,6 +1,6 @@ <macros> <token name="@TOOL_VERSION@">14.4</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> <token name="@PROFILE@">21.01</token> <token name="@LICENSE@">MIT</token> <token name="@PROFILE_AND_LICENSE@">profile="@PROFILE@" license="@LICENSE@"</token> @@ -15,6 +15,14 @@ <requirement type="package" version="16.02">p7zip</requirement> </requirements> </xml> + <xml name="bio_tools"> + <xrefs> + <xref type="bio.tools">ncbi_datasets</xref> + </xrefs> + </xml> + <xml name="version_command"><![CDATA[ + datasets --version | cut -d" " -f 3 + ]]></xml> <xml name="annotation"> <param argument="--annotated" type="boolean" truevalue="--annotated" falsevalue="" label="Only include genomes with annotation ?"/> </xml>