Mercurial > repos > iuc > data_manager_build_kraken2_database
changeset 6:9002633b4737 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 5d74f13e59c4a2862c108ac1a08c067b0cfb2d97
author | iuc |
---|---|
date | Fri, 24 Jun 2022 12:44:33 +0000 |
parents | 2f27f3b86827 |
children | ed1518ce2237 |
files | data_manager/kraken2_build_database.py data_manager/kraken2_build_database.xml |
diffstat | 2 files changed, 220 insertions(+), 80 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py Mon Nov 08 15:40:34 2021 +0000 +++ b/data_manager/kraken2_build_database.py Fri Jun 24 12:44:33 2022 +0000 @@ -54,9 +54,17 @@ class StandardPrebuiltSizes(Enum): - full = 'full' - gb_16 = '16' - gb_8 = '8' + viral = "viral" + minusb = "minusb" + standard = "standard" + standard_08gb = "standard_08gb" + standard_16gb = "standard_16gb" + pluspf = "pluspf" + pluspf_08gb = "pluspf_08gb" + pluspf_16gb = "pluspf_16gb" + pluspfp = "pluspfp" + pluspfp_08gb = "pluspfp_08gb" + pluspfp_16gb = "pluspfp_16gb" def __str__(self): return self.value @@ -122,36 +130,47 @@ return data_table_entry -def kraken2_build_standard_prebuilt(standard_prebuilt_size, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_standard_prebuilt(prebuilt_db, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") + prebuild_name = { + 'viral': "Viral", + 'minusb': "MinusB (archaea, viral, plasmid, human, UniVec_Core)", + 'standard': "Standard-Full (archaea, bacteria, viral, plasmid, human,UniVec_Core)", + 'standard_08gb': "Standard-8 (Standard with DB capped at 8 GB)", + 'standard_16gb': "Standard-16 (Standard with DB capped at 16 GB)", + 'pluspf': "PlusPF (Standard plus protozoa and fungi)", + 'pluspf_08gb': "PlusPF-8 (PlusPF with DB capped at 8 GB)", + 'pluspf_16gb': "PlusPF-16 (PlusPF with DB capped at 16 GB)", + 'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)", + 'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)", + 'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)" + } + database_value = "_".join([ now, "standard_prebuilt", - standard_prebuilt_size + prebuilt_db, + prebuilt_date ]) database_name = " ".join([ - "Standard (Prebuilt)", - standard_prebuilt_size, - "(Downloaded:", + "Prebuilt Refseq indexes: ", + prebuild_name[prebuilt_db], + "(Version: ", + prebuilt_date, + "- Downloaded:", now + ")" ]) database_path = database_value - size_to_url_str = { - 'full': '', - '16': '_16gb', - '8': '_8gb', - } # we may need to let the user choose the date when new DBs are posted. date_url_str = prebuilt_date.replace('-', '') - standard_prebuilt_size_url = size_to_url_str[standard_prebuilt_size] # download the pre-built database try: - download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard%s_%s.tar.gz' % (standard_prebuilt_size_url, date_url_str) + download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_%s_%s.tar.gz' % (prebuilt_db, date_url_str) src = urlopen(download_url) except URLError as e: print('url: ' + download_url, file=sys.stderr) @@ -369,7 +388,7 @@ parser.add_argument('--threads', dest='threads', default=1, help='threads') parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') - parser.add_argument('--standard-prebuilt-size', dest='standard_prebuilt_size', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Size of standard prebuilt database to download (only applies to --database-type standard_prebuilt. Options are: "8", "16", "full".)') + parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.') parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') @@ -408,7 +427,7 @@ ) elif str(args.database_type) == 'standard_prebuilt': data_manager_output = kraken2_build_standard_prebuilt( - str(args.standard_prebuilt_size), + str(args.prebuilt_db), str(args.prebuilt_date), target_directory )
--- a/data_manager/kraken2_build_database.xml Mon Nov 08 15:40:34 2021 +0000 +++ b/data_manager/kraken2_build_database.xml Fri Jun 24 12:44:33 2022 +0000 @@ -1,7 +1,9 @@ <?xml version="1.0"?> -<tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="@TOOL_VERSION@" profile="18.09"> +<tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <macros> - <token name="@TOOL_VERSION@">2.1.1</token> + <token name="@TOOL_VERSION@">2.1.2</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">21.01</token> <xml name="common_params"> <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" /> <param name="minimizer_len" type="integer" value="31" label="Minimizer length" /> @@ -9,6 +11,39 @@ <param name="load_factor" type="float" value="0.7" min="0" max="1" label="Load factor" help="Proportion of the hash table to be populated" /> <param name="clean" type="boolean" truevalue="--clean" falsevalue="" checked="true" label="Clean up extra files" /> </xml> + <xml name="viral"> + <option value="viral">Viral (viral; ~50 GB)</option> + </xml> + <xml name="minusb"> + <option value="minusb">MinusB (archaea, viral, plasmid, human, UniVec_Core; ~8 GB)</option> + </xml> + <xml name="standard"> + <option value="standard">Standard-Full (archaea, bacteria, viral, plasmid, human, UniVec_Core; ~50 GB)</option> + </xml> + <xml name="standard_08gb"> + <option value="standard_08gb">Standard-8 (Standard with DB capped at 8 GB)</option> + </xml> + <xml name="standard_16gb"> + <option value="standard_16gb">Standard-16 (Standard with DB capped at 16 GB)</option> + </xml> + <xml name="pluspf"> + <option value="pluspf">PlusPF (Standard plus protozoa and fungi; ~50 GB)</option> + </xml> + <xml name="pluspf_08gb"> + <option value="pluspf_08gb">PlusPF-8 (PlusPF with DB capped at 8 GB; ~7.5 GB)</option> + </xml> + <xml name="pluspf_16gb"> + <option value="pluspf_16gb">PlusPF-16 (PlusPF with DB capped at 16 GB; ~15 GB)</option> + </xml> + <xml name="pluspfp"> + <option value="pluspfp">PlusPFP (Standard plus protozoa, fungi and plant; ~129 GB)</option> + </xml> + <xml name="pluspfp_08gb"> + <option value="pluspfp_08gb">PlusPFP-8 (PlusPFP with DB capped at 8 GB; ~7.5 GB)</option> + </xml> + <xml name="pluspfp_16gb"> + <option value="pluspfp_16gb">PlusPFP-16 (PlusPFP with DB capped at 16 GB; ~15 GB)</option> + </xml> </macros> <description>database builder</description> <requirements> @@ -16,53 +51,48 @@ <requirement type="package" version="3.7">python</requirement> </requirements> <version_command>kraken2 -version | head -n 1 | awk '{print $NF}'</version_command> - <command detect_errors="exit_code"> - <![CDATA[ - python '$__tool_directory__/kraken2_build_database.py' - '${out_file}' - --database-type ${database_type.database_type} - #if $database_type.database_type == "minikraken" - --minikraken2-version ${database_type.minikraken2_version} - #else if $database_type.database_type == "standard_prebuilt" - --standard-prebuilt-size ${database_type.standard_prebuilt_size} - #if $database_type.prebuilt_date_custom != "" - --prebuilt-date ${database_type.prebuilt_date_custom} - #else - --prebuilt-date ${database_type.prebuilt_date} - #end if - #else if $database_type.database_type == "standard_local_build" - --threads \${GALAXY_SLOTS:-1} - --kmer-len ${database_type.kmer_len} - --minimizer-len ${database_type.minimizer_len} - --minimizer-spaces ${database_type.minimizer_spaces} - --load-factor ${database_type.load_factor} - ${database_type.clean} - #else if $database_type.database_type == "special" - --threads \${GALAXY_SLOTS:-1} - --special-database-type ${database_type.special_database_type} - --kmer-len ${database_type.kmer_len} - --minimizer-len ${database_type.minimizer_len} - --minimizer-spaces ${database_type.minimizer_spaces} - --load-factor ${database_type.load_factor} - ${database_type.clean} - #else if $database_type.database_type == "custom" - --threads \${GALAXY_SLOTS:-1} - ${database_type.skip_maps} - --custom-fasta ${database_type.custom_fasta} - --custom-database-name ${database_type.custom_database_name} - --kmer-len ${database_type.kmer_len} - --minimizer-len ${database_type.minimizer_len} - --minimizer-spaces ${database_type.minimizer_spaces} - --load-factor ${database_type.load_factor} - ${database_type.clean} - #end if - ]]> + <command detect_errors="exit_code"><![CDATA[ +python '$__tool_directory__/kraken2_build_database.py' + '$out_file' + --database-type '$database_type.database_type' +#if $database_type.database_type == "standard_local_build" + --threads \${GALAXY_SLOTS:-1} + --kmer-len $database_type.kmer_len + --minimizer-len $database_type.minimizer_len + --minimizer-spaces $database_type.minimizer_spaces + --load-factor $database_type.load_factor + $database_type.clean +#else if $database_type.database_type == "standard_prebuilt" + --prebuilt-db '$database_type.prebuild.prebuilt_db' + --prebuilt-date '$database_type.prebuild.prebuilt_date' +#else if $database_type.database_type == "minikraken" + --minikraken2-version '$database_type.minikraken2_version' +#else if $database_type.database_type == "special" + --threads \${GALAXY_SLOTS:-1} + --special-database-type '$database_type.special_database_type' + --kmer-len $database_type.kmer_len + --minimizer-len $database_type.minimizer_len + --minimizer-spaces $database_type.minimizer_spaces + --load-factor $database_type.load_factor + $database_type.clean +#else if $database_type.database_type == "custom" + --threads \${GALAXY_SLOTS:-1} + --custom-fasta '$database_type.custom_fasta' + --custom-database-name '$database_type.custom_database_name' + $database_type.skip_maps + --kmer-len $database_type.kmer_len + --minimizer-len $database_type.minimizer_len + --minimizer-spaces $database_type.minimizer_spaces + --load-factor $database_type.load_factor + $database_type.clean +#end if +]]> </command> <inputs> <conditional name="database_type"> <param name="database_type" type="select" multiple="false" label="Database Type"> <option value="standard_local_build">Standard, Local Build</option> - <option value="standard_prebuilt">Standard, Pre-Built</option> + <option value="standard_prebuilt">Pre-Built Refseq indexes</option> <option value="minikraken">MiniKraken</option> <option value="special">Special</option> <option value="custom">Custom</option> @@ -71,16 +101,75 @@ <expand macro="common_params" /> </when> <when value="standard_prebuilt"> - <param name="standard_prebuilt_size" type="select" multiple="false" label="Select size of prebuilt database to download"> - <option value="full">Standard-Full (~50 GB)</option> - <option value="16">Standard-16 (~16 GB)</option> - <option value="8">Standard-8 (~8 GB)</option> - </param> - <param name="prebuilt_date" type="select" multiple="false" optional="true" label="Select database build date"> - <option value="2021-05-17">May 17, 2021</option> - <option value="2020-12-02">December 2, 2020</option> - </param> - <param name="prebuilt_date_custom" type="text" label="Custom date (YYYY-MM-DD)" help="Any text here will overwrite the selected date above." /> + <conditional name="prebuild"> + <param name="prebuilt_date" type="select" label="Select index build date"> + <option value="2022-06-07">June 7, 2022</option> + <option value="2021-05-17">May 17, 2021</option> + <option value="2021-01-27">January 27, 2021</option> + <option value="2020-12-02">December 2, 2020</option> + <option value="2020-09-19">September 19, 2020</option> + </param> + <when value="2022-06-07"> + <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download"> + <expand macro="viral"/> + <expand macro="minusb"/> + <expand macro="standard"/> + <expand macro="standard_08gb"/> + <expand macro="standard_16gb"/> + <expand macro="pluspf"/> + <expand macro="pluspf_08gb"/> + <expand macro="pluspf_16gb"/> + <expand macro="pluspfp"/> + <expand macro="pluspfp_08gb"/> + <expand macro="pluspfp_16gb"/> + </param> + </when> + <when value="2021-05-17"> + <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download"> + <expand macro="viral"/> + <expand macro="minusb"/> + <expand macro="standard"/> + <expand macro="standard_08gb"/> + <expand macro="standard_16gb"/> + <expand macro="pluspf"/> + <expand macro="pluspf_08gb"/> + <expand macro="pluspf_16gb"/> + <expand macro="pluspfp_08gb"/> + <expand macro="pluspfp_16gb"/> + </param> + </when> + <when value="2021-01-27"> + <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download"> + <expand macro="pluspf"/> + <expand macro="pluspf_08gb"/> + <expand macro="pluspf_16gb"/> + <expand macro="pluspfp_08gb"/> + <expand macro="pluspfp_16gb"/> + </param> + </when> + <when value="2020-12-02"> + <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download"> + <expand macro="viral"/> + <expand macro="minusb"/> + <expand macro="standard"/> + <expand macro="standard_08gb"/> + <expand macro="standard_16gb"/> + </param> + </when> + <when value="2020-09-19"> + <param name="prebuilt_db" type="select" multiple="false" label="Select a prebuilt Refseq indexes to download"> + <expand macro="minusb"/> + <expand macro="standard"/> + <expand macro="standard_08gb"/> + <expand macro="standard_16gb"/> + <expand macro="pluspf"/> + <expand macro="pluspf_08gb"/> + <expand macro="pluspf_16gb"/> + <expand macro="pluspfp_08gb"/> + <expand macro="pluspfp_16gb"/> + </param> + </when> + </conditional> </when> <when value="minikraken"> <param name="minikraken2_version" type="select" multiple="false" label="Select MiniKraken2 database version to download"> @@ -97,9 +186,9 @@ <expand macro="common_params" /> </when> <when value="custom"> - <param name="custom_fasta" type="data" format="fasta" multiple="False" optional="true" label="Select history item" /> + <param name="custom_fasta" type="data" format="fasta" multiple="False" label="Select history item" /> <param name="custom_database_name" type="text" label="Name for this database" /> - <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." /> + <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." /> <expand macro="common_params" /> </when> </conditional> @@ -108,16 +197,48 @@ <data name="out_file" format="data_manager_json" /> </outputs> <tests> + <test expect_num_outputs="1"> + <conditional name="database_type"> + <param name="database_type" value="custom" /> + <param name="custom_fasta" value="adapter.fa" /> + <param name="custom_database_name" value="custom_database" /> + <param name="skip_maps" value="true" /> + <param name="kmer_len" value="35" /> + <param name="minimizer_spaces" value="6"/> + <param name="load_factor" value="0.7" /> + <param name="clean" truevalue="--clean"/> + </conditional> + <output name="out_file"> + <assert_contents> + <has_text text="kraken2_databases"/> + <has_text text="path"/> + <has_text text="custom_database"/> + </assert_contents> + </output> + </test> <test> - <param name="database_type" value="custom" /> - <param name="custom_fasta" value="adapter.fa" /> - <param name="custom_database_name" value="database" /> - <param name="skip_maps" value="true" /> - <output name="out_file" value="kraken2_custom_data_manager.json" /> + <conditional name="database_type"> + <param name="database_type" value="standard_prebuilt" /> + <conditional name="prebuild"> + <param name="prebuilt_date" value="2022-06-07"/> + <param name="prebuilt_db" value="standard_08gb"/> + </conditional> + </conditional> + <output name="out_file"> + <assert_contents> + <has_text text="kraken2_databases"/> + <has_text text="path"/> + <has_text text="Standard-8"/> + <has_text text="_standard_prebuilt_standard_08gb_2022-06-07"/> + <has_text text="Version"/> + <has_text text="Downloaded"/> + </assert_contents> + </output> </test> </tests> - <help> - </help> + <help><![CDATA[ +Build Kraken2 databases or download `prebuilt Kraken2 RefSeq indexes <https://benlangmead.github.io/aws-indexes/k2>`__ + ]]></help> <citations> <citation type="doi">10.1186/gb-2014-15-3-r46</citation> </citations>