Mercurial > repos > iuc > data_manager_build_kraken2_database
changeset 5:2f27f3b86827 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 648fe4911ce49173697f314d70e63e0de95b7e66"
author | iuc |
---|---|
date | Mon, 08 Nov 2021 15:40:34 +0000 |
parents | 0eebe086fd58 |
children | 9002633b4737 |
files | data_manager/kraken2_build_database.py data_manager/kraken2_build_database.xml data_manager_conf.xml |
diffstat | 3 files changed, 115 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py Mon Nov 23 20:49:52 2020 +0000 +++ b/data_manager/kraken2_build_database.py Mon Nov 08 15:40:34 2021 +0000 @@ -16,15 +16,18 @@ try: # Python3 from urllib.request import urlopen + from urllib.error import URLError except ImportError: from urllib2 import urlopen + from urllib2 import URLError DATA_TABLE_NAME = "kraken2_databases" class KrakenDatabaseTypes(Enum): - standard = 'standard' + standard_local_build = 'standard_local_build' + standard_prebuilt = 'standard_prebuilt' minikraken = 'minikraken' special = 'special' custom = 'custom' @@ -50,6 +53,15 @@ return self.value +class StandardPrebuiltSizes(Enum): + full = 'full' + gb_16 = '16' + gb_8 = '8' + + def __str__(self): + return self.value + + def kraken2_build_standard(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") @@ -63,7 +75,7 @@ ]) database_name = " ".join([ - "Standard", + "Standard (Local Build)", "(Created:", now + ",", "kmer-len=" + str(kraken2_args["kmer_len"]) + ",", @@ -110,6 +122,66 @@ return data_table_entry +def kraken2_build_standard_prebuilt(standard_prebuilt_size, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME): + + now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") + + database_value = "_".join([ + now, + "standard_prebuilt", + standard_prebuilt_size + ]) + + database_name = " ".join([ + "Standard (Prebuilt)", + standard_prebuilt_size, + "(Downloaded:", + now + ")" + ]) + + database_path = database_value + + size_to_url_str = { + 'full': '', + '16': '_16gb', + '8': '_8gb', + } + # we may need to let the user choose the date when new DBs are posted. + date_url_str = prebuilt_date.replace('-', '') + standard_prebuilt_size_url = size_to_url_str[standard_prebuilt_size] + # download the pre-built database + try: + download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard%s_%s.tar.gz' % (standard_prebuilt_size_url, date_url_str) + src = urlopen(download_url) + except URLError as e: + print('url: ' + download_url, file=sys.stderr) + print(e, file=sys.stderr) + exit(1) + + with open('tmp_data.tar.gz', 'wb') as dst: + shutil.copyfileobj(src, dst) + # unpack the downloaded archive to the target directory + with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: + for member in fh.getmembers(): + if member.isreg(): + member.name = os.path.basename(member.name) + fh.extract(member, os.path.join(target_directory, database_path)) + + data_table_entry = { + 'data_tables': { + data_table_name: [ + { + "value": database_value, + "name": database_name, + "path": database_path, + } + ] + } + } + + return data_table_entry + + def kraken2_build_minikraken(minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") @@ -131,10 +203,14 @@ database_path = database_value # download the minikraken2 data - src = urlopen( - 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz' - % minikraken2_version - ) + try: + download_url = 'https://genome-idx.s3.amazonaws.com/kraken/minikraken2_%s_8GB_201904.tgz' % minikraken2_version + src = urlopen(download_url) + except URLError as e: + print('url: ' + download_url, file=sys.stderr) + print(e, file=sys.stderr) + exit(1) + with open('tmp_data.tar.gz', 'wb') as dst: shutil.copyfileobj(src, dst) # unpack the downloaded archive to the target directory @@ -293,6 +369,8 @@ parser.add_argument('--threads', dest='threads', default=1, help='threads') parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') + parser.add_argument('--standard-prebuilt-size', dest='standard_prebuilt_size', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Size of standard prebuilt database to download (only applies to --database-type standard_prebuilt. Options are: "8", "16", "full".)') + parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') @@ -315,7 +393,7 @@ data_manager_output = {} - if str(args.database_type) == 'standard': + if str(args.database_type) == 'standard_local_build': kraken2_args = { "kmer_len": args.kmer_len, "minimizer_len": args.minimizer_len, @@ -328,6 +406,12 @@ kraken2_args, target_directory, ) + elif str(args.database_type) == 'standard_prebuilt': + data_manager_output = kraken2_build_standard_prebuilt( + str(args.standard_prebuilt_size), + str(args.prebuilt_date), + target_directory + ) elif str(args.database_type) == 'minikraken': data_manager_output = kraken2_build_minikraken( str(args.minikraken2_version),
--- a/data_manager/kraken2_build_database.xml Mon Nov 23 20:49:52 2020 +0000 +++ b/data_manager/kraken2_build_database.xml Mon Nov 08 15:40:34 2021 +0000 @@ -23,7 +23,14 @@ --database-type ${database_type.database_type} #if $database_type.database_type == "minikraken" --minikraken2-version ${database_type.minikraken2_version} - #else if $database_type.database_type == "standard" + #else if $database_type.database_type == "standard_prebuilt" + --standard-prebuilt-size ${database_type.standard_prebuilt_size} + #if $database_type.prebuilt_date_custom != "" + --prebuilt-date ${database_type.prebuilt_date_custom} + #else + --prebuilt-date ${database_type.prebuilt_date} + #end if + #else if $database_type.database_type == "standard_local_build" --threads \${GALAXY_SLOTS:-1} --kmer-len ${database_type.kmer_len} --minimizer-len ${database_type.minimizer_len} @@ -54,14 +61,27 @@ <inputs> <conditional name="database_type"> <param name="database_type" type="select" multiple="false" label="Database Type"> - <option value="standard">Standard</option> + <option value="standard_local_build">Standard, Local Build</option> + <option value="standard_prebuilt">Standard, Pre-Built</option> <option value="minikraken">MiniKraken</option> <option value="special">Special</option> <option value="custom">Custom</option> </param> - <when value="standard"> + <when value="standard_local_build"> <expand macro="common_params" /> </when> + <when value="standard_prebuilt"> + <param name="standard_prebuilt_size" type="select" multiple="false" label="Select size of prebuilt database to download"> + <option value="full">Standard-Full (~50 GB)</option> + <option value="16">Standard-16 (~16 GB)</option> + <option value="8">Standard-8 (~8 GB)</option> + </param> + <param name="prebuilt_date" type="select" multiple="false" optional="true" label="Select database build date"> + <option value="2021-05-17">May 17, 2021</option> + <option value="2020-12-02">December 2, 2020</option> + </param> + <param name="prebuilt_date_custom" type="text" label="Custom date (YYYY-MM-DD)" help="Any text here will overwrite the selected date above." /> + </when> <when value="minikraken"> <param name="minikraken2_version" type="select" multiple="false" label="Select MiniKraken2 database version to download"> <option value="v2">Version 2</option>
--- a/data_manager_conf.xml Mon Nov 23 20:49:52 2020 +0000 +++ b/data_manager_conf.xml Mon Nov 08 15:40:34 2021 +0000 @@ -1,5 +1,5 @@ <data_managers> - <data_manager tool_file="data_manager/kraken2_build_database.xml" id="kraken2_build_database" version="2.0.8_beta+galaxy1"> + <data_manager tool_file="data_manager/kraken2_build_database.xml" id="kraken2_build_database" version="2.1.1+galaxy0"> <data_table name="kraken2_databases"> <output> <column name="value"/>