# HG changeset patch # User dfornika # Date 1558719548 14400 # Node ID f005b6efd096e8e44253149874fa53aafd0d2a15 # Parent 4c9f9d6098ebcc08a952f80d60873e6fb7e01cfd planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit a6877055190331683367394d1d1ca6ff47cf4fa7-dirty diff -r 4c9f9d6098eb -r f005b6efd096 data_manager/kraken2_build_database.py --- a/data_manager/kraken2_build_database.py Mon May 06 19:42:14 2019 -0400 +++ b/data_manager/kraken2_build_database.py Fri May 24 13:39:08 2019 -0400 @@ -50,7 +50,7 @@ return self.value -def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_standard(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") database_value = "_".join([ @@ -92,15 +92,21 @@ subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) data_table_entry = { - "value": database_value, - "name": database_name, - "path": database_path, + 'data_tables': { + data_table_name: [ + { + "value": database_value, + "name": database_name, + "path": database_path, + } + ] + } } - _add_data_table_entry(data_manager_dict, data_table_entry) + return data_table_entry -def kraken2_build_minikraken(data_manager_dict, minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_minikraken(minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") @@ -118,6 +124,8 @@ now + ")" ]) + database_path = database_value + # download the minikraken2 data src = urlopen( 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz' @@ -127,18 +135,27 @@ shutil.copyfileobj(src, dst) # unpack the downloaded archive to the target directory with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: - fh.extractall(target_directory) + for member in fh.getmembers(): + if member.isreg(): + member.name = os.path.basename(member.name) + fh.extract(member, os.path.join(target_directory, database_path)) data_table_entry = { - "value": database_value, - "name": database_name, - "path": database_value, + 'data_tables': { + data_table_name: [ + { + "value": database_value, + "name": database_name, + "path": database_path, + } + ] + } } - _add_data_table_entry(data_manager_dict, data_table_entry) + return data_table_entry -def kraken2_build_special(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_special(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") @@ -187,22 +204,31 @@ subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) data_table_entry = { - "value": database_value, - "name": database_name, - "path": database_path, + 'data_tables': { + data_table_name: [ + { + "value": database_value, + "name": database_name, + "path": database_path, + } + ] + } } - _add_data_table_entry(data_manager_dict, data_table_entry) + return data_table_entry -def kraken2_build_custom(data_manager_dict, kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): +def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): args = [ '--threads', str(kraken2_args["threads"]), '--download-taxonomy', - '--db', custom_database_name + '--db', custom_database_name, ] + if kraken2_args['skip_maps']: + args.append('--skip-maps') + subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) args = [ @@ -230,22 +256,21 @@ '--db', custom_database_name ] - subprocess.check_call(['kraken2-build'] + args, target_directory) + subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) data_table_entry = { - "value": custom_database_name, - "name": custom_database_name, - "path": custom_database_name + 'data_tables': { + data_table_name: [ + { + "value": custom_database_name, + "name": custom_database_name, + "path": custom_database_name + } + ] + } } - _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) - - -def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME): - data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) - data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) - data_manager_dict['data_tables'][data_table_name].append( data_table_entry ) - return data_manager_dict + return data_table_entry def main(): @@ -259,7 +284,8 @@ parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') - parser.add_argument( '--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)' ) + parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') + parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') args = parser.parse_args() data_manager_input = json.loads(open(args.data_manager_json).read()) @@ -276,7 +302,6 @@ data_manager_output = {} - print(args.database_type) if str(args.database_type) == 'standard': kraken2_args = { "kmer_len": args.kmer_len, @@ -284,14 +309,12 @@ "minimizer_spaces": args.minimizer_spaces, "threads": args.threads, } - kraken2_build_standard( - data_manager_output, + data_manager_output = kraken2_build_standard( kraken2_args, target_directory, ) elif str(args.database_type) == 'minikraken': - kraken2_build_minikraken( - data_manager_output, + data_manager_output = kraken2_build_minikraken( str(args.minikraken2_version), target_directory ) @@ -303,21 +326,20 @@ "minimizer_spaces": args.minimizer_spaces, "threads": args.threads, } - kraken2_build_special( - data_manager_output, + data_manager_output = kraken2_build_special( kraken2_args, target_directory, ) elif str(args.database_type) == 'custom': kraken2_args = { "custom_fasta": args.custom_fasta, + "skip_maps": args.skip_maps, "kmer_len": args.kmer_len, "minimizer_len": args.minimizer_len, "minimizer_spaces": args.minimizer_spaces, "threads": args.threads, } - kraken2_build_custom( - data_manager_output, + data_manager_output = kraken2_build_custom( kraken2_args, args.custom_database_name, target_directory, diff -r 4c9f9d6098eb -r f005b6efd096 data_manager/kraken2_build_database.xml --- a/data_manager/kraken2_build_database.xml Mon May 06 19:42:14 2019 -0400 +++ b/data_manager/kraken2_build_database.xml Fri May 24 13:39:08 2019 -0400 @@ -1,5 +1,5 @@ - + @@ -32,6 +32,7 @@ --minimizer-spaces ${database_type.minimizer_spaces} #else if $database_type.database_type == "custom" --threads \${GALAXY_SLOTS:-1} + ${database_type.skip_maps} --custom-fasta ${database_type.custom_fasta} --custom-database-name ${database_type.custom_database_name} --kmer-len ${database_type.kmer_len} @@ -68,6 +69,7 @@ + @@ -76,6 +78,15 @@ + + + + + + + + + diff -r 4c9f9d6098eb -r f005b6efd096 data_manager_conf.xml --- a/data_manager_conf.xml Mon May 06 19:42:14 2019 -0400 +++ b/data_manager_conf.xml Fri May 24 13:39:08 2019 -0400 @@ -1,18 +1,18 @@ - - - - - - - - ${path} - kraken2_databases/${path} - + + + + + + + + ${path} + kraken2_databases/${path} + ${GALAXY_DATA_MANAGER_DATA_PATH}/kraken2_databases/${path} - abspath - - - + abspath + + + diff -r 4c9f9d6098eb -r f005b6efd096 test-data/adapter.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/adapter.fa Fri May 24 13:39:08 2019 -0400 @@ -0,0 +1,2 @@ +>sequence16|kraken:taxid|32630 Adapter sequence +CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA diff -r 4c9f9d6098eb -r f005b6efd096 test-data/adapter.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/adapter.fastq Fri May 24 13:39:08 2019 -0400 @@ -0,0 +1,4 @@ +@sequence16 +CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII diff -r 4c9f9d6098eb -r f005b6efd096 test-data/kraken2_custom_data_manager.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kraken2_custom_data_manager.json Fri May 24 13:39:08 2019 -0400 @@ -0,0 +1,1 @@ +{"data_tables": {"kraken2_databases": [{"path": "database", "name": "database", "value": "database"}]}} \ No newline at end of file