Mercurial > repos > dfornika > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_minikraken.py @ 12:039a65ff445d draft
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/data_managers/data_manager_build_kraken2_database/ commit 70494d589aa1990618ebc7a895e91477d41c3203-dirty
author | dfornika |
---|---|
date | Tue, 05 Mar 2019 15:11:05 -0500 |
parents | 1dc93ae264e6 |
children | 1f55643715e1 |
comparison
equal
deleted
inserted
replaced
11:1dc93ae264e6 | 12:039a65ff445d |
---|---|
19 return_code = proc.wait() | 19 return_code = proc.wait() |
20 if return_code: | 20 if return_code: |
21 print("Error building database.", file=sys.stderr) | 21 print("Error building database.", file=sys.stderr) |
22 sys.exit( return_code ) | 22 sys.exit( return_code ) |
23 | 23 |
24 def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): | 24 def kraken2_build_minikraken(data_manager_dict, minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): |
25 | 25 |
26 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | 26 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") |
27 | |
28 special_database_names = { | |
29 "rdp": "RDP", | |
30 "greengenes": "Greengenes", | |
31 "silva": "Silva", | |
32 } | |
33 | 27 |
34 database_value = "_".join([ | 28 database_value = "_".join([ |
35 now, | 29 now, |
36 kraken2_args["special"], | 30 "minikraken2", |
37 "kmer-len", str(kraken2_args["kmer_len"]), | 31 minikraken2_version, |
38 "minimizer-len", str(kraken2_args["minimizer_len"]), | 32 "8GB", |
39 "minimizer-spaces", str(kraken2_args["minimizer_spaces"]), | |
40 ]) | 33 ]) |
41 | 34 |
42 database_name = " ".join([ | 35 database_name = " ".join([ |
43 special_database_names[kraken2_args["special"]], | 36 "Minikraken2", |
37 minikraken2_version, | |
44 "(Created:", | 38 "(Created:", |
45 now + "," | 39 now + ")" |
46 "kmer-len=" + str(kraken2_args["kmer_len"]) + ",", | |
47 "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",", | |
48 "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ")", | |
49 ]) | 40 ]) |
50 | 41 |
51 database_path = database_value | 42 database_path = database_value |
52 | 43 |
53 args = [ | 44 args = [ |
54 '--threads', str(kraken2_args["threads"]), | 45 'https://ccb.jhu.edu/software/kraken2/dl/minikraken2_' + minikraken2_version + '_8GB.tgz' |
55 '--special', kraken2_args["special"], | |
56 '--kmer-len', str(kraken2_args["kmer_len"]), | |
57 '--minimizer-len', str(kraken2_args["minimizer_len"]), | |
58 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), | |
59 '--db', database_path | |
60 ] | 46 ] |
61 | 47 |
62 run(['kraken2-build'] + args, target_directory) | 48 run(['wget'] + args, target_directory) |
63 | 49 |
64 args = [ | 50 args = [ |
65 '--threads', str(kraken2_args["threads"]), | 51 '-xvzf', 'minikraken2_' + minikraken2_version + '_8GB.tgz' |
66 '--clean', | |
67 '--db', database_path | |
68 ] | 52 ] |
69 | 53 |
70 run(['kraken2-build'] + args, target_directory) | 54 run(['tar'] + args, target_directory) |
71 | 55 |
72 data_table_entry = { | 56 data_table_entry = { |
73 "value": database_value, | 57 "value": database_value, |
74 "name": database_name, | 58 "name": database_name, |
75 "path": database_path, | 59 "path": database_path, |
86 | 70 |
87 | 71 |
88 def main(): | 72 def main(): |
89 parser = argparse.ArgumentParser() | 73 parser = argparse.ArgumentParser() |
90 parser.add_argument('data_manager_json') | 74 parser.add_argument('data_manager_json') |
91 parser.add_argument( '-b', '--db-type', dest='db_type', help='database type (one of: silva, rdp, greengenes)' ) | 75 parser.add_argument( '-v', '--minikraken2-version', dest='minikraken2_version', default="v2", help='MiniKraken2 version (v1 or v2)' ) |
92 parser.add_argument( '-k', '--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length' ) | |
93 parser.add_argument( '-m', '--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length' ) | |
94 parser.add_argument( '-s', '--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces' ) | |
95 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' ) | 76 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' ) |
77 | |
96 args = parser.parse_args() | 78 args = parser.parse_args() |
97 | |
98 kraken2_args = { | |
99 "special": args.db_type, | |
100 "kmer_len": args.kmer_len, | |
101 "minimizer_len": args.minimizer_len, | |
102 "minimizer_spaces": args.minimizer_spaces, | |
103 "threads": args.threads, | |
104 } | |
105 | 79 |
106 data_manager_input = json.loads(open(args.data_manager_json).read()) | 80 data_manager_input = json.loads(open(args.data_manager_json).read()) |
107 | 81 |
108 target_directory = data_manager_input['output_data'][0]['extra_files_path'] | 82 target_directory = data_manager_input['output_data'][0]['extra_files_path'] |
109 | 83 |
115 else: | 89 else: |
116 raise | 90 raise |
117 | 91 |
118 data_manager_output = {} | 92 data_manager_output = {} |
119 | 93 |
120 kraken2_build_standard( | 94 kraken2_build_minikraken( |
121 data_manager_output, | 95 data_manager_output, |
122 kraken2_args, | 96 args.minikraken2_version, |
123 target_directory, | 97 target_directory, |
124 ) | 98 ) |
125 | 99 |
126 open(args.data_manager_json, 'wb').write(json.dumps(data_manager_output)) | 100 open(args.data_manager_json, 'wb').write(json.dumps(data_manager_output)) |
127 | 101 |