comparison data_manager/kraken2_build_minikraken.py @ 12:039a65ff445d draft

planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/data_managers/data_manager_build_kraken2_database/ commit 70494d589aa1990618ebc7a895e91477d41c3203-dirty
author dfornika
date Tue, 05 Mar 2019 15:11:05 -0500
parents 1dc93ae264e6
children 1f55643715e1
comparison
equal deleted inserted replaced
11:1dc93ae264e6 12:039a65ff445d
19 return_code = proc.wait() 19 return_code = proc.wait()
20 if return_code: 20 if return_code:
21 print("Error building database.", file=sys.stderr) 21 print("Error building database.", file=sys.stderr)
22 sys.exit( return_code ) 22 sys.exit( return_code )
23 23
24 def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): 24 def kraken2_build_minikraken(data_manager_dict, minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME):
25 25
26 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") 26 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
27
28 special_database_names = {
29 "rdp": "RDP",
30 "greengenes": "Greengenes",
31 "silva": "Silva",
32 }
33 27
34 database_value = "_".join([ 28 database_value = "_".join([
35 now, 29 now,
36 kraken2_args["special"], 30 "minikraken2",
37 "kmer-len", str(kraken2_args["kmer_len"]), 31 minikraken2_version,
38 "minimizer-len", str(kraken2_args["minimizer_len"]), 32 "8GB",
39 "minimizer-spaces", str(kraken2_args["minimizer_spaces"]),
40 ]) 33 ])
41 34
42 database_name = " ".join([ 35 database_name = " ".join([
43 special_database_names[kraken2_args["special"]], 36 "Minikraken2",
37 minikraken2_version,
44 "(Created:", 38 "(Created:",
45 now + "," 39 now + ")"
46 "kmer-len=" + str(kraken2_args["kmer_len"]) + ",",
47 "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",",
48 "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ")",
49 ]) 40 ])
50 41
51 database_path = database_value 42 database_path = database_value
52 43
53 args = [ 44 args = [
54 '--threads', str(kraken2_args["threads"]), 45 'https://ccb.jhu.edu/software/kraken2/dl/minikraken2_' + minikraken2_version + '_8GB.tgz'
55 '--special', kraken2_args["special"],
56 '--kmer-len', str(kraken2_args["kmer_len"]),
57 '--minimizer-len', str(kraken2_args["minimizer_len"]),
58 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]),
59 '--db', database_path
60 ] 46 ]
61 47
62 run(['kraken2-build'] + args, target_directory) 48 run(['wget'] + args, target_directory)
63 49
64 args = [ 50 args = [
65 '--threads', str(kraken2_args["threads"]), 51 '-xvzf', 'minikraken2_' + minikraken2_version + '_8GB.tgz'
66 '--clean',
67 '--db', database_path
68 ] 52 ]
69 53
70 run(['kraken2-build'] + args, target_directory) 54 run(['tar'] + args, target_directory)
71 55
72 data_table_entry = { 56 data_table_entry = {
73 "value": database_value, 57 "value": database_value,
74 "name": database_name, 58 "name": database_name,
75 "path": database_path, 59 "path": database_path,
86 70
87 71
88 def main(): 72 def main():
89 parser = argparse.ArgumentParser() 73 parser = argparse.ArgumentParser()
90 parser.add_argument('data_manager_json') 74 parser.add_argument('data_manager_json')
91 parser.add_argument( '-b', '--db-type', dest='db_type', help='database type (one of: silva, rdp, greengenes)' ) 75 parser.add_argument( '-v', '--minikraken2-version', dest='minikraken2_version', default="v2", help='MiniKraken2 version (v1 or v2)' )
92 parser.add_argument( '-k', '--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length' )
93 parser.add_argument( '-m', '--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length' )
94 parser.add_argument( '-s', '--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces' )
95 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' ) 76 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' )
77
96 args = parser.parse_args() 78 args = parser.parse_args()
97
98 kraken2_args = {
99 "special": args.db_type,
100 "kmer_len": args.kmer_len,
101 "minimizer_len": args.minimizer_len,
102 "minimizer_spaces": args.minimizer_spaces,
103 "threads": args.threads,
104 }
105 79
106 data_manager_input = json.loads(open(args.data_manager_json).read()) 80 data_manager_input = json.loads(open(args.data_manager_json).read())
107 81
108 target_directory = data_manager_input['output_data'][0]['extra_files_path'] 82 target_directory = data_manager_input['output_data'][0]['extra_files_path']
109 83
115 else: 89 else:
116 raise 90 raise
117 91
118 data_manager_output = {} 92 data_manager_output = {}
119 93
120 kraken2_build_standard( 94 kraken2_build_minikraken(
121 data_manager_output, 95 data_manager_output,
122 kraken2_args, 96 args.minikraken2_version,
123 target_directory, 97 target_directory,
124 ) 98 )
125 99
126 open(args.data_manager_json, 'wb').write(json.dumps(data_manager_output)) 100 open(args.data_manager_json, 'wb').write(json.dumps(data_manager_output))
127 101