comparison data_manager/kraken2_build_database.py @ 18:f005b6efd096 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit a6877055190331683367394d1d1ca6ff47cf4fa7-dirty
author dfornika
date Fri, 24 May 2019 13:39:08 -0400
parents 4c9f9d6098eb
children ffeb852407d6
comparison
equal deleted inserted replaced
17:4c9f9d6098eb 18:f005b6efd096
48 48
49 def __str__(self): 49 def __str__(self):
50 return self.value 50 return self.value
51 51
52 52
53 def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): 53 def kraken2_build_standard(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
54 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") 54 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
55 55
56 database_value = "_".join([ 56 database_value = "_".join([
57 now, 57 now,
58 "standard", 58 "standard",
90 ] 90 ]
91 91
92 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) 92 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
93 93
94 data_table_entry = { 94 data_table_entry = {
95 "value": database_value, 95 'data_tables': {
96 "name": database_name, 96 data_table_name: [
97 "path": database_path, 97 {
98 } 98 "value": database_value,
99 99 "name": database_name,
100 _add_data_table_entry(data_manager_dict, data_table_entry) 100 "path": database_path,
101 101 }
102 102 ]
103 def kraken2_build_minikraken(data_manager_dict, minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): 103 }
104 }
105
106 return data_table_entry
107
108
109 def kraken2_build_minikraken(minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME):
104 110
105 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") 111 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
106 112
107 database_value = "_".join([ 113 database_value = "_".join([
108 now, 114 now,
115 "Minikraken2", 121 "Minikraken2",
116 minikraken2_version, 122 minikraken2_version,
117 "(Created:", 123 "(Created:",
118 now + ")" 124 now + ")"
119 ]) 125 ])
126
127 database_path = database_value
120 128
121 # download the minikraken2 data 129 # download the minikraken2 data
122 src = urlopen( 130 src = urlopen(
123 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz' 131 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz'
124 % minikraken2_version 132 % minikraken2_version
125 ) 133 )
126 with open('tmp_data.tar.gz', 'wb') as dst: 134 with open('tmp_data.tar.gz', 'wb') as dst:
127 shutil.copyfileobj(src, dst) 135 shutil.copyfileobj(src, dst)
128 # unpack the downloaded archive to the target directory 136 # unpack the downloaded archive to the target directory
129 with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: 137 with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh:
130 fh.extractall(target_directory) 138 for member in fh.getmembers():
139 if member.isreg():
140 member.name = os.path.basename(member.name)
141 fh.extract(member, os.path.join(target_directory, database_path))
131 142
132 data_table_entry = { 143 data_table_entry = {
133 "value": database_value, 144 'data_tables': {
134 "name": database_name, 145 data_table_name: [
135 "path": database_value, 146 {
136 } 147 "value": database_value,
137 148 "name": database_name,
138 _add_data_table_entry(data_manager_dict, data_table_entry) 149 "path": database_path,
139 150 }
140 151 ]
141 def kraken2_build_special(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): 152 }
153 }
154
155 return data_table_entry
156
157
158 def kraken2_build_special(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
142 159
143 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") 160 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
144 161
145 special_database_names = { 162 special_database_names = {
146 "rdp": "RDP", 163 "rdp": "RDP",
185 ] 202 ]
186 203
187 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) 204 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
188 205
189 data_table_entry = { 206 data_table_entry = {
190 "value": database_value, 207 'data_tables': {
191 "name": database_name, 208 data_table_name: [
192 "path": database_path, 209 {
193 } 210 "value": database_value,
194 211 "name": database_name,
195 _add_data_table_entry(data_manager_dict, data_table_entry) 212 "path": database_path,
196 213 }
197 214 ]
198 def kraken2_build_custom(data_manager_dict, kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): 215 }
216 }
217
218 return data_table_entry
219
220
221 def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME):
199 222
200 args = [ 223 args = [
201 '--threads', str(kraken2_args["threads"]), 224 '--threads', str(kraken2_args["threads"]),
202 '--download-taxonomy', 225 '--download-taxonomy',
203 '--db', custom_database_name 226 '--db', custom_database_name,
204 ] 227 ]
228
229 if kraken2_args['skip_maps']:
230 args.append('--skip-maps')
205 231
206 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) 232 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
207 233
208 args = [ 234 args = [
209 '--threads', str(kraken2_args["threads"]), 235 '--threads', str(kraken2_args["threads"]),
228 '--threads', str(kraken2_args["threads"]), 254 '--threads', str(kraken2_args["threads"]),
229 '--clean', 255 '--clean',
230 '--db', custom_database_name 256 '--db', custom_database_name
231 ] 257 ]
232 258
233 subprocess.check_call(['kraken2-build'] + args, target_directory) 259 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
234 260
235 data_table_entry = { 261 data_table_entry = {
236 "value": custom_database_name, 262 'data_tables': {
237 "name": custom_database_name, 263 data_table_name: [
238 "path": custom_database_name 264 {
239 } 265 "value": custom_database_name,
240 266 "name": custom_database_name,
241 _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) 267 "path": custom_database_name
242 268 }
243 269 ]
244 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME): 270 }
245 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) 271 }
246 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) 272
247 data_manager_dict['data_tables'][data_table_name].append( data_table_entry ) 273 return data_table_entry
248 return data_manager_dict
249 274
250 275
251 def main(): 276 def main():
252 parser = argparse.ArgumentParser() 277 parser = argparse.ArgumentParser()
253 parser.add_argument('data_manager_json') 278 parser.add_argument('data_manager_json')
257 parser.add_argument('--threads', dest='threads', default=1, help='threads') 282 parser.add_argument('--threads', dest='threads', default=1, help='threads')
258 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') 283 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build')
259 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') 284 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)')
260 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') 285 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)')
261 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') 286 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)')
262 parser.add_argument( '--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)' ) 287 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)')
288 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='')
263 args = parser.parse_args() 289 args = parser.parse_args()
264 290
265 data_manager_input = json.loads(open(args.data_manager_json).read()) 291 data_manager_input = json.loads(open(args.data_manager_json).read())
266 292
267 target_directory = data_manager_input['output_data'][0]['extra_files_path'] 293 target_directory = data_manager_input['output_data'][0]['extra_files_path']
274 else: 300 else:
275 raise 301 raise
276 302
277 data_manager_output = {} 303 data_manager_output = {}
278 304
279 print(args.database_type)
280 if str(args.database_type) == 'standard': 305 if str(args.database_type) == 'standard':
281 kraken2_args = { 306 kraken2_args = {
282 "kmer_len": args.kmer_len, 307 "kmer_len": args.kmer_len,
283 "minimizer_len": args.minimizer_len, 308 "minimizer_len": args.minimizer_len,
284 "minimizer_spaces": args.minimizer_spaces, 309 "minimizer_spaces": args.minimizer_spaces,
285 "threads": args.threads, 310 "threads": args.threads,
286 } 311 }
287 kraken2_build_standard( 312 data_manager_output = kraken2_build_standard(
288 data_manager_output,
289 kraken2_args, 313 kraken2_args,
290 target_directory, 314 target_directory,
291 ) 315 )
292 elif str(args.database_type) == 'minikraken': 316 elif str(args.database_type) == 'minikraken':
293 kraken2_build_minikraken( 317 data_manager_output = kraken2_build_minikraken(
294 data_manager_output,
295 str(args.minikraken2_version), 318 str(args.minikraken2_version),
296 target_directory 319 target_directory
297 ) 320 )
298 elif str(args.database_type) == 'special': 321 elif str(args.database_type) == 'special':
299 kraken2_args = { 322 kraken2_args = {
301 "kmer_len": args.kmer_len, 324 "kmer_len": args.kmer_len,
302 "minimizer_len": args.minimizer_len, 325 "minimizer_len": args.minimizer_len,
303 "minimizer_spaces": args.minimizer_spaces, 326 "minimizer_spaces": args.minimizer_spaces,
304 "threads": args.threads, 327 "threads": args.threads,
305 } 328 }
306 kraken2_build_special( 329 data_manager_output = kraken2_build_special(
307 data_manager_output,
308 kraken2_args, 330 kraken2_args,
309 target_directory, 331 target_directory,
310 ) 332 )
311 elif str(args.database_type) == 'custom': 333 elif str(args.database_type) == 'custom':
312 kraken2_args = { 334 kraken2_args = {
313 "custom_fasta": args.custom_fasta, 335 "custom_fasta": args.custom_fasta,
336 "skip_maps": args.skip_maps,
314 "kmer_len": args.kmer_len, 337 "kmer_len": args.kmer_len,
315 "minimizer_len": args.minimizer_len, 338 "minimizer_len": args.minimizer_len,
316 "minimizer_spaces": args.minimizer_spaces, 339 "minimizer_spaces": args.minimizer_spaces,
317 "threads": args.threads, 340 "threads": args.threads,
318 } 341 }
319 kraken2_build_custom( 342 data_manager_output = kraken2_build_custom(
320 data_manager_output,
321 kraken2_args, 343 kraken2_args,
322 args.custom_database_name, 344 args.custom_database_name,
323 target_directory, 345 target_directory,
324 ) 346 )
325 else: 347 else: