Mercurial > repos > iuc > data_manager_mothur_toolsuite
comparison data_manager/fetch_mothur_reference_data.py @ 1:aec831b54a5b draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mothur_toolsuite/ commit 57f71aa633a43ab02bbf05acd0c6d7f406e01f1e"
author | iuc |
---|---|
date | Thu, 28 Nov 2019 15:47:32 -0500 |
parents | ab7a7e798c34 |
children | 0e532fc0a0a6 |
comparison
equal
deleted
inserted
replaced
0:ab7a7e798c34 | 1:aec831b54a5b |
---|---|
226 system. | 226 system. |
227 | 227 |
228 Returns the name that the file is saved with. | 228 Returns the name that the file is saved with. |
229 | 229 |
230 """ | 230 """ |
231 print "Downloading %s" % url | 231 print("Downloading %s" % url) |
232 if not target: | 232 if not target: |
233 target = os.path.basename(url) | 233 target = os.path.basename(url) |
234 if wd: | 234 if wd: |
235 target = os.path.join(wd, target) | 235 target = os.path.join(wd, target) |
236 print "Saving to %s" % target | 236 print("Saving to %s" % target) |
237 open(target, 'wb').write(urllib2.urlopen(url).read()) | 237 open(target, 'wb').write(urllib2.urlopen(url).read()) |
238 return target | 238 return target |
239 | 239 |
240 | 240 |
241 def unpack_zip_archive(filen, wd=None): | 241 def unpack_zip_archive(filen, wd=None): |
252 Once all the files are extracted the ZIP archive | 252 Once all the files are extracted the ZIP archive |
253 file is deleted from the file system. | 253 file is deleted from the file system. |
254 | 254 |
255 """ | 255 """ |
256 if not zipfile.is_zipfile(filen): | 256 if not zipfile.is_zipfile(filen): |
257 print "%s: not ZIP formatted file" | 257 print("%s: not ZIP formatted file") |
258 return [filen] | 258 return [filen] |
259 file_list = [] | 259 file_list = [] |
260 z = zipfile.ZipFile(filen) | 260 z = zipfile.ZipFile(filen) |
261 for name in z.namelist(): | 261 for name in z.namelist(): |
262 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): | 262 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): |
263 print "Ignoring %s" % name | 263 print("Ignoring %s" % name) |
264 continue | 264 continue |
265 if wd: | 265 if wd: |
266 target = os.path.join(wd, name) | 266 target = os.path.join(wd, name) |
267 else: | 267 else: |
268 target = name | 268 target = name |
269 if name.endswith('/'): | 269 if name.endswith('/'): |
270 # Make directory | 270 # Make directory |
271 print "Creating dir %s" % target | 271 print("Creating dir %s" % target) |
272 try: | 272 try: |
273 os.makedirs(target) | 273 os.makedirs(target) |
274 except OSError: | 274 except OSError: |
275 pass | 275 pass |
276 else: | 276 else: |
277 # Extract file | 277 # Extract file |
278 print "Extracting %s" % name | 278 print("Extracting %s" % name) |
279 try: | 279 try: |
280 os.makedirs(os.path.dirname(target)) | 280 os.makedirs(os.path.dirname(target)) |
281 except OSError: | 281 except OSError: |
282 pass | 282 pass |
283 open(target, 'wb').write(z.read(name)) | 283 open(target, 'wb').write(z.read(name)) |
284 file_list.append(target) | 284 file_list.append(target) |
285 print "Removing %s" % filen | 285 print("Removing %s" % filen) |
286 os.remove(filen) | 286 os.remove(filen) |
287 return file_list | 287 return file_list |
288 | 288 |
289 | 289 |
290 def unpack_tar_archive(filen, wd=None): | 290 def unpack_tar_archive(filen, wd=None): |
303 file is deleted from the file system. | 303 file is deleted from the file system. |
304 | 304 |
305 """ | 305 """ |
306 file_list = [] | 306 file_list = [] |
307 if not tarfile.is_tarfile(filen): | 307 if not tarfile.is_tarfile(filen): |
308 print "%s: not TAR file" | 308 print("%s: not TAR file") |
309 return [filen] | 309 return [filen] |
310 t = tarfile.open(filen) | 310 t = tarfile.open(filen) |
311 for name in t.getnames(): | 311 for name in t.getnames(): |
312 # Check for unwanted files | 312 # Check for unwanted files |
313 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): | 313 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): |
314 print "Ignoring %s" % name | 314 print("Ignoring %s" % name) |
315 continue | 315 continue |
316 # Extract file | 316 # Extract file |
317 print "Extracting %s" % name | 317 print("Extracting %s" % name) |
318 t.extract(name, wd) | 318 t.extract(name, wd) |
319 if wd: | 319 if wd: |
320 target = os.path.join(wd, name) | 320 target = os.path.join(wd, name) |
321 else: | 321 else: |
322 target = name | 322 target = name |
323 file_list.append(target) | 323 file_list.append(target) |
324 print "Removing %s" % filen | 324 print("Removing %s" % filen) |
325 os.remove(filen) | 325 os.remove(filen) |
326 return file_list | 326 return file_list |
327 | 327 |
328 | 328 |
329 def unpack_archive(filen, wd=None): | 329 def unpack_archive(filen, wd=None): |
337 'wd' specifies the working directory to extract | 337 'wd' specifies the working directory to extract |
338 the files to, otherwise they are extracted to the | 338 the files to, otherwise they are extracted to the |
339 current working directory. | 339 current working directory. |
340 | 340 |
341 """ | 341 """ |
342 print "Unpack %s" % filen | 342 print("Unpack %s" % filen) |
343 ext = os.path.splitext(filen)[1] | 343 ext = os.path.splitext(filen)[1] |
344 print "Extension: %s" % ext | 344 print("Extension: %s" % ext) |
345 if ext == ".zip": | 345 if ext == ".zip": |
346 return unpack_zip_archive(filen, wd=wd) | 346 return unpack_zip_archive(filen, wd=wd) |
347 elif ext == ".tgz": | 347 elif ext == ".tgz": |
348 return unpack_tar_archive(filen, wd=wd) | 348 return unpack_tar_archive(filen, wd=wd) |
349 else: | 349 else: |
380 """ | 380 """ |
381 ext = os.path.splitext(filen)[1] | 381 ext = os.path.splitext(filen)[1] |
382 try: | 382 try: |
383 return MOTHUR_FILE_TYPES[ext] | 383 return MOTHUR_FILE_TYPES[ext] |
384 except KeyError: | 384 except KeyError: |
385 print "WARNING: unknown file type for " + filen + ", skipping" | 385 print("WARNING: unknown file type for " + filen + ", skipping") |
386 return None | 386 return None |
387 | 387 |
388 | 388 |
389 def get_name(filen): | 389 def get_name(filen): |
390 """Generate a descriptive name based on the file name | 390 """Generate a descriptive name based on the file name |
413 datasets: a list of dataset names corresponding to keys in | 413 datasets: a list of dataset names corresponding to keys in |
414 the MOTHUR_REFERENCE_DATA dictionary | 414 the MOTHUR_REFERENCE_DATA dictionary |
415 """ | 415 """ |
416 # Make working dir | 416 # Make working dir |
417 wd = tempfile.mkdtemp(suffix=".mothur", dir=os.getcwd()) | 417 wd = tempfile.mkdtemp(suffix=".mothur", dir=os.getcwd()) |
418 print "Working dir %s" % wd | 418 print("Working dir %s" % wd) |
419 # Iterate over all requested reference data URLs | 419 # Iterate over all requested reference data URLs |
420 for dataset in datasets: | 420 for dataset in datasets: |
421 print "Handling dataset '%s'" % dataset | 421 print("Handling dataset '%s'" % dataset) |
422 for name in MOTHUR_REFERENCE_DATA[dataset]: | 422 for name in MOTHUR_REFERENCE_DATA[dataset]: |
423 for f in fetch_files(MOTHUR_REFERENCE_DATA[dataset][name], wd=wd): | 423 for f in fetch_files(MOTHUR_REFERENCE_DATA[dataset][name], wd=wd): |
424 type_ = identify_type(f) | 424 type_ = identify_type(f) |
425 entry_name = "%s (%s)" % (os.path.splitext(os.path.basename(f))[0], name) | 425 entry_name = "%s (%s)" % (os.path.splitext(os.path.basename(f))[0], name) |
426 print "%s\t\'%s'\t.../%s" % (type_, entry_name, os.path.basename(f)) | 426 print("%s\t\'%s'\t.../%s" % (type_, entry_name, os.path.basename(f))) |
427 if type_ is not None: | 427 if type_ is not None: |
428 # Move to target dir | 428 # Move to target dir |
429 ref_data_file = os.path.basename(f) | 429 ref_data_file = os.path.basename(f) |
430 f1 = os.path.join(target_dir, ref_data_file) | 430 f1 = os.path.join(target_dir, ref_data_file) |
431 print "Moving %s to %s" % (f, f1) | 431 print("Moving %s to %s" % (f, f1)) |
432 os.rename(f, f1) | 432 os.rename(f, f1) |
433 # Add entry to data table | 433 # Add entry to data table |
434 table_name = "mothur_%s" % type_ | 434 table_name = "mothur_%s" % type_ |
435 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) | 435 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) |
436 # Remove working dir | 436 # Remove working dir |
437 print "Removing %s" % wd | 437 print("Removing %s" % wd) |
438 shutil.rmtree(wd) | 438 shutil.rmtree(wd) |
439 | 439 |
440 | 440 |
441 def files_from_filesystem_paths(paths): | 441 def files_from_filesystem_paths(paths): |
442 """Return list of file paths from arbitrary input paths | 442 """Return list of file paths from arbitrary input paths |
448 """ | 448 """ |
449 # Collect files to add | 449 # Collect files to add |
450 files = [] | 450 files = [] |
451 for path in paths: | 451 for path in paths: |
452 path = os.path.abspath(path) | 452 path = os.path.abspath(path) |
453 print "Examining '%s'..." % path | 453 print("Examining '%s'..." % path) |
454 if os.path.isfile(path): | 454 if os.path.isfile(path): |
455 # Store full path for file | 455 # Store full path for file |
456 files.append(path) | 456 files.append(path) |
457 elif os.path.isdir(path): | 457 elif os.path.isdir(path): |
458 # Descend into directory and collect the files | 458 # Descend into directory and collect the files |
459 for f in os.listdir(path): | 459 for f in os.listdir(path): |
460 files.extend(files_from_filesystem_paths((os.path.join(path, f), ))) | 460 files.extend(files_from_filesystem_paths((os.path.join(path, f), ))) |
461 else: | 461 else: |
462 print "Not a file or directory, ignored" | 462 print("Not a file or directory, ignored") |
463 return files | 463 return files |
464 | 464 |
465 | 465 |
466 def import_from_server(data_tables, target_dir, paths, description, link_to_data=False): | 466 def import_from_server(data_tables, target_dir, paths, description, link_to_data=False): |
467 """Import reference data from filesystem paths | 467 """Import reference data from filesystem paths |
487 files = files_from_filesystem_paths(paths) | 487 files = files_from_filesystem_paths(paths) |
488 # Handle each file individually | 488 # Handle each file individually |
489 for f in files: | 489 for f in files: |
490 type_ = identify_type(f) | 490 type_ = identify_type(f) |
491 if type_ is None: | 491 if type_ is None: |
492 print "%s: unrecognised type, skipped" % f | 492 print("%s: unrecognised type, skipped" % f) |
493 continue | 493 continue |
494 ref_data_file = os.path.basename(f) | 494 ref_data_file = os.path.basename(f) |
495 target_file = os.path.join(target_dir, ref_data_file) | 495 target_file = os.path.join(target_dir, ref_data_file) |
496 entry_name = "%s" % os.path.splitext(ref_data_file)[0] | 496 entry_name = "%s" % os.path.splitext(ref_data_file)[0] |
497 if description: | 497 if description: |
498 entry_name += " (%s)" % description | 498 entry_name += " (%s)" % description |
499 print "%s\t\'%s'\t.../%s" % (type_, entry_name, ref_data_file) | 499 print("%s\t\'%s'\t.../%s" % (type_, entry_name, ref_data_file)) |
500 # Link to or copy the data | 500 # Link to or copy the data |
501 if link_to_data: | 501 if link_to_data: |
502 os.symlink(f, target_file) | 502 os.symlink(f, target_file) |
503 else: | 503 else: |
504 shutil.copyfile(f, target_file) | 504 shutil.copyfile(f, target_file) |
506 table_name = "mothur_%s" % type_ | 506 table_name = "mothur_%s" % type_ |
507 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) | 507 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) |
508 | 508 |
509 | 509 |
510 if __name__ == "__main__": | 510 if __name__ == "__main__": |
511 print "Starting..." | 511 print("Starting...") |
512 | 512 |
513 # Read command line | 513 # Read command line |
514 parser = optparse.OptionParser() | 514 parser = optparse.OptionParser() |
515 parser.add_option('--source', action='store', dest='data_source') | 515 parser.add_option('--source', action='store', dest='data_source') |
516 parser.add_option('--datasets', action='store', dest='datasets', default='') | 516 parser.add_option('--datasets', action='store', dest='datasets', default='') |
517 parser.add_option('--paths', action='store', dest='paths', default=[]) | 517 parser.add_option('--paths', action='store', dest='paths', default=[]) |
518 parser.add_option('--description', action='store', dest='description', default='') | 518 parser.add_option('--description', action='store', dest='description', default='') |
519 parser.add_option('--link', action='store_true', dest='link_to_data') | 519 parser.add_option('--link', action='store_true', dest='link_to_data') |
520 options, args = parser.parse_args() | 520 options, args = parser.parse_args() |
521 print "options: %s" % options | 521 print("options: %s" % options) |
522 print "args : %s" % args | 522 print("args : %s" % args) |
523 | 523 |
524 # Check for JSON file | 524 # Check for JSON file |
525 if len(args) != 1: | 525 if len(args) != 1: |
526 sys.stderr.write("Need to supply JSON file name") | 526 sys.stderr.write("Need to supply JSON file name") |
527 sys.exit(1) | 527 sys.exit(1) |
530 | 530 |
531 # Read the input JSON | 531 # Read the input JSON |
532 params, target_dir = read_input_json(jsonfile) | 532 params, target_dir = read_input_json(jsonfile) |
533 | 533 |
534 # Make the target directory | 534 # Make the target directory |
535 print "Making %s" % target_dir | 535 print("Making %s" % target_dir) |
536 os.mkdir(target_dir) | 536 os.mkdir(target_dir) |
537 | 537 |
538 # Set up data tables dictionary | 538 # Set up data tables dictionary |
539 data_tables = create_data_tables_dict() | 539 data_tables = create_data_tables_dict() |
540 add_data_table(data_tables, 'mothur_lookup') | 540 add_data_table(data_tables, 'mothur_lookup') |
552 # Get list of paths (need to remove any escapes for '\n' and '\r' | 552 # Get list of paths (need to remove any escapes for '\n' and '\r' |
553 # that might have been inserted by Galaxy) | 553 # that might have been inserted by Galaxy) |
554 paths = options.paths.replace('__cn__', '\n').replace('__cr__', '\r').split() | 554 paths = options.paths.replace('__cn__', '\n').replace('__cr__', '\r').split() |
555 import_from_server(data_tables, target_dir, paths, description, link_to_data=options.link_to_data) | 555 import_from_server(data_tables, target_dir, paths, description, link_to_data=options.link_to_data) |
556 # Write output JSON | 556 # Write output JSON |
557 print "Outputting JSON" | 557 print("Outputting JSON") |
558 print str(json.dumps(data_tables)) | 558 print(json.dumps(data_tables)) |
559 open(jsonfile, 'wb').write(json.dumps(data_tables)) | 559 open(jsonfile, 'w').write(json.dumps(data_tables, sort_keys=True)) |
560 print "Done." | 560 print("Done.") |