Mercurial > repos > iuc > data_manager_mothur_toolsuite
comparison data_manager/fetch_mothur_reference_data.py @ 1:aec831b54a5b draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mothur_toolsuite/ commit 57f71aa633a43ab02bbf05acd0c6d7f406e01f1e"
| author | iuc |
|---|---|
| date | Thu, 28 Nov 2019 15:47:32 -0500 |
| parents | ab7a7e798c34 |
| children | 0e532fc0a0a6 |
comparison
equal
deleted
inserted
replaced
| 0:ab7a7e798c34 | 1:aec831b54a5b |
|---|---|
| 226 system. | 226 system. |
| 227 | 227 |
| 228 Returns the name that the file is saved with. | 228 Returns the name that the file is saved with. |
| 229 | 229 |
| 230 """ | 230 """ |
| 231 print "Downloading %s" % url | 231 print("Downloading %s" % url) |
| 232 if not target: | 232 if not target: |
| 233 target = os.path.basename(url) | 233 target = os.path.basename(url) |
| 234 if wd: | 234 if wd: |
| 235 target = os.path.join(wd, target) | 235 target = os.path.join(wd, target) |
| 236 print "Saving to %s" % target | 236 print("Saving to %s" % target) |
| 237 open(target, 'wb').write(urllib2.urlopen(url).read()) | 237 open(target, 'wb').write(urllib2.urlopen(url).read()) |
| 238 return target | 238 return target |
| 239 | 239 |
| 240 | 240 |
| 241 def unpack_zip_archive(filen, wd=None): | 241 def unpack_zip_archive(filen, wd=None): |
| 252 Once all the files are extracted the ZIP archive | 252 Once all the files are extracted the ZIP archive |
| 253 file is deleted from the file system. | 253 file is deleted from the file system. |
| 254 | 254 |
| 255 """ | 255 """ |
| 256 if not zipfile.is_zipfile(filen): | 256 if not zipfile.is_zipfile(filen): |
| 257 print "%s: not ZIP formatted file" | 257 print("%s: not ZIP formatted file") |
| 258 return [filen] | 258 return [filen] |
| 259 file_list = [] | 259 file_list = [] |
| 260 z = zipfile.ZipFile(filen) | 260 z = zipfile.ZipFile(filen) |
| 261 for name in z.namelist(): | 261 for name in z.namelist(): |
| 262 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): | 262 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): |
| 263 print "Ignoring %s" % name | 263 print("Ignoring %s" % name) |
| 264 continue | 264 continue |
| 265 if wd: | 265 if wd: |
| 266 target = os.path.join(wd, name) | 266 target = os.path.join(wd, name) |
| 267 else: | 267 else: |
| 268 target = name | 268 target = name |
| 269 if name.endswith('/'): | 269 if name.endswith('/'): |
| 270 # Make directory | 270 # Make directory |
| 271 print "Creating dir %s" % target | 271 print("Creating dir %s" % target) |
| 272 try: | 272 try: |
| 273 os.makedirs(target) | 273 os.makedirs(target) |
| 274 except OSError: | 274 except OSError: |
| 275 pass | 275 pass |
| 276 else: | 276 else: |
| 277 # Extract file | 277 # Extract file |
| 278 print "Extracting %s" % name | 278 print("Extracting %s" % name) |
| 279 try: | 279 try: |
| 280 os.makedirs(os.path.dirname(target)) | 280 os.makedirs(os.path.dirname(target)) |
| 281 except OSError: | 281 except OSError: |
| 282 pass | 282 pass |
| 283 open(target, 'wb').write(z.read(name)) | 283 open(target, 'wb').write(z.read(name)) |
| 284 file_list.append(target) | 284 file_list.append(target) |
| 285 print "Removing %s" % filen | 285 print("Removing %s" % filen) |
| 286 os.remove(filen) | 286 os.remove(filen) |
| 287 return file_list | 287 return file_list |
| 288 | 288 |
| 289 | 289 |
| 290 def unpack_tar_archive(filen, wd=None): | 290 def unpack_tar_archive(filen, wd=None): |
| 303 file is deleted from the file system. | 303 file is deleted from the file system. |
| 304 | 304 |
| 305 """ | 305 """ |
| 306 file_list = [] | 306 file_list = [] |
| 307 if not tarfile.is_tarfile(filen): | 307 if not tarfile.is_tarfile(filen): |
| 308 print "%s: not TAR file" | 308 print("%s: not TAR file") |
| 309 return [filen] | 309 return [filen] |
| 310 t = tarfile.open(filen) | 310 t = tarfile.open(filen) |
| 311 for name in t.getnames(): | 311 for name in t.getnames(): |
| 312 # Check for unwanted files | 312 # Check for unwanted files |
| 313 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): | 313 if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): |
| 314 print "Ignoring %s" % name | 314 print("Ignoring %s" % name) |
| 315 continue | 315 continue |
| 316 # Extract file | 316 # Extract file |
| 317 print "Extracting %s" % name | 317 print("Extracting %s" % name) |
| 318 t.extract(name, wd) | 318 t.extract(name, wd) |
| 319 if wd: | 319 if wd: |
| 320 target = os.path.join(wd, name) | 320 target = os.path.join(wd, name) |
| 321 else: | 321 else: |
| 322 target = name | 322 target = name |
| 323 file_list.append(target) | 323 file_list.append(target) |
| 324 print "Removing %s" % filen | 324 print("Removing %s" % filen) |
| 325 os.remove(filen) | 325 os.remove(filen) |
| 326 return file_list | 326 return file_list |
| 327 | 327 |
| 328 | 328 |
| 329 def unpack_archive(filen, wd=None): | 329 def unpack_archive(filen, wd=None): |
| 337 'wd' specifies the working directory to extract | 337 'wd' specifies the working directory to extract |
| 338 the files to, otherwise they are extracted to the | 338 the files to, otherwise they are extracted to the |
| 339 current working directory. | 339 current working directory. |
| 340 | 340 |
| 341 """ | 341 """ |
| 342 print "Unpack %s" % filen | 342 print("Unpack %s" % filen) |
| 343 ext = os.path.splitext(filen)[1] | 343 ext = os.path.splitext(filen)[1] |
| 344 print "Extension: %s" % ext | 344 print("Extension: %s" % ext) |
| 345 if ext == ".zip": | 345 if ext == ".zip": |
| 346 return unpack_zip_archive(filen, wd=wd) | 346 return unpack_zip_archive(filen, wd=wd) |
| 347 elif ext == ".tgz": | 347 elif ext == ".tgz": |
| 348 return unpack_tar_archive(filen, wd=wd) | 348 return unpack_tar_archive(filen, wd=wd) |
| 349 else: | 349 else: |
| 380 """ | 380 """ |
| 381 ext = os.path.splitext(filen)[1] | 381 ext = os.path.splitext(filen)[1] |
| 382 try: | 382 try: |
| 383 return MOTHUR_FILE_TYPES[ext] | 383 return MOTHUR_FILE_TYPES[ext] |
| 384 except KeyError: | 384 except KeyError: |
| 385 print "WARNING: unknown file type for " + filen + ", skipping" | 385 print("WARNING: unknown file type for " + filen + ", skipping") |
| 386 return None | 386 return None |
| 387 | 387 |
| 388 | 388 |
| 389 def get_name(filen): | 389 def get_name(filen): |
| 390 """Generate a descriptive name based on the file name | 390 """Generate a descriptive name based on the file name |
| 413 datasets: a list of dataset names corresponding to keys in | 413 datasets: a list of dataset names corresponding to keys in |
| 414 the MOTHUR_REFERENCE_DATA dictionary | 414 the MOTHUR_REFERENCE_DATA dictionary |
| 415 """ | 415 """ |
| 416 # Make working dir | 416 # Make working dir |
| 417 wd = tempfile.mkdtemp(suffix=".mothur", dir=os.getcwd()) | 417 wd = tempfile.mkdtemp(suffix=".mothur", dir=os.getcwd()) |
| 418 print "Working dir %s" % wd | 418 print("Working dir %s" % wd) |
| 419 # Iterate over all requested reference data URLs | 419 # Iterate over all requested reference data URLs |
| 420 for dataset in datasets: | 420 for dataset in datasets: |
| 421 print "Handling dataset '%s'" % dataset | 421 print("Handling dataset '%s'" % dataset) |
| 422 for name in MOTHUR_REFERENCE_DATA[dataset]: | 422 for name in MOTHUR_REFERENCE_DATA[dataset]: |
| 423 for f in fetch_files(MOTHUR_REFERENCE_DATA[dataset][name], wd=wd): | 423 for f in fetch_files(MOTHUR_REFERENCE_DATA[dataset][name], wd=wd): |
| 424 type_ = identify_type(f) | 424 type_ = identify_type(f) |
| 425 entry_name = "%s (%s)" % (os.path.splitext(os.path.basename(f))[0], name) | 425 entry_name = "%s (%s)" % (os.path.splitext(os.path.basename(f))[0], name) |
| 426 print "%s\t\'%s'\t.../%s" % (type_, entry_name, os.path.basename(f)) | 426 print("%s\t\'%s'\t.../%s" % (type_, entry_name, os.path.basename(f))) |
| 427 if type_ is not None: | 427 if type_ is not None: |
| 428 # Move to target dir | 428 # Move to target dir |
| 429 ref_data_file = os.path.basename(f) | 429 ref_data_file = os.path.basename(f) |
| 430 f1 = os.path.join(target_dir, ref_data_file) | 430 f1 = os.path.join(target_dir, ref_data_file) |
| 431 print "Moving %s to %s" % (f, f1) | 431 print("Moving %s to %s" % (f, f1)) |
| 432 os.rename(f, f1) | 432 os.rename(f, f1) |
| 433 # Add entry to data table | 433 # Add entry to data table |
| 434 table_name = "mothur_%s" % type_ | 434 table_name = "mothur_%s" % type_ |
| 435 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) | 435 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) |
| 436 # Remove working dir | 436 # Remove working dir |
| 437 print "Removing %s" % wd | 437 print("Removing %s" % wd) |
| 438 shutil.rmtree(wd) | 438 shutil.rmtree(wd) |
| 439 | 439 |
| 440 | 440 |
| 441 def files_from_filesystem_paths(paths): | 441 def files_from_filesystem_paths(paths): |
| 442 """Return list of file paths from arbitrary input paths | 442 """Return list of file paths from arbitrary input paths |
| 448 """ | 448 """ |
| 449 # Collect files to add | 449 # Collect files to add |
| 450 files = [] | 450 files = [] |
| 451 for path in paths: | 451 for path in paths: |
| 452 path = os.path.abspath(path) | 452 path = os.path.abspath(path) |
| 453 print "Examining '%s'..." % path | 453 print("Examining '%s'..." % path) |
| 454 if os.path.isfile(path): | 454 if os.path.isfile(path): |
| 455 # Store full path for file | 455 # Store full path for file |
| 456 files.append(path) | 456 files.append(path) |
| 457 elif os.path.isdir(path): | 457 elif os.path.isdir(path): |
| 458 # Descend into directory and collect the files | 458 # Descend into directory and collect the files |
| 459 for f in os.listdir(path): | 459 for f in os.listdir(path): |
| 460 files.extend(files_from_filesystem_paths((os.path.join(path, f), ))) | 460 files.extend(files_from_filesystem_paths((os.path.join(path, f), ))) |
| 461 else: | 461 else: |
| 462 print "Not a file or directory, ignored" | 462 print("Not a file or directory, ignored") |
| 463 return files | 463 return files |
| 464 | 464 |
| 465 | 465 |
| 466 def import_from_server(data_tables, target_dir, paths, description, link_to_data=False): | 466 def import_from_server(data_tables, target_dir, paths, description, link_to_data=False): |
| 467 """Import reference data from filesystem paths | 467 """Import reference data from filesystem paths |
| 487 files = files_from_filesystem_paths(paths) | 487 files = files_from_filesystem_paths(paths) |
| 488 # Handle each file individually | 488 # Handle each file individually |
| 489 for f in files: | 489 for f in files: |
| 490 type_ = identify_type(f) | 490 type_ = identify_type(f) |
| 491 if type_ is None: | 491 if type_ is None: |
| 492 print "%s: unrecognised type, skipped" % f | 492 print("%s: unrecognised type, skipped" % f) |
| 493 continue | 493 continue |
| 494 ref_data_file = os.path.basename(f) | 494 ref_data_file = os.path.basename(f) |
| 495 target_file = os.path.join(target_dir, ref_data_file) | 495 target_file = os.path.join(target_dir, ref_data_file) |
| 496 entry_name = "%s" % os.path.splitext(ref_data_file)[0] | 496 entry_name = "%s" % os.path.splitext(ref_data_file)[0] |
| 497 if description: | 497 if description: |
| 498 entry_name += " (%s)" % description | 498 entry_name += " (%s)" % description |
| 499 print "%s\t\'%s'\t.../%s" % (type_, entry_name, ref_data_file) | 499 print("%s\t\'%s'\t.../%s" % (type_, entry_name, ref_data_file)) |
| 500 # Link to or copy the data | 500 # Link to or copy the data |
| 501 if link_to_data: | 501 if link_to_data: |
| 502 os.symlink(f, target_file) | 502 os.symlink(f, target_file) |
| 503 else: | 503 else: |
| 504 shutil.copyfile(f, target_file) | 504 shutil.copyfile(f, target_file) |
| 506 table_name = "mothur_%s" % type_ | 506 table_name = "mothur_%s" % type_ |
| 507 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) | 507 add_data_table_entry(data_tables, table_name, dict(name=entry_name, value=ref_data_file)) |
| 508 | 508 |
| 509 | 509 |
| 510 if __name__ == "__main__": | 510 if __name__ == "__main__": |
| 511 print "Starting..." | 511 print("Starting...") |
| 512 | 512 |
| 513 # Read command line | 513 # Read command line |
| 514 parser = optparse.OptionParser() | 514 parser = optparse.OptionParser() |
| 515 parser.add_option('--source', action='store', dest='data_source') | 515 parser.add_option('--source', action='store', dest='data_source') |
| 516 parser.add_option('--datasets', action='store', dest='datasets', default='') | 516 parser.add_option('--datasets', action='store', dest='datasets', default='') |
| 517 parser.add_option('--paths', action='store', dest='paths', default=[]) | 517 parser.add_option('--paths', action='store', dest='paths', default=[]) |
| 518 parser.add_option('--description', action='store', dest='description', default='') | 518 parser.add_option('--description', action='store', dest='description', default='') |
| 519 parser.add_option('--link', action='store_true', dest='link_to_data') | 519 parser.add_option('--link', action='store_true', dest='link_to_data') |
| 520 options, args = parser.parse_args() | 520 options, args = parser.parse_args() |
| 521 print "options: %s" % options | 521 print("options: %s" % options) |
| 522 print "args : %s" % args | 522 print("args : %s" % args) |
| 523 | 523 |
| 524 # Check for JSON file | 524 # Check for JSON file |
| 525 if len(args) != 1: | 525 if len(args) != 1: |
| 526 sys.stderr.write("Need to supply JSON file name") | 526 sys.stderr.write("Need to supply JSON file name") |
| 527 sys.exit(1) | 527 sys.exit(1) |
| 530 | 530 |
| 531 # Read the input JSON | 531 # Read the input JSON |
| 532 params, target_dir = read_input_json(jsonfile) | 532 params, target_dir = read_input_json(jsonfile) |
| 533 | 533 |
| 534 # Make the target directory | 534 # Make the target directory |
| 535 print "Making %s" % target_dir | 535 print("Making %s" % target_dir) |
| 536 os.mkdir(target_dir) | 536 os.mkdir(target_dir) |
| 537 | 537 |
| 538 # Set up data tables dictionary | 538 # Set up data tables dictionary |
| 539 data_tables = create_data_tables_dict() | 539 data_tables = create_data_tables_dict() |
| 540 add_data_table(data_tables, 'mothur_lookup') | 540 add_data_table(data_tables, 'mothur_lookup') |
| 552 # Get list of paths (need to remove any escapes for '\n' and '\r' | 552 # Get list of paths (need to remove any escapes for '\n' and '\r' |
| 553 # that might have been inserted by Galaxy) | 553 # that might have been inserted by Galaxy) |
| 554 paths = options.paths.replace('__cn__', '\n').replace('__cr__', '\r').split() | 554 paths = options.paths.replace('__cn__', '\n').replace('__cr__', '\r').split() |
| 555 import_from_server(data_tables, target_dir, paths, description, link_to_data=options.link_to_data) | 555 import_from_server(data_tables, target_dir, paths, description, link_to_data=options.link_to_data) |
| 556 # Write output JSON | 556 # Write output JSON |
| 557 print "Outputting JSON" | 557 print("Outputting JSON") |
| 558 print str(json.dumps(data_tables)) | 558 print(json.dumps(data_tables)) |
| 559 open(jsonfile, 'wb').write(json.dumps(data_tables)) | 559 open(jsonfile, 'w').write(json.dumps(data_tables, sort_keys=True)) |
| 560 print "Done." | 560 print("Done.") |
