Mercurial > repos > shellac > guppy_basecaller
diff env/bin/dynamodb_dump @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author | shellac |
---|---|
date | Thu, 14 May 2020 14:56:58 -0400 |
parents | 26e78fe6e8c4 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/bin/dynamodb_dump Thu May 14 14:56:58 2020 -0400 @@ -0,0 +1,76 @@ +#!/Users/pldms/Development/Projects/2020/david-matthews-galaxy/guppy_basecaller/env/bin/python3 + +import argparse +import errno +import os + +import boto +from boto.compat import json +from boto.compat import six + + +DESCRIPTION = """Dump the contents of one or more DynamoDB tables to the local filesystem. + +Each table is dumped into two files: + - {table_name}.metadata stores the table's name, schema and provisioned + throughput. + - {table_name}.data stores the table's actual contents. + +Both files are created in the current directory. To write them somewhere else, +use the --out-dir parameter (the target directory will be created if needed). +""" + + +def dump_table(table, out_dir): + metadata_file = os.path.join(out_dir, "%s.metadata" % table.name) + data_file = os.path.join(out_dir, "%s.data" % table.name) + + with open(metadata_file, "w") as metadata_fd: + json.dump( + { + "name": table.name, + "schema": table.schema.dict, + "read_units": table.read_units, + "write_units": table.write_units, + }, + metadata_fd + ) + + with open(data_file, "w") as data_fd: + for item in table.scan(): + # JSON can't serialize sets -- convert those to lists. + data = {} + for k, v in six.iteritems(item): + if isinstance(v, (set, frozenset)): + data[k] = list(v) + else: + data[k] = v + + data_fd.write(json.dumps(data)) + data_fd.write("\n") + + +def dynamodb_dump(tables, out_dir): + try: + os.makedirs(out_dir) + except OSError as e: + # We don't care if the dir already exists. + if e.errno != errno.EEXIST: + raise + + conn = boto.connect_dynamodb() + for t in tables: + dump_table(conn.get_table(t), out_dir) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="dynamodb_dump", + description=DESCRIPTION + ) + parser.add_argument("--out-dir", default=".") + parser.add_argument("tables", metavar="TABLES", nargs="+") + + namespace = parser.parse_args() + + dynamodb_dump(namespace.tables, namespace.out_dir)