view env/bin/dynamodb_dump @ 4:79f47841a781 draft

"planemo upload commit 2a0fe2cc28b09e101d37293e53e82f61762262ec"
author shellac
date Thu, 14 May 2020 16:47:39 -0400
parents 26e78fe6e8c4
children
line wrap: on
line source

#!/Users/pldms/Development/Projects/2020/david-matthews-galaxy/guppy_basecaller/env/bin/python3

import argparse
import errno
import os

import boto
from boto.compat import json
from boto.compat import six


DESCRIPTION = """Dump the contents of one or more DynamoDB tables to the local filesystem.

Each table is dumped into two files:
  - {table_name}.metadata stores the table's name, schema and provisioned
    throughput.
  - {table_name}.data stores the table's actual contents.

Both files are created in the current directory. To write them somewhere else,
use the --out-dir parameter (the target directory will be created if needed).
"""


def dump_table(table, out_dir):
    metadata_file = os.path.join(out_dir, "%s.metadata" % table.name)
    data_file = os.path.join(out_dir, "%s.data" % table.name)

    with open(metadata_file, "w") as metadata_fd:
        json.dump(
            {
                "name": table.name,
                "schema": table.schema.dict,
                "read_units": table.read_units,
                "write_units": table.write_units,
            },
            metadata_fd
        )

    with open(data_file, "w") as data_fd:
        for item in table.scan():
            # JSON can't serialize sets -- convert those to lists.
            data = {}
            for k, v in six.iteritems(item):
                if isinstance(v, (set, frozenset)):
                    data[k] = list(v)
                else:
                    data[k] = v

            data_fd.write(json.dumps(data))
            data_fd.write("\n")


def dynamodb_dump(tables, out_dir):
    try:
        os.makedirs(out_dir)
    except OSError as e:
        # We don't care if the dir already exists.
        if e.errno != errno.EEXIST:
            raise

    conn = boto.connect_dynamodb()
    for t in tables:
        dump_table(conn.get_table(t), out_dir)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        prog="dynamodb_dump",
        description=DESCRIPTION
    )
    parser.add_argument("--out-dir", default=".")
    parser.add_argument("tables", metavar="TABLES", nargs="+")

    namespace = parser.parse_args()

    dynamodb_dump(namespace.tables, namespace.out_dir)