Mercurial > repos > shellac > sam_consensus_v3
annotate env/bin/dynamodb_dump @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac | 
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 0 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 1 #!/Users/cmdms/OneDrive-UOB/Development/Projects/2021/sam-consensus-v3/env/bin/python3 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 2 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 3 import argparse | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 4 import errno | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 5 import os | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 6 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 7 import boto | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 8 from boto.compat import json | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 9 from boto.compat import six | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 10 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 11 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 12 DESCRIPTION = """Dump the contents of one or more DynamoDB tables to the local filesystem. | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 13 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 14 Each table is dumped into two files: | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 15 - {table_name}.metadata stores the table's name, schema and provisioned | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 16 throughput. | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 17 - {table_name}.data stores the table's actual contents. | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 18 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 19 Both files are created in the current directory. To write them somewhere else, | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 20 use the --out-dir parameter (the target directory will be created if needed). | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 21 """ | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 22 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 23 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 24 def dump_table(table, out_dir): | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 25 metadata_file = os.path.join(out_dir, "%s.metadata" % table.name) | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 26 data_file = os.path.join(out_dir, "%s.data" % table.name) | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 27 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 28 with open(metadata_file, "w") as metadata_fd: | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 29 json.dump( | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 30 { | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 31 "name": table.name, | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 32 "schema": table.schema.dict, | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 33 "read_units": table.read_units, | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 34 "write_units": table.write_units, | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 35 }, | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 36 metadata_fd | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 37 ) | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 38 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 39 with open(data_file, "w") as data_fd: | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 40 for item in table.scan(): | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 41 # JSON can't serialize sets -- convert those to lists. | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 42 data = {} | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 43 for k, v in six.iteritems(item): | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 44 if isinstance(v, (set, frozenset)): | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 45 data[k] = list(v) | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 46 else: | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 47 data[k] = v | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 48 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 49 data_fd.write(json.dumps(data)) | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 50 data_fd.write("\n") | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 51 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 52 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 53 def dynamodb_dump(tables, out_dir): | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 54 try: | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 55 os.makedirs(out_dir) | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 56 except OSError as e: | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 57 # We don't care if the dir already exists. | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 58 if e.errno != errno.EEXIST: | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 59 raise | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 60 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 61 conn = boto.connect_dynamodb() | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 62 for t in tables: | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 63 dump_table(conn.get_table(t), out_dir) | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 64 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 65 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 66 if __name__ == "__main__": | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 67 parser = argparse.ArgumentParser( | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 68 prog="dynamodb_dump", | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 69 description=DESCRIPTION | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 70 ) | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 71 parser.add_argument("--out-dir", default=".") | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 72 parser.add_argument("tables", metavar="TABLES", nargs="+") | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 73 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 74 namespace = parser.parse_args() | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 75 | 
| 
4f3585e2f14b
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
 shellac parents: diff
changeset | 76 dynamodb_dump(namespace.tables, namespace.out_dir) | 
