comparison env/bin/dynamodb_load @ 2:6af9afd405e9 draft

"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author shellac
date Thu, 14 May 2020 14:56:58 -0400
parents 26e78fe6e8c4
children
comparison
equal deleted inserted replaced
1:75ca89e9b81c 2:6af9afd405e9
1 #!/Users/pldms/Development/Projects/2020/david-matthews-galaxy/guppy_basecaller/env/bin/python3
2
3 import argparse
4 import os
5
6 import boto
7 from boto.compat import json
8 from boto.compat import six
9 from boto.dynamodb.schema import Schema
10
11
12 DESCRIPTION = """Load data into one or more DynamoDB tables.
13
14 For each table, data is read from two files:
15 - {table_name}.metadata for the table's name, schema and provisioned
16 throughput (only required if creating the table).
17 - {table_name}.data for the table's actual contents.
18
19 Both files are searched for in the current directory. To read them from
20 somewhere else, use the --in-dir parameter.
21
22 This program does not wipe the tables prior to loading data. However, any
23 items present in the data files will overwrite the table's contents.
24 """
25
26
27 def _json_iterload(fd):
28 """Lazily load newline-separated JSON objects from a file-like object."""
29 buffer = ""
30 eof = False
31 while not eof:
32 try:
33 # Add a line to the buffer
34 buffer += fd.next()
35 except StopIteration:
36 # We can't let that exception bubble up, otherwise the last
37 # object in the file will never be decoded.
38 eof = True
39 try:
40 # Try to decode a JSON object.
41 json_object = json.loads(buffer.strip())
42
43 # Success: clear the buffer (everything was decoded).
44 buffer = ""
45 except ValueError:
46 if eof and buffer.strip():
47 # No more lines to load and the buffer contains something other
48 # than whitespace: the file is, in fact, malformed.
49 raise
50 # We couldn't decode a complete JSON object: load more lines.
51 continue
52
53 yield json_object
54
55
56 def create_table(metadata_fd):
57 """Create a table from a metadata file-like object."""
58
59
60 def load_table(table, in_fd):
61 """Load items into a table from a file-like object."""
62 for i in _json_iterload(in_fd):
63 # Convert lists back to sets.
64 data = {}
65 for k, v in six.iteritems(i):
66 if isinstance(v, list):
67 data[k] = set(v)
68 else:
69 data[k] = v
70 table.new_item(attrs=data).put()
71
72
73 def dynamodb_load(tables, in_dir, create_tables):
74 conn = boto.connect_dynamodb()
75 for t in tables:
76 metadata_file = os.path.join(in_dir, "%s.metadata" % t)
77 data_file = os.path.join(in_dir, "%s.data" % t)
78 if create_tables:
79 with open(metadata_file) as meta_fd:
80 metadata = json.load(meta_fd)
81 table = conn.create_table(
82 name=t,
83 schema=Schema(metadata["schema"]),
84 read_units=metadata["read_units"],
85 write_units=metadata["write_units"],
86 )
87 table.refresh(wait_for_active=True)
88 else:
89 table = conn.get_table(t)
90
91 with open(data_file) as in_fd:
92 load_table(table, in_fd)
93
94
95 if __name__ == "__main__":
96 parser = argparse.ArgumentParser(
97 prog="dynamodb_load",
98 description=DESCRIPTION
99 )
100 parser.add_argument(
101 "--create-tables",
102 action="store_true",
103 help="Create the tables if they don't exist already (without this flag, attempts to load data into non-existing tables fail)."
104 )
105 parser.add_argument("--in-dir", default=".")
106 parser.add_argument("tables", metavar="TABLES", nargs="+")
107
108 namespace = parser.parse_args()
109
110 dynamodb_load(namespace.tables, namespace.in_dir, namespace.create_tables)