Mercurial > repos > shellac > guppy_basecaller
annotate env/bin/dynamodb_dump @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
| author | shellac | 
|---|---|
| date | Thu, 14 May 2020 14:56:58 -0400 | 
| parents | 26e78fe6e8c4 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
1 #!/Users/pldms/Development/Projects/2020/david-matthews-galaxy/guppy_basecaller/env/bin/python3 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
2 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
3 import argparse | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
4 import errno | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
5 import os | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
6 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
7 import boto | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
8 from boto.compat import json | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
9 from boto.compat import six | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
10 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
11 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
12 DESCRIPTION = """Dump the contents of one or more DynamoDB tables to the local filesystem. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
13 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
14 Each table is dumped into two files: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
15 - {table_name}.metadata stores the table's name, schema and provisioned | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
16 throughput. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
17 - {table_name}.data stores the table's actual contents. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
18 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
19 Both files are created in the current directory. To write them somewhere else, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
20 use the --out-dir parameter (the target directory will be created if needed). | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
21 """ | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
22 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
23 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
24 def dump_table(table, out_dir): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
25 metadata_file = os.path.join(out_dir, "%s.metadata" % table.name) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
26 data_file = os.path.join(out_dir, "%s.data" % table.name) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
27 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
28 with open(metadata_file, "w") as metadata_fd: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
29 json.dump( | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
30 { | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
31 "name": table.name, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
32 "schema": table.schema.dict, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
33 "read_units": table.read_units, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
34 "write_units": table.write_units, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
35 }, | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
36 metadata_fd | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
37 ) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
38 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
39 with open(data_file, "w") as data_fd: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
40 for item in table.scan(): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
41 # JSON can't serialize sets -- convert those to lists. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
42 data = {} | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
43 for k, v in six.iteritems(item): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
44 if isinstance(v, (set, frozenset)): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
45 data[k] = list(v) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
46 else: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
47 data[k] = v | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
48 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
49 data_fd.write(json.dumps(data)) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
50 data_fd.write("\n") | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
51 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
52 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
53 def dynamodb_dump(tables, out_dir): | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
54 try: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
55 os.makedirs(out_dir) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
56 except OSError as e: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
57 # We don't care if the dir already exists. | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
58 if e.errno != errno.EEXIST: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
59 raise | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
60 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
61 conn = boto.connect_dynamodb() | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
62 for t in tables: | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
63 dump_table(conn.get_table(t), out_dir) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
64 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
65 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
66 if __name__ == "__main__": | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
67 parser = argparse.ArgumentParser( | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
68 prog="dynamodb_dump", | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
69 description=DESCRIPTION | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
70 ) | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
71 parser.add_argument("--out-dir", default=".") | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
72 parser.add_argument("tables", metavar="TABLES", nargs="+") | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
73 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
74 namespace = parser.parse_args() | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
75 | 
| 
 
26e78fe6e8c4
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
 
shellac 
parents:  
diff
changeset
 | 
76 dynamodb_dump(namespace.tables, namespace.out_dir) | 
