annotate data_manager_gene_annotation/data_manager/data_manager.py @ 7:89ba3a52e764 draft

Uploaded new data_manager.py
author scottx611x
date Fri, 17 Jun 2016 16:20:54 -0400
parents 810ef478fc98
children 57ce598b7737
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
1 import os
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
2 import sys
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
3 import uuid
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
4 import json
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
5 import argparse
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
6 import datetime
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
7 import requests
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
8 from requests.exceptions import ContentDecodingError
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
9
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
10 parser = argparse.ArgumentParser(description='Create data manager json.')
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
11 parser.add_argument('--out',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
12 dest='output',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
13 action='store',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
14 help='JSON filename',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
15 default="gene_annotation.json"
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
16 )
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
17 parser.add_argument('--name',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
18 dest='name',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
19 action='store',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
20 default=uuid.uuid4(),
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
21 help='Data table entry unique ID'
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
22 )
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
23 parser.add_argument('--url',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
24 dest='url',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
25 action='store',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
26 help='Download URL',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
27 default="http://www.scott-ouellette.com/gene_annotations/chr1-hg19_genes.gtf")
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
28
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
29 args = parser.parse_args()
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
30
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
31
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
32 def url_download(url, name, workdir):
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
33 # Create path if it doesn't exist
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
34 if not os.path.exists(workdir):
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
35 os.makedirs(workdir)
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
36
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
37 response = requests.get(url=url, stream=True)
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
38
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
39 # Create path that we will write the file to
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
40 file_path = os.path.join(workdir, 'download_{}.dat'.format(name))
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
41
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
42 block_size = 10 * 1024 * 1024 # 10MB chunked download
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
43 with open(file_path, 'w+') as f:
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
44 try:
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
45 for buf in response.iter_content(block_size):
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
46 f.write(buf)
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
47 except (ContentDecodingError, IOError) as e:
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
48 sys.stderr.write("Error occured downloading reference file: %s"
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
49 % e)
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
50 os.remove(file_path)
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
51
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
52 return file_path
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
53
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
54
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
55 def main(args):
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
56 workdir = os.path.join(os.getcwd(), 'gene_annotation')
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
57
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
58 # Attempt to download gene annotation file from given url
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
59 gene_annotation_file_path = url_download(args.url, args.name, workdir)
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
60
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
61 # Update Data Manager Json and write out
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
62 data_manager_entry = {
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
63 'data_tables': {
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
64 'gene_annotation': {
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
65 'date': str(datetime.datetime.now()),
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
66 'name': str(args.name),
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
67 'path': gene_annotation_file_path
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
68 }
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
69 }
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
70 }
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
71
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
72 with open(os.path.join(workdir, args.output), "w+") as f:
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
73 f.write(json.dumps(data_manager_entry))
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
74
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
75 if __name__ == '__main__':
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
76 main(args)