annotate data_manager_gene_annotation/data_manager/data_manager.py @ 25:689075526eb3 draft

planemo upload
author scottx611x
date Thu, 23 Jun 2016 15:34:25 -0400
parents a70b4c3bdd8b
children 2f03bb5788e1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
1 import os
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
2 import sys
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
3 import uuid
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
4 import json
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
5 import argparse
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
6 import datetime
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
7 import requests
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
8 from requests.exceptions import ContentDecodingError
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
9
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
10 parser = argparse.ArgumentParser(description='Create data manager json.')
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
11 parser.add_argument('--out',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
12 dest='output',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
13 action='store',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
14 help='JSON filename',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
15 )
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
16 parser.add_argument('--name',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
17 dest='name',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
18 action='store',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
19 default=uuid.uuid4(),
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
20 help='Data table entry unique ID'
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
21 )
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
22 parser.add_argument('--url',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
23 dest='url',
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
24 action='store',
8
57ce598b7737 Uploaded new data_manager.py
scottx611x
parents: 7
diff changeset
25 help='Download URL'
57ce598b7737 Uploaded new data_manager.py
scottx611x
parents: 7
diff changeset
26 )
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
27
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
28 args = parser.parse_args()
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
29
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
30
19
7662f6a989c9 Uploaded
scottx611x
parents: 18
diff changeset
31 def url_download(url, name):
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
32 response = requests.get(url=url, stream=True)
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
33
22
b47ce1c4373e planemo upload
scottx611x
parents: 19
diff changeset
34 # Generate file_name
23
cb42506ae8ce planemo upload
scottx611x
parents: 22
diff changeset
35 file_name = response.url.split("/")[-1]
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
36
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
37 block_size = 10 * 1024 * 1024 # 10MB chunked download
23
cb42506ae8ce planemo upload
scottx611x
parents: 22
diff changeset
38 with open(file_name, 'w+') as f:
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
39 try:
24
a70b4c3bdd8b planemo upload
scottx611x
parents: 23
diff changeset
40 # Good to note here that requests' iter_content() will
a70b4c3bdd8b planemo upload
scottx611x
parents: 23
diff changeset
41 # automatically handle decoding "gzip" and "deflate" formats
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
42 for buf in response.iter_content(block_size):
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
43 f.write(buf)
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
44 except (ContentDecodingError, IOError) as e:
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
45 sys.stderr.write("Error occured downloading reference file: %s"
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
46 % e)
23
cb42506ae8ce planemo upload
scottx611x
parents: 22
diff changeset
47 os.remove(file_name)
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
48
24
a70b4c3bdd8b planemo upload
scottx611x
parents: 23
diff changeset
49 return os.path.join(os.getcwd(), file_name)
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
50
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
51
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
52 def main(args):
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
53
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
54 # Attempt to download gene annotation file from given url
24
a70b4c3bdd8b planemo upload
scottx611x
parents: 23
diff changeset
55 gene_annotation_file_path = url_download(
23
cb42506ae8ce planemo upload
scottx611x
parents: 22
diff changeset
56 "http://www.scott-ouellette.com/gene_annotations/chr1-hg19_genes.gtf", args.name)
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
57
22
b47ce1c4373e planemo upload
scottx611x
parents: 19
diff changeset
58 # Update Data Manager JSON and write to file
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
59 data_manager_entry = {
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
60 'data_tables': {
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
61 'gene_annotation': {
22
b47ce1c4373e planemo upload
scottx611x
parents: 19
diff changeset
62 'value': str(datetime.datetime.now()),
10
6c874bd23a6d Uploaded
scottx611x
parents: 8
diff changeset
63 'dbkey': str(args.name),
25
689075526eb3 planemo upload
scottx611x
parents: 24
diff changeset
64 'name': gene_annotation_file_path.split(".")[0]
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
65 }
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
66 }
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
67 }
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
68
19
7662f6a989c9 Uploaded
scottx611x
parents: 18
diff changeset
69 with open(os.path.join(args.output), "w+") as f:
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
70 f.write(json.dumps(data_manager_entry))
0
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
71
0442068f5c91 Uploaded
scottx611x
parents:
diff changeset
72 if __name__ == '__main__':
7
89ba3a52e764 Uploaded new data_manager.py
scottx611x
parents: 6
diff changeset
73 main(args)