Mercurial > repos > iuc > data_manager_plant_tribes_scaffolds_downloader
annotate data_manager/data_manager_plant_tribes_scaffolds_download.py @ 3:5833ef61c1f8 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 3b27dae566cb21e08f5915ae20e0727f7d694707
author | iuc |
---|---|
date | Fri, 17 Aug 2018 07:04:08 -0400 |
parents | 80b0bd65cbfb |
children | 93253aebaf2e |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 # | |
3 # Data manager for downloading Plant Tribes scaffolds data. | |
4 import argparse | |
5 import json | |
6 import os | |
7 import shutil | |
8 import sys | |
9 import tarfile | |
10 import urllib2 | |
11 import zipfile | |
12 | |
13 | |
14 DEFAULT_DATA_TABLE_NAMES = ["plant_tribes_scaffolds"] | |
15 | |
16 | |
17 def add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): | |
18 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) | |
19 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) | |
20 data_manager_dict['data_tables'][data_table_name].append(data_table_entry) | |
21 return data_manager_dict | |
22 | |
23 | |
24 def make_directory(dir): | |
25 if not os.path.exists(dir): | |
26 os.makedirs(dir) | |
27 | |
28 | |
29 def remove_directory(dir): | |
30 if os.path.exists(dir): | |
31 shutil.rmtree(dir) | |
32 | |
33 | |
1 | 34 def extract_archive(file_path, work_directory): |
35 if tarfile.is_tarfile(file_path): | |
36 fh = tarfile.open(file_path, 'r:*') | |
37 elif zipfile.is_zipfile(file_path): | |
38 fh = zipfile.ZipFile(file_path, 'r') | |
39 else: | |
40 return | |
41 fh.extractall(work_directory) | |
42 | |
43 | |
44 def move_files(source_directory, target_directory): | |
45 # Move the files into defined output directory. | |
46 for filename in os.listdir(source_directory): | |
47 shutil.move(os.path.join(source_directory, filename), target_directory) | |
48 | |
49 | |
50 def url_download(url, work_directory): | |
0 | 51 file_path = os.path.join(work_directory, os.path.basename(url)) |
52 src = None | |
53 dst = None | |
54 try: | |
55 req = urllib2.Request(url) | |
56 src = urllib2.urlopen(req) | |
57 dst = open(file_path, 'wb') | |
58 while True: | |
59 chunk = src.read(2**10) | |
60 if chunk: | |
61 dst.write(chunk) | |
62 else: | |
63 break | |
3
5833ef61c1f8
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 3b27dae566cb21e08f5915ae20e0727f7d694707
iuc
parents:
1
diff
changeset
|
64 except Exception as e: |
0 | 65 print >>sys.stderr, str(e) |
66 finally: | |
67 if src: | |
68 src.close() | |
69 if dst: | |
70 dst.close() | |
1 | 71 return file_path |
72 | |
73 | |
74 def download(target_directory, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): | |
75 data_manager_dict = {} | |
76 data_table_entry = {} | |
77 # Download the scaffolds data. | |
78 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) | |
79 make_directory(work_directory) | |
80 file_path = url_download(web_url, work_directory) | |
81 extract_archive(file_path, work_directory) | |
0 | 82 os.remove(file_path) |
1 | 83 # Move the scaffolds data files into the defined output directory. |
84 move_files(work_directory, target_directory) | |
0 | 85 remove_directory(work_directory) |
1 | 86 # Populate the data_manager_dict with the scaffolds data entry. |
0 | 87 for file_path in os.listdir(target_directory): |
88 full_path = os.path.abspath(os.path.join(target_directory, file_path)) | |
89 entry_name = "%s" % os.path.basename(file_path) | |
1 | 90 data_table_entry['value'] = entry_name |
91 data_table_entry['name'] = entry_name | |
92 data_table_entry['path'] = full_path | |
93 data_table_entry['description'] = description | |
94 # Populate the data_manager_dict. | |
0 | 95 for data_table_name in data_table_names: |
96 data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) | |
1 | 97 # Download the default configuration files. |
98 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs')) | |
99 make_directory(work_directory) | |
100 file_path = url_download(config_web_url, work_directory) | |
101 extract_archive(file_path, work_directory) | |
102 os.remove(file_path) | |
103 # Move the default configuration files into the defined output directory. | |
104 source_configs_directory = os.path.join(work_directory, entry_name) | |
105 target_configs_directory = os.path.join(target_directory, entry_name) | |
106 move_files(source_configs_directory, target_configs_directory) | |
107 remove_directory(work_directory) | |
0 | 108 return data_manager_dict |
109 | |
110 | |
111 parser = argparse.ArgumentParser() | |
112 parser.add_argument('--description', dest='description', default=None, help='Description') | |
113 parser.add_argument('--name', dest='name', help='Data table entry unique ID') | |
114 parser.add_argument('--out_file', dest='out_file', help='JSON output file') | |
1 | 115 parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds') |
116 parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs') | |
0 | 117 |
118 args = parser.parse_args() | |
119 | |
120 # Some magic happens with tools of type "manage_data" in that the output | |
121 # file contains some JSON data that allows us to define the target directory. | |
122 params = json.loads(open(args.out_file).read()) | |
123 target_directory = params['output_data'][0]['extra_files_path'] | |
124 make_directory(target_directory) | |
125 | |
126 if args.description is None: | |
127 description = '' | |
128 else: | |
129 description = args.description.strip() | |
130 | |
131 # Get the scaffolds data. | |
1 | 132 data_manager_dict = download(target_directory, args.web_url, args.config_web_url, description) |
0 | 133 # Write the JSON output dataset. |
134 fh = open(args.out_file, 'wb') | |
135 fh.write(json.dumps(data_manager_dict)) | |
136 fh.close() |