Mercurial > repos > iuc > data_manager_plant_tribes_scaffolds_downloader
comparison data_manager/data_manager_plant_tribes_scaffolds_download.py @ 1:80b0bd65cbfb draft
Uploaded
author | iuc |
---|---|
date | Wed, 29 Mar 2017 12:08:57 -0400 |
parents | 4c96b684f0fd |
children | 5833ef61c1f8 |
comparison
equal
deleted
inserted
replaced
0:4c96b684f0fd | 1:80b0bd65cbfb |
---|---|
29 def remove_directory(dir): | 29 def remove_directory(dir): |
30 if os.path.exists(dir): | 30 if os.path.exists(dir): |
31 shutil.rmtree(dir) | 31 shutil.rmtree(dir) |
32 | 32 |
33 | 33 |
34 def url_download(target_directory, url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): | 34 def extract_archive(file_path, work_directory): |
35 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) | 35 if tarfile.is_tarfile(file_path): |
36 make_directory(work_directory) | 36 fh = tarfile.open(file_path, 'r:*') |
37 elif zipfile.is_zipfile(file_path): | |
38 fh = zipfile.ZipFile(file_path, 'r') | |
39 else: | |
40 return | |
41 fh.extractall(work_directory) | |
42 | |
43 | |
44 def move_files(source_directory, target_directory): | |
45 # Move the files into defined output directory. | |
46 for filename in os.listdir(source_directory): | |
47 shutil.move(os.path.join(source_directory, filename), target_directory) | |
48 | |
49 | |
50 def url_download(url, work_directory): | |
37 file_path = os.path.join(work_directory, os.path.basename(url)) | 51 file_path = os.path.join(work_directory, os.path.basename(url)) |
38 src = None | 52 src = None |
39 dst = None | 53 dst = None |
40 try: | 54 try: |
41 req = urllib2.Request(url) | 55 req = urllib2.Request(url) |
52 finally: | 66 finally: |
53 if src: | 67 if src: |
54 src.close() | 68 src.close() |
55 if dst: | 69 if dst: |
56 dst.close() | 70 dst.close() |
57 if tarfile.is_tarfile(file_path): | 71 return file_path |
58 fh = tarfile.open(file_path, 'r:*') | 72 |
59 elif zipfile.is_zipfile(file_path): | 73 |
60 fh = zipfile.ZipFile(file_path, 'r') | 74 def download(target_directory, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): |
61 else: | 75 data_manager_dict = {} |
62 return | 76 data_table_entry = {} |
63 fh.extractall(work_directory) | 77 # Download the scaffolds data. |
78 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) | |
79 make_directory(work_directory) | |
80 file_path = url_download(web_url, work_directory) | |
81 extract_archive(file_path, work_directory) | |
64 os.remove(file_path) | 82 os.remove(file_path) |
65 # Move the scaffolds data files into defined output directory. | 83 # Move the scaffolds data files into the defined output directory. |
66 for filename in os.listdir(work_directory): | 84 move_files(work_directory, target_directory) |
67 shutil.move(os.path.join(work_directory, filename), target_directory) | |
68 remove_directory(work_directory) | 85 remove_directory(work_directory) |
69 data_manager_dict = {} | 86 # Populate the data_manager_dict with the scaffolds data entry. |
70 # Populate the data table, there should be a single entry in target_directory. | |
71 for file_path in os.listdir(target_directory): | 87 for file_path in os.listdir(target_directory): |
72 full_path = os.path.abspath(os.path.join(target_directory, file_path)) | 88 full_path = os.path.abspath(os.path.join(target_directory, file_path)) |
73 entry_name = "%s" % os.path.basename(file_path) | 89 entry_name = "%s" % os.path.basename(file_path) |
74 data_table_entry = dict(value=entry_name, name=entry_name, path=full_path, description=description) | 90 data_table_entry['value'] = entry_name |
91 data_table_entry['name'] = entry_name | |
92 data_table_entry['path'] = full_path | |
93 data_table_entry['description'] = description | |
94 # Populate the data_manager_dict. | |
75 for data_table_name in data_table_names: | 95 for data_table_name in data_table_names: |
76 data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) | 96 data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) |
97 # Download the default configuration files. | |
98 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs')) | |
99 make_directory(work_directory) | |
100 file_path = url_download(config_web_url, work_directory) | |
101 extract_archive(file_path, work_directory) | |
102 os.remove(file_path) | |
103 # Move the default configuration files into the defined output directory. | |
104 source_configs_directory = os.path.join(work_directory, entry_name) | |
105 target_configs_directory = os.path.join(target_directory, entry_name) | |
106 move_files(source_configs_directory, target_configs_directory) | |
107 remove_directory(work_directory) | |
77 return data_manager_dict | 108 return data_manager_dict |
78 | 109 |
79 | 110 |
80 parser = argparse.ArgumentParser() | 111 parser = argparse.ArgumentParser() |
81 parser.add_argument('--description', dest='description', default=None, help='Description') | 112 parser.add_argument('--description', dest='description', default=None, help='Description') |
82 parser.add_argument('--name', dest='name', help='Data table entry unique ID') | 113 parser.add_argument('--name', dest='name', help='Data table entry unique ID') |
83 parser.add_argument('--out_file', dest='out_file', help='JSON output file') | 114 parser.add_argument('--out_file', dest='out_file', help='JSON output file') |
84 parser.add_argument('--web_url', dest='web_url', help='Web URL') | 115 parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds') |
116 parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs') | |
85 | 117 |
86 args = parser.parse_args() | 118 args = parser.parse_args() |
87 | 119 |
88 # Some magic happens with tools of type "manage_data" in that the output | 120 # Some magic happens with tools of type "manage_data" in that the output |
89 # file contains some JSON data that allows us to define the target directory. | 121 # file contains some JSON data that allows us to define the target directory. |
95 description = '' | 127 description = '' |
96 else: | 128 else: |
97 description = args.description.strip() | 129 description = args.description.strip() |
98 | 130 |
99 # Get the scaffolds data. | 131 # Get the scaffolds data. |
100 data_manager_dict = url_download(target_directory, args.web_url, description) | 132 data_manager_dict = download(target_directory, args.web_url, args.config_web_url, description) |
101 # Write the JSON output dataset. | 133 # Write the JSON output dataset. |
102 fh = open(args.out_file, 'wb') | 134 fh = open(args.out_file, 'wb') |
103 fh.write(json.dumps(data_manager_dict)) | 135 fh.write(json.dumps(data_manager_dict)) |
104 fh.close() | 136 fh.close() |