Mercurial > repos > iuc > data_manager_plant_tribes_scaffolds_downloader
changeset 1:80b0bd65cbfb draft
Uploaded
author | iuc |
---|---|
date | Wed, 29 Mar 2017 12:08:57 -0400 |
parents | 4c96b684f0fd |
children | 13c235ded82e |
files | data_manager/data_manager_plant_tribes_scaffolds_download.py |
diffstat | 1 files changed, 50 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_plant_tribes_scaffolds_download.py Fri Jan 13 10:34:15 2017 -0500 +++ b/data_manager/data_manager_plant_tribes_scaffolds_download.py Wed Mar 29 12:08:57 2017 -0400 @@ -31,9 +31,23 @@ shutil.rmtree(dir) -def url_download(target_directory, url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): - work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) - make_directory(work_directory) +def extract_archive(file_path, work_directory): + if tarfile.is_tarfile(file_path): + fh = tarfile.open(file_path, 'r:*') + elif zipfile.is_zipfile(file_path): + fh = zipfile.ZipFile(file_path, 'r') + else: + return + fh.extractall(work_directory) + + +def move_files(source_directory, target_directory): + # Move the files into defined output directory. + for filename in os.listdir(source_directory): + shutil.move(os.path.join(source_directory, filename), target_directory) + + +def url_download(url, work_directory): file_path = os.path.join(work_directory, os.path.basename(url)) src = None dst = None @@ -54,26 +68,43 @@ src.close() if dst: dst.close() - if tarfile.is_tarfile(file_path): - fh = tarfile.open(file_path, 'r:*') - elif zipfile.is_zipfile(file_path): - fh = zipfile.ZipFile(file_path, 'r') - else: - return - fh.extractall(work_directory) + return file_path + + +def download(target_directory, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): + data_manager_dict = {} + data_table_entry = {} + # Download the scaffolds data. + work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) + make_directory(work_directory) + file_path = url_download(web_url, work_directory) + extract_archive(file_path, work_directory) os.remove(file_path) - # Move the scaffolds data files into defined output directory. - for filename in os.listdir(work_directory): - shutil.move(os.path.join(work_directory, filename), target_directory) + # Move the scaffolds data files into the defined output directory. + move_files(work_directory, target_directory) remove_directory(work_directory) - data_manager_dict = {} - # Populate the data table, there should be a single entry in target_directory. + # Populate the data_manager_dict with the scaffolds data entry. for file_path in os.listdir(target_directory): full_path = os.path.abspath(os.path.join(target_directory, file_path)) entry_name = "%s" % os.path.basename(file_path) - data_table_entry = dict(value=entry_name, name=entry_name, path=full_path, description=description) + data_table_entry['value'] = entry_name + data_table_entry['name'] = entry_name + data_table_entry['path'] = full_path + data_table_entry['description'] = description + # Populate the data_manager_dict. for data_table_name in data_table_names: data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) + # Download the default configuration files. + work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs')) + make_directory(work_directory) + file_path = url_download(config_web_url, work_directory) + extract_archive(file_path, work_directory) + os.remove(file_path) + # Move the default configuration files into the defined output directory. + source_configs_directory = os.path.join(work_directory, entry_name) + target_configs_directory = os.path.join(target_directory, entry_name) + move_files(source_configs_directory, target_configs_directory) + remove_directory(work_directory) return data_manager_dict @@ -81,7 +112,8 @@ parser.add_argument('--description', dest='description', default=None, help='Description') parser.add_argument('--name', dest='name', help='Data table entry unique ID') parser.add_argument('--out_file', dest='out_file', help='JSON output file') -parser.add_argument('--web_url', dest='web_url', help='Web URL') +parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds') +parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs') args = parser.parse_args() @@ -97,7 +129,7 @@ description = args.description.strip() # Get the scaffolds data. -data_manager_dict = url_download(target_directory, args.web_url, description) +data_manager_dict = download(target_directory, args.web_url, args.config_web_url, description) # Write the JSON output dataset. fh = open(args.out_file, 'wb') fh.write(json.dumps(data_manager_dict))