comparison data_manager/data_manager_plant_tribes_scaffolds_download.py @ 1:80b0bd65cbfb draft

Uploaded
author iuc
date Wed, 29 Mar 2017 12:08:57 -0400
parents 4c96b684f0fd
children 5833ef61c1f8
comparison
equal deleted inserted replaced
0:4c96b684f0fd 1:80b0bd65cbfb
29 def remove_directory(dir): 29 def remove_directory(dir):
30 if os.path.exists(dir): 30 if os.path.exists(dir):
31 shutil.rmtree(dir) 31 shutil.rmtree(dir)
32 32
33 33
34 def url_download(target_directory, url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): 34 def extract_archive(file_path, work_directory):
35 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) 35 if tarfile.is_tarfile(file_path):
36 make_directory(work_directory) 36 fh = tarfile.open(file_path, 'r:*')
37 elif zipfile.is_zipfile(file_path):
38 fh = zipfile.ZipFile(file_path, 'r')
39 else:
40 return
41 fh.extractall(work_directory)
42
43
44 def move_files(source_directory, target_directory):
45 # Move the files into defined output directory.
46 for filename in os.listdir(source_directory):
47 shutil.move(os.path.join(source_directory, filename), target_directory)
48
49
50 def url_download(url, work_directory):
37 file_path = os.path.join(work_directory, os.path.basename(url)) 51 file_path = os.path.join(work_directory, os.path.basename(url))
38 src = None 52 src = None
39 dst = None 53 dst = None
40 try: 54 try:
41 req = urllib2.Request(url) 55 req = urllib2.Request(url)
52 finally: 66 finally:
53 if src: 67 if src:
54 src.close() 68 src.close()
55 if dst: 69 if dst:
56 dst.close() 70 dst.close()
57 if tarfile.is_tarfile(file_path): 71 return file_path
58 fh = tarfile.open(file_path, 'r:*') 72
59 elif zipfile.is_zipfile(file_path): 73
60 fh = zipfile.ZipFile(file_path, 'r') 74 def download(target_directory, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES):
61 else: 75 data_manager_dict = {}
62 return 76 data_table_entry = {}
63 fh.extractall(work_directory) 77 # Download the scaffolds data.
78 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds'))
79 make_directory(work_directory)
80 file_path = url_download(web_url, work_directory)
81 extract_archive(file_path, work_directory)
64 os.remove(file_path) 82 os.remove(file_path)
65 # Move the scaffolds data files into defined output directory. 83 # Move the scaffolds data files into the defined output directory.
66 for filename in os.listdir(work_directory): 84 move_files(work_directory, target_directory)
67 shutil.move(os.path.join(work_directory, filename), target_directory)
68 remove_directory(work_directory) 85 remove_directory(work_directory)
69 data_manager_dict = {} 86 # Populate the data_manager_dict with the scaffolds data entry.
70 # Populate the data table, there should be a single entry in target_directory.
71 for file_path in os.listdir(target_directory): 87 for file_path in os.listdir(target_directory):
72 full_path = os.path.abspath(os.path.join(target_directory, file_path)) 88 full_path = os.path.abspath(os.path.join(target_directory, file_path))
73 entry_name = "%s" % os.path.basename(file_path) 89 entry_name = "%s" % os.path.basename(file_path)
74 data_table_entry = dict(value=entry_name, name=entry_name, path=full_path, description=description) 90 data_table_entry['value'] = entry_name
91 data_table_entry['name'] = entry_name
92 data_table_entry['path'] = full_path
93 data_table_entry['description'] = description
94 # Populate the data_manager_dict.
75 for data_table_name in data_table_names: 95 for data_table_name in data_table_names:
76 data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) 96 data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
97 # Download the default configuration files.
98 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs'))
99 make_directory(work_directory)
100 file_path = url_download(config_web_url, work_directory)
101 extract_archive(file_path, work_directory)
102 os.remove(file_path)
103 # Move the default configuration files into the defined output directory.
104 source_configs_directory = os.path.join(work_directory, entry_name)
105 target_configs_directory = os.path.join(target_directory, entry_name)
106 move_files(source_configs_directory, target_configs_directory)
107 remove_directory(work_directory)
77 return data_manager_dict 108 return data_manager_dict
78 109
79 110
80 parser = argparse.ArgumentParser() 111 parser = argparse.ArgumentParser()
81 parser.add_argument('--description', dest='description', default=None, help='Description') 112 parser.add_argument('--description', dest='description', default=None, help='Description')
82 parser.add_argument('--name', dest='name', help='Data table entry unique ID') 113 parser.add_argument('--name', dest='name', help='Data table entry unique ID')
83 parser.add_argument('--out_file', dest='out_file', help='JSON output file') 114 parser.add_argument('--out_file', dest='out_file', help='JSON output file')
84 parser.add_argument('--web_url', dest='web_url', help='Web URL') 115 parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds')
116 parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs')
85 117
86 args = parser.parse_args() 118 args = parser.parse_args()
87 119
88 # Some magic happens with tools of type "manage_data" in that the output 120 # Some magic happens with tools of type "manage_data" in that the output
89 # file contains some JSON data that allows us to define the target directory. 121 # file contains some JSON data that allows us to define the target directory.
95 description = '' 127 description = ''
96 else: 128 else:
97 description = args.description.strip() 129 description = args.description.strip()
98 130
99 # Get the scaffolds data. 131 # Get the scaffolds data.
100 data_manager_dict = url_download(target_directory, args.web_url, description) 132 data_manager_dict = download(target_directory, args.web_url, args.config_web_url, description)
101 # Write the JSON output dataset. 133 # Write the JSON output dataset.
102 fh = open(args.out_file, 'wb') 134 fh = open(args.out_file, 'wb')
103 fh.write(json.dumps(data_manager_dict)) 135 fh.write(json.dumps(data_manager_dict))
104 fh.close() 136 fh.close()