annotate data_manager/data_manager_plant_tribes_scaffolds_download.py @ 5:1550b1741780 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
author iuc
date Sun, 22 Nov 2020 12:52:36 +0000
parents 93253aebaf2e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
1 #!/usr/bin/env python
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
2 #
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
3 # Data manager for downloading Plant Tribes scaffolds data.
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
4 import argparse
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
5 import json
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
6 import os
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
7 import shutil
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
8 import sys
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
9 import tarfile
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
10 import zipfile
4
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
11 from urllib.request import Request, urlopen
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
12
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
13 DEFAULT_DATA_TABLE_NAMES = ["plant_tribes_scaffolds"]
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
14
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
15
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
16 def add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
17 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
18 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
19 data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
20 return data_manager_dict
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
21
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
22
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
23 def make_directory(dir):
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
24 if not os.path.exists(dir):
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
25 os.makedirs(dir)
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
26
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
27
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
28 def remove_directory(dir):
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
29 if os.path.exists(dir):
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
30 shutil.rmtree(dir)
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
31
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
32
1
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
33 def extract_archive(file_path, work_directory):
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
34 if tarfile.is_tarfile(file_path):
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
35 fh = tarfile.open(file_path, 'r:*')
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
36 elif zipfile.is_zipfile(file_path):
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
37 fh = zipfile.ZipFile(file_path, 'r')
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
38 else:
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
39 return
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
40 fh.extractall(work_directory)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
41
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
42
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
43 def move_files(source_directory, target_directory):
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
44 # Move the files into defined output directory.
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
45 for filename in os.listdir(source_directory):
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
46 shutil.move(os.path.join(source_directory, filename), target_directory)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
47
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
48
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
49 def url_download(url, work_directory):
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
50 file_path = os.path.join(work_directory, os.path.basename(url))
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
51 src = None
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
52 dst = None
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
53 try:
4
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
54 req = Request(url)
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
55 src = urlopen(req)
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
56 with open(file_path, 'wb') as dst:
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
57 while True:
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
58 chunk = src.read(2**10)
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
59 if chunk:
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
60 dst.write(chunk)
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
61 else:
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
62 break
3
5833ef61c1f8 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 3b27dae566cb21e08f5915ae20e0727f7d694707
iuc
parents: 1
diff changeset
63 except Exception as e:
4
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
64 sys.exit(str(e))
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
65 finally:
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
66 if src:
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
67 src.close()
1
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
68 return file_path
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
69
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
70
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
71 def download(target_directory, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES):
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
72 data_manager_dict = {}
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
73 data_table_entry = {}
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
74 # Download the scaffolds data.
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
75 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds'))
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
76 make_directory(work_directory)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
77 file_path = url_download(web_url, work_directory)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
78 extract_archive(file_path, work_directory)
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
79 os.remove(file_path)
1
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
80 # Move the scaffolds data files into the defined output directory.
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
81 move_files(work_directory, target_directory)
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
82 remove_directory(work_directory)
1
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
83 # Populate the data_manager_dict with the scaffolds data entry.
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
84 for file_path in os.listdir(target_directory):
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
85 full_path = os.path.abspath(os.path.join(target_directory, file_path))
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
86 entry_name = "%s" % os.path.basename(file_path)
1
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
87 data_table_entry['value'] = entry_name
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
88 data_table_entry['name'] = entry_name
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
89 data_table_entry['path'] = full_path
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
90 data_table_entry['description'] = description
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
91 # Populate the data_manager_dict.
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
92 for data_table_name in data_table_names:
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
93 data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
1
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
94 # Download the default configuration files.
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
95 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs'))
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
96 make_directory(work_directory)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
97 file_path = url_download(config_web_url, work_directory)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
98 extract_archive(file_path, work_directory)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
99 os.remove(file_path)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
100 # Move the default configuration files into the defined output directory.
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
101 source_configs_directory = os.path.join(work_directory, entry_name)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
102 target_configs_directory = os.path.join(target_directory, entry_name)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
103 move_files(source_configs_directory, target_configs_directory)
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
104 remove_directory(work_directory)
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
105 return data_manager_dict
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
106
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
107
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
108 parser = argparse.ArgumentParser()
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
109 parser.add_argument('--description', dest='description', default=None, help='Description')
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
110 parser.add_argument('--name', dest='name', help='Data table entry unique ID')
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
111 parser.add_argument('--out_file', dest='out_file', help='JSON output file')
1
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
112 parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds')
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
113 parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs')
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
114
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
115 args = parser.parse_args()
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
116
4
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
117 with open(args.out_file) as fh:
5
1550b1741780 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 4
diff changeset
118 params = json.load(fh)
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
119 target_directory = params['output_data'][0]['extra_files_path']
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
120 make_directory(target_directory)
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
121
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
122 if args.description is None:
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
123 description = ''
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
124 else:
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
125 description = args.description.strip()
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
126
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
127 # Get the scaffolds data.
1
80b0bd65cbfb Uploaded
iuc
parents: 0
diff changeset
128 data_manager_dict = download(target_directory, args.web_url, args.config_web_url, description)
0
4c96b684f0fd Uploaded
iuc
parents:
diff changeset
129 # Write the JSON output dataset.
4
93253aebaf2e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 6085b7d47fcb47ad1587ac2542abdef782f61fe4"
iuc
parents: 3
diff changeset
130 with open(args.out_file, 'w') as fh:
5
1550b1741780 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 4
diff changeset
131 json.dump(data_manager_dict, fh, sort_keys=True)