Mercurial > repos > iuc > data_manager_cat
annotate data_manager/data_manager_cat.py @ 2:472dabcb03bf draft default tip
planemo upload commit 09b56ef3e09ad6c5923c88616fea5cbd77d87616
author | iuc |
---|---|
date | Mon, 18 Dec 2023 09:36:10 +0000 |
parents | 74af283d8ebd |
children |
rev | line source |
---|---|
0
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
2 from __future__ import print_function |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
3 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
4 import argparse |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
5 import json |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
6 import os.path |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
7 import subprocess |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
8 import sys |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
9 import tarfile |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
10 import tempfile |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
11 import zipfile |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
12 try: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
13 # For Python 3.0 and later |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
14 from urllib.request import urlopen |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
15 except ImportError: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
16 # Fall back to Python 2 imports |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
17 from urllib2 import urlopen |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
18 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
19 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
20 def url_download(url, workdir): |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
21 file_path = os.path.join(workdir, 'download.dat') |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
22 src = None |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
23 dst = None |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
24 try: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
25 src = urlopen(url) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
26 with open(file_path, 'wb') as dst: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
27 while True: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
28 chunk = src.read(2**10) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
29 if chunk: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
30 dst.write(chunk) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
31 else: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
32 break |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
33 finally: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
34 if src: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
35 src.close() |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
36 if tarfile.is_tarfile(file_path): |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
37 fh = tarfile.open(file_path, 'r:*') |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
38 elif zipfile.is_zipfile(file_path): |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
39 fh = zipfile.ZipFile(file_path, 'r') |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
40 else: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
41 return |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
42 fh.extractall(workdir) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
43 os.remove(file_path) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
44 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
45 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
46 def cat_prepare(install_dir, db_dir=None, tax_dir=None): |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
47 if db_dir and tax_dir: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
48 cmd = ['CAT', 'prepare', '--existing', '-d', db_dir, '-t', tax_dir] |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
49 else: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
50 cmd = ['CAT', 'prepare', '--fresh', '-q'] |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
51 cmd_stdout = tempfile.NamedTemporaryFile() |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
52 cmd_stderr = tempfile.NamedTemporaryFile() |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
53 return_code = subprocess.call(cmd, shell=False, cwd=install_dir, |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
54 stdout=cmd_stdout, stderr=cmd_stderr) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
55 if return_code: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
56 msg = "stdout:\n%s\nstderr:\n%s" % (cmd_stdout.read(), |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
57 cmd_stderr.read()) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
58 cmd_stdout.close() |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
59 cmd_stderr.close() |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
60 raise Exception('Error: (%s), returncode=%s %s' |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
61 % (' '.join(cmd), return_code, msg)) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
62 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
63 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
64 def main(): |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
65 parser = argparse.ArgumentParser() |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
66 parser.add_argument('--config_file', required=True) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
67 parser.add_argument('--install_path', default=None) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
68 parser.add_argument('--db_url', default=None) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
69 parser.add_argument('--database_folder', default=None) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
70 parser.add_argument('--taxonomy_folder', default=None) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
71 args = parser.parse_args() |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
72 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
73 cat_path = None |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
74 cat_db = None |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
75 tax_db = None |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
76 if args.database_folder and args.taxonomy_folder: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
77 cat_path = os.path.dirname(args.database_folder) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
78 cat_db = os.path.basename(args.database_folder) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
79 tax_db = os.path.basename(args.taxonomy_folder) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
80 cat_prepare(os.getcwd(), |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
81 db_dir=args.database_folder, |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
82 tax_dir=args.taxonomy_folder) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
83 elif not args.install_path: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
84 sys.exit(1) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
85 else: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
86 if not os.path.exists(args.install_path): |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
87 os.makedirs(args.install_path) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
88 if args.db_url: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
89 url_download(args.db_url, args.install_path) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
90 else: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
91 cat_prepare(args.install_path) |
1
74af283d8ebd
"planemo upload commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents:
0
diff
changeset
|
92 for root, dirs, _ in os.walk(args.install_path): |
0
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
93 for dname in dirs: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
94 if dname.endswith('CAT_database'): |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
95 cat_db = dname |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
96 elif dname.endswith('taxonomy'): |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
97 tax_db = dname |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
98 if cat_db and tax_db: |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
99 cat_path = root |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
100 break |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
101 cat_dir = os.path.basename(cat_path) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
102 dm_dict = {} |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
103 dm_dict['data_tables'] = dm_dict.get('data_tables', {}) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
104 data_table = 'cat_database' |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
105 dm_dict['data_tables'][data_table]\ |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
106 = dm_dict['data_tables'].get(data_table, []) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
107 data_table_entry = dict(value=cat_dir, name=cat_dir, |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
108 database_folder=os.path.join(cat_dir, cat_db), |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
109 taxonomy_folder=os.path.join(cat_dir, tax_db)) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
110 dm_dict['data_tables'][data_table].append(data_table_entry) |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
111 # save info to json file |
1
74af283d8ebd
"planemo upload commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents:
0
diff
changeset
|
112 with open(args.config_file, 'w') as fh: |
74af283d8ebd
"planemo upload commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents:
0
diff
changeset
|
113 json.dump(dm_dict, fh, sort_keys=True) |
0
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
114 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
115 |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
116 if __name__ == "__main__": |
cffd8e2382cf
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff
changeset
|
117 main() |