annotate data_manager/data_manager_cat.py @ 0:cffd8e2382cf draft

"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
author iuc
date Mon, 09 Dec 2019 10:28:15 -0500
parents
children 74af283d8ebd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
2 from __future__ import print_function
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
3
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
4 import argparse
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
5 import json
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
6 import os.path
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
7 import subprocess
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
8 import sys
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
9 import tarfile
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
10 import tempfile
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
11 import zipfile
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
12 try:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
13 # For Python 3.0 and later
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
14 from urllib.request import urlopen
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
15 except ImportError:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
16 # Fall back to Python 2 imports
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
17 from urllib2 import urlopen
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
18
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
19
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
20 def url_download(url, workdir):
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
21 file_path = os.path.join(workdir, 'download.dat')
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
22 src = None
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
23 dst = None
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
24 try:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
25 src = urlopen(url)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
26 with open(file_path, 'wb') as dst:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
27 while True:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
28 chunk = src.read(2**10)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
29 if chunk:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
30 dst.write(chunk)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
31 else:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
32 break
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
33 finally:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
34 if src:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
35 src.close()
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
36 if tarfile.is_tarfile(file_path):
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
37 fh = tarfile.open(file_path, 'r:*')
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
38 elif zipfile.is_zipfile(file_path):
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
39 fh = zipfile.ZipFile(file_path, 'r')
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
40 else:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
41 return
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
42 fh.extractall(workdir)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
43 os.remove(file_path)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
44
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
45
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
46 def cat_prepare(install_dir, db_dir=None, tax_dir=None):
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
47 if db_dir and tax_dir:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
48 cmd = ['CAT', 'prepare', '--existing', '-d', db_dir, '-t', tax_dir]
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
49 else:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
50 cmd = ['CAT', 'prepare', '--fresh', '-q']
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
51 cmd_stdout = tempfile.NamedTemporaryFile()
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
52 cmd_stderr = tempfile.NamedTemporaryFile()
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
53 return_code = subprocess.call(cmd, shell=False, cwd=install_dir,
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
54 stdout=cmd_stdout, stderr=cmd_stderr)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
55 if return_code:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
56 msg = "stdout:\n%s\nstderr:\n%s" % (cmd_stdout.read(),
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
57 cmd_stderr.read())
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
58 cmd_stdout.close()
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
59 cmd_stderr.close()
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
60 raise Exception('Error: (%s), returncode=%s %s'
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
61 % (' '.join(cmd), return_code, msg))
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
62
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
63
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
64 def main():
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
65 parser = argparse.ArgumentParser()
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
66 parser.add_argument('--config_file', required=True)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
67 parser.add_argument('--install_path', default=None)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
68 parser.add_argument('--db_url', default=None)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
69 parser.add_argument('--database_folder', default=None)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
70 parser.add_argument('--taxonomy_folder', default=None)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
71 args = parser.parse_args()
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
72
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
73 cat_path = None
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
74 cat_db = None
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
75 tax_db = None
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
76 if args.database_folder and args.taxonomy_folder:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
77 cat_path = os.path.dirname(args.database_folder)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
78 cat_db = os.path.basename(args.database_folder)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
79 tax_db = os.path.basename(args.taxonomy_folder)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
80 cat_prepare(os.getcwd(),
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
81 db_dir=args.database_folder,
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
82 tax_dir=args.taxonomy_folder)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
83 elif not args.install_path:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
84 sys.exit(1)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
85 else:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
86 if not os.path.exists(args.install_path):
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
87 os.makedirs(args.install_path)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
88 if args.db_url:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
89 url_download(args.db_url, args.install_path)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
90 else:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
91 cat_prepare(args.install_path)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
92 for root, dirs, files in os.walk(args.install_path):
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
93 for dname in dirs:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
94 if dname.endswith('CAT_database'):
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
95 cat_db = dname
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
96 elif dname.endswith('taxonomy'):
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
97 tax_db = dname
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
98 if cat_db and tax_db:
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
99 cat_path = root
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
100 break
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
101 cat_dir = os.path.basename(cat_path)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
102 dm_dict = {}
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
103 dm_dict['data_tables'] = dm_dict.get('data_tables', {})
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
104 data_table = 'cat_database'
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
105 dm_dict['data_tables'][data_table]\
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
106 = dm_dict['data_tables'].get(data_table, [])
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
107 data_table_entry = dict(value=cat_dir, name=cat_dir,
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
108 database_folder=os.path.join(cat_dir, cat_db),
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
109 taxonomy_folder=os.path.join(cat_dir, tax_db))
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
110 dm_dict['data_tables'][data_table].append(data_table_entry)
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
111 # save info to json file
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
112 open(args.config_file, 'w').write(json.dumps(dm_dict))
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
113
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
114
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
115 if __name__ == "__main__":
cffd8e2382cf "planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
iuc
parents:
diff changeset
116 main()