annotate data_manager/data_manager_eggnog.py @ 4:fcb8bdd124f4 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
author galaxyp
date Tue, 25 Jan 2022 13:51:24 +0000
parents b711f5b6bd44
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
2
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
3 from __future__ import print_function
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
4
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
5 import argparse
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
6 import json
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
7 import os.path
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
8 import sqlite3
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
9 import sys
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
10 from sqlite3 import OperationalError
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
11
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
12
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
13 def _get_db_version(sqlitedb_path):
1
269f0970d762 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit 2200885b5049b2d952959001c8a9b5ae5c62bee5"
galaxyp
parents: 0
diff changeset
14 version = '5.0'
0
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
15 try:
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
16 query = 'select version from version'
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
17 conn = sqlite3.connect(sqlitedb_path)
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
18 cur = conn.cursor()
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
19 cur.execute(query)
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
20 version = cur.fetchone()[0]
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
21 except OperationalError as e:
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
22 print('Assuming eggnog version %s because %s %s' %
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
23 (version, sqlitedb_path, e), file=sys.stderr)
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
24 return version
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
25
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
26
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
27 def main():
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
28 parser = argparse.ArgumentParser()
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
29 parser.add_argument('--config_file')
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
30 parser.add_argument('--install_path')
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
31 args = parser.parse_args()
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
32
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
33 eggnog_db_path = os.path.join(args.install_path, 'eggnog.db')
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
34 if not os.path.exists(eggnog_db_path):
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
35 print('Can not open: %s' % eggnog_db_path, file=sys.stderr)
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
36 exit(1)
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
37 db_version = _get_db_version(eggnog_db_path)
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
38
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
39 # params = json.loads(open(args.config_file).read())
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
40 dm_dict = {}
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
41 dm_dict['data_tables'] = dm_dict.get('data_tables', {})
3
b711f5b6bd44 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit fd234f7532b34a1b6ced0d3ac53a8f42348e23f7"
galaxyp
parents: 1
diff changeset
42 data_table = 'eggnog_mapper_db_versioned'
0
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
43 dm_dict['data_tables'][data_table]\
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
44 = dm_dict['data_tables'].get(data_table, [])
4
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
45 # DB versionning was super confusing for eggnog-mapper 2.0.x:
3
b711f5b6bd44 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit fd234f7532b34a1b6ced0d3ac53a8f42348e23f7"
galaxyp
parents: 1
diff changeset
46 # eggnog-mapper 1.* needed a db v4.5 (based on eggnog v4.5)
4
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
47 # eggnog-mapper 2.x needed a db v2.0 (based on eggnog v5.0)
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
48 # (db v4.5 are not compatible with eggnog-mapper 2.0)
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
49 # Starting with eggnog-mapper 2.1.* db versioning looks better: 2.1.0 requires db v5.0.2
3
b711f5b6bd44 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit fd234f7532b34a1b6ced0d3ac53a8f42348e23f7"
galaxyp
parents: 1
diff changeset
50 version = "2.0"
b711f5b6bd44 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit fd234f7532b34a1b6ced0d3ac53a8f42348e23f7"
galaxyp
parents: 1
diff changeset
51 if "4.5" in db_version:
4
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
52 # special case: eggnog-mapper 1.x
3
b711f5b6bd44 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit fd234f7532b34a1b6ced0d3ac53a8f42348e23f7"
galaxyp
parents: 1
diff changeset
53 version = "1.0"
4
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
54 elif db_version.startswith('2.'):
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
55 # special case: eggnog-mapper 2.0.x
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
56 version = "2.0"
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
57 else:
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
58 # normal case or eggno-mapper >= 2.1
fcb8bdd124f4 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit e45c15081260025e470d23975ef5a734d3f8fc66"
galaxyp
parents: 3
diff changeset
59 version = db_version
0
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
60 data_table_entry = dict(value=db_version, name=db_version,
3
b711f5b6bd44 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit fd234f7532b34a1b6ced0d3ac53a8f42348e23f7"
galaxyp
parents: 1
diff changeset
61 path=args.install_path, version=version)
0
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
62 dm_dict['data_tables'][data_table].append(data_table_entry)
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
63
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
64 # save info to json file
1
269f0970d762 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit 2200885b5049b2d952959001c8a9b5ae5c62bee5"
galaxyp
parents: 0
diff changeset
65 open(args.config_file, 'w').write(json.dumps(dm_dict))
0
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
66
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
67
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
68 if __name__ == "__main__":
9d5f039c637f "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/data_manager_eggnog_mapper_abspath commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
galaxyp
parents:
diff changeset
69 main()