comparison data_manager/bakta_build_database.py @ 4:d74850cf4e42 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit 404dc5eae30884c5814e463921eb3678bbd6878e
author iuc
date Fri, 25 Aug 2023 23:34:04 +0000
parents 3e73c97f025d
children baceff842902
comparison
equal deleted inserted replaced
3:3e73c97f025d 4:d74850cf4e42
1 import argparse 1 import argparse
2 import hashlib 2 import hashlib
3 import json 3 import json
4 import os 4 import os
5 import re 5 import re
6 import shutil
6 import sys 7 import sys
7 import tarfile 8 import tarfile
8 from datetime import datetime 9 from datetime import datetime
9 from pathlib import Path 10 from pathlib import Path
10 11
18 """ 19 """
19 20
20 def __init__( 21 def __init__(
21 self, 22 self,
22 data_table_name="bakta_database", 23 data_table_name="bakta_database",
23 db_name=Path.cwd().joinpath("db"),
24 db_version="latest", 24 db_version="latest",
25 tarball_name="db.tar.gz", 25 tarball_name="db.tar.gz",
26 test_mode=False, 26 test_mode=False,
27 ): 27 ):
28 self.bakta_table_list = None 28 self.bakta_table_list = None
29 self.db_url = None 29 self.db_url = None
30 self.db_name = "bakta-db"
30 self.db_type = "" 31 self.db_type = ""
31 self.data_table_entry = None 32 self.data_table_entry = None
32 self.data_table_name = data_table_name 33 self.data_table_name = data_table_name
33 self.db_name = db_name
34 self.tar_name = tarball_name 34 self.tar_name = tarball_name
35 self.db_version = db_version 35 self.db_version = db_version
36 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" 36 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json"
37 self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json" 37 self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json"
38 self.test_mode = test_mode 38 self.test_mode = test_mode
101 ) 101 )
102 data_info = dict( 102 data_info = dict(
103 value=bakta_name, 103 value=bakta_name,
104 dbkey=bakta_database_info["record"], 104 dbkey=bakta_database_info["record"],
105 bakta_version=tool_version, 105 bakta_version=tool_version,
106 path="db", 106 path=self.db_name,
107 ) 107 )
108 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] 108 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info]
109 return self.bakta_table_list 109 return self.bakta_table_list
110 110
111 111
115 check md5 sum, 115 check md5 sum,
116 untar the download db and update for the amrfinderplus database 116 untar the download db and update for the amrfinderplus database
117 """ 117 """
118 118
119 def __init__( 119 def __init__(
120 self, db_dir=Path.cwd(), db_name="bakta", db_version="latest", test_mode=False 120 self,
121 db_dir=Path.cwd(),
122 db_name="bakta-db",
123 db_version="latest",
124 test_mode=False
121 ): 125 ):
122 super().__init__() 126 super().__init__()
123 self.md5 = None 127 self.md5 = None
124 self.db_version = db_version 128 self.db_version = db_version
125 self.db_dir = db_dir 129 self.db_dir = db_dir
127 self.tarball_path = "" 131 self.tarball_path = ""
128 self.test_mode = test_mode 132 self.test_mode = test_mode
129 self.get_database_type() 133 self.get_database_type()
130 134
131 def download(self): 135 def download(self):
132 self.db_name = f"{self.db_name}_{self.db_version}{self.db_type}"
133 bakta_path = Path(self.db_dir).joinpath(self.tar_name) 136 bakta_path = Path(self.db_dir).joinpath(self.tar_name)
134 try: 137 try:
135 with bakta_path.open("wb") as fh_out, requests.get( 138 with bakta_path.open("wb") as fh_out, requests.get(
136 self.db_url, stream=True) as resp: 139 self.db_url, stream=True) as resp:
137 total_length = resp.headers.get("content-length") 140 total_length = resp.headers.get("content-length")
148 f"ERROR: Could not download file from Zenodo!" 151 f"ERROR: Could not download file from Zenodo!"
149 f" url={self.db_url}, to={self.tarball_path}" 152 f" url={self.db_url}, to={self.tarball_path}"
150 ) 153 )
151 154
152 def untar(self): 155 def untar(self):
153 db_path = Path(self.db_dir).as_posix() 156 db_path = Path(self.db_dir).joinpath(self.db_name)
154 try: 157 try:
155 with self.tarball_path.open("rb") as fh_in, tarfile.open( 158 with self.tarball_path.open("rb") as fh_in, tarfile.open(
156 fileobj=fh_in, mode="r:gz" 159 fileobj=fh_in, mode="r:gz"
157 ) as tar_file: 160 ) as tar_file:
158 tar_file.extractall(path=db_path) 161 tar_file.extractall(path=db_path)
159 print(f"Untar the database in {db_path}") 162 print(f"Untar the database in {db_path}")
160 return db_path 163
164 if not self.test_mode:
165 self.moove_files(db_path=db_path)
166
161 except OSError: 167 except OSError:
162 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {self.db_name}") 168 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}")
169
170 def moove_files(self, db_path):
171 if os.path.isdir(db_path.joinpath("db-light")):
172 input_dir = db_path.joinpath("db-light")
173 elif os.path.isdir(db_path.joinpath("db")):
174 input_dir = db_path.joinpath("db")
175 file_list = os.listdir(input_dir)
176 output_dir = db_path
177 for file in file_list:
178 input = input_dir.joinpath(file)
179 output = output_dir.joinpath(file)
180 shutil.move(input, output)
163 181
164 def calc_md5_sum(self, buffer_size=1048576): 182 def calc_md5_sum(self, buffer_size=1048576):
165 tarball_path = Path(self.db_dir).joinpath(self.tar_name) 183 tarball_path = Path(self.db_dir).joinpath(self.tar_name)
166 md5 = hashlib.md5() 184 md5 = hashlib.md5()
167 with tarball_path.open("rb") as fh: 185 with tarball_path.open("rb") as fh: