Mercurial > repos > iuc > data_manager_bakta
comparison data_manager/bakta_build_database.py @ 4:d74850cf4e42 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit 404dc5eae30884c5814e463921eb3678bbd6878e
| author | iuc |
|---|---|
| date | Fri, 25 Aug 2023 23:34:04 +0000 |
| parents | 3e73c97f025d |
| children | baceff842902 |
comparison
equal
deleted
inserted
replaced
| 3:3e73c97f025d | 4:d74850cf4e42 |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import hashlib | 2 import hashlib |
| 3 import json | 3 import json |
| 4 import os | 4 import os |
| 5 import re | 5 import re |
| 6 import shutil | |
| 6 import sys | 7 import sys |
| 7 import tarfile | 8 import tarfile |
| 8 from datetime import datetime | 9 from datetime import datetime |
| 9 from pathlib import Path | 10 from pathlib import Path |
| 10 | 11 |
| 18 """ | 19 """ |
| 19 | 20 |
| 20 def __init__( | 21 def __init__( |
| 21 self, | 22 self, |
| 22 data_table_name="bakta_database", | 23 data_table_name="bakta_database", |
| 23 db_name=Path.cwd().joinpath("db"), | |
| 24 db_version="latest", | 24 db_version="latest", |
| 25 tarball_name="db.tar.gz", | 25 tarball_name="db.tar.gz", |
| 26 test_mode=False, | 26 test_mode=False, |
| 27 ): | 27 ): |
| 28 self.bakta_table_list = None | 28 self.bakta_table_list = None |
| 29 self.db_url = None | 29 self.db_url = None |
| 30 self.db_name = "bakta-db" | |
| 30 self.db_type = "" | 31 self.db_type = "" |
| 31 self.data_table_entry = None | 32 self.data_table_entry = None |
| 32 self.data_table_name = data_table_name | 33 self.data_table_name = data_table_name |
| 33 self.db_name = db_name | |
| 34 self.tar_name = tarball_name | 34 self.tar_name = tarball_name |
| 35 self.db_version = db_version | 35 self.db_version = db_version |
| 36 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" | 36 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" |
| 37 self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json" | 37 self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json" |
| 38 self.test_mode = test_mode | 38 self.test_mode = test_mode |
| 101 ) | 101 ) |
| 102 data_info = dict( | 102 data_info = dict( |
| 103 value=bakta_name, | 103 value=bakta_name, |
| 104 dbkey=bakta_database_info["record"], | 104 dbkey=bakta_database_info["record"], |
| 105 bakta_version=tool_version, | 105 bakta_version=tool_version, |
| 106 path="db", | 106 path=self.db_name, |
| 107 ) | 107 ) |
| 108 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] | 108 self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] |
| 109 return self.bakta_table_list | 109 return self.bakta_table_list |
| 110 | 110 |
| 111 | 111 |
| 115 check md5 sum, | 115 check md5 sum, |
| 116 untar the download db and update for the amrfinderplus database | 116 untar the download db and update for the amrfinderplus database |
| 117 """ | 117 """ |
| 118 | 118 |
| 119 def __init__( | 119 def __init__( |
| 120 self, db_dir=Path.cwd(), db_name="bakta", db_version="latest", test_mode=False | 120 self, |
| 121 db_dir=Path.cwd(), | |
| 122 db_name="bakta-db", | |
| 123 db_version="latest", | |
| 124 test_mode=False | |
| 121 ): | 125 ): |
| 122 super().__init__() | 126 super().__init__() |
| 123 self.md5 = None | 127 self.md5 = None |
| 124 self.db_version = db_version | 128 self.db_version = db_version |
| 125 self.db_dir = db_dir | 129 self.db_dir = db_dir |
| 127 self.tarball_path = "" | 131 self.tarball_path = "" |
| 128 self.test_mode = test_mode | 132 self.test_mode = test_mode |
| 129 self.get_database_type() | 133 self.get_database_type() |
| 130 | 134 |
| 131 def download(self): | 135 def download(self): |
| 132 self.db_name = f"{self.db_name}_{self.db_version}{self.db_type}" | |
| 133 bakta_path = Path(self.db_dir).joinpath(self.tar_name) | 136 bakta_path = Path(self.db_dir).joinpath(self.tar_name) |
| 134 try: | 137 try: |
| 135 with bakta_path.open("wb") as fh_out, requests.get( | 138 with bakta_path.open("wb") as fh_out, requests.get( |
| 136 self.db_url, stream=True) as resp: | 139 self.db_url, stream=True) as resp: |
| 137 total_length = resp.headers.get("content-length") | 140 total_length = resp.headers.get("content-length") |
| 148 f"ERROR: Could not download file from Zenodo!" | 151 f"ERROR: Could not download file from Zenodo!" |
| 149 f" url={self.db_url}, to={self.tarball_path}" | 152 f" url={self.db_url}, to={self.tarball_path}" |
| 150 ) | 153 ) |
| 151 | 154 |
| 152 def untar(self): | 155 def untar(self): |
| 153 db_path = Path(self.db_dir).as_posix() | 156 db_path = Path(self.db_dir).joinpath(self.db_name) |
| 154 try: | 157 try: |
| 155 with self.tarball_path.open("rb") as fh_in, tarfile.open( | 158 with self.tarball_path.open("rb") as fh_in, tarfile.open( |
| 156 fileobj=fh_in, mode="r:gz" | 159 fileobj=fh_in, mode="r:gz" |
| 157 ) as tar_file: | 160 ) as tar_file: |
| 158 tar_file.extractall(path=db_path) | 161 tar_file.extractall(path=db_path) |
| 159 print(f"Untar the database in {db_path}") | 162 print(f"Untar the database in {db_path}") |
| 160 return db_path | 163 |
| 164 if not self.test_mode: | |
| 165 self.moove_files(db_path=db_path) | |
| 166 | |
| 161 except OSError: | 167 except OSError: |
| 162 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {self.db_name}") | 168 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}") |
| 169 | |
| 170 def moove_files(self, db_path): | |
| 171 if os.path.isdir(db_path.joinpath("db-light")): | |
| 172 input_dir = db_path.joinpath("db-light") | |
| 173 elif os.path.isdir(db_path.joinpath("db")): | |
| 174 input_dir = db_path.joinpath("db") | |
| 175 file_list = os.listdir(input_dir) | |
| 176 output_dir = db_path | |
| 177 for file in file_list: | |
| 178 input = input_dir.joinpath(file) | |
| 179 output = output_dir.joinpath(file) | |
| 180 shutil.move(input, output) | |
| 163 | 181 |
| 164 def calc_md5_sum(self, buffer_size=1048576): | 182 def calc_md5_sum(self, buffer_size=1048576): |
| 165 tarball_path = Path(self.db_dir).joinpath(self.tar_name) | 183 tarball_path = Path(self.db_dir).joinpath(self.tar_name) |
| 166 md5 = hashlib.md5() | 184 md5 = hashlib.md5() |
| 167 with tarball_path.open("rb") as fh: | 185 with tarball_path.open("rb") as fh: |
