Mercurial > repos > iuc > data_manager_bakta
comparison data_manager/bakta_build_database.py @ 5:baceff842902 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit e277883fca66013904bae930f04e7f3be5fcb1a2
| author | iuc |
|---|---|
| date | Wed, 05 Jun 2024 14:20:59 +0000 |
| parents | d74850cf4e42 |
| children | 97b1b5ad1cda |
comparison
equal
deleted
inserted
replaced
| 4:d74850cf4e42 | 5:baceff842902 |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import hashlib | 2 import hashlib |
| 3 import json | 3 import json |
| 4 import os | |
| 5 import re | 4 import re |
| 6 import shutil | |
| 7 import sys | 5 import sys |
| 8 import tarfile | 6 import tarfile |
| 9 from datetime import datetime | 7 from datetime import datetime |
| 10 from pathlib import Path | 8 from pathlib import Path |
| 11 | 9 |
| 32 self.data_table_entry = None | 30 self.data_table_entry = None |
| 33 self.data_table_name = data_table_name | 31 self.data_table_name = data_table_name |
| 34 self.tar_name = tarball_name | 32 self.tar_name = tarball_name |
| 35 self.db_version = db_version | 33 self.db_version = db_version |
| 36 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" | 34 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" |
| 37 self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json" | 35 self.DB_TEST_URL = "https://zenodo.org/record/11381156/files/db-versions.json" |
| 38 self.test_mode = test_mode | 36 self.test_mode = test_mode |
| 39 | 37 |
| 40 def get_database_type(self): | 38 def get_database_type(self): |
| 41 self.light_db = bool(re.search(pattern="light", string=self.db_version)) | 39 self.light_db = bool(re.search(pattern="light", string=self.db_version)) |
| 42 self.db_version = self.db_version.split(sep="_")[0] | 40 self.db_version = self.db_version.split(sep="_")[0] |
| 160 ) as tar_file: | 158 ) as tar_file: |
| 161 tar_file.extractall(path=db_path) | 159 tar_file.extractall(path=db_path) |
| 162 print(f"Untar the database in {db_path}") | 160 print(f"Untar the database in {db_path}") |
| 163 | 161 |
| 164 if not self.test_mode: | 162 if not self.test_mode: |
| 165 self.moove_files(db_path=db_path) | 163 self.move_files(db_path=db_path) |
| 166 | 164 |
| 167 except OSError: | 165 except OSError: |
| 168 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}") | 166 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}") |
| 169 | 167 |
| 170 def moove_files(self, db_path): | 168 def move_files(self, db_path): |
| 171 if os.path.isdir(db_path.joinpath("db-light")): | 169 if db_path.joinpath("db-light").is_dir(): |
| 172 input_dir = db_path.joinpath("db-light") | 170 input_dir = db_path.joinpath("db-light") |
| 173 elif os.path.isdir(db_path.joinpath("db")): | 171 elif db_path.joinpath("db").is_dir(): |
| 174 input_dir = db_path.joinpath("db") | 172 input_dir = db_path.joinpath("db") |
| 175 file_list = os.listdir(input_dir) | |
| 176 output_dir = db_path | 173 output_dir = db_path |
| 177 for file in file_list: | 174 for file in input_dir.iterdir(): |
| 178 input = input_dir.joinpath(file) | 175 if file.is_file(): # to avoid moving amrfinder-plus folder |
| 179 output = output_dir.joinpath(file) | 176 input = input_dir.joinpath(file) |
| 180 shutil.move(input, output) | 177 output = output_dir.joinpath(file) |
| 178 input.rename(output) | |
| 181 | 179 |
| 182 def calc_md5_sum(self, buffer_size=1048576): | 180 def calc_md5_sum(self, buffer_size=1048576): |
| 183 tarball_path = Path(self.db_dir).joinpath(self.tar_name) | 181 tarball_path = Path(self.db_dir).joinpath(self.tar_name) |
| 184 md5 = hashlib.md5() | 182 md5 = hashlib.md5() |
| 185 with tarball_path.open("rb") as fh: | 183 with tarball_path.open("rb") as fh: |
| 221 | 219 |
| 222 def main(): | 220 def main(): |
| 223 all_args = parse_arguments() | 221 all_args = parse_arguments() |
| 224 with open(all_args.data_manager_json) as fh: | 222 with open(all_args.data_manager_json) as fh: |
| 225 params = json.load(fh) | 223 params = json.load(fh) |
| 226 target_dir = params["output_data"][0]["extra_files_path"] | 224 target_dir = Path(params["output_data"][0]["extra_files_path"]) |
| 227 os.makedirs(target_dir) | 225 target_dir.mkdir(parents=True, exist_ok=True) |
| 228 # init the class to download bakta db | 226 # init the class to download bakta db |
| 229 bakta_upload = InstallBaktaDatabase( | 227 bakta_upload = InstallBaktaDatabase( |
| 230 test_mode=all_args.test, db_version=all_args.database_version | 228 test_mode=all_args.test, db_version=all_args.database_version |
| 231 ) | 229 ) |
| 232 bakta_db = bakta_upload.fetch_db_versions() | 230 bakta_db = bakta_upload.fetch_db_versions() |
| 233 # update the path for galaxy | 231 # update the path for galaxy |
| 234 bakta_upload.db_dir = target_dir | 232 bakta_upload.db_dir = target_dir.absolute() |
| 235 # download the database | 233 # download the database |
| 236 bakta_upload.download() | 234 bakta_upload.download() |
| 237 # check md5 sum | 235 # check md5 sum |
| 238 bakta_upload.calc_md5_sum() | 236 bakta_upload.calc_md5_sum() |
| 239 # untar db | 237 # untar db |
