comparison data_manager/bakta_build_database.py @ 5:baceff842902 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit e277883fca66013904bae930f04e7f3be5fcb1a2
author iuc
date Wed, 05 Jun 2024 14:20:59 +0000
parents d74850cf4e42
children 97b1b5ad1cda
comparison
equal deleted inserted replaced
4:d74850cf4e42 5:baceff842902
1 import argparse 1 import argparse
2 import hashlib 2 import hashlib
3 import json 3 import json
4 import os
5 import re 4 import re
6 import shutil
7 import sys 5 import sys
8 import tarfile 6 import tarfile
9 from datetime import datetime 7 from datetime import datetime
10 from pathlib import Path 8 from pathlib import Path
11 9
32 self.data_table_entry = None 30 self.data_table_entry = None
33 self.data_table_name = data_table_name 31 self.data_table_name = data_table_name
34 self.tar_name = tarball_name 32 self.tar_name = tarball_name
35 self.db_version = db_version 33 self.db_version = db_version
36 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" 34 self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json"
37 self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json" 35 self.DB_TEST_URL = "https://zenodo.org/record/11381156/files/db-versions.json"
38 self.test_mode = test_mode 36 self.test_mode = test_mode
39 37
40 def get_database_type(self): 38 def get_database_type(self):
41 self.light_db = bool(re.search(pattern="light", string=self.db_version)) 39 self.light_db = bool(re.search(pattern="light", string=self.db_version))
42 self.db_version = self.db_version.split(sep="_")[0] 40 self.db_version = self.db_version.split(sep="_")[0]
160 ) as tar_file: 158 ) as tar_file:
161 tar_file.extractall(path=db_path) 159 tar_file.extractall(path=db_path)
162 print(f"Untar the database in {db_path}") 160 print(f"Untar the database in {db_path}")
163 161
164 if not self.test_mode: 162 if not self.test_mode:
165 self.moove_files(db_path=db_path) 163 self.move_files(db_path=db_path)
166 164
167 except OSError: 165 except OSError:
168 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}") 166 sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}")
169 167
170 def moove_files(self, db_path): 168 def move_files(self, db_path):
171 if os.path.isdir(db_path.joinpath("db-light")): 169 if db_path.joinpath("db-light").is_dir():
172 input_dir = db_path.joinpath("db-light") 170 input_dir = db_path.joinpath("db-light")
173 elif os.path.isdir(db_path.joinpath("db")): 171 elif db_path.joinpath("db").is_dir():
174 input_dir = db_path.joinpath("db") 172 input_dir = db_path.joinpath("db")
175 file_list = os.listdir(input_dir)
176 output_dir = db_path 173 output_dir = db_path
177 for file in file_list: 174 for file in input_dir.iterdir():
178 input = input_dir.joinpath(file) 175 if file.is_file(): # to avoid moving amrfinder-plus folder
179 output = output_dir.joinpath(file) 176 input = input_dir.joinpath(file)
180 shutil.move(input, output) 177 output = output_dir.joinpath(file)
178 input.rename(output)
181 179
182 def calc_md5_sum(self, buffer_size=1048576): 180 def calc_md5_sum(self, buffer_size=1048576):
183 tarball_path = Path(self.db_dir).joinpath(self.tar_name) 181 tarball_path = Path(self.db_dir).joinpath(self.tar_name)
184 md5 = hashlib.md5() 182 md5 = hashlib.md5()
185 with tarball_path.open("rb") as fh: 183 with tarball_path.open("rb") as fh:
221 219
222 def main(): 220 def main():
223 all_args = parse_arguments() 221 all_args = parse_arguments()
224 with open(all_args.data_manager_json) as fh: 222 with open(all_args.data_manager_json) as fh:
225 params = json.load(fh) 223 params = json.load(fh)
226 target_dir = params["output_data"][0]["extra_files_path"] 224 target_dir = Path(params["output_data"][0]["extra_files_path"])
227 os.makedirs(target_dir) 225 target_dir.mkdir(parents=True, exist_ok=True)
228 # init the class to download bakta db 226 # init the class to download bakta db
229 bakta_upload = InstallBaktaDatabase( 227 bakta_upload = InstallBaktaDatabase(
230 test_mode=all_args.test, db_version=all_args.database_version 228 test_mode=all_args.test, db_version=all_args.database_version
231 ) 229 )
232 bakta_db = bakta_upload.fetch_db_versions() 230 bakta_db = bakta_upload.fetch_db_versions()
233 # update the path for galaxy 231 # update the path for galaxy
234 bakta_upload.db_dir = target_dir 232 bakta_upload.db_dir = target_dir.absolute()
235 # download the database 233 # download the database
236 bakta_upload.download() 234 bakta_upload.download()
237 # check md5 sum 235 # check md5 sum
238 bakta_upload.calc_md5_sum() 236 bakta_upload.calc_md5_sum()
239 # untar db 237 # untar db