diff data_manager/bakta_build_database.py @ 5:baceff842902 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit e277883fca66013904bae930f04e7f3be5fcb1a2
author iuc
date Wed, 05 Jun 2024 14:20:59 +0000
parents d74850cf4e42
children 97b1b5ad1cda
line wrap: on
line diff
--- a/data_manager/bakta_build_database.py	Fri Aug 25 23:34:04 2023 +0000
+++ b/data_manager/bakta_build_database.py	Wed Jun 05 14:20:59 2024 +0000
@@ -1,9 +1,7 @@
 import argparse
 import hashlib
 import json
-import os
 import re
-import shutil
 import sys
 import tarfile
 from datetime import datetime
@@ -34,7 +32,7 @@
         self.tar_name = tarball_name
         self.db_version = db_version
         self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json"
-        self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json"
+        self.DB_TEST_URL = "https://zenodo.org/record/11381156/files/db-versions.json"
         self.test_mode = test_mode
 
     def get_database_type(self):
@@ -162,22 +160,22 @@
                 print(f"Untar the database in {db_path}")
 
             if not self.test_mode:
-                self.moove_files(db_path=db_path)
+                self.move_files(db_path=db_path)
 
         except OSError:
             sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {db_path}")
 
-    def moove_files(self, db_path):
-        if os.path.isdir(db_path.joinpath("db-light")):
+    def move_files(self, db_path):
+        if db_path.joinpath("db-light").is_dir():
             input_dir = db_path.joinpath("db-light")
-        elif os.path.isdir(db_path.joinpath("db")):
+        elif db_path.joinpath("db").is_dir():
             input_dir = db_path.joinpath("db")
-        file_list = os.listdir(input_dir)
         output_dir = db_path
-        for file in file_list:
-            input = input_dir.joinpath(file)
-            output = output_dir.joinpath(file)
-            shutil.move(input, output)
+        for file in input_dir.iterdir():
+            if file.is_file():  # to avoid moving amrfinder-plus folder
+                input = input_dir.joinpath(file)
+                output = output_dir.joinpath(file)
+                input.rename(output)
 
     def calc_md5_sum(self, buffer_size=1048576):
         tarball_path = Path(self.db_dir).joinpath(self.tar_name)
@@ -223,15 +221,15 @@
     all_args = parse_arguments()
     with open(all_args.data_manager_json) as fh:
         params = json.load(fh)
-    target_dir = params["output_data"][0]["extra_files_path"]
-    os.makedirs(target_dir)
+    target_dir = Path(params["output_data"][0]["extra_files_path"])
+    target_dir.mkdir(parents=True, exist_ok=True)
     # init the class to download bakta db
     bakta_upload = InstallBaktaDatabase(
         test_mode=all_args.test, db_version=all_args.database_version
     )
     bakta_db = bakta_upload.fetch_db_versions()
     # update the path for galaxy
-    bakta_upload.db_dir = target_dir
+    bakta_upload.db_dir = target_dir.absolute()
     # download the database
     bakta_upload.download()
     # check md5 sum