# HG changeset patch
# User iuc
# Date 1763900494 0
# Node ID dfa1eb2941b0e30384e096f93ea982955ac114eb
# Parent a5921c09b7b7e44d8d9d8ab4c44c5d44641a0f93
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_build_amrfinderplus commit 45dbbf06a59df43da2c321c272de11cc41e17d43
diff -r a5921c09b7b7 -r dfa1eb2941b0 data_manager/data_manager_build_amrfinderplus.py
--- a/data_manager/data_manager_build_amrfinderplus.py Thu May 30 13:06:37 2024 +0000
+++ b/data_manager/data_manager_build_amrfinderplus.py Sun Nov 23 12:21:34 2025 +0000
@@ -14,11 +14,13 @@
Create the json file with database information for galaxy data manager
"""
- def __init__(self,
- amrfinderplus_database="amrfinderplus_versioned_database",
- db_name="amrfinderplus-db",
- amrfinderplus_version="latest",
- date_version=None):
+ def __init__(
+ self,
+ amrfinderplus_database="amrfinderplus_versioned_database",
+ db_name="amrfinderplus-db",
+ amrfinderplus_version="latest",
+ date_version=None,
+ ):
self.data_table_name = amrfinderplus_database
self._db_name = db_name
self._amrfinderplus_version = amrfinderplus_version
@@ -31,11 +33,7 @@
Skeleton of a data_table format
return: a data table formatted for json output
"""
- self.data_table_entry = {
- "data_tables": {
- self.data_table_name: {}
- }
- }
+ self.data_table_entry = {"data_tables": {self.data_table_name: {}}}
return self.data_table_entry
def get_data_manager(self):
@@ -44,14 +42,19 @@
return: The data table with database information
"""
self.amrfinderplus_table_list = self.get_data_table_format()
- amrfinderplus_value = f"amrfinderplus_V{self._amrfinderplus_version}" \
- f"_{self._amrfinderplus_date_version}"
- amrfinderplus_name = f"V{self._amrfinderplus_version}" \
- f"-{self._amrfinderplus_date_version}"
- data_info = dict(value=amrfinderplus_value,
- name=amrfinderplus_name,
- db_version=self._amrfinderplus_version,
- path=self._db_name)
+ amrfinderplus_value = (
+ f"amrfinderplus_V{self._amrfinderplus_version}"
+ f"_{self._amrfinderplus_date_version}"
+ )
+ amrfinderplus_name = (
+ f"V{self._amrfinderplus_version}" f"-{self._amrfinderplus_date_version}"
+ )
+ data_info = dict(
+ value=amrfinderplus_value,
+ name=amrfinderplus_name,
+ db_version=self._amrfinderplus_version,
+ path=self._db_name,
+ )
self.amrfinderplus_table_list["data_tables"][self.data_table_name] = [data_info]
return self.amrfinderplus_table_list
@@ -63,24 +66,28 @@
Build the data manager infos for galaxy
"""
- def __init__(self,
- output_dir=Path.cwd(),
- ncbi_url="ftp.ncbi.nlm.nih.gov",
- ftp_login="anonymous",
- ftp_password="anonymous",
- amrfinderplus_database="amrfinderplus_database",
- db_name="amrfinderplus-db",
- amrfinderplus_version="latest",
- json_file_path=None,
- date_version=None,
- amrfinderplus_db_path=None,
- test_mode=False):
+ def __init__(
+ self,
+ output_dir=Path.cwd(),
+ ncbi_url="ftp.ncbi.nlm.nih.gov",
+ ftp_login="anonymous",
+ ftp_password="anonymous",
+ amrfinderplus_database="amrfinderplus_database",
+ db_name="amrfinderplus-db",
+ amrfinderplus_version="latest",
+ json_file_path=None,
+ date_version=None,
+ amrfinderplus_db_path=None,
+ test_mode=False,
+ ):
super().__init__()
self.json_file_path = json_file_path
self._output_dir = output_dir
self._ncbi_ftp_url = ncbi_url
- self._ncbi_database_path = "pathogen/Antimicrobial_resistance/AMRFinderPlus/database"
+ self._ncbi_database_path = (
+ "pathogen/Antimicrobial_resistance/AMRFinderPlus/database"
+ )
self._login = ftp_login
self._password = ftp_password
self._amrfinderplus_database = amrfinderplus_database
@@ -103,40 +110,61 @@
[cmd.append(i) for i in args]
proc = sp.run(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
if proc.returncode != 0:
- print(f'Error type {proc.returncode} with : \n {proc}')
+ print(f"Error type {proc.returncode} with : \n {proc}")
def download_amrfinderplus_db(self):
"""
Download the amrfinderplus database from the ncbi ftp server
"""
- self.amrfinderplus_db_path = f'{self._output_dir}/{self._db_name}'
+ self.amrfinderplus_db_path = f"{self._output_dir}/{self._db_name}"
os.makedirs(self.amrfinderplus_db_path)
- amrfinderplus_ftp_path = f"ftp://{self._login}:" \
- f"{self._password}@{self._ncbi_ftp_url}/" \
- f"{self._ncbi_database_path}/" \
- f"{self._amrfinderplus_version}/" \
- f"{self._amrfinderplus_date_version}"
+ if self._amrfinderplus_version == "latest":
+ self.get_amrfinderplus_version()
+
+ amrfinderplus_ftp_path = (
+ f"ftp://{self._login}:"
+ f"{self._password}@{self._ncbi_ftp_url}/"
+ f"{self._ncbi_database_path}/"
+ f"{self._amrfinderplus_version}/"
+ f"{self._amrfinderplus_date_version}"
+ )
+
+ if self._amrfinderplus_version == "3.12":
+ taxa_group_file = "taxgroup.tab"
+ test_dna_fasta = "AMR_DNA-Escherichia"
+ else:
+ taxa_group_file = "taxgroup.tsv"
+ test_dna_fasta = "AMR_DNA-Escherichia.fa"
if self.test_mode is True:
- file_list = ["AMR_DNA-Escherichia", "version.txt", "taxgroup.tab", "database_format_version.txt"]
+ file_list = [
+ test_dna_fasta,
+ "version.txt",
+ taxa_group_file,
+ "database_format_version.txt",
+ ]
output_option = "-O"
for file in file_list:
- self.subprocess_cmd("wget",
- "-nd",
- "-np",
- "-r",
- f"{amrfinderplus_ftp_path}/{file}",
- output_option,
- f"{self.amrfinderplus_db_path}/{file}")
+ self.subprocess_cmd(
+ "wget",
+ "-nd",
+ "-np",
+ "-r",
+ f"{amrfinderplus_ftp_path}/{file}",
+ output_option,
+ f"{self.amrfinderplus_db_path}/{file}",
+ )
else:
output_option = "-P"
- self.subprocess_cmd("wget",
- "-nd",
- "-np",
- "-r",
- amrfinderplus_ftp_path,
- output_option,
- self.amrfinderplus_db_path)
+ self.subprocess_cmd(
+ "wget",
+ "-nd",
+ "-np",
+ "-r",
+ amrfinderplus_ftp_path,
+ output_option,
+ self.amrfinderplus_db_path,
+ )
def make_hmm_profile(self):
"""
@@ -153,27 +181,48 @@
Extract le list of species which have file in the database
return: a filtered species list of available species in the database
"""
- taxa_group_path = Path(f"{self.amrfinderplus_db_path}/taxgroup.tab")
+ if self._amrfinderplus_version == "3.12":
+ taxa_group_file = "taxgroup.tab"
+ else:
+ taxa_group_file = "taxgroup.tsv"
+ taxa_group_path = Path(f"{self.amrfinderplus_db_path}/{taxa_group_file}")
if Path.exists(taxa_group_path):
taxa_table = pd.read_table(taxa_group_path)
- taxa_table.columns = ["taxgroup", "gpipe_taxgroup", "number_of_nucl_ref_genes"]
- taxa_df = taxa_table[taxa_table.number_of_nucl_ref_genes > 0].filter(items=["taxgroup"], axis=1)
+ taxa_table.columns = [
+ "taxgroup",
+ "gpipe_taxgroup",
+ "number_of_nucl_ref_genes",
+ ]
+ taxa_df = taxa_table[taxa_table.number_of_nucl_ref_genes > 0].filter(
+ items=["taxgroup"], axis=1
+ )
if self.test_mode is True:
taxa_df = taxa_df[taxa_df.taxgroup == "Escherichia"].taxgroup
else:
taxa_df = taxa_df.taxgroup
self.species_list = list(taxa_df)
else:
- print("taxgroup.tab file is missing to list available species")
+ print(f"{taxa_group_file} file is missing to list available species")
def make_blastdb(self):
"""
Index fasta file for blast
"""
self.extract_filelist_makeblast()
- nucl_file_db_list = [f'{self.amrfinderplus_db_path}/AMR_DNA-{specie}' for specie in self.species_list]
- amr_dna = f'{self.amrfinderplus_db_path}/AMR_CDS'
- amr_prot = f'{self.amrfinderplus_db_path}/AMRProt'
+ if self._amrfinderplus_version == "3.12":
+ nucl_file_db_list = [
+ f"{self.amrfinderplus_db_path}/AMR_DNA-{specie}"
+ for specie in self.species_list
+ ]
+ amr_dna = f"{self.amrfinderplus_db_path}/AMR_CDS"
+ amr_prot = f"{self.amrfinderplus_db_path}/AMRProt"
+ else:
+ nucl_file_db_list = [
+ f"{self.amrfinderplus_db_path}/AMR_DNA-{specie}.fa"
+ for specie in self.species_list
+ ]
+ amr_dna = f"{self.amrfinderplus_db_path}/AMR_CDS.fa"
+ amr_prot = f"{self.amrfinderplus_db_path}/AMRProt.fa"
os.chdir(self.amrfinderplus_db_path)
if Path(amr_dna).exists():
nucl_file_db_list.append(amr_dna)
@@ -183,10 +232,16 @@
self.subprocess_cmd("makeblastdb", "-in", amr_prot, "-dbtype", "prot")
else:
print("No file AMRProt detected for indexing")
- [self.subprocess_cmd("makeblastdb", "-in", file, "-dbtype", "nucl") for file in nucl_file_db_list]
+ [
+ self.subprocess_cmd("makeblastdb", "-in", file, "-dbtype", "nucl")
+ for file in nucl_file_db_list
+ ]
- def get_amrfinderplus_version(self, version_file="version.txt",
- database_version_file="database_format_version.txt"):
+ def get_amrfinderplus_version(
+ self,
+ version_file="version.txt",
+ database_version_file="database_format_version.txt",
+ ):
"""
Check the version when latest if provided and update the number
param version_file: name of the file containing version information
@@ -197,11 +252,14 @@
ftp.cwd(f"{self._ncbi_database_path}/{self._amrfinderplus_version}")
db_version = BytesIO()
db_date_version = BytesIO()
- ftp.retrbinary(f'RETR {version_file}', db_version.write)
- ftp.retrbinary(f'RETR {database_version_file}', db_date_version.write)
- self._amrfinderplus_date_version = db_version.getvalue().decode("utf-8").splitlines()[0]
- self._amrfinderplus_version = '.'.join(
- db_date_version.getvalue().decode("utf-8").splitlines()[0].split(".")[:2])
+ ftp.retrbinary(f"RETR {version_file}", db_version.write)
+ ftp.retrbinary(f"RETR {database_version_file}", db_date_version.write)
+ self._amrfinderplus_date_version = (
+ db_version.getvalue().decode("utf-8").splitlines()[0]
+ )
+ self._amrfinderplus_version = ".".join(
+ db_date_version.getvalue().decode("utf-8").splitlines()[0].split(".")[:2]
+ )
def read_json_input_file(self):
"""
@@ -209,7 +267,7 @@
"""
with open(self.json_file_path) as fh:
params = json.load(fh)
- target_dir = params['output_data'][0]['extra_files_path']
+ target_dir = params["output_data"][0]["extra_files_path"]
os.makedirs(target_dir)
self._output_dir = target_dir
@@ -217,7 +275,7 @@
"""
Write in the imported json file
"""
- with open(self.json_file_path, 'w') as fh:
+ with open(self.json_file_path, "w") as fh:
json.dump(self.get_data_manager(), fh, sort_keys=True)
@@ -228,23 +286,32 @@
"""
# parse options and arguments
arg_parser = argparse.ArgumentParser()
- arg_parser.add_argument("data_manager_json",
- help="json file from galaxy")
- arg_parser.add_argument("--db_version", default="latest",
- help="select the major version of the database (e.g. 3.10, 3.8), default is latest")
- arg_parser.add_argument("--db_date",
- help="select the date into the database version (e.g. 2022-10-11.2)")
- arg_parser.add_argument("--test", action='store_true',
- help="option to test the script with an lighted database")
+ arg_parser.add_argument("data_manager_json", help="json file from galaxy")
+ arg_parser.add_argument(
+ "--db_version",
+ default="latest",
+ help="select the major version of the database (e.g. 3.10, 3.8), default is latest",
+ )
+ arg_parser.add_argument(
+ "--db_date",
+ help="select the date into the database version (e.g. 2022-10-11.2)",
+ )
+ arg_parser.add_argument(
+ "--test",
+ action="store_true",
+ help="option to test the script with an lighted database",
+ )
return arg_parser.parse_args()
def main():
all_args = parse_arguments()
- amrfinderplus_download = DownloadAmrFinderPlusDatabase(amrfinderplus_version=all_args.db_version,
- date_version=all_args.db_date,
- json_file_path=all_args.data_manager_json,
- test_mode=all_args.test)
+ amrfinderplus_download = DownloadAmrFinderPlusDatabase(
+ amrfinderplus_version=all_args.db_version,
+ date_version=all_args.db_date,
+ json_file_path=all_args.data_manager_json,
+ test_mode=all_args.test,
+ )
amrfinderplus_download.read_json_input_file()
amrfinderplus_download.download_amrfinderplus_db()
amrfinderplus_download.make_hmm_profile()
@@ -252,5 +319,5 @@
amrfinderplus_download.write_json_infos()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff -r a5921c09b7b7 -r dfa1eb2941b0 data_manager/data_manager_build_amrfinderplus.xml
--- a/data_manager/data_manager_build_amrfinderplus.xml Thu May 30 13:06:37 2024 +0000
+++ b/data_manager/data_manager_build_amrfinderplus.xml Sun Nov 23 12:21:34 2025 +0000
@@ -17,8 +17,14 @@
+
+
+
+
+
+
@@ -26,7 +32,7 @@
-
+
@@ -40,7 +46,6 @@
-
@@ -61,6 +66,23 @@
+
+
+
+
+
+
+
+
+
- 3.12.8
- 3.10.6
- 1.5.1
+ 4.0.23
+ 3.11.14
+ 2.3.3
0
21.05