changeset 3:dfa1eb2941b0 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_build_amrfinderplus commit 45dbbf06a59df43da2c321c272de11cc41e17d43
author iuc
date Sun, 23 Nov 2025 12:21:34 +0000
parents a5921c09b7b7
children
files data_manager/data_manager_build_amrfinderplus.py data_manager/data_manager_build_amrfinderplus.xml data_manager/macro.xml
diffstat 3 files changed, 177 insertions(+), 88 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/data_manager_build_amrfinderplus.py	Thu May 30 13:06:37 2024 +0000
+++ b/data_manager/data_manager_build_amrfinderplus.py	Sun Nov 23 12:21:34 2025 +0000
@@ -14,11 +14,13 @@
     Create the json file with database information for galaxy data manager
     """
 
-    def __init__(self,
-                 amrfinderplus_database="amrfinderplus_versioned_database",
-                 db_name="amrfinderplus-db",
-                 amrfinderplus_version="latest",
-                 date_version=None):
+    def __init__(
+        self,
+        amrfinderplus_database="amrfinderplus_versioned_database",
+        db_name="amrfinderplus-db",
+        amrfinderplus_version="latest",
+        date_version=None,
+    ):
         self.data_table_name = amrfinderplus_database
         self._db_name = db_name
         self._amrfinderplus_version = amrfinderplus_version
@@ -31,11 +33,7 @@
         Skeleton of a data_table format
         return: a data table formatted for json output
         """
-        self.data_table_entry = {
-            "data_tables": {
-                self.data_table_name: {}
-            }
-        }
+        self.data_table_entry = {"data_tables": {self.data_table_name: {}}}
         return self.data_table_entry
 
     def get_data_manager(self):
@@ -44,14 +42,19 @@
         return: The data table with database information
         """
         self.amrfinderplus_table_list = self.get_data_table_format()
-        amrfinderplus_value = f"amrfinderplus_V{self._amrfinderplus_version}" \
-                              f"_{self._amrfinderplus_date_version}"
-        amrfinderplus_name = f"V{self._amrfinderplus_version}" \
-                             f"-{self._amrfinderplus_date_version}"
-        data_info = dict(value=amrfinderplus_value,
-                         name=amrfinderplus_name,
-                         db_version=self._amrfinderplus_version,
-                         path=self._db_name)
+        amrfinderplus_value = (
+            f"amrfinderplus_V{self._amrfinderplus_version}"
+            f"_{self._amrfinderplus_date_version}"
+        )
+        amrfinderplus_name = (
+            f"V{self._amrfinderplus_version}" f"-{self._amrfinderplus_date_version}"
+        )
+        data_info = dict(
+            value=amrfinderplus_value,
+            name=amrfinderplus_name,
+            db_version=self._amrfinderplus_version,
+            path=self._db_name,
+        )
         self.amrfinderplus_table_list["data_tables"][self.data_table_name] = [data_info]
         return self.amrfinderplus_table_list
 
@@ -63,24 +66,28 @@
     Build the data manager infos for galaxy
     """
 
-    def __init__(self,
-                 output_dir=Path.cwd(),
-                 ncbi_url="ftp.ncbi.nlm.nih.gov",
-                 ftp_login="anonymous",
-                 ftp_password="anonymous",
-                 amrfinderplus_database="amrfinderplus_database",
-                 db_name="amrfinderplus-db",
-                 amrfinderplus_version="latest",
-                 json_file_path=None,
-                 date_version=None,
-                 amrfinderplus_db_path=None,
-                 test_mode=False):
+    def __init__(
+        self,
+        output_dir=Path.cwd(),
+        ncbi_url="ftp.ncbi.nlm.nih.gov",
+        ftp_login="anonymous",
+        ftp_password="anonymous",
+        amrfinderplus_database="amrfinderplus_database",
+        db_name="amrfinderplus-db",
+        amrfinderplus_version="latest",
+        json_file_path=None,
+        date_version=None,
+        amrfinderplus_db_path=None,
+        test_mode=False,
+    ):
 
         super().__init__()
         self.json_file_path = json_file_path
         self._output_dir = output_dir
         self._ncbi_ftp_url = ncbi_url
-        self._ncbi_database_path = "pathogen/Antimicrobial_resistance/AMRFinderPlus/database"
+        self._ncbi_database_path = (
+            "pathogen/Antimicrobial_resistance/AMRFinderPlus/database"
+        )
         self._login = ftp_login
         self._password = ftp_password
         self._amrfinderplus_database = amrfinderplus_database
@@ -103,40 +110,61 @@
         [cmd.append(i) for i in args]
         proc = sp.run(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
         if proc.returncode != 0:
-            print(f'Error type {proc.returncode} with : \n {proc}')
+            print(f"Error type {proc.returncode} with : \n {proc}")
 
     def download_amrfinderplus_db(self):
         """
         Download the amrfinderplus database from the ncbi ftp server
         """
-        self.amrfinderplus_db_path = f'{self._output_dir}/{self._db_name}'
+        self.amrfinderplus_db_path = f"{self._output_dir}/{self._db_name}"
         os.makedirs(self.amrfinderplus_db_path)
 
-        amrfinderplus_ftp_path = f"ftp://{self._login}:" \
-                                 f"{self._password}@{self._ncbi_ftp_url}/" \
-                                 f"{self._ncbi_database_path}/" \
-                                 f"{self._amrfinderplus_version}/" \
-                                 f"{self._amrfinderplus_date_version}"
+        if self._amrfinderplus_version == "latest":
+            self.get_amrfinderplus_version()
+
+        amrfinderplus_ftp_path = (
+            f"ftp://{self._login}:"
+            f"{self._password}@{self._ncbi_ftp_url}/"
+            f"{self._ncbi_database_path}/"
+            f"{self._amrfinderplus_version}/"
+            f"{self._amrfinderplus_date_version}"
+        )
+
+        if self._amrfinderplus_version == "3.12":
+            taxa_group_file = "taxgroup.tab"
+            test_dna_fasta = "AMR_DNA-Escherichia"
+        else:
+            taxa_group_file = "taxgroup.tsv"
+            test_dna_fasta = "AMR_DNA-Escherichia.fa"
         if self.test_mode is True:
-            file_list = ["AMR_DNA-Escherichia", "version.txt", "taxgroup.tab", "database_format_version.txt"]
+            file_list = [
+                test_dna_fasta,
+                "version.txt",
+                taxa_group_file,
+                "database_format_version.txt",
+            ]
             output_option = "-O"
             for file in file_list:
-                self.subprocess_cmd("wget",
-                                    "-nd",
-                                    "-np",
-                                    "-r",
-                                    f"{amrfinderplus_ftp_path}/{file}",
-                                    output_option,
-                                    f"{self.amrfinderplus_db_path}/{file}")
+                self.subprocess_cmd(
+                    "wget",
+                    "-nd",
+                    "-np",
+                    "-r",
+                    f"{amrfinderplus_ftp_path}/{file}",
+                    output_option,
+                    f"{self.amrfinderplus_db_path}/{file}",
+                )
         else:
             output_option = "-P"
-            self.subprocess_cmd("wget",
-                                "-nd",
-                                "-np",
-                                "-r",
-                                amrfinderplus_ftp_path,
-                                output_option,
-                                self.amrfinderplus_db_path)
+            self.subprocess_cmd(
+                "wget",
+                "-nd",
+                "-np",
+                "-r",
+                amrfinderplus_ftp_path,
+                output_option,
+                self.amrfinderplus_db_path,
+            )
 
     def make_hmm_profile(self):
         """
@@ -153,27 +181,48 @@
         Extract le list of species which have file in the database
         return: a filtered species list of available species in the database
         """
-        taxa_group_path = Path(f"{self.amrfinderplus_db_path}/taxgroup.tab")
+        if self._amrfinderplus_version == "3.12":
+            taxa_group_file = "taxgroup.tab"
+        else:
+            taxa_group_file = "taxgroup.tsv"
+        taxa_group_path = Path(f"{self.amrfinderplus_db_path}/{taxa_group_file}")
         if Path.exists(taxa_group_path):
             taxa_table = pd.read_table(taxa_group_path)
-            taxa_table.columns = ["taxgroup", "gpipe_taxgroup", "number_of_nucl_ref_genes"]
-            taxa_df = taxa_table[taxa_table.number_of_nucl_ref_genes > 0].filter(items=["taxgroup"], axis=1)
+            taxa_table.columns = [
+                "taxgroup",
+                "gpipe_taxgroup",
+                "number_of_nucl_ref_genes",
+            ]
+            taxa_df = taxa_table[taxa_table.number_of_nucl_ref_genes > 0].filter(
+                items=["taxgroup"], axis=1
+            )
             if self.test_mode is True:
                 taxa_df = taxa_df[taxa_df.taxgroup == "Escherichia"].taxgroup
             else:
                 taxa_df = taxa_df.taxgroup
             self.species_list = list(taxa_df)
         else:
-            print("taxgroup.tab file is missing to list available species")
+            print(f"{taxa_group_file} file is missing to list available species")
 
     def make_blastdb(self):
         """
         Index fasta file for blast
         """
         self.extract_filelist_makeblast()
-        nucl_file_db_list = [f'{self.amrfinderplus_db_path}/AMR_DNA-{specie}' for specie in self.species_list]
-        amr_dna = f'{self.amrfinderplus_db_path}/AMR_CDS'
-        amr_prot = f'{self.amrfinderplus_db_path}/AMRProt'
+        if self._amrfinderplus_version == "3.12":
+            nucl_file_db_list = [
+                f"{self.amrfinderplus_db_path}/AMR_DNA-{specie}"
+                for specie in self.species_list
+            ]
+            amr_dna = f"{self.amrfinderplus_db_path}/AMR_CDS"
+            amr_prot = f"{self.amrfinderplus_db_path}/AMRProt"
+        else:
+            nucl_file_db_list = [
+                f"{self.amrfinderplus_db_path}/AMR_DNA-{specie}.fa"
+                for specie in self.species_list
+            ]
+            amr_dna = f"{self.amrfinderplus_db_path}/AMR_CDS.fa"
+            amr_prot = f"{self.amrfinderplus_db_path}/AMRProt.fa"
         os.chdir(self.amrfinderplus_db_path)
         if Path(amr_dna).exists():
             nucl_file_db_list.append(amr_dna)
@@ -183,10 +232,16 @@
             self.subprocess_cmd("makeblastdb", "-in", amr_prot, "-dbtype", "prot")
         else:
             print("No file AMRProt detected for indexing")
-        [self.subprocess_cmd("makeblastdb", "-in", file, "-dbtype", "nucl") for file in nucl_file_db_list]
+        [
+            self.subprocess_cmd("makeblastdb", "-in", file, "-dbtype", "nucl")
+            for file in nucl_file_db_list
+        ]
 
-    def get_amrfinderplus_version(self, version_file="version.txt",
-                                  database_version_file="database_format_version.txt"):
+    def get_amrfinderplus_version(
+        self,
+        version_file="version.txt",
+        database_version_file="database_format_version.txt",
+    ):
         """
         Check the version when latest if provided and update the number
         param version_file: name of the file containing version information
@@ -197,11 +252,14 @@
         ftp.cwd(f"{self._ncbi_database_path}/{self._amrfinderplus_version}")
         db_version = BytesIO()
         db_date_version = BytesIO()
-        ftp.retrbinary(f'RETR {version_file}', db_version.write)
-        ftp.retrbinary(f'RETR {database_version_file}', db_date_version.write)
-        self._amrfinderplus_date_version = db_version.getvalue().decode("utf-8").splitlines()[0]
-        self._amrfinderplus_version = '.'.join(
-            db_date_version.getvalue().decode("utf-8").splitlines()[0].split(".")[:2])
+        ftp.retrbinary(f"RETR {version_file}", db_version.write)
+        ftp.retrbinary(f"RETR {database_version_file}", db_date_version.write)
+        self._amrfinderplus_date_version = (
+            db_version.getvalue().decode("utf-8").splitlines()[0]
+        )
+        self._amrfinderplus_version = ".".join(
+            db_date_version.getvalue().decode("utf-8").splitlines()[0].split(".")[:2]
+        )
 
     def read_json_input_file(self):
         """
@@ -209,7 +267,7 @@
         """
         with open(self.json_file_path) as fh:
             params = json.load(fh)
-        target_dir = params['output_data'][0]['extra_files_path']
+        target_dir = params["output_data"][0]["extra_files_path"]
         os.makedirs(target_dir)
         self._output_dir = target_dir
 
@@ -217,7 +275,7 @@
         """
         Write in the imported json file
         """
-        with open(self.json_file_path, 'w') as fh:
+        with open(self.json_file_path, "w") as fh:
             json.dump(self.get_data_manager(), fh, sort_keys=True)
 
 
@@ -228,23 +286,32 @@
     """
     # parse options and arguments
     arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument("data_manager_json",
-                            help="json file from galaxy")
-    arg_parser.add_argument("--db_version", default="latest",
-                            help="select the major version of the database (e.g. 3.10, 3.8), default is latest")
-    arg_parser.add_argument("--db_date",
-                            help="select the date into the database version (e.g. 2022-10-11.2)")
-    arg_parser.add_argument("--test", action='store_true',
-                            help="option to test the script with an lighted database")
+    arg_parser.add_argument("data_manager_json", help="json file from galaxy")
+    arg_parser.add_argument(
+        "--db_version",
+        default="latest",
+        help="select the major version of the database (e.g. 3.10, 3.8), default is latest",
+    )
+    arg_parser.add_argument(
+        "--db_date",
+        help="select the date into the database version (e.g. 2022-10-11.2)",
+    )
+    arg_parser.add_argument(
+        "--test",
+        action="store_true",
+        help="option to test the script with an lighted database",
+    )
     return arg_parser.parse_args()
 
 
 def main():
     all_args = parse_arguments()
-    amrfinderplus_download = DownloadAmrFinderPlusDatabase(amrfinderplus_version=all_args.db_version,
-                                                           date_version=all_args.db_date,
-                                                           json_file_path=all_args.data_manager_json,
-                                                           test_mode=all_args.test)
+    amrfinderplus_download = DownloadAmrFinderPlusDatabase(
+        amrfinderplus_version=all_args.db_version,
+        date_version=all_args.db_date,
+        json_file_path=all_args.data_manager_json,
+        test_mode=all_args.test,
+    )
     amrfinderplus_download.read_json_input_file()
     amrfinderplus_download.download_amrfinderplus_db()
     amrfinderplus_download.make_hmm_profile()
@@ -252,5 +319,5 @@
     amrfinderplus_download.write_json_infos()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
--- a/data_manager/data_manager_build_amrfinderplus.xml	Thu May 30 13:06:37 2024 +0000
+++ b/data_manager/data_manager_build_amrfinderplus.xml	Sun Nov 23 12:21:34 2025 +0000
@@ -17,8 +17,14 @@
     <inputs>
         <conditional name="database_list">
             <param name="database_version_select" type="select" label="Database version">
+                <option value="4.0">V4.0</option>
                 <option value="3.12">V3.12</option>
             </param>
+            <when value="4.0">
+                <param name="database_date_select" type="select" label="Date version">
+                    <option value="2025-07-16.1">2025-07-16.1</option>
+                </param>
+            </when>
             <when value="3.12">
                 <param name="database_date_select" type="select" label="Date version">
                     <option value="2024-05-02.2">2024-05-02.2</option>
@@ -26,7 +32,7 @@
                 </param>
             </when>
         </conditional>
-         <param name="test_data_manager" type="hidden" value=""/>
+        <param name="test_data_manager" type="hidden" value=""/>
     </inputs>
     <outputs>
         <data name="output_file" format="data_manager_json"/>
@@ -40,7 +46,6 @@
                     <has_n_lines n="1"/>
                     <has_text text="{&quot;data_tables&quot;"/>
                     <has_text text="amrfinderplus_versioned_database"/>
-                    <has_text text='"db_version": "3.12"'/>
                 </assert_contents>
             </output>
         </test>
@@ -61,6 +66,23 @@
                 </assert_contents>
             </output>
         </test>
+        <!-- Test_3 DB 4.0 2025-07-16.1 -->
+        <test expect_num_outputs="1">
+            <param name="test_data_manager" value="--test"/>
+            <conditional name="database_list">
+                <param name="database_version_select" value="4.0"/>
+                <param name="database_date_select" value="2025-07-16.1"/>
+            </conditional>
+            <output name="output_file">
+                <assert_contents>
+                    <has_n_lines n="1"/>
+                    <has_text text="{&quot;data_tables&quot;"/>
+                    <has_text text="amrfinderplus_versioned_database"/>
+                    <has_text text='"name": "V4.0-2025-07-16.1"'/>
+                    <has_text text='"db_version": "4.0"'/>
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help><![CDATA[
 Download AMRFinderPlus database from the NCBI server
--- a/data_manager/macro.xml	Thu May 30 13:06:37 2024 +0000
+++ b/data_manager/macro.xml	Sun Nov 23 12:21:34 2025 +0000
@@ -1,8 +1,8 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">3.12.8</token>
-    <token name="@PYTHON_VERSION@">3.10.6</token>
-    <token name="@PANDAS@">1.5.1</token>
+    <token name="@TOOL_VERSION@">4.0.23</token>
+    <token name="@PYTHON_VERSION@">3.11.14</token>
+    <token name="@PANDAS@">2.3.3</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <token name="@PROFILE@">21.05</token>
     <xml name="requirements">