changeset 0:5ba68abd41f6 draft

Uploaded
author estrain
date Tue, 24 May 2022 11:46:19 +0000
parents
children ba50f77b5db9
files data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml data_manager_amrfinderplus/data_manager_conf.xml data_manager_amrfinderplus/test-data/amrfinder_databases.loc data_manager_amrfinderplus/tool-data/amrfinder_databases.loc data_manager_amrfinderplus/tool_data_table_conf.xml.sample data_manager_amrfinderplus/tool_data_table_conf.xml.test
diffstat 7 files changed, 175 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py	Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# Errol Strain, estrain@gmail.com
+# Database downloads for NCBI AMRFinderPlus
+
+import sys
+import os
+import tempfile
+import shutil
+import json
+import re
+import argparse
+from ftplib import FTP
+
+
+def download_from_ncbi(output_directory):
+    NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov'
+    FILENAME = 'version.txt' 
+    NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/'
+    
+    email = 'anonymous@example.com'
+
+    cwd = os.getcwd() 
+    os.chdir(output_directory)
+
+    ftp = FTP( NCBI_FTP_SERVER )
+    ftp.login( 'anonymous', email)
+    ftp.cwd(NCBI_DOWNLOAD_PATH)
+    
+    #exclude the allele counts folder
+    files = ftp.nlst()
+    files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
+
+    for f in files:
+      ftp.retrbinary("RETR " + f, open(f, 'wb').write)
+
+    files = ftp.nlst()
+    files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
+    pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files)
+    pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts)
+
+
+    # Make blast databases
+    blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null"
+    os.system(blastcmd)
+    blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null"
+    os.system(blastcmd)
+
+    for f in pointmuts:
+      blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null"
+      os.system(blastcmd)
+
+    # Make HMM indexes
+    hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null"
+    os.system(hmmcmd)
+ 
+    # Read in version
+    with open("version.txt") as f:
+      version = f.readline().rstrip()
+
+    ftp.quit()
+    
+    os.chdir(cwd)    
+    
+    return version 
+
+def print_json (version,argspath,argsname,argsout):
+
+    data_table_entry = {
+      'data_tables' : {
+        'amrfinderplus': [
+          {
+            "value":version,
+            "name":argsname,
+            "path":argspath,
+          }
+        ]
+      }
+    }
+
+    with open(argsout, 'w') as fh:
+      json.dump(data_table_entry, fh, indent=2, sort_keys=True)
+        
+def main():
+   
+    parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases')
+    parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name')
+    parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
+
+    args = parser.parse_args()
+
+    with open(args.out[0]) as fh:
+        params = json.load(fh)
+
+    output_directory = params['output_data'][0]['extra_files_path']
+    os.mkdir(output_directory)
+    data_manager_dict = {}
+
+    #Fetch the files and build blast databases
+    version=download_from_ncbi(output_directory)    
+
+    tablename = "AMRFinderPlus Database " + version
+
+    #shutil.copytree("amrdb",args.path[0])
+    print_json(version,output_directory,tablename,args.out[0])
+
+if __name__ == "__main__": main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml	Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,24 @@
+<tool id="data_manager_amrfinderplus" name="AMRFinderPlus Data Manger" tool_type="manage_data" version="0.0.3" profile="20.01">
+    <requirements>
+        <requirement type="package">blast</requirement>
+        <requirement type="package">hmmer</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python $__tool_directory__/data_manager_amrfinderplus.py --name $amrdbname --out ${output_file};
+    ]]></command>
+    <inputs>
+      <param name="amrdbname" type="text" value="latest" label="Release Version - Note: only latest release is supported"/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+    </help>
+    <citations>
+     Refer to https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/ for latest database version number.
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/data_manager_conf.xml	Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_amrfinderplus.xml" id="data_manager_amrfinderplus">
+      <data_table name="amrfinderplus">
+        <output>
+          <column name="value" />
+          <column name="name" />
+          <column name="path" output_ref="output_file" >
+            <move type="directory" relativize_symlinks="True">
+              <src>${path}</src>
+              <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">amrfinderplus/${value}</target>
+            </move>
+            <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/amrfinderplus/${value}</value_translation>
+            <value_translation type="function">abspath</value_translation>
+          </column>
+        </output>
+      </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/test-data/amrfinder_databases.loc	Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of amrfinder databases
+#
+# the columns are:
+# value name path
+#
+# for example
+# 2021-04-14 May22DB /tmp/tool-data/amrfinderdatabases/May22DB
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/tool-data/amrfinder_databases.loc	Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of amrfinder databases
+#
+# the columns are:
+# value name path
+#
+# for example
+# 2021-04-14 May22DB /tmp/tool-data/amrfinderdatabases/May22DB
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/tool_data_table_conf.xml.sample	Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="amrfinderplus" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/amrfinderplus.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_amrfinderplus/tool_data_table_conf.xml.test	Tue May 24 11:46:19 2022 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="amrfinderplus" comment_char="#" allow_duplicate_entries="False">
+        <columns>value,name, path</columns>
+        <file path="${__HERE__}/test-data/amrfinderplus.loc" />
+    </table>
+</tables>