Mercurial > repos > estrain > amrfinderplus_db
changeset 0:5ba68abd41f6 draft
Uploaded
author | estrain |
---|---|
date | Tue, 24 May 2022 11:46:19 +0000 |
parents | |
children | ba50f77b5db9 |
files | data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml data_manager_amrfinderplus/data_manager_conf.xml data_manager_amrfinderplus/test-data/amrfinder_databases.loc data_manager_amrfinderplus/tool-data/amrfinder_databases.loc data_manager_amrfinderplus/tool_data_table_conf.xml.sample data_manager_amrfinderplus/tool_data_table_conf.xml.test |
diffstat | 7 files changed, 175 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py Tue May 24 11:46:19 2022 +0000 @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# Errol Strain, estrain@gmail.com +# Database downloads for NCBI AMRFinderPlus + +import sys +import os +import tempfile +import shutil +import json +import re +import argparse +from ftplib import FTP + + +def download_from_ncbi(output_directory): + NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov' + FILENAME = 'version.txt' + NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/' + + email = 'anonymous@example.com' + + cwd = os.getcwd() + os.chdir(output_directory) + + ftp = FTP( NCBI_FTP_SERVER ) + ftp.login( 'anonymous', email) + ftp.cwd(NCBI_DOWNLOAD_PATH) + + #exclude the allele counts folder + files = ftp.nlst() + files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) + + for f in files: + ftp.retrbinary("RETR " + f, open(f, 'wb').write) + + files = ftp.nlst() + files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) + pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files) + pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts) + + + # Make blast databases + blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null" + os.system(blastcmd) + blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null" + os.system(blastcmd) + + for f in pointmuts: + blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null" + os.system(blastcmd) + + # Make HMM indexes + hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null" + os.system(hmmcmd) + + # Read in version + with open("version.txt") as f: + version = f.readline().rstrip() + + ftp.quit() + + os.chdir(cwd) + + return version + +def print_json (version,argspath,argsname,argsout): + + data_table_entry = { + 'data_tables' : { + 'amrfinderplus': [ + { + "value":version, + "name":argsname, + "path":argspath, + } + ] + } + } + + with open(argsout, 'w') as fh: + json.dump(data_table_entry, fh, indent=2, sort_keys=True) + +def main(): + + parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases') + parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name') + parser.add_argument('--out', type=str, required=True, nargs=1, help='output file') + + args = parser.parse_args() + + with open(args.out[0]) as fh: + params = json.load(fh) + + output_directory = params['output_data'][0]['extra_files_path'] + os.mkdir(output_directory) + data_manager_dict = {} + + #Fetch the files and build blast databases + version=download_from_ncbi(output_directory) + + tablename = "AMRFinderPlus Database " + version + + #shutil.copytree("amrdb",args.path[0]) + print_json(version,output_directory,tablename,args.out[0]) + +if __name__ == "__main__": main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml Tue May 24 11:46:19 2022 +0000 @@ -0,0 +1,24 @@ +<tool id="data_manager_amrfinderplus" name="AMRFinderPlus Data Manger" tool_type="manage_data" version="0.0.3" profile="20.01"> + <requirements> + <requirement type="package">blast</requirement> + <requirement type="package">hmmer</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/data_manager_amrfinderplus.py --name $amrdbname --out ${output_file}; + ]]></command> + <inputs> + <param name="amrdbname" type="text" value="latest" label="Release Version - Note: only latest release is supported"/> + </inputs> + <outputs> + <data name="output_file" format="data_manager_json"/> + </outputs> + <tests> + <test> + </test> + </tests> + <help> + </help> + <citations> + Refer to https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/ for latest database version number. + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/data_manager_conf.xml Tue May 24 11:46:19 2022 +0000 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/data_manager_amrfinderplus.xml" id="data_manager_amrfinderplus"> + <data_table name="amrfinderplus"> + <output> + <column name="value" /> + <column name="name" /> + <column name="path" output_ref="output_file" > + <move type="directory" relativize_symlinks="True"> + <src>${path}</src> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">amrfinderplus/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/amrfinderplus/${value}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/test-data/amrfinder_databases.loc Tue May 24 11:46:19 2022 +0000 @@ -0,0 +1,7 @@ +# this is a tab separated file describing the location of amrfinder databases +# +# the columns are: +# value name path +# +# for example +# 2021-04-14 May22DB /tmp/tool-data/amrfinderdatabases/May22DB
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/tool-data/amrfinder_databases.loc Tue May 24 11:46:19 2022 +0000 @@ -0,0 +1,7 @@ +# this is a tab separated file describing the location of amrfinder databases +# +# the columns are: +# value name path +# +# for example +# 2021-04-14 May22DB /tmp/tool-data/amrfinderdatabases/May22DB
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/tool_data_table_conf.xml.sample Tue May 24 11:46:19 2022 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="amrfinderplus" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/amrfinderplus.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_amrfinderplus/tool_data_table_conf.xml.test Tue May 24 11:46:19 2022 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="amrfinderplus" comment_char="#" allow_duplicate_entries="False"> + <columns>value,name, path</columns> + <file path="${__HERE__}/test-data/amrfinderplus.loc" /> + </table> +</tables>