Mercurial > repos > estrain > amrfinderplus_db
view data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py @ 0:5ba68abd41f6 draft
Uploaded
author | estrain |
---|---|
date | Tue, 24 May 2022 11:46:19 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python # Errol Strain, estrain@gmail.com # Database downloads for NCBI AMRFinderPlus import sys import os import tempfile import shutil import json import re import argparse from ftplib import FTP def download_from_ncbi(output_directory): NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov' FILENAME = 'version.txt' NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/' email = 'anonymous@example.com' cwd = os.getcwd() os.chdir(output_directory) ftp = FTP( NCBI_FTP_SERVER ) ftp.login( 'anonymous', email) ftp.cwd(NCBI_DOWNLOAD_PATH) #exclude the allele counts folder files = ftp.nlst() files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) for f in files: ftp.retrbinary("RETR " + f, open(f, 'wb').write) files = ftp.nlst() files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files) pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts) # Make blast databases blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null" os.system(blastcmd) blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null" os.system(blastcmd) for f in pointmuts: blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null" os.system(blastcmd) # Make HMM indexes hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null" os.system(hmmcmd) # Read in version with open("version.txt") as f: version = f.readline().rstrip() ftp.quit() os.chdir(cwd) return version def print_json (version,argspath,argsname,argsout): data_table_entry = { 'data_tables' : { 'amrfinderplus': [ { "value":version, "name":argsname, "path":argspath, } ] } } with open(argsout, 'w') as fh: json.dump(data_table_entry, fh, indent=2, sort_keys=True) def main(): parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases') parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name') parser.add_argument('--out', type=str, required=True, nargs=1, help='output file') args = parser.parse_args() with open(args.out[0]) as fh: params = json.load(fh) output_directory = params['output_data'][0]['extra_files_path'] os.mkdir(output_directory) data_manager_dict = {} #Fetch the files and build blast databases version=download_from_ncbi(output_directory) tablename = "AMRFinderPlus Database " + version #shutil.copytree("amrdb",args.path[0]) print_json(version,output_directory,tablename,args.out[0]) if __name__ == "__main__": main()