0
|
1 #!/usr/bin/env python
|
|
2 # Errol Strain, estrain@gmail.com
|
|
3 # Database downloads for NCBI AMRFinderPlus
|
|
4
|
|
5 import sys
|
|
6 import os
|
|
7 import tempfile
|
|
8 import shutil
|
|
9 import json
|
|
10 import re
|
|
11 import argparse
|
|
12 from ftplib import FTP
|
|
13
|
|
14
|
|
15 def download_from_ncbi(output_directory):
|
|
16 NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov'
|
|
17 FILENAME = 'version.txt'
|
|
18 NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/'
|
|
19
|
|
20 email = 'anonymous@example.com'
|
|
21
|
|
22 cwd = os.getcwd()
|
|
23 os.chdir(output_directory)
|
|
24
|
|
25 ftp = FTP( NCBI_FTP_SERVER )
|
|
26 ftp.login( 'anonymous', email)
|
|
27 ftp.cwd(NCBI_DOWNLOAD_PATH)
|
|
28
|
|
29 #exclude the allele counts folder
|
|
30 files = ftp.nlst()
|
|
31 files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
|
|
32
|
|
33 for f in files:
|
|
34 ftp.retrbinary("RETR " + f, open(f, 'wb').write)
|
|
35
|
|
36 files = ftp.nlst()
|
|
37 files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
|
|
38 pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files)
|
|
39 pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts)
|
|
40
|
|
41
|
|
42 # Make blast databases
|
|
43 blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null"
|
|
44 os.system(blastcmd)
|
|
45 blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null"
|
|
46 os.system(blastcmd)
|
|
47
|
|
48 for f in pointmuts:
|
|
49 blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null"
|
|
50 os.system(blastcmd)
|
|
51
|
|
52 # Make HMM indexes
|
|
53 hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null"
|
|
54 os.system(hmmcmd)
|
|
55
|
|
56 # Read in version
|
|
57 with open("version.txt") as f:
|
|
58 version = f.readline().rstrip()
|
|
59
|
|
60 ftp.quit()
|
|
61
|
|
62 os.chdir(cwd)
|
|
63
|
|
64 return version
|
|
65
|
|
66 def print_json (version,argspath,argsname,argsout):
|
|
67
|
|
68 data_table_entry = {
|
|
69 'data_tables' : {
|
|
70 'amrfinderplus': [
|
|
71 {
|
|
72 "value":version,
|
|
73 "name":argsname,
|
|
74 "path":argspath,
|
|
75 }
|
|
76 ]
|
|
77 }
|
|
78 }
|
|
79
|
|
80 with open(argsout, 'w') as fh:
|
|
81 json.dump(data_table_entry, fh, indent=2, sort_keys=True)
|
|
82
|
|
83 def main():
|
|
84
|
|
85 parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases')
|
|
86 parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name')
|
|
87 parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
|
|
88
|
|
89 args = parser.parse_args()
|
|
90
|
|
91 with open(args.out[0]) as fh:
|
|
92 params = json.load(fh)
|
|
93
|
|
94 output_directory = params['output_data'][0]['extra_files_path']
|
|
95 os.mkdir(output_directory)
|
|
96 data_manager_dict = {}
|
|
97
|
|
98 #Fetch the files and build blast databases
|
|
99 version=download_from_ncbi(output_directory)
|
|
100
|
|
101 tablename = "AMRFinderPlus Database " + version
|
|
102
|
|
103 #shutil.copytree("amrdb",args.path[0])
|
|
104 print_json(version,output_directory,tablename,args.out[0])
|
|
105
|
|
106 if __name__ == "__main__": main()
|