comparison data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py @ 0:5ba68abd41f6 draft

Uploaded
author estrain
date Tue, 24 May 2022 11:46:19 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5ba68abd41f6
1 #!/usr/bin/env python
2 # Errol Strain, estrain@gmail.com
3 # Database downloads for NCBI AMRFinderPlus
4
5 import sys
6 import os
7 import tempfile
8 import shutil
9 import json
10 import re
11 import argparse
12 from ftplib import FTP
13
14
15 def download_from_ncbi(output_directory):
16 NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov'
17 FILENAME = 'version.txt'
18 NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/'
19
20 email = 'anonymous@example.com'
21
22 cwd = os.getcwd()
23 os.chdir(output_directory)
24
25 ftp = FTP( NCBI_FTP_SERVER )
26 ftp.login( 'anonymous', email)
27 ftp.cwd(NCBI_DOWNLOAD_PATH)
28
29 #exclude the allele counts folder
30 files = ftp.nlst()
31 files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
32
33 for f in files:
34 ftp.retrbinary("RETR " + f, open(f, 'wb').write)
35
36 files = ftp.nlst()
37 files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files)
38 pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files)
39 pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts)
40
41
42 # Make blast databases
43 blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null"
44 os.system(blastcmd)
45 blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null"
46 os.system(blastcmd)
47
48 for f in pointmuts:
49 blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null"
50 os.system(blastcmd)
51
52 # Make HMM indexes
53 hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null"
54 os.system(hmmcmd)
55
56 # Read in version
57 with open("version.txt") as f:
58 version = f.readline().rstrip()
59
60 ftp.quit()
61
62 os.chdir(cwd)
63
64 return version
65
66 def print_json (version,argspath,argsname,argsout):
67
68 data_table_entry = {
69 'data_tables' : {
70 'amrfinderplus': [
71 {
72 "value":version,
73 "name":argsname,
74 "path":argspath,
75 }
76 ]
77 }
78 }
79
80 with open(argsout, 'w') as fh:
81 json.dump(data_table_entry, fh, indent=2, sort_keys=True)
82
83 def main():
84
85 parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases')
86 parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name')
87 parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
88
89 args = parser.parse_args()
90
91 with open(args.out[0]) as fh:
92 params = json.load(fh)
93
94 output_directory = params['output_data'][0]['extra_files_path']
95 os.mkdir(output_directory)
96 data_manager_dict = {}
97
98 #Fetch the files and build blast databases
99 version=download_from_ncbi(output_directory)
100
101 tablename = "AMRFinderPlus Database " + version
102
103 #shutil.copytree("amrdb",args.path[0])
104 print_json(version,output_directory,tablename,args.out[0])
105
106 if __name__ == "__main__": main()