Mercurial > repos > estrain > amrfinderplus_db
comparison data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py @ 0:5ba68abd41f6 draft
Uploaded
author | estrain |
---|---|
date | Tue, 24 May 2022 11:46:19 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5ba68abd41f6 |
---|---|
1 #!/usr/bin/env python | |
2 # Errol Strain, estrain@gmail.com | |
3 # Database downloads for NCBI AMRFinderPlus | |
4 | |
5 import sys | |
6 import os | |
7 import tempfile | |
8 import shutil | |
9 import json | |
10 import re | |
11 import argparse | |
12 from ftplib import FTP | |
13 | |
14 | |
15 def download_from_ncbi(output_directory): | |
16 NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov' | |
17 FILENAME = 'version.txt' | |
18 NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/' | |
19 | |
20 email = 'anonymous@example.com' | |
21 | |
22 cwd = os.getcwd() | |
23 os.chdir(output_directory) | |
24 | |
25 ftp = FTP( NCBI_FTP_SERVER ) | |
26 ftp.login( 'anonymous', email) | |
27 ftp.cwd(NCBI_DOWNLOAD_PATH) | |
28 | |
29 #exclude the allele counts folder | |
30 files = ftp.nlst() | |
31 files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) | |
32 | |
33 for f in files: | |
34 ftp.retrbinary("RETR " + f, open(f, 'wb').write) | |
35 | |
36 files = ftp.nlst() | |
37 files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) | |
38 pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files) | |
39 pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts) | |
40 | |
41 | |
42 # Make blast databases | |
43 blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null" | |
44 os.system(blastcmd) | |
45 blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null" | |
46 os.system(blastcmd) | |
47 | |
48 for f in pointmuts: | |
49 blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null" | |
50 os.system(blastcmd) | |
51 | |
52 # Make HMM indexes | |
53 hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null" | |
54 os.system(hmmcmd) | |
55 | |
56 # Read in version | |
57 with open("version.txt") as f: | |
58 version = f.readline().rstrip() | |
59 | |
60 ftp.quit() | |
61 | |
62 os.chdir(cwd) | |
63 | |
64 return version | |
65 | |
66 def print_json (version,argspath,argsname,argsout): | |
67 | |
68 data_table_entry = { | |
69 'data_tables' : { | |
70 'amrfinderplus': [ | |
71 { | |
72 "value":version, | |
73 "name":argsname, | |
74 "path":argspath, | |
75 } | |
76 ] | |
77 } | |
78 } | |
79 | |
80 with open(argsout, 'w') as fh: | |
81 json.dump(data_table_entry, fh, indent=2, sort_keys=True) | |
82 | |
83 def main(): | |
84 | |
85 parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases') | |
86 parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name') | |
87 parser.add_argument('--out', type=str, required=True, nargs=1, help='output file') | |
88 | |
89 args = parser.parse_args() | |
90 | |
91 with open(args.out[0]) as fh: | |
92 params = json.load(fh) | |
93 | |
94 output_directory = params['output_data'][0]['extra_files_path'] | |
95 os.mkdir(output_directory) | |
96 data_manager_dict = {} | |
97 | |
98 #Fetch the files and build blast databases | |
99 version=download_from_ncbi(output_directory) | |
100 | |
101 tablename = "AMRFinderPlus Database " + version | |
102 | |
103 #shutil.copytree("amrdb",args.path[0]) | |
104 print_json(version,output_directory,tablename,args.out[0]) | |
105 | |
106 if __name__ == "__main__": main() |