annotate data_managers/data_manager_rgi_build_db/data_manager/import_data.py @ 0:715bc9aeef69 draft

planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
author card
date Wed, 27 Feb 2019 09:08:21 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
1 import argparse
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
2 import datetime
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
3 import json
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
4 import os
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
5 import shutil
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
6 import sys
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
7 import tarfile
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
8 import urllib.request, urllib.error, urllib.parse
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
9 import zipfile
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
10 import logging
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
11
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
12 path = os.path.join(os.getcwd(), 'rgi-database')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
13 data_path = path
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
14
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
15 level = logging.WARNING
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
16 logger = logging.getLogger(__name__)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
17 logger.setLevel(level)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
18
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
19 def url_download(url, workdir):
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
20 file_path = os.path.join(workdir, 'download.dat')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
21 if not os.path.exists(workdir):
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
22 os.makedirs(workdir)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
23 src = None
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
24 dst = None
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
25 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
26 req = urllib.request.Request(url)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
27 src = urllib.request.urlopen(req)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
28 dst = open(file_path, 'wb')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
29 while True:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
30 chunk = src.read(2**10)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
31 if chunk:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
32 dst.write(chunk)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
33 else:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
34 break
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
35 except Exception as e:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
36 print(str(e), file=sys.stderr)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
37 finally:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
38 if src:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
39 src.close()
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
40 if dst:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
41 dst.close()
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
42 if tarfile.is_tarfile(file_path):
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
43 fh = tarfile.open(file_path, 'r:*')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
44 elif zipfile.is_zipfile(file_path):
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
45 fh = zipfile.ZipFile(file_path, 'r')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
46 else:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
47 return
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
48 # extract only one file : card.json
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
49 for member in fh.getmembers():
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
50 if member.isreg(): # skip if the TarInfo is not files
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
51 member.name = os.path.basename(member.name) # remove the path by reset it
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
52 if member.name == 'card.json':
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
53 print('[import_data] extracting file: {}'.format(str(member.name)))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
54 fh.extract(member.name,workdir)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
55 os.remove(file_path)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
56
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
57 def checkKeyExisted(key, my_dict):
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
58 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
59 nonNone = my_dict[key] is not None
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
60 except KeyError:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
61 nonNone = False
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
62 return nonNone
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
63
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
64 def data_version():
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
65 data_version = ''
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
66 with open(os.path.join(data_path, 'card.json')) as json_file:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
67 json_data = json.load(json_file)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
68 for item in list(json_data.keys()):
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
69 if item == '_version':
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
70 data_version = json_data[item]
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
71 json_file.close()
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
72 return data_version
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
73
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
74 def makeBlastDB():
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
75 if os.path.isfile(os.path.join(path, 'proteindb.fsa')) == True:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
76 print('[import_data] create blast DB.')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
77 os.system('makeblastdb -in {}/proteindb.fsa -dbtype prot -out {}/protein.db > /dev/null 2>&1'.format(path, path))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
78
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
79 def makeDiamondDB():
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
80 if os.path.isfile(os.path.join(path, 'proteindb.fsa')) == True:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
81 print('[import_data] create diamond DB.')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
82 os.system('diamond makedb --quiet --in {}/proteindb.fsa --db {}/protein.db'.format(path, path))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
83
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
84 def write_fasta_from_json():
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
85 '''Creates a fasta file from card.json file.'''
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
86 if os.path.isfile(os.path.join(path, 'proteindb.fsa')):
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
87 return
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
88 else:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
89 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
90 with open(os.path.join(data_path, 'card.json'), 'r') as jfile:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
91 j = json.load(jfile)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
92 except Exception as e:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
93 logger.error(e)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
94 exit()
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
95
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
96 with open(os.path.join(path, 'proteindb.fsa'), 'w') as fout:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
97 for i in j:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
98 if i.isdigit():
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
99 # model_type: protein homolog model
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
100 if j[i]['model_type_id'] == '40292':
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
101 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
102 pass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value']
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
103 except KeyError:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
104 logger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
105 % (j[i]['model_id'], j[i]['model_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
106 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
107 else:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
108 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
109 for seq in j[i]['model_sequences']['sequence']:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
110 fout.write('>%s_%s | model_type_id: 40292 | pass_bitscore: %s | %s\n' % (i, seq, pass_bit_score, j[i]['ARO_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
111 fout.write('%s\n' %(j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
112 except Exception as e:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
113 logger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
114 % (j[i]['model_id'], j[i]['model_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
115 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
116
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
117
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
118 # model_type: protein variant model
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
119 elif j[i]['model_type_id'] == '40293':
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
120 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
121 pass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value']
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
122 except KeyError:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
123 logger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
124 % (j[i]['model_id'], j[i]['model_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
125 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
126 else:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
127 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
128 snpList = [j[i]['model_param']['snp']['param_value'][k] for k in j[i]['model_param']['snp']['param_value']]
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
129 except Exception as e:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
130 logger.warning('No snp for model (%s, %s). RGI will omit this model and keep running.' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
131 % (j[i]['model_id'], j[i]['model_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
132 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
133
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
134 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
135 for seq in j[i]['model_sequences']['sequence']:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
136 fout.write('>%s_%s | model_type_id: 40293 | pass_bit_score: %s | SNP: %s | %s\n' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
137 % (i, seq, pass_bit_score, ','.join(snpList), j[i]['ARO_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
138 fout.write('%s\n' % (j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
139 except Exception as e:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
140 logger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
141 % (j[i]['model_id'], j[i]['model_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
142 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
143
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
144 # model_type: protein overexpression model
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
145 elif j[i]['model_type_id'] == '41091':
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
146 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
147 pass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value']
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
148 except KeyError:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
149 logger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
150 % (j[i]['model_id'], j[i]['model_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
151 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
152 else:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
153 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
154 snpList = [j[i]['model_param']['snp']['param_value'][k] for k in j[i]['model_param']['snp']['param_value']]
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
155 except Exception as e:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
156 logger.warning('No snp for model (%s, %s). RGI will omit this model and keep running.' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
157 % (j[i]['model_id'], j[i]['model_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
158 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
159
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
160 try:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
161 for seq in j[i]['model_sequences']['sequence']:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
162 fout.write('>%s_%s | model_type_id: 41091 | pass_bit_score: %s | SNP: %s | %s\n' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
163 % (i, seq, pass_bit_score, ','.join(snpList), j[i]['ARO_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
164 fout.write('%s\n' % (j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
165 except Exception as e:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
166 logger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
167 % (j[i]['model_id'], j[i]['model_name']))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
168 logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
169
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
170 def _main(args):
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
171 if not os.path.exists(path):
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
172 print('[import_data] mkdir: {}'.format(path))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
173 os.makedirs(path)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
174 print('[import_data] path: {}'.format(path))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
175 print(args)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
176
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
177 if args.url == None:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
178 url = 'https://card.mcmaster.ca/latest/data'
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
179 else:
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
180 url = args.url
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
181 print('[import_data] url: {}'.format(url))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
182 workdir = os.path.join(os.getcwd(), 'rgi-database')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
183 print('[import_data] working directory: {}'.format(workdir))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
184 url_download(url, workdir)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
185 write_fasta_from_json()
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
186 makeBlastDB()
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
187 makeDiamondDB()
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
188 version = data_version()
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
189 print('[import_data] data version: {}'.format(version))
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
190 return version
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
191
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
192 def run():
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
193 parser = argparse.ArgumentParser(description='Create data manager json.')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
194 parser.add_argument('--url', dest='url', action='store', help='Url for CARD data')
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
195 args = parser.parse_args()
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
196 _main(args)
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
197
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
198 if __name__ == '__main__':
715bc9aeef69 planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
card
parents:
diff changeset
199 run()