annotate resfinder/cge/standardize_results.py @ 0:a16d245332d6 draft default tip

Uploaded
author dcouvin
date Wed, 08 Dec 2021 01:46:07 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
1 #!/usr/bin/env python3
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
2 import random
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
3 import string
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
4
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
5 from .phenotype2genotype.feature import ResGene, ResMutation
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
6 from .phenotype2genotype.res_profile import PhenoDB
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
7 from .out.util.generator import Generator
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
8
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
9 import json
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
10
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
11
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
12 class SeqVariationResult(dict):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
13 def __init__(self, res_collection, mismatch, region_results, db_name):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
14 self.res_collection = res_collection
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
15 self.load_var_type(mismatch[0])
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
16 self["ref_start_pos"] = mismatch[1]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
17 self["ref_end_pos"] = mismatch[2]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
18 mut_string = mismatch[4]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
19 self["ref_codon"] = mismatch[5].lower()
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
20 self["var_codon"] = mismatch[6].lower()
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
21
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
22 if(len(mismatch) > 7):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
23 self["ref_aa"] = mismatch[7].lower()
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
24 self["var_aa"] = mismatch[8].lower()
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
25 region_name = region_results[0]["ref_id"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
26 region_name = PhenoDB.if_promoter_rename(region_name)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
27
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
28 self["type"] = "seq_variation"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
29 if(len(mismatch) > 7):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
30 self["ref_id"] = ("{id}{deli}{pos}{deli}{var}"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
31 .format(id=region_name,
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
32 pos=self["ref_start_pos"],
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
33 var=self["var_aa"], deli="_"))
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
34 else:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
35 self["ref_id"] = ("{id}{deli}{pos}{deli}{var}"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
36 .format(id=region_name,
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
37 pos=self["ref_start_pos"],
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
38 var=self["var_codon"], deli="_"))
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
39 self["key"] = self._get_unique_key()
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
40 self["seq_var"] = mut_string
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
41
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
42 if(len(self["ref_codon"]) == 3):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
43 self["codon_change"] = ("{}>{}".format(self["ref_codon"],
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
44 self["var_codon"]))
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
45
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
46 db_key = DatabaseHandler.get_key(res_collection, db_name)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
47 self["ref_database"] = db_key
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
48
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
49 region_keys = []
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
50 for result in region_results:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
51 region_keys.append(result["key"])
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
52 self["genes"] = region_keys
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
53
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
54 def load_var_type(self, type):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
55 self["substitution"] = False
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
56 self["deletion"] = False
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
57 self["insertion"] = False
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
58 if(type == "sub"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
59 self["substitution"] = True
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
60 elif(type == "ins"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
61 self["insertion"] = True
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
62 elif(type == "del"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
63 self["deletion"] = True
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
64
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
65 def _get_unique_key(self, delimiter=";;"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
66 minimum_key = self["ref_id"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
67 unique_key = minimum_key
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
68 while(unique_key in self.res_collection["seq_variations"]):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
69 rnd_str = GeneResult.randomString()
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
70 unique_key = ("{key}{deli}{rnd}"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
71 .format(key=minimum_key, deli=delimiter,
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
72 rnd=rnd_str))
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
73
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
74 return unique_key
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
75
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
76
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
77 class GeneResult(dict):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
78 def __init__(self, res_collection, res, db_name):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
79 self.db_name = db_name
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
80 self["type"] = "gene"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
81
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
82 self["ref_id"] = res["sbjct_header"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
83 self["ref_id"] = PhenoDB.if_promoter_rename(self["ref_id"])
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
84
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
85 if(db_name == "ResFinder"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
86 self["name"], self.variant, self["ref_acc"] = (
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
87 GeneResult._split_sbjct_header(self["ref_id"]))
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
88 elif(db_name == "PointFinder"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
89 self["name"] = self["ref_id"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
90
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
91 self["ref_start_pos"] = res["sbjct_start"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
92 self["ref_end_pos"] = res["sbjct_end"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
93 self["identity"] = res["perc_ident"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
94 self["alignment_length"] = res["HSP_length"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
95 self["ref_gene_lenght"] = res["sbjct_length"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
96 self["query_id"] = res["contig_name"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
97 self["query_start_pos"] = res["query_start"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
98 self["query_end_pos"] = res["query_end"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
99 self["key"] = self._get_unique_gene_key(res_collection)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
100
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
101 # BLAST coverage formatted results
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
102 coverage = res.get("coverage", None)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
103 if(coverage is None):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
104 # KMA coverage formatted results
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
105 coverage = res["perc_coverage"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
106 else:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
107 coverage = float(coverage) * 100
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
108 self["coverage"] = coverage
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
109
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
110 depth = res.get("depth", None)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
111 if(depth is not None):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
112 self["depth"] = depth
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
113
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
114 db_key = DatabaseHandler.get_key(res_collection, db_name)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
115 self["ref_database"] = db_key
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
116 self.remove_NAs()
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
117
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
118 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
119 def _split_sbjct_header(header):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
120 sbjct = header.split("_")
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
121 template = sbjct[0]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
122
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
123 if(len(sbjct) > 1):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
124 variant = sbjct[1]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
125 acc = "_".join(sbjct[2:])
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
126 else:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
127 variant = None
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
128 acc = None
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
129
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
130 return (template, variant, acc)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
131
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
132 def remove_NAs(self):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
133 na_keys = []
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
134 for key, val in self.items():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
135 if(val == "NA" or val is None):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
136 na_keys.append(key)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
137 for key in na_keys:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
138 del self[key]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
139
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
140 def _get_unique_gene_key(self, res_collection, delimiter=";;"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
141 if(self.db_name == "ResFinder"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
142 gene_key = ("{name}{deli}{var}{deli}{ref_acc}"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
143 .format(deli=delimiter, var=self.variant, **self))
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
144 if(self.db_name == "PointFinder"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
145 gene_key = self["name"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
146 # Attach random string if key already exists
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
147 minimum_gene_key = gene_key
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
148 if gene_key in res_collection["genes"]:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
149 if(self["query_id"] == "NA"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
150 gene_key = self.get_rnd_unique_gene_key(
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
151 gene_key, res_collection, minimum_gene_key, delimiter)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
152 elif (self["query_id"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
153 != res_collection["genes"][gene_key]["query_id"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
154 or self["query_start_pos"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
155 != res_collection["genes"][gene_key]["query_start_pos"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
156 or self["query_end_pos"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
157 != res_collection["genes"][gene_key]["query_end_pos"]):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
158 gene_key = self.get_rnd_unique_gene_key(
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
159 gene_key, res_collection, minimum_gene_key, delimiter)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
160
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
161 return gene_key
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
162
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
163 def get_rnd_unique_gene_key(self, gene_key, res_collection,
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
164 minimum_gene_key, delimiter):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
165 while(gene_key in res_collection["genes"]):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
166 rnd_str = GeneResult.randomString()
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
167 gene_key = ("{key}{deli}{rnd}"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
168 .format(key=minimum_gene_key, deli=delimiter,
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
169 rnd=rnd_str))
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
170 return gene_key
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
171
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
172 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
173 def randomString(stringLength=4):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
174 letters = string.ascii_lowercase
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
175 return ''.join(random.choice(letters) for i in range(stringLength))
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
176
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
177
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
178 class PhenotypeResult(dict):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
179 def __init__(self, antibiotic):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
180 self["type"] = "phenotype"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
181 self["category"] = "amr"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
182 self["key"] = antibiotic.name
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
183 self["amr_classes"] = antibiotic.classes
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
184 self["resistance"] = antibiotic.name
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
185 self["resistant"] = False
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
186
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
187 def set_resistant(self, res):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
188 self["resistant"] = res
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
189
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
190 def add_feature(self, res_collection, isolate, feature):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
191 # Get all keys in the result that matches the feature in question.
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
192 # Most of the time this will be a one to one relationship.
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
193 # However if several identical features has been found in a sample,
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
194 # they will all have different keys, but identical ref ids.
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
195
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
196 ref_id, type = PhenotypeResult.get_ref_id_and_type(feature, isolate)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
197 feature_keys = PhenotypeResult.get_keys_matching_ref_id(
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
198 ref_id, res_collection[type])
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
199 # Add keys to phenotype results
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
200 pheno_feat_keys = self.get(type, [])
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
201 pheno_feat_keys = pheno_feat_keys + feature_keys
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
202 self[type] = pheno_feat_keys
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
203
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
204 # Add phenotype keys to feature results
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
205 features = res_collection[type]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
206 for feat_key in feature_keys:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
207 feat_result = features[feat_key]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
208 pheno_keys = feat_result.get("phenotypes", [])
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
209 pheno_keys.append(self["key"])
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
210 feat_result["phenotypes"] = pheno_keys
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
211 if(type == "genes"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
212 db_key = DatabaseHandler.get_key(res_collection, "ResFinder")
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
213 elif(type == "seq_variations"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
214 db_key = DatabaseHandler.get_key(res_collection, "PointFinder")
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
215 self["ref_database"] = db_key
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
216
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
217 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
218 def get_ref_id_and_type(feature, isolate):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
219 type = None
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
220 ref_id = None
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
221 if(isinstance(feature, ResGene)):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
222 type = "genes"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
223 ref_id = isolate.resprofile.phenodb.id_to_idwithvar[
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
224 feature.unique_id]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
225 elif(isinstance(feature, ResMutation)):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
226 type = "seq_variations"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
227 ref_id = feature.unique_id
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
228 return (ref_id, type)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
229
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
230 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
231 def get_keys_matching_ref_id(ref_id, res_collection):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
232 out_keys = []
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
233 for key, results in res_collection.items():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
234 if(ref_id == results["ref_id"]):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
235 out_keys.append(key)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
236
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
237 return out_keys
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
238
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
239
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
240 class ResFinderResultHandler():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
241
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
242 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
243 def load_res_profile(res_collection, isolate):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
244 # For each antibiotic class
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
245 for ab_class in isolate.resprofile.phenodb.antibiotics.keys():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
246 # For each antibiotic in current class
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
247 for phenodb_ab in isolate.resprofile.phenodb.antibiotics[ab_class]:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
248
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
249 phenotype = PhenotypeResult(phenodb_ab)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
250 # Isolate is resistant towards the antibiotic
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
251 if(phenodb_ab in isolate.resprofile.resistance):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
252 phenotype.set_resistant(True)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
253
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
254 isolate_ab = isolate.resprofile.resistance[phenodb_ab]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
255 for unique_id, feature in isolate_ab.features.items():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
256 if(isinstance(feature, ResGene)):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
257 phenotype.add_feature(res_collection, isolate,
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
258 feature)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
259 res_collection.add_class(cl="phenotypes", **phenotype)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
260
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
261 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
262 def standardize_results(res_collection, res, ref_db_name):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
263 for db_name, db in res.items():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
264 if(db_name == "excluded"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
265 continue
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
266
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
267 if(db == "No hit found"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
268 continue
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
269
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
270 for unique_id, hit_db in db.items():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
271 if(unique_id in res["excluded"]):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
272 continue
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
273 gene_result = GeneResult(res_collection, hit_db, ref_db_name)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
274 if gene_result["key"] in res_collection["genes"]:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
275 res_collection.modify_class(cl="genes", **gene_result)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
276 else:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
277 res_collection.add_class(cl="genes", **gene_result)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
278
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
279
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
280 class DatabaseHandler():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
281
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
282 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
283 def load_database_metadata(name, res_collection, db_dir):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
284 database_metadata = {}
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
285 database_metadata["type"] = "database"
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
286 database_metadata["database_name"] = name
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
287
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
288 version, commit = Generator.get_version_commit(db_dir)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
289 database_metadata["database_version"] = version
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
290 database_metadata["key"] = "{}-{}".format(name, version)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
291 database_metadata["database_commit"] = commit
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
292
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
293 res_collection.add_class(cl="databases", **database_metadata)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
294
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
295 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
296 def get_key(res_collection, name):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
297 for key, val in res_collection["databases"].items():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
298 if(val["database_name"] == name):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
299 return key
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
300
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
301
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
302 class PointFinderResultHandler():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
303
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
304 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
305 def load_res_profile(res_collection, isolate):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
306 # For each antibiotic class
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
307 for ab_class in isolate.resprofile.phenodb.antibiotics.keys():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
308 # For each antibiotic in current class
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
309 for phenodb_ab in isolate.resprofile.phenodb.antibiotics[ab_class]:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
310
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
311 phenotype = PhenotypeResult(phenodb_ab)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
312 # Isolate is resistant towards the antibiotic
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
313 if(phenodb_ab in isolate.resprofile.resistance):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
314 phenotype.set_resistant(True)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
315
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
316 isolate_ab = isolate.resprofile.resistance[phenodb_ab]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
317 for unique_id, feature in isolate_ab.features.items():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
318 if(isinstance(feature, ResMutation)):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
319 phenotype.add_feature(res_collection, isolate,
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
320 feature)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
321 res_collection.add_class(cl="phenotypes", **phenotype)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
322
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
323 @staticmethod
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
324 def standardize_results(res_collection, res, ref_db_name):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
325 for gene_name, db in res.items():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
326 if(gene_name == "excluded"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
327 continue
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
328
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
329 if(db == "No hit found"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
330 continue
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
331
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
332 ###Added to solve current PointFinder
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
333 if gene_name in res["excluded"]:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
334 continue
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
335 if(isinstance(db, str)):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
336 if db.startswith("Gene found with coverage"):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
337 continue
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
338 ##### #####
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
339
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
340 gene_results = []
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
341
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
342 # For BLAST results
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
343 db_hits = db.get("hits", {})
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
344
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
345 # For KMA results
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
346 if(not db_hits):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
347 id = db["sbjct_header"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
348 db_hits[id] = db
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
349
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
350 for unique_id, hit_db in db_hits.items():
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
351 if(unique_id in res["excluded"]):
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
352 continue
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
353
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
354 gene_result = GeneResult(res_collection, hit_db, ref_db_name)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
355 res_collection.add_class(cl="genes", **gene_result)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
356 gene_results.append(gene_result)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
357
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
358 mismatches = db["mis_matches"]
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
359
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
360 #DEBUG
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
361 for mismatch in mismatches:
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
362 seq_var_result = SeqVariationResult(
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
363 res_collection, mismatch, gene_results, ref_db_name)
a16d245332d6 Uploaded
dcouvin
parents:
diff changeset
364 res_collection.add_class(cl="seq_variations", **seq_var_result)