Mercurial > repos > dcouvin > resfinder4
comparison resfinder/cge/resfinder.py @ 0:55051a9bc58d draft default tip
Uploaded
| author | dcouvin |
|---|---|
| date | Mon, 10 Jan 2022 20:06:07 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:55051a9bc58d |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 from __future__ import division | |
| 3 import sys | |
| 4 import os | |
| 5 import time | |
| 6 import random | |
| 7 import re | |
| 8 from argparse import ArgumentParser | |
| 9 from tabulate import tabulate | |
| 10 import collections | |
| 11 | |
| 12 from cgecore.blaster import Blaster | |
| 13 from cgecore.cgefinder import CGEFinder | |
| 14 from .output.table import TableResults | |
| 15 | |
| 16 | |
| 17 class ResFinder(CGEFinder): | |
| 18 | |
| 19 def __init__(self, db_conf_file, notes, db_path, db_path_kma=None, | |
| 20 databases=None): | |
| 21 """ | |
| 22 """ | |
| 23 self.db_path = db_path | |
| 24 | |
| 25 if(db_path_kma is None): | |
| 26 self.db_path_kma = db_path | |
| 27 else: | |
| 28 self.db_path_kma = db_path_kma | |
| 29 | |
| 30 self.configured_dbs = dict() | |
| 31 self.kma_db_files = None | |
| 32 self.load_db_config(db_conf_file=db_conf_file) | |
| 33 | |
| 34 self.databases = [] | |
| 35 self.load_databases(databases=databases) | |
| 36 | |
| 37 self.phenos = dict() | |
| 38 self.load_notes(notes=notes) | |
| 39 | |
| 40 self.blast_results = None | |
| 41 # self.kma_results = None | |
| 42 # self.results = None | |
| 43 | |
| 44 @staticmethod | |
| 45 def old_results_to_standard_output(result, software, version, run_date, | |
| 46 run_cmd, id, mutation=False, | |
| 47 tableresults=None): | |
| 48 """ | |
| 49 """ | |
| 50 std_results = TableResults(software, version, run_date, run_cmd, id) | |
| 51 headers = [ | |
| 52 "template_name", | |
| 53 "template_length", | |
| 54 "template_start_pos", | |
| 55 "template_end_pos", | |
| 56 "aln_length", | |
| 57 "aln_identity", | |
| 58 "aln_gaps", | |
| 59 "aln_template_string", | |
| 60 "aln_query_string", | |
| 61 "aln_homology_string", | |
| 62 "template_variant", | |
| 63 "acc_no", | |
| 64 "query_id", | |
| 65 "query_start_pos", | |
| 66 "query_end_pos", | |
| 67 "query_depth", | |
| 68 "blast_eval", | |
| 69 "blast_bitscore", | |
| 70 "pval", | |
| 71 "software_score" | |
| 72 ] | |
| 73 | |
| 74 for db_name, db in result.items(): | |
| 75 if(db_name == "excluded"): | |
| 76 continue | |
| 77 | |
| 78 if(db == "No hit found"): | |
| 79 continue | |
| 80 | |
| 81 std_results.add_table(db_name) | |
| 82 std_db = std_results.long[db_name] | |
| 83 std_db.add_headers(headers) | |
| 84 std_db.lock_headers = True | |
| 85 | |
| 86 for unique_id, hit_db in db.items(): | |
| 87 if(unique_id in result["excluded"]): | |
| 88 continue | |
| 89 # TODO: unique_id == unique_db_id | |
| 90 sbjct = hit_db["sbjct_header"].split("_") | |
| 91 template = sbjct[0] | |
| 92 variant = sbjct[1] | |
| 93 acc = "_".join(sbjct[2:]) | |
| 94 unique_db_id = ("{}_{}".format(template, acc)) | |
| 95 std_db[unique_db_id] = { | |
| 96 "template_name": template, | |
| 97 "template_variant": variant, | |
| 98 "acc_no": acc, | |
| 99 "template_length": hit_db["sbjct_length"], | |
| 100 "template_start_pos": hit_db["sbjct_start"], | |
| 101 "template_end_pos": hit_db["sbjct_end"], | |
| 102 "aln_length": hit_db["HSP_length"], | |
| 103 "aln_identity": hit_db["perc_ident"], | |
| 104 "aln_gaps": hit_db["gaps"], | |
| 105 "aln_template_string": hit_db["sbjct_string"], | |
| 106 "aln_query_string": hit_db["query_string"], | |
| 107 "aln_homology_string": hit_db["homo_string"], | |
| 108 "query_id": hit_db["contig_name"], | |
| 109 "query_start_pos": hit_db["query_start"], | |
| 110 "query_end_pos": hit_db["query_end"], | |
| 111 "query_depth": hit_db.get("query_depth", "NA"), | |
| 112 "blast_eval": hit_db.get("evalue", "NA"), | |
| 113 "blast_bitscore": hit_db.get("bit", "NA"), | |
| 114 "pval": hit_db.get("p_value", "NA"), | |
| 115 "software_score": hit_db["cal_score"] | |
| 116 } | |
| 117 | |
| 118 return std_results | |
| 119 | |
| 120 def write_results(self, out_path, result, res_type, software="ResFinder"): | |
| 121 """ | |
| 122 """ | |
| 123 if(res_type == ResFinder.TYPE_BLAST): | |
| 124 result_str = self.results_to_str( | |
| 125 res_type=res_type, results=result.results, | |
| 126 query_align=result.gene_align_query, | |
| 127 homo_align=result.gene_align_homo, | |
| 128 sbjct_align=result.gene_align_sbjct) | |
| 129 elif(res_type == ResFinder.TYPE_KMA): | |
| 130 result_str = self.results_to_str(res_type=res_type, | |
| 131 results=result) | |
| 132 | |
| 133 with open(out_path + "/{}_results_tab.txt".format(software), "w") as fh: | |
| 134 fh.write(result_str[0]) | |
| 135 with open(out_path + "/{}_results_table.txt".format(software), "w") as fh: | |
| 136 fh.write(result_str[1]) | |
| 137 with open(out_path + "/{}_results.txt".format(software), "w") as fh: | |
| 138 fh.write(result_str[2]) | |
| 139 with open(out_path + "/{}_Resistance_gene_seq.fsa".format(software), "w") as fh: | |
| 140 fh.write(result_str[3]) | |
| 141 with open(out_path + "/{}_Hit_in_genome_seq.fsa".format(software), "w") as fh: | |
| 142 fh.write(result_str[4]) | |
| 143 | |
| 144 def blast(self, inputfile, out_path, min_cov=0.9, threshold=0.6, | |
| 145 blast="blastn", allowed_overlap=0): | |
| 146 """ | |
| 147 """ | |
| 148 blast_run = Blaster(inputfile=inputfile, databases=self.databases, | |
| 149 db_path=self.db_path, out_path=out_path, | |
| 150 min_cov=min_cov, threshold=threshold, blast=blast, | |
| 151 allowed_overlap=allowed_overlap) | |
| 152 self.blast_results = blast_run.results | |
| 153 return blast_run | |
| 154 | |
| 155 def results_to_str(self, res_type, results, query_align=None, | |
| 156 homo_align=None, sbjct_align=None): | |
| 157 | |
| 158 if(res_type != ResFinder.TYPE_BLAST | |
| 159 and res_type != ResFinder.TYPE_KMA): | |
| 160 sys.exit("resfinder.py error: result method was not provided or " | |
| 161 "not recognized.") | |
| 162 | |
| 163 # Write the header for the tab file | |
| 164 tab_str = ("Resistance gene\tIdentity\tAlignment Length/Gene Length\t" | |
| 165 "Coverage\tPosition in reference\tContig\t" | |
| 166 "Position in contig\tPhenotype\tAccession no.\n") | |
| 167 | |
| 168 table_str = "" | |
| 169 txt_str = "" | |
| 170 ref_str = "" | |
| 171 hit_str = "" | |
| 172 | |
| 173 # Getting and writing out the results | |
| 174 titles = dict() | |
| 175 rows = dict() | |
| 176 headers = dict() | |
| 177 txt_file_seq_text = dict() | |
| 178 split_print = collections.defaultdict(list) | |
| 179 | |
| 180 for db in results: | |
| 181 if(db == "excluded"): | |
| 182 continue | |
| 183 | |
| 184 # Clean up dbs with only excluded hits | |
| 185 no_hits = True | |
| 186 for hit in results[db]: | |
| 187 if(hit not in results["excluded"]): | |
| 188 no_hits = False | |
| 189 break | |
| 190 if(no_hits): | |
| 191 results[db] = "No hit found" | |
| 192 | |
| 193 profile = str(self.configured_dbs[db][0]) | |
| 194 if results[db] == "No hit found": | |
| 195 table_str += ("%s\n%s\n\n" % (profile, results[db])) | |
| 196 else: | |
| 197 titles[db] = "%s" % (profile) | |
| 198 headers[db] = ["Resistance gene", "Identity", | |
| 199 "Alignment Length/Gene Length", "Coverage", | |
| 200 "Position in reference", "Contig", | |
| 201 "Position in contig", "Phenotype", | |
| 202 "Accession no."] | |
| 203 table_str += ("%s\n" % (profile)) | |
| 204 table_str += ("Resistance gene\tIdentity\t" | |
| 205 "Alignment Length/Gene Length\tCoverage\t" | |
| 206 "Position in reference\tContig\t" | |
| 207 "Position in contig\tPhenotype\t" | |
| 208 "Accession no.\n") | |
| 209 | |
| 210 rows[db] = list() | |
| 211 txt_file_seq_text[db] = list() | |
| 212 | |
| 213 for hit in results[db]: | |
| 214 if(hit in results["excluded"]): | |
| 215 continue | |
| 216 | |
| 217 res_header = results[db][hit]["sbjct_header"] | |
| 218 tmp = res_header.split("_") | |
| 219 gene = tmp[0] | |
| 220 acc = "_".join(tmp[2:]) | |
| 221 ID = results[db][hit]["perc_ident"] | |
| 222 coverage = results[db][hit]["perc_coverage"] | |
| 223 sbjt_length = results[db][hit]["sbjct_length"] | |
| 224 HSP = results[db][hit]["HSP_length"] | |
| 225 positions_contig = "{}..{}".format( | |
| 226 results[db][hit]["query_start"], | |
| 227 results[db][hit]["query_end"]) | |
| 228 positions_ref = "{}..{}".format( | |
| 229 results[db][hit]["sbjct_start"], | |
| 230 results[db][hit]["sbjct_end"]) | |
| 231 contig_name = results[db][hit]["contig_name"] | |
| 232 pheno = self.phenos.get(gene, ("Warning: gene is missing " | |
| 233 "from Notes file. Please " | |
| 234 "inform curator.")) | |
| 235 | |
| 236 pheno = pheno.strip() | |
| 237 | |
| 238 if "split_length" in results[db][hit]: | |
| 239 total_HSP = results[db][hit]["split_length"] | |
| 240 split_print[res_header].append([gene, ID, total_HSP, | |
| 241 sbjt_length, coverage, | |
| 242 positions_ref, | |
| 243 contig_name, | |
| 244 positions_contig, | |
| 245 pheno, acc]) | |
| 246 tab_str += ("%s\t%s\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s\n" | |
| 247 % (gene, ID, HSP, sbjt_length, coverage, | |
| 248 positions_ref, contig_name, | |
| 249 positions_contig, pheno, acc) | |
| 250 ) | |
| 251 else: | |
| 252 # Write tabels | |
| 253 table_str += ("%s\t%.2f\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s" | |
| 254 "\n" % (gene, ID, HSP, sbjt_length, | |
| 255 coverage, positions_ref, | |
| 256 contig_name, positions_contig, | |
| 257 pheno, acc) | |
| 258 ) | |
| 259 tab_str += ("%s\t%.2f\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s\n" | |
| 260 % (gene, ID, HSP, sbjt_length, coverage, | |
| 261 positions_ref, contig_name, | |
| 262 positions_contig, pheno, acc) | |
| 263 ) | |
| 264 | |
| 265 # Saving the output to write the txt result table | |
| 266 hsp_length = "%s/%s" % (HSP, sbjt_length) | |
| 267 rows[db].append([gene, ID, hsp_length, coverage, | |
| 268 positions_ref, contig_name, | |
| 269 positions_contig, pheno, acc]) | |
| 270 | |
| 271 # Writing subjet/ref sequence | |
| 272 if(res_type == ResFinder.TYPE_BLAST): | |
| 273 ref_seq = sbjct_align[db][hit] | |
| 274 elif(res_type == ResFinder.TYPE_KMA): | |
| 275 ref_seq = results[db][hit]["sbjct_string"] | |
| 276 | |
| 277 ref_str += (">%s_%s\n" % (gene, acc)) | |
| 278 for i in range(0, len(ref_seq), 60): | |
| 279 ref_str += ("%s\n" % (ref_seq[i:i + 60])) | |
| 280 | |
| 281 # Getting the header and text for the txt file output | |
| 282 sbjct_start = results[db][hit]["sbjct_start"] | |
| 283 sbjct_end = results[db][hit]["sbjct_end"] | |
| 284 text = ("%s, ID: %.2f %%, " | |
| 285 "Alignment Length/Gene Length: %s/%s, " | |
| 286 "Coverage: %s, " | |
| 287 "Positions in reference: %s..%s, Contig name: %s, " | |
| 288 "Position: %s" % (gene, ID, HSP, sbjt_length, | |
| 289 coverage, sbjct_start, sbjct_end, | |
| 290 contig_name, positions_contig)) | |
| 291 hit_str += (">%s\n" % text) | |
| 292 | |
| 293 # Writing query/hit sequence | |
| 294 if(res_type == ResFinder.TYPE_BLAST): | |
| 295 hit_seq = query_align[db][hit] | |
| 296 elif(res_type == ResFinder.TYPE_KMA): | |
| 297 hit_seq = results[db][hit]["query_string"] | |
| 298 | |
| 299 for i in range(0, len(hit_seq), 60): | |
| 300 hit_str += ("%s\n" % (hit_seq[i:i + 60])) | |
| 301 | |
| 302 # Saving the output to print the txt result file allignemts | |
| 303 if(res_type == ResFinder.TYPE_BLAST): | |
| 304 txt_file_seq_text[db].append((text, ref_seq, | |
| 305 homo_align[db][hit], | |
| 306 hit_seq)) | |
| 307 elif(res_type == ResFinder.TYPE_KMA): | |
| 308 txt_file_seq_text[db].append( | |
| 309 (text, ref_seq, results[db][hit]["homo_string"], | |
| 310 hit_seq)) | |
| 311 | |
| 312 for res in split_print: | |
| 313 gene = split_print[res][0][0] | |
| 314 ID = split_print[res][0][1] | |
| 315 HSP = split_print[res][0][2] | |
| 316 sbjt_length = split_print[res][0][3] | |
| 317 coverage = split_print[res][0][4] | |
| 318 positions_ref = split_print[res][0][5] | |
| 319 contig_name = split_print[res][0][6] | |
| 320 positions_contig = split_print[res][0][7] | |
| 321 pheno = split_print[res][0][8] | |
| 322 acc = split_print[res][0][9] | |
| 323 | |
| 324 total_coverage = 0 | |
| 325 | |
| 326 for i in range(1, len(split_print[res])): | |
| 327 ID = "%s, %.2f" % (ID, split_print[res][i][1]) | |
| 328 total_coverage += split_print[res][i][4] | |
| 329 positions_ref = (positions_ref + ", " | |
| 330 + split_print[res][i][5]) | |
| 331 contig_name = (contig_name + ", " | |
| 332 + split_print[res][i][6]) | |
| 333 positions_contig = (positions_contig + ", " | |
| 334 + split_print[res][i][7]) | |
| 335 | |
| 336 table_str += ("%s\t%s\t%s/%s\t%s\t%s\t%s\t%s\t%s\t%s\n" | |
| 337 % (gene, ID, HSP, sbjt_length, coverage, | |
| 338 positions_ref, contig_name, | |
| 339 positions_contig, pheno, acc) | |
| 340 ) | |
| 341 | |
| 342 hsp_length = "%s/%s" % (HSP, sbjt_length) | |
| 343 | |
| 344 rows[db].append([gene, ID, hsp_length, coverage, | |
| 345 positions_ref, contig_name, | |
| 346 positions_contig, pheno, acc]) | |
| 347 | |
| 348 table_str += ("\n") | |
| 349 | |
| 350 # Writing table txt for all hits | |
| 351 for db in titles: | |
| 352 # Txt file table | |
| 353 table = ResFinder.text_table(titles[db], headers[db], rows[db]) | |
| 354 txt_str += table | |
| 355 | |
| 356 # Writing alignment txt for all hits | |
| 357 for db in titles: | |
| 358 # Txt file alignments | |
| 359 txt_str += ("##################### %s #####################\n" | |
| 360 % (db)) | |
| 361 for text in txt_file_seq_text[db]: | |
| 362 txt_str += ("%s\n\n" % (text[0])) | |
| 363 for i in range(0, len(text[1]), 60): | |
| 364 txt_str += ("%s\n" % (text[1][i:i + 60])) | |
| 365 txt_str += ("%s\n" % (text[2][i:i + 60])) | |
| 366 txt_str += ("%s\n\n" % (text[3][i:i + 60])) | |
| 367 txt_str += ("\n") | |
| 368 | |
| 369 return (tab_str, table_str, txt_str, ref_str, hit_str) | |
| 370 | |
| 371 @staticmethod | |
| 372 def text_table(title, headers, rows, table_format='psql'): | |
| 373 ''' Create text table | |
| 374 | |
| 375 USAGE: | |
| 376 >>> from tabulate import tabulate | |
| 377 >>> title = 'My Title' | |
| 378 >>> headers = ['A','B'] | |
| 379 >>> rows = [[1,2],[3,4]] | |
| 380 >>> print(text_table(title, headers, rows)) | |
| 381 +-----------+ | |
| 382 | My Title | | |
| 383 +-----+-----+ | |
| 384 | A | B | | |
| 385 +=====+=====+ | |
| 386 | 1 | 2 | | |
| 387 | 3 | 4 | | |
| 388 +-----+-----+ | |
| 389 ''' | |
| 390 # Create table | |
| 391 table = tabulate(rows, headers, tablefmt=table_format) | |
| 392 # Prepare title injection | |
| 393 width = len(table.split('\n')[0]) | |
| 394 tlen = len(title) | |
| 395 if tlen + 4 > width: | |
| 396 # Truncate oversized titles | |
| 397 tlen = width - 4 | |
| 398 title = title[:tlen] | |
| 399 spaces = width - 2 - tlen | |
| 400 left_spacer = ' ' * int(spaces / 2) | |
| 401 right_spacer = ' ' * (spaces - len(left_spacer)) | |
| 402 # Update table with title | |
| 403 table = '\n'.join(['+%s+' % ('-' * (width - 2)), | |
| 404 '|%s%s%s|' % (left_spacer, | |
| 405 title, right_spacer), | |
| 406 table, '\n']) | |
| 407 return table | |
| 408 | |
| 409 def load_notes(self, notes): | |
| 410 with open(notes, 'r') as f: | |
| 411 for line in f: | |
| 412 line = line.strip() | |
| 413 if line.startswith("#"): | |
| 414 continue | |
| 415 else: | |
| 416 tmp = line.split(":") | |
| 417 | |
| 418 self.phenos[tmp[0]] = "%s %s" % (tmp[1], tmp[2]) | |
| 419 | |
| 420 if(tmp[2].startswith("Alternate name; ")): | |
| 421 self.phenos[tmp[2][16:]] = "%s %s" % (tmp[1], tmp[2]) | |
| 422 | |
| 423 def load_databases(self, databases): | |
| 424 """ | |
| 425 """ | |
| 426 # Check if databases and config file are correct/correponds | |
| 427 if databases == '': | |
| 428 sys.exit("Input Error: No database was specified!\n") | |
| 429 elif databases is None: | |
| 430 # Choose all available databases from the config file | |
| 431 self.databases = self.configured_dbs.keys() | |
| 432 else: | |
| 433 # Handle multiple databases | |
| 434 databases = databases.split(',') | |
| 435 # Check that the ResFinder DBs are valid | |
| 436 for db_prefix in databases: | |
| 437 if db_prefix in self.configured_dbs: | |
| 438 self.databases.append(db_prefix) | |
| 439 else: | |
| 440 sys.exit("Input Error: Provided database was not " | |
| 441 "recognised! (%s)\n" % db_prefix) | |
| 442 | |
| 443 def load_db_config(self, db_conf_file): | |
| 444 """ | |
| 445 """ | |
| 446 extensions = [] | |
| 447 with open(db_conf_file) as f: | |
| 448 for line in f: | |
| 449 line = line.strip() | |
| 450 | |
| 451 if not line: | |
| 452 continue | |
| 453 | |
| 454 if line[0] == '#': | |
| 455 if 'extensions:' in line: | |
| 456 extensions = [s.strip() | |
| 457 for s in line.split('extensions:')[-1] | |
| 458 .split(',')] | |
| 459 continue | |
| 460 | |
| 461 tmp = line.split('\t') | |
| 462 if len(tmp) != 3: | |
| 463 sys.exit(("Input Error: Invalid line in the database" | |
| 464 " config file!\nA proper entry requires 3 tab " | |
| 465 "separated columns!\n%s") % (line)) | |
| 466 | |
| 467 db_prefix = tmp[0].strip() | |
| 468 name = tmp[1].split('#')[0].strip() | |
| 469 description = tmp[2] | |
| 470 | |
| 471 # Check if all db files are present | |
| 472 for ext in extensions: | |
| 473 db = "%s/%s.%s" % (self.db_path, db_prefix, ext) | |
| 474 if not os.path.exists(db): | |
| 475 sys.exit(("Input Error: The database file (%s) " | |
| 476 "could not be found!") % (db)) | |
| 477 | |
| 478 if db_prefix not in self.configured_dbs: | |
| 479 self.configured_dbs[db_prefix] = [] | |
| 480 self.configured_dbs[db_prefix].append(name) | |
| 481 | |
| 482 if len(self.configured_dbs) == 0: | |
| 483 sys.exit("Input Error: No databases were found in the " | |
| 484 "database config file!") | |
| 485 | |
| 486 # Loading paths for KMA databases. | |
| 487 for drug in self.configured_dbs: | |
| 488 kma_db = self.db_path_kma + drug | |
| 489 self.kma_db_files = [kma_db + ".b", kma_db + ".length.b", | |
| 490 kma_db + ".name.b", kma_db + ".align.b"] | |
| 491 | |
| 492 | |
| 493 if __name__ == '__main__': | |
| 494 | |
| 495 ########################################################################## | |
| 496 # PARSE COMMAND LINE OPTIONS | |
| 497 ########################################################################## | |
| 498 | |
| 499 parser = ArgumentParser() | |
| 500 parser.add_argument("-i", "--inputfile", | |
| 501 dest="inputfile", | |
| 502 help="Input file", | |
| 503 default=None) | |
| 504 parser.add_argument("-1", "--fastq1", | |
| 505 help="Raw read data file 1.", | |
| 506 default=None) | |
| 507 parser.add_argument("-2", "--fastq2", | |
| 508 help="Raw read data file 2 (only required if \ | |
| 509 data is paired-end).", | |
| 510 default=None) | |
| 511 parser.add_argument("-o", "--outputPath", | |
| 512 dest="out_path", | |
| 513 help="Path to blast output", | |
| 514 default='') | |
| 515 | |
| 516 parser.add_argument("-b", "--blastPath", | |
| 517 dest="blast_path", | |
| 518 help="Path to blast", | |
| 519 default='blastn') | |
| 520 parser.add_argument("-p", "--databasePath", | |
| 521 dest="db_path", | |
| 522 help="Path to the databases", | |
| 523 default='') | |
| 524 | |
| 525 parser.add_argument("-k", "--kmaPath", | |
| 526 dest="kma_path", | |
| 527 help="Path to KMA", | |
| 528 default=None) | |
| 529 parser.add_argument("-q", "--databasePathKMA", | |
| 530 dest="db_path_kma", | |
| 531 help="Path to the directories containing the \ | |
| 532 KMA indexed databases. Defaults to the \ | |
| 533 directory 'kma_indexing' inside the \ | |
| 534 databasePath directory.", | |
| 535 default=None) | |
| 536 | |
| 537 parser.add_argument("-d", "--databases", | |
| 538 dest="databases", | |
| 539 help="Databases chosen to search in - if none \ | |
| 540 are specified all are used", | |
| 541 default=None) | |
| 542 parser.add_argument("-l", "--min_cov", | |
| 543 dest="min_cov", | |
| 544 help="Minimum coverage", | |
| 545 default=0.60) | |
| 546 parser.add_argument("-t", "--threshold", | |
| 547 dest="threshold", | |
| 548 help="Blast threshold for identity", | |
| 549 default=0.90) | |
| 550 parser.add_argument("-nano", "--nanopore", | |
| 551 action="store_true", | |
| 552 dest="nanopore", | |
| 553 help="If nanopore data is used", | |
| 554 default=False) | |
| 555 args = parser.parse_args() | |
| 556 | |
| 557 ########################################################################## | |
| 558 # MAIN | |
| 559 ########################################################################## | |
| 560 | |
| 561 # Defining varibales | |
| 562 | |
| 563 min_cov = args.min_cov | |
| 564 threshold = args.threshold | |
| 565 | |
| 566 # Check if valid database is provided | |
| 567 if args.db_path is None: | |
| 568 sys.exit("Input Error: No database directory was provided!\n") | |
| 569 elif not os.path.exists(args.db_path): | |
| 570 sys.exit("Input Error: The specified database directory does not " | |
| 571 "exist!\n") | |
| 572 else: | |
| 573 # Check existence of config file | |
| 574 db_config_file = '%s/config' % (args.db_path) | |
| 575 if not os.path.exists(db_config_file): | |
| 576 sys.exit("Input Error: The database config file could not be " | |
| 577 "found!") | |
| 578 # Save path | |
| 579 db_path = args.db_path | |
| 580 | |
| 581 # Check existence of notes file | |
| 582 notes_path = "%s/notes.txt" % (args.db_path) | |
| 583 if not os.path.exists(notes_path): | |
| 584 sys.exit('Input Error: notes.txt not found! (%s)' % (notes_path)) | |
| 585 | |
| 586 # Check for input | |
| 587 if args.inputfile is None and args.fastq1 is None: | |
| 588 sys.exit("Input Error: No Input were provided!\n") | |
| 589 | |
| 590 # Check if valid input file for assembly is provided | |
| 591 if args.inputfile: | |
| 592 if not os.path.exists(args.inputfile): | |
| 593 sys.exit("Input Error: Input file does not exist!\n") | |
| 594 else: | |
| 595 inputfile = args.inputfile | |
| 596 else: | |
| 597 inputfile = None | |
| 598 | |
| 599 # Check if valid input files for raw data | |
| 600 if args.fastq1: | |
| 601 | |
| 602 if not os.path.exists(args.fastq1): | |
| 603 sys.exit("Input Error: fastq1 file does not exist!\n") | |
| 604 else: | |
| 605 input_fastq1 = args.fastq1 | |
| 606 | |
| 607 if args.fastq2: | |
| 608 if not os.path.exists(args.fastq2): | |
| 609 sys.exit("Input Error: fastq2 file does not exist!\n") | |
| 610 else: | |
| 611 input_fastq2 = args.fastq2 | |
| 612 else: | |
| 613 input_fastq2 = None | |
| 614 else: | |
| 615 input_fastq1 = None | |
| 616 input_fastq2 = None | |
| 617 | |
| 618 # Check if valid output directory is provided | |
| 619 if not os.path.exists(args.out_path): | |
| 620 sys.exit("Input Error: Output dirctory does not exists!\n") | |
| 621 else: | |
| 622 out_path = args.out_path | |
| 623 | |
| 624 # If input is an assembly, then use BLAST | |
| 625 if(inputfile is not None): | |
| 626 finder = ResFinder(db_conf_file=db_config_file, | |
| 627 databases=args.databases, | |
| 628 db_path=db_path, notes=notes_path) | |
| 629 | |
| 630 blast_run = finder.blast(inputfile=inputfile, out_path=out_path, | |
| 631 min_cov=min_cov, threshold=threshold, | |
| 632 blast=args.blast_path) | |
| 633 | |
| 634 finder.write_results(out_path=out_path, result=blast_run, | |
| 635 res_type=ResFinder.TYPE_BLAST) | |
| 636 | |
| 637 # If the input is raw read data, then use KMA | |
| 638 elif(input_fastq1 is not None): | |
| 639 finder = ResFinder(db_conf_file=db_config_file, | |
| 640 databases=args.databases, | |
| 641 db_path=db_path, db_path_kma=args.db_path_kma, | |
| 642 notes=notes_path) | |
| 643 | |
| 644 # if input_fastq2 is None, it is ignored by the kma method. | |
| 645 if args.nanopore: | |
| 646 kma_run = finder.kma(inputfile_1=input_fastq1, | |
| 647 inputfile_2=input_fastq2, kma_add_args='-ont -md 5') | |
| 648 else: | |
| 649 kma_run = finder.kma(inputfile_1=input_fastq1, | |
| 650 inputfile_2=input_fastq2) | |
| 651 | |
| 652 finder.write_results(out_path=out_path, result=kma_run, | |
| 653 res_type=ResFinder.TYPE_KMA) |
