Mercurial > repos > dcouvin > resfinder4
comparison resfinder/run_resfinder.py @ 0:55051a9bc58d draft default tip
Uploaded
| author | dcouvin |
|---|---|
| date | Mon, 10 Jan 2022 20:06:07 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:55051a9bc58d |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 import sys | |
| 3 import os | |
| 4 import subprocess | |
| 5 from argparse import ArgumentParser | |
| 6 import pickle | |
| 7 | |
| 8 from cge.resfinder import ResFinder | |
| 9 from cge.pointfinder import PointFinder | |
| 10 | |
| 11 # Modules used to create the extended ResFinder output (phenotype output) | |
| 12 from cge.phenotype2genotype.isolate import Isolate | |
| 13 from cge.phenotype2genotype.res_profile import PhenoDB | |
| 14 from cge.phenotype2genotype.res_sumtable import ResSumTable | |
| 15 from cge.phenotype2genotype.res_sumtable import PanelNameError | |
| 16 from cge.out.util.generator import Generator | |
| 17 from cge.standardize_results import ResFinderResultHandler, DatabaseHandler | |
| 18 from cge.standardize_results import PointFinderResultHandler | |
| 19 | |
| 20 import json | |
| 21 # TODO list: | |
| 22 # TODO: Add input data check | |
| 23 | |
| 24 | |
| 25 # ########################################################################### # | |
| 26 # ######### FUNCTIONS ######### # | |
| 27 # ########################################################################### # | |
| 28 | |
| 29 | |
| 30 def eprint(*args, **kwargs): | |
| 31 print(*args, file=sys.stderr, **kwargs) | |
| 32 | |
| 33 # TODO: Add fix species choice | |
| 34 species_transl = {"c. jejuni": "campylobacter jejuni", | |
| 35 "c.jejuni": "campylobacter jejuni", | |
| 36 "c jejuni": "campylobacter jejuni", | |
| 37 "c. coli": "campylobacter coli", | |
| 38 "c.coli": "campylobacter coli", | |
| 39 "c coli": "campylobacter coli", | |
| 40 "e. coli": "escherichia coli", | |
| 41 "e.coli": "escherichia coli", | |
| 42 "e coli": "escherichia coli", | |
| 43 "ecoli": "escherichia coli", | |
| 44 "s. enterica": "salmonella enterica", | |
| 45 "s.enterica": "salmonella enterica", | |
| 46 "s enterica": "salmonella enterica", | |
| 47 "senterica": "salmonella enterica", | |
| 48 } | |
| 49 | |
| 50 ########################################################################## | |
| 51 # PARSE COMMAND LINE OPTIONS | |
| 52 ########################################################################## | |
| 53 | |
| 54 parser = ArgumentParser(allow_abbrev=False) | |
| 55 | |
| 56 # General options | |
| 57 parser.add_argument("-ifa", "--inputfasta", | |
| 58 help="Input fasta file.", | |
| 59 default=None) | |
| 60 parser.add_argument("-ifq", "--inputfastq", | |
| 61 help="Input fastq file(s). Assumed to be single-end fastq \ | |
| 62 if only one file is provided, and assumed to be \ | |
| 63 paired-end data if two files are provided.", | |
| 64 nargs="+", | |
| 65 default=None) | |
| 66 | |
| 67 parser.add_argument("-o", "--outputPath", | |
| 68 dest="out_path", | |
| 69 help="Path to blast output", | |
| 70 default='') | |
| 71 parser.add_argument("-b", "--blastPath", | |
| 72 dest="blast_path", | |
| 73 help="Path to blastn", | |
| 74 default='blastn') | |
| 75 parser.add_argument("-k", "--kmaPath", | |
| 76 dest="kma_path", | |
| 77 help="Path to KMA", | |
| 78 default=None) | |
| 79 parser.add_argument("-s", "--species", | |
| 80 help="Species in the sample", | |
| 81 default=None) | |
| 82 | |
| 83 # Acquired resistance options | |
| 84 parser.add_argument("-db_res", "--db_path_res", | |
| 85 help="Path to the databases for ResFinder", | |
| 86 default=None) | |
| 87 parser.add_argument("-db_res_kma", "--db_path_res_kma", | |
| 88 help="Path to the ResFinder databases indexed with KMA. \ | |
| 89 Defaults to the 'kma_indexing' directory inside the \ | |
| 90 given database directory.", | |
| 91 default=None) | |
| 92 parser.add_argument("-d", "--databases", | |
| 93 dest="databases", | |
| 94 help="Databases chosen to search in - if none is specified\ | |
| 95 all is used", | |
| 96 default=None) | |
| 97 parser.add_argument("-acq", "--acquired", | |
| 98 action="store_true", | |
| 99 dest="acquired", | |
| 100 help="Run resfinder for acquired resistance genes", | |
| 101 default=False) | |
| 102 parser.add_argument("-ao", "--acq_overlap", | |
| 103 help="Genes are allowed to overlap this number of\ | |
| 104 nucleotides. Default: 30.", | |
| 105 type=int, | |
| 106 default=30) | |
| 107 parser.add_argument("-l", "--min_cov", | |
| 108 dest="min_cov", | |
| 109 help="Minimum (breadth-of) coverage of ResFinder within the range 0-1.", | |
| 110 type=float, | |
| 111 default=0.60) | |
| 112 parser.add_argument("-t", "--threshold", | |
| 113 dest="threshold", | |
| 114 help="Threshold for identity of ResFinder within the range 0-1.", | |
| 115 type=float, | |
| 116 default=0.80) | |
| 117 parser.add_argument("-nano", "--nanopore", | |
| 118 action="store_true", | |
| 119 dest="nanopore", | |
| 120 help="If nanopore data is used", | |
| 121 default=False) | |
| 122 # Point resistance option | |
| 123 parser.add_argument("-c", "--point", | |
| 124 action="store_true", | |
| 125 dest="point", | |
| 126 help="Run pointfinder for chromosomal mutations", | |
| 127 default=False) | |
| 128 parser.add_argument("-db_point", "--db_path_point", | |
| 129 help="Path to the databases for PointFinder", | |
| 130 default=None) | |
| 131 parser.add_argument("-db_point_kma", "--db_path_point_kma", | |
| 132 help="Path to the PointFinder databases indexed with KMA. \ | |
| 133 Defaults to the 'kma_indexing' directory inside the \ | |
| 134 given database directory.", | |
| 135 default=None) | |
| 136 parser.add_argument("-g", | |
| 137 dest="specific_gene", | |
| 138 nargs='+', | |
| 139 help="Specify genes existing in the database to \ | |
| 140 search for - if none is specified all genes are \ | |
| 141 included in the search.", | |
| 142 default=None) | |
| 143 parser.add_argument("-u", "--unknown_mut", | |
| 144 dest="unknown_mutations", | |
| 145 action="store_true", | |
| 146 help="Show all mutations found even if in unknown to the\ | |
| 147 resistance database", | |
| 148 default=False) | |
| 149 parser.add_argument("-l_p", "--min_cov_point", | |
| 150 dest="min_cov_point", | |
| 151 help="Minimum (breadth-of) coverage of Pointfinder within the range 0-1. \ | |
| 152 If None is selected, the minimum coverage of \ | |
| 153 ResFinder will be used.", | |
| 154 type=float, | |
| 155 default=None) | |
| 156 parser.add_argument("-t_p", "--threshold_point", | |
| 157 dest="threshold_point", | |
| 158 help="Threshold for identity of Pointfinder within the range 0-1. \ | |
| 159 If None is selected, the minimum coverage of \ | |
| 160 ResFinder will be used.", | |
| 161 type=float, | |
| 162 default=None) | |
| 163 # Temporary option only available temporary | |
| 164 parser.add_argument("--pickle", | |
| 165 action="store_true", | |
| 166 help="Create a pickle dump of the Isolate object. \ | |
| 167 Currently needed in the CGE webserver. Dependency \ | |
| 168 and this option is being removed.", | |
| 169 default=False) | |
| 170 | |
| 171 args = parser.parse_args() | |
| 172 | |
| 173 if(args.species is not None and args.species.lower() == "other"): | |
| 174 args.species = None | |
| 175 | |
| 176 if(args.point and not args.species): | |
| 177 sys.exit("ERROR: Chromosomal point mutations cannot be located if no " | |
| 178 "species has been provided. Please provide species using the " | |
| 179 "--species option.") | |
| 180 | |
| 181 # Check if coverage/identity parameters are valid | |
| 182 if(args.min_cov > 1.0 or args.min_cov < 0.0): | |
| 183 sys.exit("ERROR: Minimum coverage above 1 or below 0 is not allowed. Please select a minimum coverage within the range 0-1 with the flag -l.") | |
| 184 | |
| 185 if(args.threshold > 1.0 or args.threshold < 0.0): | |
| 186 sys.exit("ERROR: Threshold for identity of ResFinder above 1 or below 0 is not allowed. Please select a threshold for identity within the range 0-1 with the flag -t.") | |
| 187 | |
| 188 | |
| 189 # Create a "sample" name | |
| 190 if(args.inputfasta): | |
| 191 args.inputfasta = os.path.abspath(args.inputfasta) | |
| 192 if(not os.path.isfile(args.inputfasta)): | |
| 193 sys.exit("ERROR: Input FASTA file not found: " + args.inputfasta) | |
| 194 sample_name = os.path.basename(args.inputfasta) | |
| 195 method = PointFinder.TYPE_BLAST | |
| 196 else: | |
| 197 sample_name = os.path.basename(args.inputfastq[0]) | |
| 198 method = PointFinder.TYPE_KMA | |
| 199 | |
| 200 if(args.inputfastq): | |
| 201 inputfastq_1 = args.inputfastq[0] | |
| 202 inputfastq_1 = os.path.abspath(inputfastq_1) | |
| 203 if(not os.path.isfile(inputfastq_1)): | |
| 204 sys.exit("ERROR: Input fastq file 1 not found: " + inputfastq_1) | |
| 205 if(len(args.inputfastq) == 2): | |
| 206 inputfastq_2 = args.inputfastq[1] | |
| 207 inputfastq_2 = os.path.abspath(inputfastq_2) | |
| 208 if(not os.path.isfile(inputfastq_2)): | |
| 209 sys.exit("ERROR: Input fastq file 2 not found: " + inputfastq_2) | |
| 210 else: | |
| 211 inputfastq_2 = None | |
| 212 | |
| 213 blast = args.blast_path | |
| 214 if(args.inputfasta): | |
| 215 try: | |
| 216 _ = subprocess.check_output([blast, "-h"]) | |
| 217 except FileNotFoundError as e: | |
| 218 sys.exit("ERROR: Unable to execute blastn from the path: {}" | |
| 219 .format(blast)) | |
| 220 | |
| 221 # Check KMA path cge/kma/kma | |
| 222 if(args.inputfastq): | |
| 223 if(args.kma_path is None): | |
| 224 kma = (os.path.dirname(os.path.realpath(__file__)) + "/cge/kma/kma") | |
| 225 kma = os.path.abspath(kma) | |
| 226 try: | |
| 227 _ = subprocess.check_output([kma, "-h"]) | |
| 228 except FileNotFoundError as e: | |
| 229 kma = "kma" | |
| 230 else: | |
| 231 kma = args.kma_path | |
| 232 try: | |
| 233 _ = subprocess.check_output([kma, "-h"]) | |
| 234 except FileNotFoundError as e: | |
| 235 sys.exit("ERROR: Unable to execute kma from the path: {}".format(kma)) | |
| 236 else: | |
| 237 kma = None | |
| 238 | |
| 239 db_path_point = None | |
| 240 | |
| 241 if(args.species): | |
| 242 args.species = args.species.lower() | |
| 243 | |
| 244 fixed_species = species_transl.get(args.species, None) | |
| 245 if(fixed_species): | |
| 246 args.species = fixed_species | |
| 247 | |
| 248 tmp_list = args.species.split() | |
| 249 if(len(tmp_list) != 1 and len(tmp_list) != 2): | |
| 250 sys.exit("ERROR: Species name must contain 1 or 2 names.") | |
| 251 | |
| 252 # Check Poinfinder database | |
| 253 if(args.point): | |
| 254 if(len(tmp_list) == 2): | |
| 255 point_species = "_".join(tmp_list) | |
| 256 else: | |
| 257 point_species = tmp_list[0] | |
| 258 | |
| 259 if(args.db_path_point is None and args.point): | |
| 260 db_path_point = (os.path.dirname( | |
| 261 os.path.realpath(__file__)) + "/db_pointfinder") | |
| 262 elif(args.db_path_point is not None): | |
| 263 db_path_point = args.db_path_point | |
| 264 | |
| 265 db_path_point = os.path.abspath(db_path_point) | |
| 266 point_dbs = PointFinder.get_db_names(db_path_point) | |
| 267 | |
| 268 # Check if a database for species exists | |
| 269 if(point_species not in point_dbs and args.point): | |
| 270 # If not db for species is found check if db for genus is found | |
| 271 # and use that instead | |
| 272 if(tmp_list[0] in point_dbs): | |
| 273 point_species = tmp_list[0] | |
| 274 else: | |
| 275 sys.exit("ERROR: species '%s' (%s) does not seem to exist" | |
| 276 " as a PointFinder database." | |
| 277 % (args.species, point_species)) | |
| 278 | |
| 279 db_path_point_root = db_path_point | |
| 280 db_path_point = db_path_point + "/" + point_species | |
| 281 | |
| 282 # Check output directory | |
| 283 args.out_path = os.path.abspath(args.out_path) | |
| 284 os.makedirs(args.out_path, exist_ok=True) | |
| 285 | |
| 286 if args.acquired is False and args.point is False: | |
| 287 sys.exit("Please specify to look for acquired resistance genes, " | |
| 288 "chromosomal mutaitons or both!\n") | |
| 289 | |
| 290 # Check ResFinder database | |
| 291 if(args.db_path_res is None): | |
| 292 args.db_path_res = (os.path.dirname( | |
| 293 os.path.realpath(__file__)) + "/db_resfinder") | |
| 294 args.db_path_res = os.path.abspath(args.db_path_res) | |
| 295 if(not os.path.exists(args.db_path_res)): | |
| 296 sys.exit("Could not locate ResFinder database path: %s" | |
| 297 % args.db_path_res) | |
| 298 | |
| 299 if(args.db_path_res_kma is None and args.acquired and args.inputfastq): | |
| 300 args.db_path_res_kma = args.db_path_res | |
| 301 if(not os.path.exists(args.db_path_res_kma)): | |
| 302 sys.exit("Could not locate ResFinder database index path: %s" | |
| 303 % args.db_path_res_kma) | |
| 304 | |
| 305 min_cov = float(args.min_cov) | |
| 306 | |
| 307 # Initialise result dict | |
| 308 init_software_result = {"software_name": "ResFinder"} | |
| 309 git_path = os.path.abspath(os.path.dirname(__file__)) | |
| 310 std_result = Generator.init_software_result(name="ResFinder", gitdir=git_path) | |
| 311 | |
| 312 if(args.acquired): | |
| 313 DatabaseHandler.load_database_metadata("ResFinder", std_result, | |
| 314 args.db_path_res) | |
| 315 if(args.point): | |
| 316 DatabaseHandler.load_database_metadata("PointFinder", std_result, | |
| 317 db_path_point_root) | |
| 318 ########################################################################## | |
| 319 # ResFinder | |
| 320 ########################################################################## | |
| 321 | |
| 322 if args.acquired is True: | |
| 323 | |
| 324 databases = args.databases | |
| 325 threshold = float(args.threshold) | |
| 326 | |
| 327 if(args.inputfasta): | |
| 328 out_res_blast = args.out_path + "/resfinder_blast" | |
| 329 os.makedirs(out_res_blast, exist_ok=True) | |
| 330 if(args.inputfastq): | |
| 331 out_res_kma = args.out_path + "/resfinder_kma" | |
| 332 os.makedirs(out_res_kma, exist_ok=True) | |
| 333 | |
| 334 db_path_res = args.db_path_res | |
| 335 | |
| 336 # Check if valid database is provided | |
| 337 if(db_path_res is None): | |
| 338 db_path_res = (os.path.dirname(os.path.realpath(__file__)) | |
| 339 + "/db_resfinder") | |
| 340 | |
| 341 if not os.path.exists(db_path_res): | |
| 342 sys.exit("Input Error: The specified database directory does not " | |
| 343 "exist!\nProvided path: " + str(db_path_res)) | |
| 344 else: | |
| 345 # Check existence of config file | |
| 346 db_config_file = '%s/config' % (db_path_res) | |
| 347 if not os.path.exists(db_config_file): | |
| 348 sys.exit("Input Error: The database config file could not be " | |
| 349 "found!") | |
| 350 | |
| 351 # Check existence of notes file | |
| 352 notes_path = "%s/notes.txt" % (db_path_res) | |
| 353 if not os.path.exists(notes_path): | |
| 354 sys.exit('Input Error: notes.txt not found! (%s)' % (notes_path)) | |
| 355 | |
| 356 blast_results = None | |
| 357 kma_results = None | |
| 358 | |
| 359 # Actually running ResFinder (for acquired resistance) | |
| 360 acquired_finder = ResFinder(db_conf_file=db_config_file, | |
| 361 databases=args.databases, db_path=db_path_res, | |
| 362 notes=notes_path, | |
| 363 db_path_kma=args.db_path_res_kma) | |
| 364 | |
| 365 if(args.inputfasta): | |
| 366 blast_results = acquired_finder.blast(inputfile=args.inputfasta, | |
| 367 out_path=out_res_blast, | |
| 368 min_cov=min_cov, | |
| 369 threshold=threshold, | |
| 370 blast=blast, | |
| 371 allowed_overlap=args.acq_overlap) | |
| 372 | |
| 373 # DEPRECATED | |
| 374 # use std_result | |
| 375 new_std_res = ResFinder.old_results_to_standard_output( | |
| 376 blast_results.results, software="ResFinder", version="4.1.0", | |
| 377 run_date="fake_run_date", run_cmd="Fake run cmd", | |
| 378 id=sample_name) | |
| 379 | |
| 380 # DEPRECATED | |
| 381 # use std_result | |
| 382 acquired_finder.write_results(out_path=args.out_path, | |
| 383 result=blast_results, | |
| 384 res_type=ResFinder.TYPE_BLAST) | |
| 385 | |
| 386 ResFinderResultHandler.standardize_results(std_result, | |
| 387 blast_results.results, | |
| 388 "ResFinder") | |
| 389 #DEBUG | |
| 390 # print("STD RESULT:\n{}".format(json.dumps(std_result))) | |
| 391 | |
| 392 else: | |
| 393 if args.nanopore: | |
| 394 kma_run = acquired_finder.kma(inputfile_1=inputfastq_1, | |
| 395 inputfile_2=inputfastq_2, | |
| 396 out_path=out_res_kma, | |
| 397 db_path_kma=args.db_path_res_kma, | |
| 398 databases=acquired_finder.databases, | |
| 399 min_cov=min_cov, | |
| 400 threshold=args.threshold, | |
| 401 kma_path=kma, | |
| 402 sample_name="", | |
| 403 kma_cge=True, | |
| 404 kma_apm="p", | |
| 405 kma_1t1=True, | |
| 406 kma_add_args='-ont -md 5') | |
| 407 else: | |
| 408 kma_run = acquired_finder.kma(inputfile_1=inputfastq_1, | |
| 409 inputfile_2=inputfastq_2, | |
| 410 out_path=out_res_kma, | |
| 411 db_path_kma=args.db_path_res_kma, | |
| 412 databases=acquired_finder.databases, | |
| 413 min_cov=min_cov, | |
| 414 threshold=args.threshold, | |
| 415 kma_path=kma, | |
| 416 sample_name="", | |
| 417 kma_cge=True, | |
| 418 kma_apm="p", | |
| 419 kma_1t1=True) | |
| 420 | |
| 421 # DEPRECATED | |
| 422 # use std_result | |
| 423 new_std_res = ResFinder.old_results_to_standard_output( | |
| 424 kma_run.results, software="ResFinder", version="4.1.0", | |
| 425 run_date="fake_run_date", run_cmd="Fake run cmd", | |
| 426 id=sample_name) | |
| 427 | |
| 428 # DEPRECATED | |
| 429 # use std_result | |
| 430 acquired_finder.write_results(out_path=args.out_path, | |
| 431 result=kma_run.results, | |
| 432 res_type=ResFinder.TYPE_KMA) | |
| 433 | |
| 434 ResFinderResultHandler.standardize_results(std_result, | |
| 435 kma_run.results, | |
| 436 "ResFinder") | |
| 437 #DEBUG | |
| 438 # print("STD RESULT:\n{}".format(json.dumps(std_result))) | |
| 439 | |
| 440 ########################################################################## | |
| 441 # PointFinder | |
| 442 ########################################################################## | |
| 443 | |
| 444 if args.point is True and args.species: | |
| 445 | |
| 446 if(args.inputfasta): | |
| 447 out_point = os.path.abspath(args.out_path + "/pointfinder_blast") | |
| 448 os.makedirs(out_point, exist_ok=True) | |
| 449 if(args.inputfastq): | |
| 450 out_point = os.path.abspath(args.out_path + "/pointfinder_kma") | |
| 451 os.makedirs(out_point, exist_ok=True) | |
| 452 | |
| 453 if args.min_cov_point is None: | |
| 454 min_cov_point = args.min_cov | |
| 455 else: | |
| 456 min_cov_point = args.min_cov_point | |
| 457 if(args.min_cov_point > 1.0 or args.min_cov_point < 0.0): | |
| 458 sys.exit("ERROR: Minimum coverage above 1 or below 0 is not allowed. Please select a minimum coverage within the range 0-1 with the flag -l_p.") | |
| 459 | |
| 460 if args.threshold_point is None: | |
| 461 threshold_point = args.threshold | |
| 462 else: | |
| 463 threshold_point = args.threshold_point | |
| 464 if(args.threshold_point > 1.0 or args.threshold_point < 0.0): | |
| 465 sys.exit("ERROR: Threshold for identity of PointFinder above 1 or below 0 is not allowed. Please select a threshold for identity within the range 0-1 with the flag -t_p.") | |
| 466 | |
| 467 finder = PointFinder(db_path=db_path_point, species=point_species, | |
| 468 gene_list=args.specific_gene) | |
| 469 | |
| 470 if(args.inputfasta): | |
| 471 blast_run = finder.blast(inputfile=args.inputfasta, | |
| 472 out_path=out_point, | |
| 473 # min_cov=min_cov_point, | |
| 474 min_cov=0.01, | |
| 475 threshold=threshold_point, | |
| 476 blast=blast, | |
| 477 cut_off=False) | |
| 478 results = blast_run.results | |
| 479 | |
| 480 else: | |
| 481 | |
| 482 method = PointFinder.TYPE_KMA | |
| 483 | |
| 484 kma_run = finder.kma(inputfile_1=inputfastq_1, | |
| 485 inputfile_2=inputfastq_2, | |
| 486 out_path=out_point, | |
| 487 db_path_kma=db_path_point, | |
| 488 databases=[point_species], | |
| 489 min_cov=0.01, | |
| 490 # min_cov=min_cov_point, | |
| 491 threshold=threshold_point, | |
| 492 kma_path=kma, | |
| 493 sample_name="", | |
| 494 kma_cge=True, | |
| 495 kma_apm="p", | |
| 496 kma_1t1=True) | |
| 497 | |
| 498 results = kma_run.results | |
| 499 | |
| 500 if(args.specific_gene): | |
| 501 results = PointFinder.discard_unwanted_results( | |
| 502 results=results, wanted=args.specific_gene) | |
| 503 | |
| 504 if(method == PointFinder.TYPE_BLAST): | |
| 505 results_pnt = finder.find_best_seqs(results, min_cov) | |
| 506 else: | |
| 507 results_pnt = results[finder.species] | |
| 508 if(results_pnt == "No hit found"): | |
| 509 results_pnt = {} | |
| 510 else: | |
| 511 results_pnt["excluded"] = results["excluded"] | |
| 512 | |
| 513 # DEPRECATED | |
| 514 # use std_result | |
| 515 new_std_pnt = finder.old_results_to_standard_output( | |
| 516 result=results_pnt, software="ResFinder", version="4.1.0", | |
| 517 run_date="fake_run_date", run_cmd="Fake run cmd", | |
| 518 id=sample_name) | |
| 519 | |
| 520 # DEPRECATED | |
| 521 # use std_result | |
| 522 finder.write_results(out_path=args.out_path, result=results, | |
| 523 res_type=method, unknown_flag=args.unknown_mutations, | |
| 524 min_cov=min_cov_point, perc_iden=threshold_point) | |
| 525 | |
| 526 #DEBUG | |
| 527 # print("POINT RES:\n{}".format(json.dumps(results_pnt))) | |
| 528 | |
| 529 PointFinderResultHandler.standardize_results(std_result, | |
| 530 results_pnt, | |
| 531 "PointFinder") | |
| 532 | |
| 533 #DEBUG | |
| 534 # print("STD RESULT:\n{}".format(json.dumps(std_result))) | |
| 535 ########################################################################## | |
| 536 # Phenotype to genotype | |
| 537 ########################################################################## | |
| 538 | |
| 539 # Load genotype to phenotype database | |
| 540 if(db_path_point): | |
| 541 point_file = db_path_point + "/resistens-overview.txt" | |
| 542 else: | |
| 543 point_file = None | |
| 544 | |
| 545 res_pheno_db = PhenoDB( | |
| 546 abclassdef_file=(args.db_path_res + "/antibiotic_classes.txt"), | |
| 547 acquired_file=args.db_path_res + "/phenotypes.txt", point_file=point_file) | |
| 548 # Isolate object store results | |
| 549 isolate = Isolate(name=sample_name) | |
| 550 if(args.acquired): | |
| 551 isolate.load_finder_results(std_table=std_result, | |
| 552 phenodb=res_pheno_db, | |
| 553 type="genes") | |
| 554 # isolate.load_finder_results(std_table=std_result, | |
| 555 # phenodb=res_pheno_db) | |
| 556 # isolate.load_finder_results(std_table=new_std_res, | |
| 557 # phenodb=res_pheno_db) | |
| 558 if(args.point): | |
| 559 isolate.load_finder_results(std_table=std_result, | |
| 560 phenodb=res_pheno_db, | |
| 561 type="seq_variations") | |
| 562 # isolate.load_finder_results_old(std_table=new_std_pnt, | |
| 563 # phenodb=res_pheno_db) | |
| 564 # isolate.load_pointfinder_tab(args.out_path + "/PointFinder_results.txt", | |
| 565 # res_pheno_db) | |
| 566 isolate.calc_res_profile(res_pheno_db) | |
| 567 if(args.acquired): | |
| 568 ResFinderResultHandler.load_res_profile(std_result, isolate) | |
| 569 if(args.point): | |
| 570 PointFinderResultHandler.load_res_profile(std_result, isolate) | |
| 571 | |
| 572 | |
| 573 #TODO | |
| 574 std_result_file = "{}/std_format_under_development.json".format(args.out_path) | |
| 575 with open(std_result_file, 'w') as fh: | |
| 576 fh.write(json.dumps(std_result)) | |
| 577 | |
| 578 # Create and write the downloadable tab file | |
| 579 pheno_profile_str = isolate.profile_to_str_table(header=True) | |
| 580 # TODO: REMOVE THE NEED FOR THE PICKLED FILE | |
| 581 if(args.pickle): | |
| 582 isolate_pickle = open("{}/isolate.p".format(args.out_path), "wb") | |
| 583 pickle.dump(isolate, isolate_pickle, protocol=2) | |
| 584 | |
| 585 pheno_table_file = args.out_path + '/pheno_table.txt' | |
| 586 with open(pheno_table_file, 'w') as fh: | |
| 587 fh.write(pheno_profile_str) | |
| 588 | |
| 589 if(args.species is not None): | |
| 590 # Apply AMR panel | |
| 591 input_amr_panels = args.db_path_res + "/phenotype_panels.txt" | |
| 592 res_sum_table = ResSumTable(pheno_profile_str) | |
| 593 res_sum_table.load_amr_panels(input_amr_panels) | |
| 594 | |
| 595 try: | |
| 596 panel_profile_str = res_sum_table.get_amr_panel_str( | |
| 597 panel_name_raw=args.species, header=True) | |
| 598 # If specified species does not have an associated panel, just ignore it | |
| 599 # and exit. | |
| 600 except PanelNameError: | |
| 601 eprint("Warning: No panel was detected for the species: {}" | |
| 602 .format(args.species)) | |
| 603 sys.exit() | |
| 604 | |
| 605 amr_panel_filename = args.species.replace(" ", "_") | |
| 606 | |
| 607 panel_tabel_file = (pheno_table_file[:-4] + "_" + amr_panel_filename | |
| 608 + ".txt") | |
| 609 with open(panel_tabel_file, "w") as fh: | |
| 610 fh.write(panel_profile_str) | |
| 611 | |
| 612 sys.exit() |
