Mercurial > repos > jankanis > blast2html
comparison blast2html.py @ 116:f5066973029a
refactor
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Mon, 14 Jul 2014 15:47:48 +0200 |
parents | 0c2a03f9740b |
children | 7f3f8c10f44b |
comparison
equal
deleted
inserted
replaced
115:0c2a03f9740b | 116:f5066973029a |
---|---|
236 | 236 |
237 colors = ('black', 'blue', 'green', 'magenta', 'red') | 237 colors = ('black', 'blue', 'green', 'magenta', 'red') |
238 | 238 |
239 max_scale_labels = 10 | 239 max_scale_labels = 10 |
240 | 240 |
241 def __init__(self, input, templatedir, templatename, dbname, genelinks={}): | 241 def __init__(self, input, templatedir, templatename, genelinks): |
242 self.input = input | 242 self.input = input |
243 self.templatename = templatename | 243 self.templatename = templatename |
244 self.dbname = dbname | |
245 self.genelinks = genelinks | 244 self.genelinks = genelinks |
246 | 245 |
247 self.blast = objectify.parse(self.input).getroot() | 246 self.blast = objectify.parse(self.input).getroot() |
248 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) | 247 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) |
249 self.environment = jinja2.Environment(loader=self.loader, | 248 self.environment = jinja2.Environment(loader=self.loader, |
357 display_nolink: boolean, if false don't display anything if no link can be created. Default True. | 356 display_nolink: boolean, if false don't display anything if no link can be created. Default True. |
358 """ | 357 """ |
359 | 358 |
360 db = hit.getroottree().getroot().BlastOutput_db | 359 db = hit.getroottree().getroot().BlastOutput_db |
361 | 360 |
362 if isinstance(self.genelinks, six.string_types): | 361 template = self.genelinks[db].template |
363 template = self.genelinks | |
364 else: | |
365 template = self.genelinks[db].template | |
366 | 362 |
367 if text is None: | 363 if text is None: |
368 if text_from == 'hitid': | 364 if text_from == 'hitid': |
369 text = hitid(hit) | 365 text = hitid(hit) |
370 elif text_from == 'dbname': | 366 elif text_from == 'dbname': |
371 text = self.dbname or self.genelinks[db].dbname or 'Gene Bank' | 367 text = self.genelinks[db].dbname |
372 else: | 368 else: |
373 raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from)) | 369 raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from)) |
374 | 370 |
375 if template is None: | 371 if template is None: |
376 return text if display_nolink else '' | 372 return text if display_nolink else '' |
389 classattr = 'class="{0}" '.format(jinja2.escape(cssclass)) if cssclass is not None else '' | 385 classattr = 'class="{0}" '.format(jinja2.escape(cssclass)) if cssclass is not None else '' |
390 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) | 386 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) |
391 | 387 |
392 | 388 |
393 genelinks_entry = namedtuple('genelinks_entry', 'dbname template') | 389 genelinks_entry = namedtuple('genelinks_entry', 'dbname template') |
394 def read_genelinks(dir): | 390 def read_blastdb(dir, default): |
395 links = defaultdict(lambda: genelinks_entry(None, None)) | 391 links = defaultdict(lambda: default) |
396 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. | 392 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. |
397 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) | 393 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) |
398 # reversed, so blastdb.loc will take precedence | 394 # reversed, so blastdb.loc will take precedence |
399 for f in reversed(files): | 395 for f in reversed(files): |
400 try: | 396 try: |
402 for l in f.readlines(): | 398 for l in f.readlines(): |
403 if l.strip().startswith('#'): | 399 if l.strip().startswith('#'): |
404 continue | 400 continue |
405 line = l.rstrip('\n').split('\t') | 401 line = l.rstrip('\n').split('\t') |
406 try: | 402 try: |
407 links[line[2]] = genelinks_entry(dbname=line[3], template=line[4]) | 403 links[line[2]] = genelinks_entry(dbname=line[3] or default.dbname, template=line[4]) |
408 except IndexError: | 404 except IndexError: |
409 continue | 405 continue |
410 f.close() | 406 f.close() |
411 except OSError: | 407 except OSError: |
412 continue | 408 continue |
420 | 416 |
421 def main(): | 417 def main(): |
422 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja') | 418 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja') |
423 | 419 |
424 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page", | 420 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page", |
425 usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE]".format(sys.argv[0])) | 421 usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE] [--dbname DBNAME]".format(sys.argv[0])) |
426 input_group = parser.add_mutually_exclusive_group(required=True) | 422 input_group = parser.add_mutually_exclusive_group(required=True) |
427 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'), | 423 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'), |
428 help='The input Blast XML file, same as -i/--input') | 424 help='The input Blast XML file, same as -i/--input') |
429 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'), | 425 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'), |
430 help='The input Blast XML file') | 426 help='The input Blast XML file') |
436 # jinja later tries to re-open the template file, but we don't | 432 # jinja later tries to re-open the template file, but we don't |
437 # care too much. | 433 # care too much. |
438 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, | 434 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, |
439 help='The template file to use. Defaults to blast_html.html.jinja') | 435 help='The template file to use. Defaults to blast_html.html.jinja') |
440 | 436 |
441 parser.add_argument('--dbname', type=str, default=None, | 437 parser.add_argument('--dbname', type=str, default='Gene Bank', |
442 help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'") | 438 help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'") |
443 dblink_group = parser.add_mutually_exclusive_group() | 439 parser.add_argument('--genelink-template', metavar='URL_TEMPLATE', |
444 dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE', | 440 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', |
445 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', | 441 help="""A link template to link hits to a gene bank webpage. The template string is a |
446 help="""A link template to link hits to a gene bank webpage. The template string is a | 442 Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, |
447 Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, | 443 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be |
448 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be | 444 replaced by the Nth element of the id or defline, where '|' is the field separator. |
449 replaced by the Nth element of the id or defline, where '|' is the field separator. | 445 |
450 | 446 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', |
451 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', | 447 which is a link to the NCBI nucleotide database.""") |
452 which is a link to the NCBI nucleotide database.""") | 448 |
453 | 449 parser.add_argument('--db-config-dir', |
454 dblink_group.add_argument('--db-config-dir', | 450 help="""The directory where databases are configured in blastdb*.loc files. These files |
455 help="""The directory where databases are configured in blastdb*.loc files. These files | 451 are consulted for creating a gene bank link. The files should conform to the format that |
456 are consulted for creating a gene bank link. The files should be tab-separated tables (with lines | 452 Galaxy's BLAST expect, i.e. tab-separated tables (with lines starting with '#' ignored), |
457 starting with '#' ignored), where the third field of a line should be a database path and the fourth | 453 with two extra fields. The third field of a line should be a database path and the fourth |
458 a genebank link template conforming to the --genelink-template option syntax. | 454 a genebank link template conforming to the --genelink-template option syntax. Entries in |
459 | 455 these config files override links specified using --genelink-template and --dbname.""") |
460 This option is incompatible with --genelink-template.""") | |
461 | 456 |
462 args = parser.parse_args() | 457 args = parser.parse_args() |
463 if args.input == None: | 458 if args.input == None: |
464 args.input = args.positional_arg | 459 args.input = args.positional_arg |
465 if args.input == None: | 460 if args.input == None: |
489 templatedir, templatename = path.split(args.template.name) | 484 templatedir, templatename = path.split(args.template.name) |
490 args.template.close() | 485 args.template.close() |
491 if not templatedir: | 486 if not templatedir: |
492 templatedir = '.' | 487 templatedir = '.' |
493 | 488 |
489 defaultentry = genelinks_entry(args.dbname, args.genelink_template) | |
494 if args.db_config_dir is None: | 490 if args.db_config_dir is None: |
495 genelinks = defaultdict(lambda: genelinks_entry(template=args.genelink_template, dbname=None)) | 491 genelinks = defaultdict(lambda: defaultentry) |
496 elif not path.isdir(args.db_config_dir): | 492 elif not path.isdir(args.db_config_dir): |
497 parser.error('db-config-dir does not exist or is not a directory') | 493 parser.error('db-config-dir does not exist or is not a directory') |
498 else: | 494 else: |
499 genelinks = read_genelinks(args.db_config_dir) | 495 genelinks = read_blastdb(args.db_config_dir, default=defaultentry) |
500 | 496 |
501 b = BlastVisualize(args.input, templatedir, templatename, dbname=args.dbname, genelinks=genelinks) | 497 b = BlastVisualize(args.input, templatedir, templatename, genelinks=genelinks) |
502 b.render(args.output) | 498 b.render(args.output) |
503 args.output.close() | 499 args.output.close() |
504 | 500 |
505 | 501 |
506 if __name__ == '__main__': | 502 if __name__ == '__main__': |