Mercurial > repos > jankanis > blast2html
comparison blast2html.py @ 115:0c2a03f9740b
make external gene bank name configurable
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Mon, 14 Jul 2014 15:01:32 +0200 |
parents | e17aae23cc1c |
children | f5066973029a |
comparison
equal
deleted
inserted
replaced
114:4f0ed3b5ae46 | 115:0c2a03f9740b |
---|---|
13 import warnings | 13 import warnings |
14 import six, codecs, io | 14 import six, codecs, io |
15 from six.moves import builtins | 15 from six.moves import builtins |
16 from os import path | 16 from os import path |
17 from itertools import repeat | 17 from itertools import repeat |
18 from collections import defaultdict | 18 from collections import defaultdict, namedtuple |
19 import glob | 19 import glob |
20 import argparse | 20 import argparse |
21 from lxml import objectify | 21 from lxml import objectify |
22 import jinja2 | 22 import jinja2 |
23 | 23 |
236 | 236 |
237 colors = ('black', 'blue', 'green', 'magenta', 'red') | 237 colors = ('black', 'blue', 'green', 'magenta', 'red') |
238 | 238 |
239 max_scale_labels = 10 | 239 max_scale_labels = 10 |
240 | 240 |
241 def __init__(self, input, templatedir, templatename, genelinks={}): | 241 def __init__(self, input, templatedir, templatename, dbname, genelinks={}): |
242 self.input = input | 242 self.input = input |
243 self.templatename = templatename | 243 self.templatename = templatename |
244 self.dbname = dbname | |
244 self.genelinks = genelinks | 245 self.genelinks = genelinks |
245 | 246 |
246 self.blast = objectify.parse(self.input).getroot() | 247 self.blast = objectify.parse(self.input).getroot() |
247 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) | 248 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) |
248 self.environment = jinja2.Environment(loader=self.loader, | 249 self.environment = jinja2.Environment(loader=self.loader, |
346 # FIXME: is this the correct formula vv? | 347 # FIXME: is this the correct formula vv? |
347 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 | 348 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 |
348 ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) | 349 ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) |
349 | 350 |
350 @filter | 351 @filter |
351 def genelink(self, hit, text=None, clas=None, display_nolink=True): | 352 def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True): |
352 """Create a html link from a hit node to a configured gene bank webpage. | 353 """Create a html link from a hit node to a configured gene bank webpage. |
353 text: The text of the link, defaults to the hit_id | 354 text: The text of the link. If not set applies text_from. |
354 clas: extra css classes that will be added to the <a> element | 355 text_from: string, if text is not specified, take it from specified source. Either 'hitid' (default) or 'dbname'. |
356 cssclass: extra css classes that will be added to the <a> element | |
355 display_nolink: boolean, if false don't display anything if no link can be created. Default True. | 357 display_nolink: boolean, if false don't display anything if no link can be created. Default True. |
356 """ | 358 """ |
357 | 359 |
358 if text is None: | |
359 text = hitid(hit) | |
360 | |
361 db = hit.getroottree().getroot().BlastOutput_db | 360 db = hit.getroottree().getroot().BlastOutput_db |
362 | 361 |
363 if isinstance(self.genelinks, six.string_types): | 362 if isinstance(self.genelinks, six.string_types): |
364 template = self.genelinks | 363 template = self.genelinks |
365 else: | 364 else: |
366 template = self.genelinks.get(db) | 365 template = self.genelinks[db].template |
366 | |
367 if text is None: | |
368 if text_from == 'hitid': | |
369 text = hitid(hit) | |
370 elif text_from == 'dbname': | |
371 text = self.dbname or self.genelinks[db].dbname or 'Gene Bank' | |
372 else: | |
373 raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from)) | |
374 | |
367 if template is None: | 375 if template is None: |
368 return text if display_nolink else '' | 376 return text if display_nolink else '' |
377 | |
369 args = dict(id=hitid(hit).split('|'), | 378 args = dict(id=hitid(hit).split('|'), |
370 fullid=hitid(hit), | 379 fullid=hitid(hit), |
371 defline=str(hit.Hit_def).split(' ', 1)[0].split('|'), | 380 defline=str(hit.Hit_def).split(' ', 1)[0].split('|'), |
372 fulldefline=str(hit.Hit_def).split(' ', 1)[0], | 381 fulldefline=str(hit.Hit_def).split(' ', 1)[0], |
373 accession=str(hit.Hit_accession)) | 382 accession=str(hit.Hit_accession)) |
375 link = template.format(**args) | 384 link = template.format(**args) |
376 except Exception as e: | 385 except Exception as e: |
377 warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e)) | 386 warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e)) |
378 return text if display_nolink else '' | 387 return text if display_nolink else '' |
379 | 388 |
380 classattr = 'class="{0}" '.format(jinja2.escape(clas)) if clas is not None else '' | 389 classattr = 'class="{0}" '.format(jinja2.escape(cssclass)) if cssclass is not None else '' |
381 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) | 390 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) |
382 | 391 |
383 | 392 |
393 genelinks_entry = namedtuple('genelinks_entry', 'dbname template') | |
384 def read_genelinks(dir): | 394 def read_genelinks(dir): |
385 links = {} | 395 links = defaultdict(lambda: genelinks_entry(None, None)) |
386 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. | 396 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. |
387 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) | 397 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) |
388 # reversed, so blastdb.loc will take precedence | 398 # reversed, so blastdb.loc will take precedence |
389 for f in reversed(files): | 399 for f in reversed(files): |
390 try: | 400 try: |
392 for l in f.readlines(): | 402 for l in f.readlines(): |
393 if l.strip().startswith('#'): | 403 if l.strip().startswith('#'): |
394 continue | 404 continue |
395 line = l.rstrip('\n').split('\t') | 405 line = l.rstrip('\n').split('\t') |
396 try: | 406 try: |
397 links[line[2]] = line[3] | 407 links[line[2]] = genelinks_entry(dbname=line[3], template=line[4]) |
398 except IndexError: | 408 except IndexError: |
399 continue | 409 continue |
400 f.close() | 410 f.close() |
401 except OSError: | 411 except OSError: |
402 continue | 412 continue |
425 # handle the errors. This introduces a small race condition when | 435 # handle the errors. This introduces a small race condition when |
426 # jinja later tries to re-open the template file, but we don't | 436 # jinja later tries to re-open the template file, but we don't |
427 # care too much. | 437 # care too much. |
428 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, | 438 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, |
429 help='The template file to use. Defaults to blast_html.html.jinja') | 439 help='The template file to use. Defaults to blast_html.html.jinja') |
430 | 440 |
441 parser.add_argument('--dbname', type=str, default=None, | |
442 help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'") | |
431 dblink_group = parser.add_mutually_exclusive_group() | 443 dblink_group = parser.add_mutually_exclusive_group() |
432 dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE', | 444 dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE', |
433 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', | 445 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', |
434 help="""A link template to link hits to a gene bank webpage. The template string is a | 446 help="""A link template to link hits to a gene bank webpage. The template string is a |
435 Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, | 447 Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, |
436 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be | 448 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be |
437 replaced by the Nth element of the id or defline, where '|' is the field separator. | 449 replaced by the Nth element of the id or defline, where '|' is the field separator. |
438 | 450 |
439 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', | 451 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', |
440 which is a link to the NCBI nucleotide database.""") | 452 which is a link to the NCBI nucleotide database.""") |
441 | 453 |
442 dblink_group.add_argument('--db-config-dir', | 454 dblink_group.add_argument('--db-config-dir', |
443 help="""The directory where databases are configured in blastdb*.loc files. These files | 455 help="""The directory where databases are configured in blastdb*.loc files. These files |
444 are consulted for creating a gene bank link. The files should be tab-separated tables (with lines | 456 are consulted for creating a gene bank link. The files should be tab-separated tables (with lines |
445 starting with '#' ignored), where the third field of a line should be a database path and the fourth | 457 starting with '#' ignored), where the third field of a line should be a database path and the fourth |
446 a genebank link template conforming to the --genelink-template option syntax. | 458 a genebank link template conforming to the --genelink-template option syntax. |
471 # self.write(i) | 483 # self.write(i) |
472 # args.output.writelines = fixed_writelines | 484 # args.output.writelines = fixed_writelines |
473 | 485 |
474 args.output.close() | 486 args.output.close() |
475 args.output = io.open(args.output.name, 'w', encoding='utf-8') | 487 args.output = io.open(args.output.name, 'w', encoding='utf-8') |
476 | 488 |
477 templatedir, templatename = path.split(args.template.name) | 489 templatedir, templatename = path.split(args.template.name) |
478 args.template.close() | 490 args.template.close() |
479 if not templatedir: | 491 if not templatedir: |
480 templatedir = '.' | 492 templatedir = '.' |
481 | 493 |
482 if args.db_config_dir is None: | 494 if args.db_config_dir is None: |
483 genelinks = args.genelink_template | 495 genelinks = defaultdict(lambda: genelinks_entry(template=args.genelink_template, dbname=None)) |
484 elif not path.isdir(args.db_config_dir): | 496 elif not path.isdir(args.db_config_dir): |
485 parser.error('db-config-dir does not exist or is not a directory') | 497 parser.error('db-config-dir does not exist or is not a directory') |
486 else: | 498 else: |
487 genelinks = read_genelinks(args.db_config_dir) | 499 genelinks = read_genelinks(args.db_config_dir) |
488 | 500 |
489 b = BlastVisualize(args.input, templatedir, templatename, genelinks) | 501 b = BlastVisualize(args.input, templatedir, templatename, dbname=args.dbname, genelinks=genelinks) |
490 b.render(args.output) | 502 b.render(args.output) |
491 args.output.close() | 503 args.output.close() |
492 | 504 |
493 | 505 |
494 if __name__ == '__main__': | 506 if __name__ == '__main__': |