Mercurial > repos > jankanis > blast2html
comparison blast2html.py @ 115:0c2a03f9740b
make external gene bank name configurable
| author | Jan Kanis <jan.code@jankanis.nl> |
|---|---|
| date | Mon, 14 Jul 2014 15:01:32 +0200 |
| parents | e17aae23cc1c |
| children | f5066973029a |
comparison
equal
deleted
inserted
replaced
| 114:4f0ed3b5ae46 | 115:0c2a03f9740b |
|---|---|
| 13 import warnings | 13 import warnings |
| 14 import six, codecs, io | 14 import six, codecs, io |
| 15 from six.moves import builtins | 15 from six.moves import builtins |
| 16 from os import path | 16 from os import path |
| 17 from itertools import repeat | 17 from itertools import repeat |
| 18 from collections import defaultdict | 18 from collections import defaultdict, namedtuple |
| 19 import glob | 19 import glob |
| 20 import argparse | 20 import argparse |
| 21 from lxml import objectify | 21 from lxml import objectify |
| 22 import jinja2 | 22 import jinja2 |
| 23 | 23 |
| 236 | 236 |
| 237 colors = ('black', 'blue', 'green', 'magenta', 'red') | 237 colors = ('black', 'blue', 'green', 'magenta', 'red') |
| 238 | 238 |
| 239 max_scale_labels = 10 | 239 max_scale_labels = 10 |
| 240 | 240 |
| 241 def __init__(self, input, templatedir, templatename, genelinks={}): | 241 def __init__(self, input, templatedir, templatename, dbname, genelinks={}): |
| 242 self.input = input | 242 self.input = input |
| 243 self.templatename = templatename | 243 self.templatename = templatename |
| 244 self.dbname = dbname | |
| 244 self.genelinks = genelinks | 245 self.genelinks = genelinks |
| 245 | 246 |
| 246 self.blast = objectify.parse(self.input).getroot() | 247 self.blast = objectify.parse(self.input).getroot() |
| 247 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) | 248 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) |
| 248 self.environment = jinja2.Environment(loader=self.loader, | 249 self.environment = jinja2.Environment(loader=self.loader, |
| 346 # FIXME: is this the correct formula vv? | 347 # FIXME: is this the correct formula vv? |
| 347 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 | 348 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 |
| 348 ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) | 349 ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) |
| 349 | 350 |
| 350 @filter | 351 @filter |
| 351 def genelink(self, hit, text=None, clas=None, display_nolink=True): | 352 def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True): |
| 352 """Create a html link from a hit node to a configured gene bank webpage. | 353 """Create a html link from a hit node to a configured gene bank webpage. |
| 353 text: The text of the link, defaults to the hit_id | 354 text: The text of the link. If not set applies text_from. |
| 354 clas: extra css classes that will be added to the <a> element | 355 text_from: string, if text is not specified, take it from specified source. Either 'hitid' (default) or 'dbname'. |
| 356 cssclass: extra css classes that will be added to the <a> element | |
| 355 display_nolink: boolean, if false don't display anything if no link can be created. Default True. | 357 display_nolink: boolean, if false don't display anything if no link can be created. Default True. |
| 356 """ | 358 """ |
| 357 | 359 |
| 358 if text is None: | |
| 359 text = hitid(hit) | |
| 360 | |
| 361 db = hit.getroottree().getroot().BlastOutput_db | 360 db = hit.getroottree().getroot().BlastOutput_db |
| 362 | 361 |
| 363 if isinstance(self.genelinks, six.string_types): | 362 if isinstance(self.genelinks, six.string_types): |
| 364 template = self.genelinks | 363 template = self.genelinks |
| 365 else: | 364 else: |
| 366 template = self.genelinks.get(db) | 365 template = self.genelinks[db].template |
| 366 | |
| 367 if text is None: | |
| 368 if text_from == 'hitid': | |
| 369 text = hitid(hit) | |
| 370 elif text_from == 'dbname': | |
| 371 text = self.dbname or self.genelinks[db].dbname or 'Gene Bank' | |
| 372 else: | |
| 373 raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from)) | |
| 374 | |
| 367 if template is None: | 375 if template is None: |
| 368 return text if display_nolink else '' | 376 return text if display_nolink else '' |
| 377 | |
| 369 args = dict(id=hitid(hit).split('|'), | 378 args = dict(id=hitid(hit).split('|'), |
| 370 fullid=hitid(hit), | 379 fullid=hitid(hit), |
| 371 defline=str(hit.Hit_def).split(' ', 1)[0].split('|'), | 380 defline=str(hit.Hit_def).split(' ', 1)[0].split('|'), |
| 372 fulldefline=str(hit.Hit_def).split(' ', 1)[0], | 381 fulldefline=str(hit.Hit_def).split(' ', 1)[0], |
| 373 accession=str(hit.Hit_accession)) | 382 accession=str(hit.Hit_accession)) |
| 375 link = template.format(**args) | 384 link = template.format(**args) |
| 376 except Exception as e: | 385 except Exception as e: |
| 377 warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e)) | 386 warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e)) |
| 378 return text if display_nolink else '' | 387 return text if display_nolink else '' |
| 379 | 388 |
| 380 classattr = 'class="{0}" '.format(jinja2.escape(clas)) if clas is not None else '' | 389 classattr = 'class="{0}" '.format(jinja2.escape(cssclass)) if cssclass is not None else '' |
| 381 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) | 390 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) |
| 382 | 391 |
| 383 | 392 |
| 393 genelinks_entry = namedtuple('genelinks_entry', 'dbname template') | |
| 384 def read_genelinks(dir): | 394 def read_genelinks(dir): |
| 385 links = {} | 395 links = defaultdict(lambda: genelinks_entry(None, None)) |
| 386 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. | 396 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. |
| 387 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) | 397 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) |
| 388 # reversed, so blastdb.loc will take precedence | 398 # reversed, so blastdb.loc will take precedence |
| 389 for f in reversed(files): | 399 for f in reversed(files): |
| 390 try: | 400 try: |
| 392 for l in f.readlines(): | 402 for l in f.readlines(): |
| 393 if l.strip().startswith('#'): | 403 if l.strip().startswith('#'): |
| 394 continue | 404 continue |
| 395 line = l.rstrip('\n').split('\t') | 405 line = l.rstrip('\n').split('\t') |
| 396 try: | 406 try: |
| 397 links[line[2]] = line[3] | 407 links[line[2]] = genelinks_entry(dbname=line[3], template=line[4]) |
| 398 except IndexError: | 408 except IndexError: |
| 399 continue | 409 continue |
| 400 f.close() | 410 f.close() |
| 401 except OSError: | 411 except OSError: |
| 402 continue | 412 continue |
| 425 # handle the errors. This introduces a small race condition when | 435 # handle the errors. This introduces a small race condition when |
| 426 # jinja later tries to re-open the template file, but we don't | 436 # jinja later tries to re-open the template file, but we don't |
| 427 # care too much. | 437 # care too much. |
| 428 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, | 438 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, |
| 429 help='The template file to use. Defaults to blast_html.html.jinja') | 439 help='The template file to use. Defaults to blast_html.html.jinja') |
| 430 | 440 |
| 441 parser.add_argument('--dbname', type=str, default=None, | |
| 442 help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'") | |
| 431 dblink_group = parser.add_mutually_exclusive_group() | 443 dblink_group = parser.add_mutually_exclusive_group() |
| 432 dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE', | 444 dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE', |
| 433 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', | 445 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', |
| 434 help="""A link template to link hits to a gene bank webpage. The template string is a | 446 help="""A link template to link hits to a gene bank webpage. The template string is a |
| 435 Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, | 447 Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, |
| 436 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be | 448 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be |
| 437 replaced by the Nth element of the id or defline, where '|' is the field separator. | 449 replaced by the Nth element of the id or defline, where '|' is the field separator. |
| 438 | 450 |
| 439 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', | 451 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', |
| 440 which is a link to the NCBI nucleotide database.""") | 452 which is a link to the NCBI nucleotide database.""") |
| 441 | 453 |
| 442 dblink_group.add_argument('--db-config-dir', | 454 dblink_group.add_argument('--db-config-dir', |
| 443 help="""The directory where databases are configured in blastdb*.loc files. These files | 455 help="""The directory where databases are configured in blastdb*.loc files. These files |
| 444 are consulted for creating a gene bank link. The files should be tab-separated tables (with lines | 456 are consulted for creating a gene bank link. The files should be tab-separated tables (with lines |
| 445 starting with '#' ignored), where the third field of a line should be a database path and the fourth | 457 starting with '#' ignored), where the third field of a line should be a database path and the fourth |
| 446 a genebank link template conforming to the --genelink-template option syntax. | 458 a genebank link template conforming to the --genelink-template option syntax. |
| 471 # self.write(i) | 483 # self.write(i) |
| 472 # args.output.writelines = fixed_writelines | 484 # args.output.writelines = fixed_writelines |
| 473 | 485 |
| 474 args.output.close() | 486 args.output.close() |
| 475 args.output = io.open(args.output.name, 'w', encoding='utf-8') | 487 args.output = io.open(args.output.name, 'w', encoding='utf-8') |
| 476 | 488 |
| 477 templatedir, templatename = path.split(args.template.name) | 489 templatedir, templatename = path.split(args.template.name) |
| 478 args.template.close() | 490 args.template.close() |
| 479 if not templatedir: | 491 if not templatedir: |
| 480 templatedir = '.' | 492 templatedir = '.' |
| 481 | 493 |
| 482 if args.db_config_dir is None: | 494 if args.db_config_dir is None: |
| 483 genelinks = args.genelink_template | 495 genelinks = defaultdict(lambda: genelinks_entry(template=args.genelink_template, dbname=None)) |
| 484 elif not path.isdir(args.db_config_dir): | 496 elif not path.isdir(args.db_config_dir): |
| 485 parser.error('db-config-dir does not exist or is not a directory') | 497 parser.error('db-config-dir does not exist or is not a directory') |
| 486 else: | 498 else: |
| 487 genelinks = read_genelinks(args.db_config_dir) | 499 genelinks = read_genelinks(args.db_config_dir) |
| 488 | 500 |
| 489 b = BlastVisualize(args.input, templatedir, templatename, genelinks) | 501 b = BlastVisualize(args.input, templatedir, templatename, dbname=args.dbname, genelinks=genelinks) |
| 490 b.render(args.output) | 502 b.render(args.output) |
| 491 args.output.close() | 503 args.output.close() |
| 492 | 504 |
| 493 | 505 |
| 494 if __name__ == '__main__': | 506 if __name__ == '__main__': |
