Mercurial > repos > jankanis > blast2html
diff blast2html.py @ 115:0c2a03f9740b
make external gene bank name configurable
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Mon, 14 Jul 2014 15:01:32 +0200 |
parents | e17aae23cc1c |
children | f5066973029a |
line wrap: on
line diff
--- a/blast2html.py Wed Jul 09 15:20:38 2014 +0200 +++ b/blast2html.py Mon Jul 14 15:01:32 2014 +0200 @@ -15,7 +15,7 @@ from six.moves import builtins from os import path from itertools import repeat -from collections import defaultdict +from collections import defaultdict, namedtuple import glob import argparse from lxml import objectify @@ -238,9 +238,10 @@ max_scale_labels = 10 - def __init__(self, input, templatedir, templatename, genelinks={}): + def __init__(self, input, templatedir, templatename, dbname, genelinks={}): self.input = input self.templatename = templatename + self.dbname = dbname self.genelinks = genelinks self.blast = objectify.parse(self.input).getroot() @@ -348,24 +349,32 @@ ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) @filter - def genelink(self, hit, text=None, clas=None, display_nolink=True): + def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True): """Create a html link from a hit node to a configured gene bank webpage. - text: The text of the link, defaults to the hit_id - clas: extra css classes that will be added to the <a> element + text: The text of the link. If not set applies text_from. + text_from: string, if text is not specified, take it from specified source. Either 'hitid' (default) or 'dbname'. + cssclass: extra css classes that will be added to the <a> element display_nolink: boolean, if false don't display anything if no link can be created. Default True. """ - if text is None: - text = hitid(hit) - db = hit.getroottree().getroot().BlastOutput_db if isinstance(self.genelinks, six.string_types): template = self.genelinks else: - template = self.genelinks.get(db) + template = self.genelinks[db].template + + if text is None: + if text_from == 'hitid': + text = hitid(hit) + elif text_from == 'dbname': + text = self.dbname or self.genelinks[db].dbname or 'Gene Bank' + else: + raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from)) + if template is None: return text if display_nolink else '' + args = dict(id=hitid(hit).split('|'), fullid=hitid(hit), defline=str(hit.Hit_def).split(' ', 1)[0].split('|'), @@ -377,12 +386,13 @@ warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e)) return text if display_nolink else '' - classattr = 'class="{0}" '.format(jinja2.escape(clas)) if clas is not None else '' + classattr = 'class="{0}" '.format(jinja2.escape(cssclass)) if cssclass is not None else '' return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) +genelinks_entry = namedtuple('genelinks_entry', 'dbname template') def read_genelinks(dir): - links = {} + links = defaultdict(lambda: genelinks_entry(None, None)) # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) # reversed, so blastdb.loc will take precedence @@ -394,7 +404,7 @@ continue line = l.rstrip('\n').split('\t') try: - links[line[2]] = line[3] + links[line[2]] = genelinks_entry(dbname=line[3], template=line[4]) except IndexError: continue f.close() @@ -427,7 +437,9 @@ # care too much. parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, help='The template file to use. Defaults to blast_html.html.jinja') - + + parser.add_argument('--dbname', type=str, default=None, + help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'") dblink_group = parser.add_mutually_exclusive_group() dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE', default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', @@ -438,7 +450,7 @@ The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', which is a link to the NCBI nucleotide database.""") - + dblink_group.add_argument('--db-config-dir', help="""The directory where databases are configured in blastdb*.loc files. These files are consulted for creating a gene bank link. The files should be tab-separated tables (with lines @@ -473,20 +485,20 @@ args.output.close() args.output = io.open(args.output.name, 'w', encoding='utf-8') - + templatedir, templatename = path.split(args.template.name) args.template.close() if not templatedir: templatedir = '.' if args.db_config_dir is None: - genelinks = args.genelink_template + genelinks = defaultdict(lambda: genelinks_entry(template=args.genelink_template, dbname=None)) elif not path.isdir(args.db_config_dir): parser.error('db-config-dir does not exist or is not a directory') else: genelinks = read_genelinks(args.db_config_dir) - b = BlastVisualize(args.input, templatedir, templatename, genelinks) + b = BlastVisualize(args.input, templatedir, templatename, dbname=args.dbname, genelinks=genelinks) b.render(args.output) args.output.close()