Mercurial > repos > jankanis > blast2html
comparison blast2html.py @ 120:2729c2326235
Fix for Rikilt issue 13
Hit e-value and identity% should be taken from the hsp with the highest
bit score. Previously each of these values was calculated independently.
Also use arrays for cover calculation instead of python lists and
refactor the hit_info() code a bit.
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Thu, 31 Jul 2014 16:14:36 +0200 |
parents | 591dc9c24824 |
children | 5f104d05aa23 |
comparison
equal
deleted
inserted
replaced
119:591dc9c24824 | 120:2729c2326235 |
---|---|
14 import six, codecs, io | 14 import six, codecs, io |
15 from six.moves import builtins | 15 from six.moves import builtins |
16 from os import path | 16 from os import path |
17 from itertools import repeat | 17 from itertools import repeat |
18 from collections import defaultdict, namedtuple | 18 from collections import defaultdict, namedtuple |
19 from array import array | |
19 import glob | 20 import glob |
20 import argparse | 21 import argparse |
21 from lxml import objectify | 22 from lxml import objectify |
22 import jinja2 | 23 import jinja2 |
23 | 24 |
325 query_length = blastxml_len(result) | 326 query_length = blastxml_len(result) |
326 | 327 |
327 for hit in hits(result): | 328 for hit in hits(result): |
328 hsps = hit.Hit_hsps.Hsp | 329 hsps = hit.Hit_hsps.Hsp |
329 | 330 |
330 cover = [False] * query_length | 331 cover = array('B', [0]) * query_length |
331 for hsp in hsps: | 332 for hsp in hsps: |
332 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, blastxml_len(hsp)) | 333 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = array('B', [1]) * blastxml_len(hsp) |
333 cover_count = cover.count(True) | 334 cover_count = cover.count(1) |
334 | 335 |
335 def hsp_val(path): | 336 best_hsp = max(hsps, key=lambda h: h['Hsp_bit-score']) |
336 return (float(hsp[path]) for hsp in hsps) | |
337 | 337 |
338 yield dict(hit = hit, | 338 yield dict(hit = hit, |
339 title = firsttitle(hit), | 339 title = firsttitle(hit), |
340 maxscore = "{0:.1f}".format(max(hsp_val('Hsp_bit-score'))), | 340 maxscore = format(float(best_hsp['Hsp_bit-score']), '.1f'), |
341 totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))), | 341 e_value = format(float(best_hsp.Hsp_evalue), '.4'), |
342 cover = "{0:.0%}".format(cover_count / query_length), | |
343 e_value = "{0:.4}".format(float(min(hsp_val('Hsp_evalue')))), | |
344 # FIXME: is this the correct formula vv? | |
345 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 | 342 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 |
346 ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) | 343 ident = format(float(best_hsp.Hsp_identity) / blastxml_len(best_hsp), '.0%'), |
344 totalscore = format(sum(hsp['Hsp_bit-score'] for hsp in hsps), '.1f'), | |
345 cover = format(cover_count / query_length, '.0%'), | |
346 ) | |
347 | 347 |
348 @filter | 348 @filter |
349 def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True): | 349 def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True): |
350 """Create a html link from a hit node to a configured gene bank webpage. | 350 """Create a html link from a hit node to a configured gene bank webpage. |
351 text: The text of the link. If not set applies text_from. | 351 text: The text of the link. If not set applies text_from. |