Mercurial > repos > jankanis > blast2html
changeset 120:2729c2326235
Fix for Rikilt issue 13
Hit e-value and identity% should be taken from the hsp with the highest
bit score. Previously each of these values was calculated independently.
Also use arrays for cover calculation instead of python lists and
refactor the hit_info() code a bit.
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Thu, 31 Jul 2014 16:14:36 +0200 |
parents | 591dc9c24824 |
children | 5f104d05aa23 |
files | blast2html.py |
diffstat | 1 files changed, 11 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/blast2html.py Thu Jul 31 13:26:34 2014 +0200 +++ b/blast2html.py Thu Jul 31 16:14:36 2014 +0200 @@ -16,6 +16,7 @@ from os import path from itertools import repeat from collections import defaultdict, namedtuple +from array import array import glob import argparse from lxml import objectify @@ -327,23 +328,22 @@ for hit in hits(result): hsps = hit.Hit_hsps.Hsp - cover = [False] * query_length + cover = array('B', [0]) * query_length for hsp in hsps: - cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, blastxml_len(hsp)) - cover_count = cover.count(True) + cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = array('B', [1]) * blastxml_len(hsp) + cover_count = cover.count(1) - def hsp_val(path): - return (float(hsp[path]) for hsp in hsps) + best_hsp = max(hsps, key=lambda h: h['Hsp_bit-score']) yield dict(hit = hit, title = firsttitle(hit), - maxscore = "{0:.1f}".format(max(hsp_val('Hsp_bit-score'))), - totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))), - cover = "{0:.0%}".format(cover_count / query_length), - e_value = "{0:.4}".format(float(min(hsp_val('Hsp_evalue')))), - # FIXME: is this the correct formula vv? + maxscore = format(float(best_hsp['Hsp_bit-score']), '.1f'), + e_value = format(float(best_hsp.Hsp_evalue), '.4'), # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 - ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) + ident = format(float(best_hsp.Hsp_identity) / blastxml_len(best_hsp), '.0%'), + totalscore = format(sum(hsp['Hsp_bit-score'] for hsp in hsps), '.1f'), + cover = format(cover_count / query_length, '.0%'), + ) @filter def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True):