# HG changeset patch # User Jan Kanis # Date 1406816076 -7200 # Node ID 2729c2326235cf92fdbf99642a19b7bb3283fe28 # Parent 591dc9c248247023e04e5b1dca3f6be0731db935 Fix for Rikilt issue 13 Hit e-value and identity% should be taken from the hsp with the highest bit score. Previously each of these values was calculated independently. Also use arrays for cover calculation instead of python lists and refactor the hit_info() code a bit. diff -r 591dc9c24824 -r 2729c2326235 blast2html.py --- a/blast2html.py Thu Jul 31 13:26:34 2014 +0200 +++ b/blast2html.py Thu Jul 31 16:14:36 2014 +0200 @@ -16,6 +16,7 @@ from os import path from itertools import repeat from collections import defaultdict, namedtuple +from array import array import glob import argparse from lxml import objectify @@ -327,23 +328,22 @@ for hit in hits(result): hsps = hit.Hit_hsps.Hsp - cover = [False] * query_length + cover = array('B', [0]) * query_length for hsp in hsps: - cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, blastxml_len(hsp)) - cover_count = cover.count(True) + cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = array('B', [1]) * blastxml_len(hsp) + cover_count = cover.count(1) - def hsp_val(path): - return (float(hsp[path]) for hsp in hsps) + best_hsp = max(hsps, key=lambda h: h['Hsp_bit-score']) yield dict(hit = hit, title = firsttitle(hit), - maxscore = "{0:.1f}".format(max(hsp_val('Hsp_bit-score'))), - totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))), - cover = "{0:.0%}".format(cover_count / query_length), - e_value = "{0:.4}".format(float(min(hsp_val('Hsp_evalue')))), - # FIXME: is this the correct formula vv? + maxscore = format(float(best_hsp['Hsp_bit-score']), '.1f'), + e_value = format(float(best_hsp.Hsp_evalue), '.4'), # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 - ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) + ident = format(float(best_hsp.Hsp_identity) / blastxml_len(best_hsp), '.0%'), + totalscore = format(sum(hsp['Hsp_bit-score'] for hsp in hsps), '.1f'), + cover = format(cover_count / query_length, '.0%'), + ) @filter def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True):