diff blast2html.py @ 120:2729c2326235

Fix for Rikilt issue 13 Hit e-value and identity% should be taken from the hsp with the highest bit score. Previously each of these values was calculated independently. Also use arrays for cover calculation instead of python lists and refactor the hit_info() code a bit.
author Jan Kanis <jan.code@jankanis.nl>
date Thu, 31 Jul 2014 16:14:36 +0200
parents 591dc9c24824
children 5f104d05aa23
line wrap: on
line diff
--- a/blast2html.py	Thu Jul 31 13:26:34 2014 +0200
+++ b/blast2html.py	Thu Jul 31 16:14:36 2014 +0200
@@ -16,6 +16,7 @@
 from os import path
 from itertools import repeat
 from collections import defaultdict, namedtuple
+from array import array
 import glob
 import argparse
 from lxml import objectify
@@ -327,23 +328,22 @@
         for hit in hits(result):
             hsps = hit.Hit_hsps.Hsp
 
-            cover = [False] * query_length
+            cover = array('B', [0]) * query_length
             for hsp in hsps:
-                cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, blastxml_len(hsp))
-            cover_count = cover.count(True)
+                cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = array('B', [1]) * blastxml_len(hsp)
+            cover_count = cover.count(1)
 
-            def hsp_val(path):
-                return (float(hsp[path]) for hsp in hsps)
+            best_hsp = max(hsps, key=lambda h: h['Hsp_bit-score'])
 
             yield dict(hit = hit,
                        title = firsttitle(hit),
-                       maxscore = "{0:.1f}".format(max(hsp_val('Hsp_bit-score'))),
-                       totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))),
-                       cover = "{0:.0%}".format(cover_count / query_length),
-                       e_value = "{0:.4}".format(float(min(hsp_val('Hsp_evalue')))),
-                       # FIXME: is this the correct formula vv?
+                       maxscore = format(float(best_hsp['Hsp_bit-score']), '.1f'),
+                       e_value = format(float(best_hsp.Hsp_evalue), '.4'),
                        # float(...) because non-flooring division doesn't work with lxml elements in python 2.6
-                       ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))))
+                       ident = format(float(best_hsp.Hsp_identity) / blastxml_len(best_hsp), '.0%'),
+                       totalscore = format(sum(hsp['Hsp_bit-score'] for hsp in hsps), '.1f'),
+                       cover = format(cover_count / query_length, '.0%'),
+                  )
 
     @filter
     def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True):