diff blast2html.py @ 69:0c4ac210068b

handle reverse matches
author Jan Kanis <jan.code@jankanis.nl>
date Wed, 18 Jun 2014 14:12:00 +0200
parents fa8a93bdefd7
children 371cd585e459
line wrap: on
line diff
--- a/blast2html.py	Wed Jun 18 12:25:37 2014 +0200
+++ b/blast2html.py	Wed Jun 18 14:12:00 2014 +0200
@@ -87,29 +87,37 @@
     
     step = 60
 
-    def split(txt):
-        return [txt[i:i+step] for i in range(0, len(txt), step)]
-
     qfrom = int(hsp['Hsp_query-from'])
     qto = int(hsp['Hsp_query-to'])
+    qframe = int(hsp['Hsp_query-frame'])
     hfrom = int(hsp['Hsp_hit-from'])
     hto = int(hsp['Hsp_hit-to'])
+    hframe = int(hsp['Hsp_hit-frame'])
     qseq = hsp.Hsp_qseq.text
     midline = hsp.Hsp_midline.text
     hseq = hsp.Hsp_hseq.text
+
+    if not qframe in [1, -1]:
+        warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}".format(nodeid(hsp), qframe))
+        qframe = -1 if qframe < 0 else 1
+    if not hframe in [1, -1]:
+        warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}".format(nodeid(hsp), hframe))
+        hframe = -1 if hframe < 0 else 1
     
-    offset = 0
+    def split(txt):
+        return [txt[i:i+step] for i in range(0, len(txt), step)]
+
     for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), step)):
         yield (
-            "Query  {:>7}  {}  {}\n".format(qfrom+offset, qs, qfrom+offset+len(qs)-1) +
+            "Query  {:>7}  {}  {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) +
             "       {:7}  {}\n".format('', mid) +
-            "Subject{:>7}  {}  {}".format(hfrom+offset, hs, hfrom+offset+len(hs)-1)
+            "Subject{:>7}  {}  {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe)
         )
         
-    if qfrom+len(qseq)-1 != qto:
+    if qfrom+(len(qseq)-1)*qframe != qto:
         warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format(
             nodeid(hsp), qfrom, qto, len(qseq)))
-    if hfrom+len(hseq)-1 != hto:
+    if hfrom+(len(hseq)-1)*hframe != hto:
         warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format(
             nodeid(hsp), hfrom, hto, len(hseq)))