Mercurial > repos > jankanis > blast2html
changeset 69:0c4ac210068b
handle reverse matches
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Wed, 18 Jun 2014 14:12:00 +0200 |
parents | fa8a93bdefd7 |
children | 0ef071bba164 |
files | blast2html.py test-data/blast xml example1.html |
diffstat | 2 files changed, 50 insertions(+), 42 deletions(-) [+] |
line wrap: on
line diff
--- a/blast2html.py Wed Jun 18 12:25:37 2014 +0200 +++ b/blast2html.py Wed Jun 18 14:12:00 2014 +0200 @@ -87,29 +87,37 @@ step = 60 - def split(txt): - return [txt[i:i+step] for i in range(0, len(txt), step)] - qfrom = int(hsp['Hsp_query-from']) qto = int(hsp['Hsp_query-to']) + qframe = int(hsp['Hsp_query-frame']) hfrom = int(hsp['Hsp_hit-from']) hto = int(hsp['Hsp_hit-to']) + hframe = int(hsp['Hsp_hit-frame']) qseq = hsp.Hsp_qseq.text midline = hsp.Hsp_midline.text hseq = hsp.Hsp_hseq.text + + if not qframe in [1, -1]: + warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}".format(nodeid(hsp), qframe)) + qframe = -1 if qframe < 0 else 1 + if not hframe in [1, -1]: + warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}".format(nodeid(hsp), hframe)) + hframe = -1 if hframe < 0 else 1 - offset = 0 + def split(txt): + return [txt[i:i+step] for i in range(0, len(txt), step)] + for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), step)): yield ( - "Query {:>7} {} {}\n".format(qfrom+offset, qs, qfrom+offset+len(qs)-1) + + "Query {:>7} {} {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) + " {:7} {}\n".format('', mid) + - "Subject{:>7} {} {}".format(hfrom+offset, hs, hfrom+offset+len(hs)-1) + "Subject{:>7} {} {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe) ) - if qfrom+len(qseq)-1 != qto: + if qfrom+(len(qseq)-1)*qframe != qto: warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format( nodeid(hsp), qfrom, qto, len(qseq))) - if hfrom+len(hseq)-1 != hto: + if hfrom+(len(hseq)-1)*hframe != hto: warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format( nodeid(hsp), hfrom, hto, len(hseq)))
--- a/test-data/blast xml example1.html Wed Jun 18 12:25:37 2014 +0200 +++ b/test-data/blast xml example1.html Wed Jun 18 14:12:00 2014 +0200 @@ -6667,7 +6667,7 @@ <pre class=alignmentgraphic>Query 2 GTCCGTCG 9 |||||||| -Subject 177 GTCCGTCG 184</pre> +Subject 177 GTCCGTCG 170</pre> </div> <div class=hotspot id=hotspot1-26-3> <p class=range> @@ -6715,7 +6715,7 @@ <pre class=alignmentgraphic>Query 2 GTCCGTC 8 ||||||| -Subject 2048 GTCCGTC 2054</pre> +Subject 2048 GTCCGTC 2042</pre> </div> <div class=hotspot id=hotspot1-26-5> <p class=range> @@ -6853,7 +6853,7 @@ <pre class=alignmentgraphic>Query 8 CGTGAAGA 15 |||||||| -Subject 1634 CGTGAAGA 1641</pre> +Subject 1634 CGTGAAGA 1627</pre> </div> <div class=hotspot id=hotspot1-28-3> <p class=range> @@ -6994,7 +6994,7 @@ <pre class=alignmentgraphic>Query 8 CGTGAAGA 15 |||||||| -Subject 1634 CGTGAAGA 1641</pre> +Subject 1634 CGTGAAGA 1627</pre> </div> <div class=hotspot id=hotspot1-29-3> <p class=range> @@ -7111,7 +7111,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 298 TCGTGAAGA 306</pre> +Subject 298 TCGTGAAGA 290</pre> </div> <div class=hotspot id=hotspot1-31-2> <p class=range> @@ -7672,7 +7672,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-37-2> <p class=range> @@ -7813,7 +7813,7 @@ <pre class=alignmentgraphic>Query 10 TGAAGAG 16 ||||||| -Subject 1569 TGAAGAG 1575</pre> +Subject 1569 TGAAGAG 1563</pre> </div> </div> @@ -7924,7 +7924,7 @@ <pre class=alignmentgraphic>Query 10 TGAAGAG 16 ||||||| -Subject 1569 TGAAGAG 1575</pre> +Subject 1569 TGAAGAG 1563</pre> </div> </div> @@ -8392,7 +8392,7 @@ <pre class=alignmentgraphic>Query 8 CGTGAAGA 15 |||||||| -Subject 1634 CGTGAAGA 1641</pre> +Subject 1634 CGTGAAGA 1627</pre> </div> <div class=hotspot id=hotspot1-45-3> <p class=range> @@ -8551,7 +8551,7 @@ <pre class=alignmentgraphic>Query 8 CGTGAAGA 15 |||||||| -Subject 1634 CGTGAAGA 1641</pre> +Subject 1634 CGTGAAGA 1627</pre> </div> <div class=hotspot id=hotspot1-46-3> <p class=range> @@ -8716,7 +8716,7 @@ <pre class=alignmentgraphic>Query 10 TGAAGAG 16 ||||||| -Subject 1569 TGAAGAG 1575</pre> +Subject 1569 TGAAGAG 1563</pre> </div> </div> @@ -8809,7 +8809,7 @@ <pre class=alignmentgraphic>Query 10 TGAAGAG 16 ||||||| -Subject 1569 TGAAGAG 1575</pre> +Subject 1569 TGAAGAG 1563</pre> </div> </div> @@ -8878,7 +8878,7 @@ <pre class=alignmentgraphic>Query 8 CGTGAAGA 15 |||||||| -Subject 1634 CGTGAAGA 1641</pre> +Subject 1634 CGTGAAGA 1627</pre> </div> <div class=hotspot id=hotspot1-49-3> <p class=range> @@ -9043,7 +9043,7 @@ <pre class=alignmentgraphic>Query 10 TGAAGAG 16 ||||||| -Subject 1569 TGAAGAG 1575</pre> +Subject 1569 TGAAGAG 1563</pre> </div> </div> @@ -9088,7 +9088,7 @@ <pre class=alignmentgraphic>Query 4 CCGTCGTGA 12 ||||||||| -Subject 19 CCGTCGTGA 27</pre> +Subject 19 CCGTCGTGA 11</pre> </div> </div> @@ -9178,7 +9178,7 @@ <pre class=alignmentgraphic>Query 4 CCGTCGTGA 12 ||||||||| -Subject 19 CCGTCGTGA 27</pre> +Subject 19 CCGTCGTGA 11</pre> </div> </div> @@ -9268,7 +9268,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-55-2> <p class=range> @@ -9385,7 +9385,7 @@ <pre class=alignmentgraphic>Query 8 CGTGAAGA 15 |||||||| -Subject 1587 CGTGAAGA 1594</pre> +Subject 1587 CGTGAAGA 1580</pre> </div> <div class=hotspot id=hotspot1-56-3> <p class=range> @@ -9550,7 +9550,7 @@ <pre class=alignmentgraphic>Query 10 TGAAGAG 16 ||||||| -Subject 1533 TGAAGAG 1539</pre> +Subject 1533 TGAAGAG 1527</pre> </div> </div> @@ -9814,7 +9814,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-61-2> <p class=range> @@ -9907,7 +9907,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-62-2> <p class=range> @@ -10000,7 +10000,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-63-2> <p class=range> @@ -10093,7 +10093,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-64-2> <p class=range> @@ -10186,7 +10186,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-65-2> <p class=range> @@ -10279,7 +10279,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-66-2> <p class=range> @@ -10348,7 +10348,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-67-2> <p class=range> @@ -10441,7 +10441,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-68-2> <p class=range> @@ -10534,7 +10534,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-69-2> <p class=range> @@ -10627,7 +10627,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-70-2> <p class=range> @@ -10768,7 +10768,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-71-2> <p class=range> @@ -11263,7 +11263,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-76-2> <p class=range> @@ -11380,7 +11380,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-77-2> <p class=range> @@ -11473,7 +11473,7 @@ <pre class=alignmentgraphic>Query 7 TCGTGAAGA 15 ||||||||| -Subject 305 TCGTGAAGA 313</pre> +Subject 305 TCGTGAAGA 297</pre> </div> <div class=hotspot id=hotspot1-78-2> <p class=range> @@ -12961,7 +12961,7 @@ <pre class=alignmentgraphic>Query 5 CGTCGTGA 12 |||||||| -Subject 52 CGTCGTGA 59</pre> +Subject 52 CGTCGTGA 45</pre> </div> </div>