Mercurial > repos > jankanis > blast2html
diff blast2html.py @ 69:0c4ac210068b
handle reverse matches
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Wed, 18 Jun 2014 14:12:00 +0200 |
parents | fa8a93bdefd7 |
children | 371cd585e459 |
line wrap: on
line diff
--- a/blast2html.py Wed Jun 18 12:25:37 2014 +0200 +++ b/blast2html.py Wed Jun 18 14:12:00 2014 +0200 @@ -87,29 +87,37 @@ step = 60 - def split(txt): - return [txt[i:i+step] for i in range(0, len(txt), step)] - qfrom = int(hsp['Hsp_query-from']) qto = int(hsp['Hsp_query-to']) + qframe = int(hsp['Hsp_query-frame']) hfrom = int(hsp['Hsp_hit-from']) hto = int(hsp['Hsp_hit-to']) + hframe = int(hsp['Hsp_hit-frame']) qseq = hsp.Hsp_qseq.text midline = hsp.Hsp_midline.text hseq = hsp.Hsp_hseq.text + + if not qframe in [1, -1]: + warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}".format(nodeid(hsp), qframe)) + qframe = -1 if qframe < 0 else 1 + if not hframe in [1, -1]: + warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}".format(nodeid(hsp), hframe)) + hframe = -1 if hframe < 0 else 1 - offset = 0 + def split(txt): + return [txt[i:i+step] for i in range(0, len(txt), step)] + for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), step)): yield ( - "Query {:>7} {} {}\n".format(qfrom+offset, qs, qfrom+offset+len(qs)-1) + + "Query {:>7} {} {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) + " {:7} {}\n".format('', mid) + - "Subject{:>7} {} {}".format(hfrom+offset, hs, hfrom+offset+len(hs)-1) + "Subject{:>7} {} {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe) ) - if qfrom+len(qseq)-1 != qto: + if qfrom+(len(qseq)-1)*qframe != qto: warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format( nodeid(hsp), qfrom, qto, len(qseq))) - if hfrom+len(hseq)-1 != hto: + if hfrom+(len(hseq)-1)*hframe != hto: warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format( nodeid(hsp), hfrom, hto, len(hseq)))