Mercurial > repos > jankanis > blast2html
changeset 5:1df2bfce5c24
first features are working, partial match table
| author | Jan Kanis <jan.code@jankanis.nl> | 
|---|---|
| date | Wed, 07 May 2014 18:49:54 +0200 | 
| parents | 34211f5b83fd | 
| children | d20ce91e1297 | 
| files | visualise.html.jinja visualise.py | 
| diffstat | 2 files changed, 235 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/visualise.html.jinja Wed May 07 18:49:54 2014 +0200 @@ -0,0 +1,149 @@ +<!DOCTYPE html> +<html> + <head> + <meta charset="UTF-8"> + + <title>Blast output</title> + + <style> + body { + color: #33333; + font-family: Verdana,Arial,Sans-Serif; + } + + + #content { + margin: 0 2em; + padding: 0.5em; + border: 1px solid #888888; + background-color: #d3dff5; + } + + h1, h2, h3, h4, h5, h6 { + color: #2A6979; + font-family: arial,verdana,sans-serif; + letter-spacing: -1px; + margin: 1.2em 0 0.3em; + } + + h1 { + border-bottom: 1px solid #CCCCCC; + font-size: 150%; + padding-bottom: 0.1em; + } + + h2 { + font-size: 120%; + font-weight: bold; + } + + h4.graphicHeader { + color: black; + letter-spacing: 0; + font-style: bold; + } + + .headerdata { + font-size: 90%; + } + .headerdata .param { + font-weight: bold; + text-align: right; + padding: 0 1em; + } + + .graphicInfo { + background-color: #eeeeee; + border: 1px solid #cccccc; + padding: 1em; + text-align: center; + } + + .graphic { + background-color: white; + border: 2px solid black; + padding: .5em; + align: center; + margin: auto; + } + + .centered { + align: center; + margin-left: auto; + margin-right: auto; + } + + table.legend { + color: white; + font-weight: bold; + align: center; + margin: 0 auto; + width: 40em; + border-spacing: 0; + } + table.legend td { + width: 20%; + padding: 0; + margin: 0; + border: none; + } + + table.matchresult { + height: 5px; + width: 40em; + align: center; + margin: 5px auto; + } + </style> + + </head> + + <body> + <div id=content> + <h1>Nucleotide Sequence ({{blast["BlastOutput_query-len"]}} letters)</h1> + + <div id=header> + + <table class=headerdata> + {% for param, value in params %} + <tr><td class=param>{{param}}</td><td>{{value}}</td></tr> + {% endfor %} + </table> + + </div> + + <div id=graphics> + <h2>Graphic Summary</h2> + + <div class=graphicInfo> + <h3 class=centered>Distribution of {{hits|length}} Blast Hits on the Query Sequence</h3> + <div class=graphic> + <h4 class=graphicHeader>Color key for alignment scores</h4> + <table class=legend><tr> + <td style="background-color: {{colors[0]}}"><40</td> + <td style="background-color: {{colors[1]}}">40-50</td> + <td style="background-color: {{colors[2]}}">50-80</td> + <td style="background-color: {{colors[3]}}">80-200</td> + <td style="background-color: {{colors[4]}}">>=200</td> + </tr></table> + + {% for line in match_colors %} + <table class=matchresult><tr> + {% for match in line.colors %} + <td width={{match[0]}} style="background-color: {{match[1]}}"> </td> + {% endfor %} + </tr></table> + {% endfor %} + + <p>hoi</p> + + </div> + </div> + + + </div> + + </div> + </body> +</html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/visualise.py Wed May 07 18:49:54 2014 +0200 @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 + +# Copyright The Hyve B.V. 2014 +# License: GPL version 3 or higher + +import sys +import warnings +from itertools import repeat +from lxml import objectify +import jinja2 + + +def color_idx(length): + if length < 40: + return 0 + elif length < 50: + return 1 + elif length < 80: + return 2 + elif length < 200: + return 3 + return 4 + + +colors = ['black', 'blue', 'green', 'magenta', 'red'] + +blast = objectify.parse('blast xml example1.xml').getroot() +loader = jinja2.FileSystemLoader(searchpath='.') +environment = jinja2.Environment(loader=loader) +environment.filters['color'] = lambda length: match_colors[color_idx(length)] + +def match_colors(): + """ + An iterator that yields lists of length-color pairs. + """ + + hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit + query_length = blast["BlastOutput_query-len"] + # sort hits by longest hotspot first + hits = sorted(hits, key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), reverse=True) + + for hit in hits: + # sort hotspots from short to long, so we can overwrite index colors of + # short matches with those of long ones. + hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) + table = bytearray([255]) * query_length + for hsp in hotspots: + frm = hsp['Hsp_query-from'] - 1 + to = hsp['Hsp_query-to'] - 1 + table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) + + matches = [] + last = table[0] + count = 0 + for i in range(int(query_length)): + if table[i] == last: + count += 1 + continue + matches.append((count, colors[last] if last != 255 else 'none')) + last = table[i] + count = 1 + matches.append((count, colors[last] if last != 255 else 'none')) + + yield dict(colors=matches, link="#hit"+hit.Hit_num.text) + + +def main(): + template = environment.get_template('visualise.html.jinja') + + params = (('Query ID', blast["BlastOutput_query-ID"]), + ('Query definition', blast["BlastOutput_query-def"]), + ('Query length', blast["BlastOutput_query-len"]), + ('Program', blast.BlastOutput_version), + ('Database', blast.BlastOutput_db), + ) + + if len(blast.BlastOutput_iterations.Iteration) > 1: + warnings.warn("Multiple 'Iteration' elements found, showing only the first") + + sys.stdout.write(template.render(blast=blast, + hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, + colors=colors, + match_colors=match_colors(), + params=params)) + +main()
