Mercurial > repos > jankanis > blast2html
changeset 19:67ddcb807b7d
make it work with multiple queries
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Tue, 13 May 2014 18:06:36 +0200 |
parents | 4434ffab721a |
children | 53cd304c5f26 |
files | blast_html.html.jinja blast_html.py |
diffstat | 2 files changed, 250 insertions(+), 221 deletions(-) [+] |
line wrap: on
line diff
--- a/blast_html.html.jinja Tue May 13 15:26:20 2014 +0200 +++ b/blast_html.html.jinja Tue May 13 18:06:36 2014 +0200 @@ -2,6 +2,7 @@ <html> <head> <meta charset="UTF-8"> + <meta name=generator content="blast_html; see ..."> <title>Blast output</title> @@ -97,12 +98,12 @@ margin: auto; } - .centered, #defline, div.legend, div.tablewrapper { + .centered, .defline, div.legend, div.tablewrapper { margin-left: auto; margin-right: auto; } - #defline { + .defline { background-color: white; border: 1px solid black; margin: .5em auto; @@ -338,197 +339,213 @@ <body> <div id=content> - <h1>Nucleotide Sequence ({{length}} letters)</h1> - <section class=header> - - <table class=headerdata> - {% for param, value in params %} - <tr><td class=param>{{param}}:</td><td>{{value}}</td></tr> - {% endfor %} - </table> - - </section> - - {% if not (blast.BlastOutput_iterations.findall('Iteration') and - blast.BlastOutput_iterations.Iteration.Iteration_hits.findall('Hit')) %} + {% if not blast.BlastOutput_iterations.findall('Iteration') %} <section class=nodata> - <h2>No Results</h2> - <div class=grey> - No Matches - </div> - </section> - {% else %} - - - - <section class=graphics> - <h2>Graphic Summary</h2> - + <h1>No data</h1> <div class=grey> - <h3 class=centered>Distribution of {{hits|length}} Blast Hits on the Query Sequence</h3> - - <div id=defline>Mouse-over to show defline and scores, click to show alignments</div> - - <div class=graphic> - <h4 class=darkHeader>Color key for alignment scores</h4> - <div class=legend><div class=graphicrow> - <div class=graphicitem style="background-color: {{colors[0]}}"><40</div> - <div class=graphicitem style="background-color: {{colors[1]}}">40–50</div> - <div class=graphicitem style="background-color: {{colors[2]}}">50–80</div> - <div class=graphicitem style="background-color: {{colors[3]}}">80–200</div> - <div class=graphicitem style="background-color: {{colors[4]}}">200≤</div> - </div></div> - <div style="clear: left"></div> - - <div class=tablewrapper> - - <div class=scale> - <div>query:</div> - <div class=graphicrow> - {% for s in queryscale %} - <div class=graphicitem style="width: {{s.width}}%"> - <div>{{s.label}}</div> - </div> - {% endfor %} - </div> - <div style="clear: left"></div> - </div> - - {% for line in match_colors %} - <a class=matchresult - href="{{line.link}}" - onmouseover='document.getElementById("defline").innerHTML="{{line.defline|js_string_escape}}"' - onmouseout='document.getElementById("defline").innerHTML="Mouse-over to show defline and scores, click to show alignments"' - title="{{line.defline}}"> - <div class="matchrow graphicrow"> - {% for match in line.colors %} - <div class="matchitem graphicitem" - style="background-color: {{match[1]}}; width: {{match[0]}}%"></div> - {% endfor %} - </div> - </a> - - {% endfor %} - </div> - </div> + No matches </div> </section> - + {% else %} + {% for result in blast.BlastOutput_iterations.Iteration %} + + <section class=match> - <section class=descriptions> - <h2>Descriptions</h2> + <h1>Nucleotide Sequence ({{result|len}} letters)</h1> + + <section class=header> + + <table class=headerdata> + {% for param, value in params %} + <tr><td class=param>{{param}}:</td><td>{{value}}</td></tr> + {% endfor %} + </table> + + </section> + + {% set hits = result|hits %} + {% if not hits %} + <section> + <h2>No Hits</h2> + <div class=grey> + This query did not match anywhere + </div> + </section> + {% else %} + + <section class=graphics> + <h2>Graphic Summary</h2> + + <div class=grey> + <h3 class=centered>Distribution of {{result|length}} Blast Hits on the Query Sequence</h3> + + <div class=defline id=defline{{result['Iteration_iter-num']}}> + Mouse-over to show defline and scores, click to show alignments + </div> - <div class=grey><div class=white> - <h4 class=darkHeader>Sequences producing significant alignments:</h4> + <div class=graphic> + <h4 class=darkHeader>Color key for alignment scores</h4> + <div class=legend><div class=graphicrow> + <div class=graphicitem style="background-color: {{colors[0]}}"><40</div> + <div class=graphicitem style="background-color: {{colors[1]}}">40–50</div> + <div class=graphicitem style="background-color: {{colors[2]}}">50–80</div> + <div class=graphicitem style="background-color: {{colors[3]}}">80–200</div> + <div class=graphicitem style="background-color: {{colors[4]}}">200≤</div> + </div></div> + <div style="clear: left"></div> + + <div class=tablewrapper> + + <div class=scale> + <div>query:</div> + <div class=graphicrow> + {% for s in result|queryscale %} + <div class=graphicitem style="width: {{s.width}}%"> + <div>{{s.label}}</div> + </div> + {% endfor %} + </div> + <div style="clear: left"></div> + </div> + + {% for line in result|match_colors %} + <a class=matchresult + href="{{line.link}}" + onmouseover='document.getElementById("defline{{result['Iteration_iter-num']}}").innerHTML="{{line.defline|js_string_escape}}"' + onmouseout='document.getElementById("defline{{result['Iteration_iter-num']}}").innerHTML="Mouse-over to show defline and scores, click to show alignments"' + title="{{line.defline}}"> + <div class="matchrow graphicrow"> + {% for hit in line.colors %} + <div class="matchitem graphicitem" + style="background-color: {{hit[1]}}; width: {{hit[0]}}%"></div> + {% endfor %} + </div> + </a> + + {% endfor %} + </div> + </div> + </div> + </section> + + + + <section class=descriptions> + <h2>Descriptions</h2> + + <div class=grey><div class=white> + <h4 class=darkHeader>Sequences producing significant alignments:</h4> - <table class=descriptiontable> - <col/><col/><col/><col/><col/><col/><col/> - <tr> - <th>Description</th> - <th>Max score</th> - <th>Total score</th> - <th>Query cover</th> - <th>E value</th> - <th>Ident</th> - <th>Accession</th> - </tr> - {% for hit in hit_info %} - <tr> - <td><div><a href="#hit{{hit.link_id}}" - title="{{hit.title}}" - id="description{{hit.link_id}}"> - {{hit.title}} - </a></div></td> - <td>{{hit.maxscore}}</td> - <td>{{hit.totalscore}}</td> - <td>{{hit.cover}}</td> - <td>{{hit.e_value}}</td> - <td>{{hit.ident}}</td> - <td><a href="{{genelink(hit.hit|hitid)}}">{{hit.accession}}</a></td> - </tr> - {% endfor %} - </table> - - </div></div> - </section> + <table class=descriptiontable> + <col/><col/><col/><col/><col/><col/><col/> + <tr> + <th>Description</th> + <th>Max score</th> + <th>Total score</th> + <th>Query cover</th> + <th>E value</th> + <th>Ident</th> + <th>Accession</th> + </tr> + {% for hit in result|hit_info %} + <tr> + <td><div><a href="#hit{{hit.link_id}}" + title="{{hit.title}}" + id="description{{hit.link_id}}"> + {{hit.title}} + </a></div></td> + <td>{{hit.maxscore}}</td> + <td>{{hit.totalscore}}</td> + <td>{{hit.cover}}</td> + <td>{{hit.e_value}}</td> + <td>{{hit.ident}}</td> + <td><a href="{{genelink(hit.hit|hitid)}}">{{hit.accession}}</a></td> + </tr> + {% endfor %} + </table> + + </div></div> + </section> + + + + <section class=alignments> + <h2>Alignments</h2> + + <div class=grey><div class=white> + {% for hit in hits %} + <div class=alignment id=hit{{hit.Hit_num}}> + + <div class=linkheader> + <div class=right><a href="#description{{hit.Hit_num}}">Descriptions</a></div> + <a class=linkheader href="{{genelink(hit|hitid)}}">GenBank</a> + <a class=linkheader href="{{genelink(hit|hitid, 'graph')}}">Graphics</a> + </div> - + <div class=title> + <p class=hittitle>{{hit|firsttitle}}</p> + <p class=titleinfo> + <span class=b>Sequence ID:</span> <a href="{{genelink(hit|hitid)}}">{{hit|seqid}}</a> + <span class=b>Length:</span> {{hit.Hit_len}} + <span class=b>Number of Matches:</span> {{hit.Hit_hsps.Hsp|length}} + </p> + </div> + + {% if hit|othertitles|length %} + <a class=showmoretitles onclick="toggle_visibility('moretitles{{hit.Hit_num|js_string_escape}}'); return false;" href=''> + See {{hit|othertitles|length}} more title(s) + </a> + + <div class=moretitles id=moretitles{{hit.Hit_num}} style="display: none"> + {% for title in hit|othertitles %} + <div class=title> + <p class=hittitle>{{title.title}}</p> + <p class=titleinfo> + <span class=b>Sequence ID:</span> <a href="{{genelink(title.hitid)}}">{{title.id}}</a> + </p> + </div> + {% endfor %} + </div> + {% endif %} - <section class=alignments> - <h2>Alignments</h2> - - <div class=grey><div class=white> - {% for hit in hits %} - <div class=alignment id=hit{{hit.Hit_num}}> - - <div class=linkheader> - <div class=right><a href="#description{{hit.Hit_num}}">Descriptions</a></div> - <a class=linkheader href="{{genelink(hit|hitid)}}">GenBank</a> - <a class=linkheader href="{{genelink(hit|hitid, 'graph')}}">Graphics</a> - </div> - - <div class=title> - <p class=hittitle>{{hit|firsttitle}}</p> - <p class=titleinfo> - <span class=b>Sequence ID:</span> <a href="{{genelink(hit|hitid)}}">{{hit|seqid}}</a> - <span class=b>Length:</span> {{hit.Hit_len}} - <span class=b>Number of Matches:</span> {{hit.Hit_hsps.Hsp|length}} - </p> + {% for hsp in hit.Hit_hsps.Hsp %} + <div class=hotspot> + <p class=range> + <span class=range>Range {{hsp.Hsp_num}}: {{hsp['Hsp_hit-from']}} to {{hsp['Hsp_hit-to']}}</span> + <a class=range href="{{genelink(hit|hitid, 'genbank', hsp)}}">GenBank</a> + <a class=range href="{{genelink(hit|hitid, 'graph', hsp)}}">Graphics</a> + </p> + + <table class=hotspotstable> + <tr> + <th>Score</th><th>Expect</th><th>Identities</th><th>Gaps</th><th>Strand</th> + </tr> + <tr> + <td>{{hsp['Hsp_bit-score']|fmt('.1f')}} bits({{hsp.Hsp_score}})</td> + <td>{{hsp.Hsp_evalue|fmt('.1f')}}</td> + <td>{{ hsp.Hsp_identity }}/{{ hsp|len }}({{ + (hsp.Hsp_identity/hsp|len) |fmt('.0%') }})</td> + <td>{{ hsp.Hsp_gaps }}/{{ hsp|len + }}({{ (hsp.Hsp_gaps / hsp|len) | fmt('.0%') }})</td> + <td>{{ hsp['Hsp_query-frame']|asframe }}/{{ hsp['Hsp_hit-frame']|asframe }}</td> + </tr> + </table> + + <pre class=alignmentgraphic>{{hsp|alignment_pre}}</pre> + </div> + {% endfor %} + </div> - {% if hit|othertitles|length %} - <a class=showmoretitles onclick="toggle_visibility('moretitles{{hit.Hit_num|js_string_escape}}'); return false;" href=''> - See {{hit|othertitles|length}} more title(s) - </a> - - <div class=moretitles id=moretitles{{hit.Hit_num}} style="display: none"> - {% for title in hit|othertitles %} - <div class=title> - <p class=hittitle>{{title.title}}</p> - <p class=titleinfo> - <span class=b>Sequence ID:</span> <a href="{{genelink(title.hitid)}}">{{title.id}}</a> - </p> - </div> - {% endfor %} - </div> - {% endif %} - - {% for hsp in hit.Hit_hsps.Hsp %} - <div class=hotspot> - <p class=range> - <span class=range>Range {{hsp.Hsp_num}}: {{hsp['Hsp_hit-from']}} to {{hsp['Hsp_hit-to']}}</span> - <a class=range href="{{genelink(hit|hitid, 'genbank', hsp)}}">GenBank</a> - <a class=range href="{{genelink(hit|hitid, 'graph', hsp)}}">Graphics</a> - </p> + {% endfor %} + </div></div> + </section> + {% endif %} - <table class=hotspotstable> - <tr> - <th>Score</th><th>Expect</th><th>Identities</th><th>Gaps</th><th>Strand</th> - </tr> - <tr> - <td>{{hsp['Hsp_bit-score']|fmt('.1f')}} bits({{hsp.Hsp_score}})</td> - <td>{{hsp.Hsp_evalue|fmt('.1f')}}</td> - <td>{{ hsp.Hsp_identity }}/{{ hsp|len }}({{ - (hsp.Hsp_identity/hsp|len) |fmt('.0%') }})</td> - <td>{{ hsp.Hsp_gaps }}/{{ hsp|len - }}({{ (hsp.Hsp_gaps / hsp|len) | fmt('.0%') }})</td> - <td>{{ hsp['Hsp_query-frame']|asframe }}/{{ hsp['Hsp_hit-frame']|asframe }}</td> - </tr> - </table> - - <pre class=alignmentgraphic>{{hsp|alignment_pre}}</pre> - </div> - {% endfor %} - - </div> - - {% endfor %} - </div></div> - </section> - - {% endif %} + {% endfor %} + {% endif %} </div> </body> </html>
--- a/blast_html.py Tue May 13 15:26:20 2014 +0200 +++ b/blast_html.py Tue May 13 18:06:36 2014 +0200 @@ -19,11 +19,11 @@ "Decorator to register a function as filter in the current jinja environment" if isinstance(func_or_name, str): def inner(func): - _filters[func_or_name] = func + _filters[func_or_name] = func.__name__ return func return inner else: - _filters[func_or_name.__name__] = func_or_name + _filters[func_or_name.__name__] = func_or_name.__name__ return func_or_name @@ -78,8 +78,13 @@ ) @filter('len') -def hsplen(node): - return int(node['Hsp_align-len']) +def blastxml_len(node): + if node.tag == 'Hsp': + return int(node['Hsp_align-len']) + elif node.tag == 'Iteration': + return int(node['Iteration_query-len']) + raise Exception("Unknown XML node type: "+node.tag) + @filter def asframe(frame): @@ -134,6 +139,13 @@ return value +@filter +def hits(result): + # sort hits by longest hotspot first + return sorted(result.Iteration_hits.findall('Hit'), + key=lambda h: max(blastxml_len(hsp) for hsp in h.Hit_hsps.Hsp), + reverse=True) + class BlastVisualize: @@ -151,15 +163,15 @@ self.environment = jinja2.Environment(loader=self.loader, lstrip_blocks=True, trim_blocks=True, autoescape=True) - self.environment.filters.update(_filters) - self.environment.filters['color'] = lambda length: match_colors[color_idx(length)] + self._addfilters(self.environment) + - self.query_length = int(self.blast["BlastOutput_query-len"]) - self.hits = self.blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit - # sort hits by longest hotspot first - self.ordered_hits = sorted(self.hits, - key=lambda h: max(hsplen(hsp) for hsp in h.Hit_hsps.Hsp), - reverse=True) + def _addfilters(self, environment): + for filtername, funcname in _filters.items(): + try: + environment.filters[filtername] = getattr(self, funcname) + except AttributeError: + environment.filters[filtername] = globals()[funcname] def render(self, output): template = self.environment.get_template(self.templatename) @@ -171,41 +183,38 @@ ('Database', self.blast.BlastOutput_db), ) - if len(self.blast.BlastOutput_iterations.Iteration) > 1: - warnings.warn("Multiple 'Iteration' elements found, showing only the first") - output.write(template.render(blast=self.blast, - length=self.query_length, - hits=self.blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, + iterations=self.blast.BlastOutput_iterations.Iteration, colors=self.colors, - match_colors=self.match_colors(), - queryscale=self.queryscale(), - hit_info=self.hit_info(), + # match_colors=self.match_colors(), + # hit_info=self.hit_info(), genelink=genelink, params=params)) - - def match_colors(self): + @filter + def match_colors(self, result): """ An iterator that yields lists of length-color pairs. """ - percent_multiplier = 100 / self.query_length + query_length = blastxml_len(result) + + percent_multiplier = 100 / query_length - for hit in self.hits: + for hit in hits(result): # sort hotspots from short to long, so we can overwrite index colors of # short matches with those of long ones. - hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsplen(hsp)) - table = bytearray([255]) * self.query_length + hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: blastxml_len(hsp)) + table = bytearray([255]) * query_length for hsp in hotspots: frm = hsp['Hsp_query-from'] - 1 to = int(hsp['Hsp_query-to']) - table[frm:to] = repeat(color_idx(hsplen(hsp)), to - frm) + table[frm:to] = repeat(color_idx(blastxml_len(hsp)), to - frm) matches = [] last = table[0] count = 0 - for i in range(self.query_length): + for i in range(query_length): if table[i] == last: count += 1 continue @@ -216,25 +225,28 @@ yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=firsttitle(hit)) - - def queryscale(self): - skip = math.ceil(self.query_length / self.max_scale_labels) - percent_multiplier = 100 / self.query_length - for i in range(1, self.query_length+1): + @filter + def queryscale(self, result): + query_length = blastxml_len(result) + skip = math.ceil(query_length / self.max_scale_labels) + percent_multiplier = 100 / query_length + for i in range(1, query_length+1): if i % skip == 0: yield dict(label = i, width = skip * percent_multiplier) - if self.query_length % skip != 0: - yield dict(label = self.query_length, width = (self.query_length % skip) * percent_multiplier) - + if query_length % skip != 0: + yield dict(label = query_length, width = (query_length % skip) * percent_multiplier) - def hit_info(self): + @filter + def hit_info(self, result): - for hit in self.ordered_hits: + query_length = blastxml_len(result) + + for hit in hits(result): hsps = hit.Hit_hsps.Hsp - cover = [False] * self.query_length + cover = [False] * query_length for hsp in hsps: - cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, hsplen(hsp)) + cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, blastxml_len(hsp)) cover_count = cover.count(True) def hsp_val(path): @@ -245,10 +257,10 @@ link_id = hit.Hit_num, maxscore = "{:.1f}".format(max(hsp_val('Hsp_bit-score'))), totalscore = "{:.1f}".format(sum(hsp_val('Hsp_bit-score'))), - cover = "{:.0%}".format(cover_count / self.query_length), + cover = "{:.0%}".format(cover_count / query_length), e_value = "{:.4g}".format(min(hsp_val('Hsp_evalue'))), # FIXME: is this the correct formula vv? - ident = "{:.0%}".format(float(min(hsp.Hsp_identity / hsplen(hsp) for hsp in hsps))), + ident = "{:.0%}".format(float(min(hsp.Hsp_identity / blastxml_len(hsp) for hsp in hsps))), accession = hit.Hit_accession) def main():