Mercurial > repos > jankanis > blast2html
annotate visualise.py @ 5:1df2bfce5c24
first features are working, partial match table
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Wed, 07 May 2014 18:49:54 +0200 |
parents | |
children | 9e7927673089 |
rev | line source |
---|---|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
2 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
3 # Copyright The Hyve B.V. 2014 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
4 # License: GPL version 3 or higher |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
5 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
6 import sys |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
7 import warnings |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
8 from itertools import repeat |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
9 from lxml import objectify |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
10 import jinja2 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
11 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
12 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
13 def color_idx(length): |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
14 if length < 40: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
15 return 0 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
16 elif length < 50: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
17 return 1 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
18 elif length < 80: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
19 return 2 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
20 elif length < 200: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
21 return 3 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
22 return 4 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
23 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
24 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
25 colors = ['black', 'blue', 'green', 'magenta', 'red'] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
26 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
27 blast = objectify.parse('blast xml example1.xml').getroot() |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
28 loader = jinja2.FileSystemLoader(searchpath='.') |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
29 environment = jinja2.Environment(loader=loader) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
30 environment.filters['color'] = lambda length: match_colors[color_idx(length)] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
31 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
32 def match_colors(): |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
33 """ |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
34 An iterator that yields lists of length-color pairs. |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
35 """ |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
36 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
37 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
38 query_length = blast["BlastOutput_query-len"] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
39 # sort hits by longest hotspot first |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
40 hits = sorted(hits, key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), reverse=True) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
41 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
42 for hit in hits: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
43 # sort hotspots from short to long, so we can overwrite index colors of |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
44 # short matches with those of long ones. |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
45 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
46 table = bytearray([255]) * query_length |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
47 for hsp in hotspots: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
48 frm = hsp['Hsp_query-from'] - 1 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
49 to = hsp['Hsp_query-to'] - 1 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
50 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
51 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
52 matches = [] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
53 last = table[0] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
54 count = 0 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
55 for i in range(int(query_length)): |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
56 if table[i] == last: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
57 count += 1 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
58 continue |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
59 matches.append((count, colors[last] if last != 255 else 'none')) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
60 last = table[i] |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
61 count = 1 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
62 matches.append((count, colors[last] if last != 255 else 'none')) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
63 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
64 yield dict(colors=matches, link="#hit"+hit.Hit_num.text) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
65 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
66 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
67 def main(): |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
68 template = environment.get_template('visualise.html.jinja') |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
69 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
70 params = (('Query ID', blast["BlastOutput_query-ID"]), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
71 ('Query definition', blast["BlastOutput_query-def"]), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
72 ('Query length', blast["BlastOutput_query-len"]), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
73 ('Program', blast.BlastOutput_version), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
74 ('Database', blast.BlastOutput_db), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
75 ) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
76 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
77 if len(blast.BlastOutput_iterations.Iteration) > 1: |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
78 warnings.warn("Multiple 'Iteration' elements found, showing only the first") |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
79 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
80 sys.stdout.write(template.render(blast=blast, |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
81 hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
82 colors=colors, |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
83 match_colors=match_colors(), |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
84 params=params)) |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
85 |
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
86 main() |