Mercurial > repos > jankanis > blast2html
annotate blast2html.py @ 70:0ef071bba164
Modify test data to include a negative frame sequence that is split in multiple lines
| author | Jan Kanis <jan.code@jankanis.nl> |
|---|---|
| date | Wed, 18 Jun 2014 14:22:57 +0200 |
| parents | 0c4ac210068b |
| children | 371cd585e459 |
| rev | line source |
|---|---|
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
| 50 | 2 # -*- coding: utf-8 -*- |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
3 |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
4 # Copyright The Hyve B.V. 2014 |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
5 # License: GPL version 3 or higher |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
6 |
| 50 | 7 from __future__ import unicode_literals |
| 8 | |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
9 import sys |
|
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
10 import math |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
11 import warnings |
|
18
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
12 from os import path |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
13 from itertools import repeat |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
14 import argparse |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
15 from lxml import objectify |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
16 import jinja2 |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
17 |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
18 |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
19 |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
20 _filters = {} |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
21 def filter(func_or_name): |
|
11
7660519f2dc9
proper layout for alignments, added some links
Jan Kanis <jan.code@jankanis.nl>
parents:
10
diff
changeset
|
22 "Decorator to register a function as filter in the current jinja environment" |
|
53
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
23 if isinstance(func_or_name, str): |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
24 def inner(func): |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
25 _filters[func_or_name] = func.__name__ |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
26 return func |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
27 return inner |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
28 else: |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
29 _filters[func_or_name.__name__] = func_or_name.__name__ |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
30 return func_or_name |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
31 |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
32 |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
33 def color_idx(length): |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
34 if length < 40: |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
35 return 0 |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
36 elif length < 50: |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
37 return 1 |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
38 elif length < 80: |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
39 return 2 |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
40 elif length < 200: |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
41 return 3 |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
42 return 4 |
|
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
43 |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
44 @filter |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
45 def fmt(val, fmt): |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
46 return format(float(val), fmt) |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
47 |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
48 @filter |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
49 def firsttitle(hit): |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
50 return hit.Hit_def.text.split('>')[0] |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
51 |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
52 @filter |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
53 def othertitles(hit): |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
54 """Split a hit.Hit_def that contains multiple titles up, splitting out the hit ids from the titles.""" |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
55 id_titles = hit.Hit_def.text.split('>') |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
56 |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
57 titles = [] |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
58 for t in id_titles[1:]: |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
59 fullid, title = t.split(' ', 1) |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
60 hitid, id = fullid.split('|', 2)[1:3] |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
61 titles.append(dict(id = id, |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
62 hitid = hitid, |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
63 fullid = fullid, |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
64 title = title)) |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
65 return titles |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
66 |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
67 @filter |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
68 def hitid(hit): |
|
24
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
69 hitid = hit.Hit_id.text |
|
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
70 s = hitid.split('|', 2) |
|
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
71 if len(s) >= 2: |
|
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
72 return s[1] |
|
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
73 return hitid |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
74 |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
75 @filter |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
76 def seqid(hit): |
|
24
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
77 hitid = hit.Hit_id.text |
|
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
78 s = hitid.split('|', 2) |
|
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
79 if len(s) >= 3: |
|
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
80 return s[2] |
|
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
81 return hitid |
|
c8347745bbad
use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents:
23
diff
changeset
|
82 |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
83 |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
84 @filter |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
85 def alignment_pre(hsp): |
|
68
fa8a93bdefd7
fix bug in calculations of alignment end
Jan Kanis <jan.code@jankanis.nl>
parents:
67
diff
changeset
|
86 """Create the preformatted alignment blocks""" |
|
fa8a93bdefd7
fix bug in calculations of alignment end
Jan Kanis <jan.code@jankanis.nl>
parents:
67
diff
changeset
|
87 |
|
67
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
88 step = 60 |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
89 |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
90 qfrom = int(hsp['Hsp_query-from']) |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
91 qto = int(hsp['Hsp_query-to']) |
| 69 | 92 qframe = int(hsp['Hsp_query-frame']) |
|
67
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
93 hfrom = int(hsp['Hsp_hit-from']) |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
94 hto = int(hsp['Hsp_hit-to']) |
| 69 | 95 hframe = int(hsp['Hsp_hit-frame']) |
|
67
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
96 qseq = hsp.Hsp_qseq.text |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
97 midline = hsp.Hsp_midline.text |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
98 hseq = hsp.Hsp_hseq.text |
| 69 | 99 |
| 100 if not qframe in [1, -1]: | |
| 101 warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}".format(nodeid(hsp), qframe)) | |
| 102 qframe = -1 if qframe < 0 else 1 | |
| 103 if not hframe in [1, -1]: | |
| 104 warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}".format(nodeid(hsp), hframe)) | |
| 105 hframe = -1 if hframe < 0 else 1 | |
|
67
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
106 |
| 69 | 107 def split(txt): |
| 108 return [txt[i:i+step] for i in range(0, len(txt), step)] | |
| 109 | |
|
67
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
110 for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), step)): |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
111 yield ( |
| 69 | 112 "Query {:>7} {} {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) + |
|
67
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
113 " {:7} {}\n".format('', mid) + |
| 69 | 114 "Subject{:>7} {} {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe) |
|
67
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
115 ) |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
116 |
| 69 | 117 if qfrom+(len(qseq)-1)*qframe != qto: |
|
67
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
118 warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format( |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
119 nodeid(hsp), qfrom, qto, len(qseq))) |
| 69 | 120 if hfrom+(len(hseq)-1)*hframe != hto: |
|
67
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
121 warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format( |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
122 nodeid(hsp), hfrom, hto, len(hseq))) |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
123 |
|
19c48f2ec775
wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents:
53
diff
changeset
|
124 |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
125 |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
126 @filter('len') |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
127 def blastxml_len(node): |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
128 if node.tag == 'Hsp': |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
129 return int(node['Hsp_align-len']) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
130 elif node.tag == 'Iteration': |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
131 return int(node['Iteration_query-len']) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
132 raise Exception("Unknown XML node type: "+node.tag) |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
133 |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
134 @filter |
|
53
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
135 def nodeid(node): |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
136 id = [] |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
137 if node.tag == 'Hsp': |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
138 id.insert(0, node.Hsp_num.text) |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
139 node = node.getparent().getparent() |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
140 assert node.tag == 'Hit' |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
141 if node.tag == 'Hit': |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
142 id.insert(0, node.Hit_num.text) |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
143 node = node.getparent().getparent() |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
144 assert node.tag == 'Iteration' |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
145 if node.tag == 'Iteration': |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
146 id.insert(0, node['Iteration_iter-num'].text) |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
147 return '-'.join(id) |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
148 raise ValueError("The nodeid filter can only be applied to Hsp, Hit or Iteration nodes in a BlastXML document") |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
149 |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
150 |
|
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
151 @filter |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
152 def asframe(frame): |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
153 if frame == 1: |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
154 return 'Plus' |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
155 elif frame == -1: |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
156 return 'Minus' |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
157 raise Exception("frame should be either +1 or -1") |
|
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
158 |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
159 def genelink(hit, type='genbank', hsp=None): |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
160 if not isinstance(hit, str): |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
161 hit = hitid(hit) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
162 link = "http://www.ncbi.nlm.nih.gov/nucleotide/{}?report={}&log$=nuclalign".format(hit, type) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
163 if hsp != None: |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
164 link += "&from={}&to={}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to']) |
| 16 | 165 return link |
| 166 | |
| 167 | |
| 168 # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139 | |
| 169 # I've removed the html escapes, since html escaping is already being performed by the template engine. | |
|
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
170 |
| 16 | 171 _base_js_escapes = ( |
| 172 ('\\', r'\u005C'), | |
| 173 ('\'', r'\u0027'), | |
| 174 ('"', r'\u0022'), | |
| 175 # ('>', r'\u003E'), | |
| 176 # ('<', r'\u003C'), | |
| 177 # ('&', r'\u0026'), | |
| 178 # ('=', r'\u003D'), | |
| 179 # ('-', r'\u002D'), | |
| 180 # (';', r'\u003B'), | |
| 181 # (u'\u2028', r'\u2028'), | |
| 182 # (u'\u2029', r'\u2029') | |
| 183 ) | |
| 184 | |
| 185 # Escape every ASCII character with a value less than 32. This is | |
|
18
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
186 # needed a.o. to prevent html parsers from jumping out of javascript |
| 16 | 187 # parsing mode. |
| 188 _js_escapes = (_base_js_escapes + | |
| 189 tuple(('%c' % z, '\\u%04X' % z) for z in range(32))) | |
| 190 | |
| 191 @filter | |
| 192 def js_string_escape(value): | |
| 193 """Escape javascript string literal escapes. Note that this only works | |
| 194 within javascript string literals, not in general javascript | |
| 195 snippets.""" | |
| 196 | |
| 197 value = str(value) | |
| 198 | |
| 199 for bad, good in _js_escapes: | |
| 200 value = value.replace(bad, good) | |
| 201 | |
| 202 return value | |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
203 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
204 @filter |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
205 def hits(result): |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
206 # sort hits by longest hotspot first |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
207 return sorted(result.Iteration_hits.findall('Hit'), |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
208 key=lambda h: max(blastxml_len(hsp) for hsp in h.Hit_hsps.Hsp), |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
209 reverse=True) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
210 |
|
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
211 |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
212 |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
213 class BlastVisualize: |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
214 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
215 colors = ('black', 'blue', 'green', 'magenta', 'red') |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
216 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
217 max_scale_labels = 10 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
218 |
|
18
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
219 def __init__(self, input, templatedir, templatename): |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
220 self.input = input |
|
18
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
221 self.templatename = templatename |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
222 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
223 self.blast = objectify.parse(self.input).getroot() |
|
53
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
224 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
225 self.environment = jinja2.Environment(loader=self.loader, |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
226 lstrip_blocks=True, trim_blocks=True, autoescape=True) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
227 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
228 self._addfilters(self.environment) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
229 |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
230 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
231 def _addfilters(self, environment): |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
232 for filtername, funcname in _filters.items(): |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
233 try: |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
234 environment.filters[filtername] = getattr(self, funcname) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
235 except AttributeError: |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
236 environment.filters[filtername] = globals()[funcname] |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
237 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
238 def render(self, output): |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
239 template = self.environment.get_template(self.templatename) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
240 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
241 params = (('Query ID', self.blast["BlastOutput_query-ID"]), |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
242 ('Query definition', self.blast["BlastOutput_query-def"]), |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
243 ('Query length', self.blast["BlastOutput_query-len"]), |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
244 ('Program', self.blast.BlastOutput_version), |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
245 ('Database', self.blast.BlastOutput_db), |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
246 ) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
247 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
248 output.write(template.render(blast=self.blast, |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
249 iterations=self.blast.BlastOutput_iterations.Iteration, |
|
18
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
250 colors=self.colors, |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
251 # match_colors=self.match_colors(), |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
252 # hit_info=self.hit_info(), |
|
18
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
253 genelink=genelink, |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
254 params=params)) |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
255 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
256 @filter |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
257 def match_colors(self, result): |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
258 """ |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
259 An iterator that yields lists of length-color pairs. |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
260 """ |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
261 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
262 query_length = blastxml_len(result) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
263 |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
264 percent_multiplier = 100 / query_length |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
265 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
266 for hit in hits(result): |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
267 # sort hotspots from short to long, so we can overwrite index colors of |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
268 # short matches with those of long ones. |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
269 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: blastxml_len(hsp)) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
270 table = bytearray([255]) * query_length |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
271 for hsp in hotspots: |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
272 frm = hsp['Hsp_query-from'] - 1 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
273 to = int(hsp['Hsp_query-to']) |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
274 table[frm:to] = repeat(color_idx(blastxml_len(hsp)), to - frm) |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
275 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
276 matches = [] |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
277 last = table[0] |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
278 count = 0 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
279 for i in range(query_length): |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
280 if table[i] == last: |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
281 count += 1 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
282 continue |
| 16 | 283 matches.append((count * percent_multiplier, self.colors[last] if last != 255 else 'transparent')) |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
284 last = table[i] |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
285 count = 1 |
| 16 | 286 matches.append((count * percent_multiplier, self.colors[last] if last != 255 else 'transparent')) |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
287 |
|
53
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
288 yield dict(colors=matches, hit=hit, defline=firsttitle(hit)) |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
289 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
290 @filter |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
291 def queryscale(self, result): |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
292 query_length = blastxml_len(result) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
293 skip = math.ceil(query_length / self.max_scale_labels) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
294 percent_multiplier = 100 / query_length |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
295 for i in range(1, query_length+1): |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
296 if i % skip == 0: |
| 23 | 297 yield dict(label = i, width = skip * percent_multiplier) |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
298 if query_length % skip != 0: |
|
20
53cd304c5f26
Add index for multiple results; fix layout of query ruler for edge case
Jan Kanis <jan.code@jankanis.nl>
parents:
19
diff
changeset
|
299 yield dict(label = query_length, |
| 23 | 300 width = (query_length % skip) * percent_multiplier) |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
301 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
302 @filter |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
303 def hit_info(self, result): |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
304 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
305 query_length = blastxml_len(result) |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
306 |
|
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
307 for hit in hits(result): |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
308 hsps = hit.Hit_hsps.Hsp |
|
7
9e7927673089
intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents:
5
diff
changeset
|
309 |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
310 cover = [False] * query_length |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
311 for hsp in hsps: |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
312 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, blastxml_len(hsp)) |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
313 cover_count = cover.count(True) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
314 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
315 def hsp_val(path): |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
316 return (float(hsp[path]) for hsp in hsps) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
317 |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
318 yield dict(hit = hit, |
|
53
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
319 title = firsttitle(hit), |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
320 maxscore = "{:.1f}".format(max(hsp_val('Hsp_bit-score'))), |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
321 totalscore = "{:.1f}".format(sum(hsp_val('Hsp_bit-score'))), |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
322 cover = "{:.0%}".format(cover_count / query_length), |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
323 e_value = "{:.4g}".format(min(hsp_val('Hsp_evalue'))), |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
324 # FIXME: is this the correct formula vv? |
|
19
67ddcb807b7d
make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents:
18
diff
changeset
|
325 ident = "{:.0%}".format(float(min(hsp.Hsp_identity / blastxml_len(hsp) for hsp in hsps))), |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
326 accession = hit.Hit_accession) |
|
10
2fbdf2eb27b4
All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents:
7
diff
changeset
|
327 |
|
27
4e6ac737ba17
improve the galaxy html stripping warning; make sure the tool can find the template from within galaxy
Jan Kanis <jan.code@jankanis.nl>
parents:
24
diff
changeset
|
328 |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
329 def main(): |
|
27
4e6ac737ba17
improve the galaxy html stripping warning; make sure the tool can find the template from within galaxy
Jan Kanis <jan.code@jankanis.nl>
parents:
24
diff
changeset
|
330 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja') |
|
4e6ac737ba17
improve the galaxy html stripping warning; make sure the tool can find the template from within galaxy
Jan Kanis <jan.code@jankanis.nl>
parents:
24
diff
changeset
|
331 |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
332 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page", |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
333 usage="{} [-i] INPUT [-o OUTPUT]".format(sys.argv[0])) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
334 input_group = parser.add_mutually_exclusive_group(required=True) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
335 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'), |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
336 help='The input Blast XML file, same as -i/--input') |
|
53
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
337 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'), |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
338 help='The input Blast XML file') |
|
53
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
339 parser.add_argument('-o', '--output', type=argparse.FileType(mode='w'), default=sys.stdout, |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
340 help='The output html file') |
|
18
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
341 # We just want the file name here, so jinja can open the file |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
342 # itself. But it is easier to just use a FileType so argparse can |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
343 # handle the errors. This introduces a small race condition when |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
344 # jinja later tries to re-open the template file, but we don't |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
345 # care too much. |
|
53
4217bb9cf1d3
depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents:
51
diff
changeset
|
346 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, |
|
18
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
347 help='The template file to use. Defaults to blast_html.html.jinja') |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
348 |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
349 args = parser.parse_args() |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
350 if args.input == None: |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
351 args.input = args.positional_arg |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
352 if args.input == None: |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
353 parser.error('no input specified') |
|
5
1df2bfce5c24
first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff
changeset
|
354 |
|
18
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
355 templatedir, templatename = path.split(args.template.name) |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
356 args.template.close() |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
357 if not templatedir: |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
358 templatedir = '.' |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
359 |
|
4434ffab721a
add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents:
16
diff
changeset
|
360 b = BlastVisualize(args.input, templatedir, templatename) |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
361 b.render(args.output) |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
362 |
|
11
7660519f2dc9
proper layout for alignments, added some links
Jan Kanis <jan.code@jankanis.nl>
parents:
10
diff
changeset
|
363 |
|
12
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
364 if __name__ == '__main__': |
|
a459c754cdb5
add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents:
11
diff
changeset
|
365 main() |
|
11
7660519f2dc9
proper layout for alignments, added some links
Jan Kanis <jan.code@jankanis.nl>
parents:
10
diff
changeset
|
366 |
