annotate blast2html.py @ 120:2729c2326235

Fix for Rikilt issue 13 Hit e-value and identity% should be taken from the hsp with the highest bit score. Previously each of these values was calculated independently. Also use arrays for cover calculation instead of python lists and refactor the hit_info() code a bit.
author Jan Kanis <jan.code@jankanis.nl>
date Thu, 31 Jul 2014 16:14:36 +0200
parents 591dc9c24824
children 5f104d05aa23
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
1 #!/usr/bin/env python3
50
bfc82a8aa3c9 unicodify python sources
Jan Kanis <jan.code@jankanis.nl>
parents: 27
diff changeset
2 # -*- coding: utf-8 -*-
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
3
75
4d2c25baf5a3 Fix rounding errors
Jan Kanis <jan.code@jankanis.nl>
parents: 74
diff changeset
4 # Actually this program works with both python 2 and 3, tested against python 2.6
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
5
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
6 # Copyright The Hyve B.V. 2014
72
6ecbfebb9dd9 doc changes
Jan Kanis <jan.code@jankanis.nl>
parents: 71
diff changeset
7 # License: GPL version 3 or (at your option) any higher version
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
8
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
9 from __future__ import unicode_literals, division
50
bfc82a8aa3c9 unicodify python sources
Jan Kanis <jan.code@jankanis.nl>
parents: 27
diff changeset
10
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
11 import sys
7
9e7927673089 intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents: 5
diff changeset
12 import math
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
13 import warnings
104
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
14 import six, codecs, io
75
4d2c25baf5a3 Fix rounding errors
Jan Kanis <jan.code@jankanis.nl>
parents: 74
diff changeset
15 from six.moves import builtins
18
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
16 from os import path
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
17 from itertools import repeat
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
18 from collections import defaultdict, namedtuple
120
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
19 from array import array
99
8f02008a5f20 look at all blast*.loc files; python2.6 compat fix
Jan Kanis <jan.code@jankanis.nl>
parents: 98
diff changeset
20 import glob
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
21 import argparse
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
22 from lxml import objectify
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
23 import jinja2
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
24
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
25 builtin_str = str
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
26 str = six.text_type
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
27
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
28
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
29
76
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
30 _filters = dict(float='float')
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
31 def filter(func_or_name):
11
7660519f2dc9 proper layout for alignments, added some links
Jan Kanis <jan.code@jankanis.nl>
parents: 10
diff changeset
32 "Decorator to register a function as filter in the current jinja environment"
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
33 if isinstance(func_or_name, six.string_types):
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
34 def inner(func):
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
35 _filters[func_or_name] = func.__name__
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
36 return func
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
37 return inner
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
38 else:
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
39 _filters[func_or_name.__name__] = func_or_name.__name__
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
40 return func_or_name
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
41
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
42
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
43 def color_idx(length):
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
44 if length < 40:
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
45 return 0
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
46 elif length < 50:
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
47 return 1
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
48 elif length < 80:
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
49 return 2
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
50 elif length < 200:
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
51 return 3
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
52 return 4
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
53
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
54 @filter
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
55 def fmt(val, fmt):
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
56 return format(float(val), fmt)
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
57
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
58 @filter
76
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
59 def numfmt(val):
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
60 """Format numbers in decimal notation, but without excessive trailing 0's.
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
61 Default python float formatting will use scientific notation for some values,
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
62 or append trailing zeros with the 'f' format type, and the number of digits differs
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
63 between python 2 and 3."""
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
64 fpart, ipart = math.modf(val)
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
65 if fpart == 0:
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
66 return str(int(val))
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
67 # round to 10 to get identical representations in python 2 and 3
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
68 s = format(round(val, 10), '.10f').rstrip('0')
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
69 if s[-1] == '.':
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
70 s += '0'
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
71 return s
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
72
7d0d46168fd5 Format all numbers in a predictable way
Jan Kanis <jan.code@jankanis.nl>
parents: 75
diff changeset
73 @filter
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
74 def firsttitle(hit):
96
02b795b784e1 fix bug; add comments
Jan Kanis <jan.code@jankanis.nl>
parents: 95
diff changeset
75 return str(hit.Hit_def).split('>')[0]
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
76
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
77 @filter
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
78 def othertitles(hit):
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
79 """Split a hit.Hit_def that contains multiple titles up, splitting out the hit ids from the titles."""
96
02b795b784e1 fix bug; add comments
Jan Kanis <jan.code@jankanis.nl>
parents: 95
diff changeset
80 id_titles = str(hit.Hit_def).split('>')
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
81
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
82 titles = []
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
83 for t in id_titles[1:]:
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
84 id, title = t.split(' ', 1)
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
85 titles.append(argparse.Namespace(Hit_id = id,
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
86 Hit_def = title,
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
87 Hit_accession = '',
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
88 getroottree = hit.getroottree))
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
89 return titles
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
90
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
91 @filter
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
92 def hitid(hit):
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
93 return str(hit.Hit_id)
24
c8347745bbad use Iteration_message tag; also work with unexpected Hit_id values
Jan Kanis <jan.code@jankanis.nl>
parents: 23
diff changeset
94
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
95
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
96 @filter
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
97 def alignment_pre(hsp):
68
fa8a93bdefd7 fix bug in calculations of alignment end
Jan Kanis <jan.code@jankanis.nl>
parents: 67
diff changeset
98 """Create the preformatted alignment blocks"""
71
371cd585e459 refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 69
diff changeset
99
371cd585e459 refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 69
diff changeset
100 # line break length
371cd585e459 refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 69
diff changeset
101 linewidth = 60
67
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
102
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
103 qfrom = int(hsp['Hsp_query-from'])
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
104 qto = int(hsp['Hsp_query-to'])
69
0c4ac210068b handle reverse matches
Jan Kanis <jan.code@jankanis.nl>
parents: 68
diff changeset
105 qframe = int(hsp['Hsp_query-frame'])
67
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
106 hfrom = int(hsp['Hsp_hit-from'])
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
107 hto = int(hsp['Hsp_hit-to'])
69
0c4ac210068b handle reverse matches
Jan Kanis <jan.code@jankanis.nl>
parents: 68
diff changeset
108 hframe = int(hsp['Hsp_hit-frame'])
71
371cd585e459 refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 69
diff changeset
109
67
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
110 qseq = hsp.Hsp_qseq.text
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
111 midline = hsp.Hsp_midline.text
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
112 hseq = hsp.Hsp_hseq.text
69
0c4ac210068b handle reverse matches
Jan Kanis <jan.code@jankanis.nl>
parents: 68
diff changeset
113
71
371cd585e459 refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 69
diff changeset
114 if not qframe in (1, -1):
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
115 warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_query-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), qframe))
69
0c4ac210068b handle reverse matches
Jan Kanis <jan.code@jankanis.nl>
parents: 68
diff changeset
116 qframe = -1 if qframe < 0 else 1
71
371cd585e459 refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 69
diff changeset
117 if not hframe in (1, -1):
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
118 warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_hit-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), hframe))
69
0c4ac210068b handle reverse matches
Jan Kanis <jan.code@jankanis.nl>
parents: 68
diff changeset
119 hframe = -1 if hframe < 0 else 1
67
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
120
69
0c4ac210068b handle reverse matches
Jan Kanis <jan.code@jankanis.nl>
parents: 68
diff changeset
121 def split(txt):
71
371cd585e459 refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 69
diff changeset
122 return [txt[i:i+linewidth] for i in range(0, len(txt), linewidth)]
69
0c4ac210068b handle reverse matches
Jan Kanis <jan.code@jankanis.nl>
parents: 68
diff changeset
123
71
371cd585e459 refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 69
diff changeset
124 for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), linewidth)):
67
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
125 yield (
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
126 "Query {0:>7} {1} {2}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) +
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
127 " {0:7} {1}\n".format('', mid) +
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
128 "Subject{0:>7} {1} {2}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe)
67
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
129 )
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
130
69
0c4ac210068b handle reverse matches
Jan Kanis <jan.code@jankanis.nl>
parents: 68
diff changeset
131 if qfrom+(len(qseq)-1)*qframe != qto:
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
132 warnings.warn("Error in BlastXML input: Hsp node {0} qseq length mismatch: from {1} to {2} length {3}".format(
67
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
133 nodeid(hsp), qfrom, qto, len(qseq)))
69
0c4ac210068b handle reverse matches
Jan Kanis <jan.code@jankanis.nl>
parents: 68
diff changeset
134 if hfrom+(len(hseq)-1)*hframe != hto:
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
135 warnings.warn("Error in BlastXML input: Hsp node {0} hseq length mismatch: from {1} to {2} length {3}".format(
67
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
136 nodeid(hsp), hfrom, hto, len(hseq)))
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
137
19c48f2ec775 wrap alignments if they are too long
Jan Kanis <jan.code@jankanis.nl>
parents: 53
diff changeset
138
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
139
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
140 @filter('len')
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
141 def blastxml_len(node):
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
142 if node.tag == 'Hsp':
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
143 return int(node['Hsp_align-len'])
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
144 elif node.tag == 'Iteration':
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
145 return int(node['Iteration_query-len'])
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
146 raise Exception("Unknown XML node type: "+node.tag)
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
147
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
148 @filter
53
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
149 def nodeid(node):
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
150 id = []
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
151 if node.tag == 'Hsp':
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
152 id.insert(0, node.Hsp_num.text)
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
153 node = node.getparent().getparent()
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
154 assert node.tag == 'Hit'
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
155 if node.tag == 'Hit':
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
156 id.insert(0, node.Hit_num.text)
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
157 node = node.getparent().getparent()
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
158 assert node.tag == 'Iteration'
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
159 if node.tag == 'Iteration':
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
160 id.insert(0, node['Iteration_iter-num'].text)
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
161 return '-'.join(id)
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
162 raise ValueError("The nodeid filter can only be applied to Hsp, Hit or Iteration nodes in a BlastXML document")
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
163
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
164
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
165 @filter
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
166 def asframe(frame):
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
167 if frame == 1:
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
168 return 'Plus'
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
169 elif frame == -1:
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
170 return 'Minus'
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
171 raise Exception("frame should be either +1 or -1")
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
172
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
173 # def genelink(hit, type='genbank', hsp=None):
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
174 # if not isinstance(hit, six.string_types):
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
175 # hit = hitid(hit)
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
176 # link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type)
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
177 # if hsp != None:
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
178 # link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to'])
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
179 # return link
16
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
180
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
181
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
182 # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
183 # I've removed the html escapes, since html escaping is already being performed by the template engine.
7
9e7927673089 intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents: 5
diff changeset
184
74
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
185 # The r'\u0027' syntax doesn't work the way we need to in python 2.6 with unicode_literals
16
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
186 _base_js_escapes = (
74
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
187 ('\\', '\\u005C'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
188 ('\'', '\\u0027'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
189 ('"', '\\u0022'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
190 # ('>', '\\u003E'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
191 # ('<', '\\u003C'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
192 # ('&', '\\u0026'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
193 # ('=', '\\u003D'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
194 # ('-', '\\u002D'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
195 # (';', '\\u003B'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
196 (u'\u2028', '\\u2028'),
03e044b5bcc2 fix escaping of javascript literals
Jan Kanis <jan.code@jankanis.nl>
parents: 73
diff changeset
197 (u'\u2029', '\\u2029')
16
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
198 )
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
199
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
200 # Escape every ASCII character with a value less than 32. This is
18
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
201 # needed a.o. to prevent html parsers from jumping out of javascript
16
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
202 # parsing mode.
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
203 _js_escapes = (_base_js_escapes +
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
204 tuple(('%c' % z, '\\u%04X' % z) for z in range(32)))
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
205
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
206 @filter
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
207 def js_string_escape(value):
72
6ecbfebb9dd9 doc changes
Jan Kanis <jan.code@jankanis.nl>
parents: 71
diff changeset
208 """
6ecbfebb9dd9 doc changes
Jan Kanis <jan.code@jankanis.nl>
parents: 71
diff changeset
209 Javascript string literal escape. Note that this only escapes data
6ecbfebb9dd9 doc changes
Jan Kanis <jan.code@jankanis.nl>
parents: 71
diff changeset
210 for embedding within javascript string literals, not in general
6ecbfebb9dd9 doc changes
Jan Kanis <jan.code@jankanis.nl>
parents: 71
diff changeset
211 javascript snippets.
6ecbfebb9dd9 doc changes
Jan Kanis <jan.code@jankanis.nl>
parents: 71
diff changeset
212 """
16
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
213
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
214 value = str(value)
16
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
215
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
216 for bad, good in _js_escapes:
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
217 value = value.replace(bad, good)
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
218
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
219 return value
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
220
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
221 @filter
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
222 def hits(result):
119
591dc9c24824 tentative fix for Rikilt issue 10: don't sort hits ourselves but use blastxml ordering
Jan Kanis <jan.code@jankanis.nl>
parents: 118
diff changeset
223 # Use findall so we get an empty list if there are no Hit elements at all
591dc9c24824 tentative fix for Rikilt issue 10: don't sort hits ourselves but use blastxml ordering
Jan Kanis <jan.code@jankanis.nl>
parents: 118
diff changeset
224 return result.Iteration_hits.findall('Hit')
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
225
110
e17aae23cc1c report query parameters for each query
Jan Kanis <jan.code@jankanis.nl>
parents: 109
diff changeset
226 @filter('params')
e17aae23cc1c report query parameters for each query
Jan Kanis <jan.code@jankanis.nl>
parents: 109
diff changeset
227 def result_params(iteration):
e17aae23cc1c report query parameters for each query
Jan Kanis <jan.code@jankanis.nl>
parents: 109
diff changeset
228 return (('Query number', iteration['Iteration_iter-num']),
e17aae23cc1c report query parameters for each query
Jan Kanis <jan.code@jankanis.nl>
parents: 109
diff changeset
229 ('Query ID', iteration['Iteration_query-ID']),
e17aae23cc1c report query parameters for each query
Jan Kanis <jan.code@jankanis.nl>
parents: 109
diff changeset
230 ('Definition line', iteration['Iteration_query-def']),
e17aae23cc1c report query parameters for each query
Jan Kanis <jan.code@jankanis.nl>
parents: 109
diff changeset
231 ('Length', blastxml_len(iteration)))
7
9e7927673089 intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents: 5
diff changeset
232
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
233
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
234 class BlastVisualize:
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
235
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
236 colors = ('black', 'blue', 'green', 'magenta', 'red')
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
237
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
238 max_scale_labels = 10
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
239
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
240 def __init__(self, input, templatedir, templatename, genelinks):
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
241 self.input = input
18
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
242 self.templatename = templatename
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
243 self.genelinks = genelinks
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
244
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
245 self.blast = objectify.parse(self.input).getroot()
79
9fb1a7d67317 remove unneeded encoding parameter
Jan Kanis <jan.code@jankanis.nl>
parents: 78
diff changeset
246 self.loader = jinja2.FileSystemLoader(searchpath=templatedir)
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
247 self.environment = jinja2.Environment(loader=self.loader,
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
248 lstrip_blocks=True, trim_blocks=True, autoescape=True)
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
249
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
250 self._addfilters(self.environment)
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
251
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
252
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
253 def _addfilters(self, environment):
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
254 for filtername, funcname in _filters.items():
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
255 try:
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
256 environment.filters[filtername] = getattr(self, funcname)
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
257 except AttributeError:
75
4d2c25baf5a3 Fix rounding errors
Jan Kanis <jan.code@jankanis.nl>
parents: 74
diff changeset
258 try:
4d2c25baf5a3 Fix rounding errors
Jan Kanis <jan.code@jankanis.nl>
parents: 74
diff changeset
259 environment.filters[filtername] = globals()[funcname]
4d2c25baf5a3 Fix rounding errors
Jan Kanis <jan.code@jankanis.nl>
parents: 74
diff changeset
260 except KeyError:
4d2c25baf5a3 Fix rounding errors
Jan Kanis <jan.code@jankanis.nl>
parents: 74
diff changeset
261 environment.filters[filtername] = getattr(builtins, funcname)
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
262
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
263 def render(self, output):
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
264 template = self.environment.get_template(self.templatename)
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
265
110
e17aae23cc1c report query parameters for each query
Jan Kanis <jan.code@jankanis.nl>
parents: 109
diff changeset
266 params = (('Program', self.blast.BlastOutput_version),
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
267 ('Database', self.blast.BlastOutput_db),
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
268 )
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
269
104
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
270 result = template.stream(blast=self.blast,
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
271 iterations=self.blast.BlastOutput_iterations.Iteration,
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
272 colors=self.colors,
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
273 params=params)
104
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
274
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
275 result.dump(output)
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
276
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
277 @filter
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
278 def match_colors(self, result):
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
279 """
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
280 An iterator that yields lists of length-color pairs.
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
281 """
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
282
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
283 query_length = blastxml_len(result)
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
284
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
285 percent_multiplier = 100 / query_length
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
286
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
287 for hit in hits(result):
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
288 # sort hotspots from short to long, so we can overwrite index colors of
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
289 # short matches with those of long ones.
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
290 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: blastxml_len(hsp))
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
291 table = bytearray([255]) * query_length
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
292 for hsp in hotspots:
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
293 frm = hsp['Hsp_query-from'] - 1
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
294 to = int(hsp['Hsp_query-to'])
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
295 table[frm:to] = repeat(color_idx(blastxml_len(hsp)), to - frm)
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
296
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
297 matches = []
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
298 last = table[0]
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
299 count = 0
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
300 for i in range(query_length):
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
301 if table[i] == last:
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
302 count += 1
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
303 continue
16
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
304 matches.append((count * percent_multiplier, self.colors[last] if last != 255 else 'transparent'))
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
305 last = table[i]
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
306 count = 1
16
db7e4ee3be03 fix validation, reindent
Jan Kanis <jan.code@jankanis.nl>
parents: 15
diff changeset
307 matches.append((count * percent_multiplier, self.colors[last] if last != 255 else 'transparent'))
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
308
53
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
309 yield dict(colors=matches, hit=hit, defline=firsttitle(hit))
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
310
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
311 @filter
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
312 def queryscale(self, result):
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
313 query_length = blastxml_len(result)
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
314 skip = math.ceil(query_length / self.max_scale_labels)
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
315 percent_multiplier = 100 / query_length
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
316 for i in range(1, query_length+1):
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
317 if i % skip == 0:
23
6995a6f34f3f fix sectioning, add footer
Jan Kanis <jan.code@jankanis.nl>
parents: 22
diff changeset
318 yield dict(label = i, width = skip * percent_multiplier)
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
319 if query_length % skip != 0:
20
53cd304c5f26 Add index for multiple results; fix layout of query ruler for edge case
Jan Kanis <jan.code@jankanis.nl>
parents: 19
diff changeset
320 yield dict(label = query_length,
23
6995a6f34f3f fix sectioning, add footer
Jan Kanis <jan.code@jankanis.nl>
parents: 22
diff changeset
321 width = (query_length % skip) * percent_multiplier)
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
322
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
323 @filter
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
324 def hit_info(self, result):
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
325
19
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
326 query_length = blastxml_len(result)
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
327
67ddcb807b7d make it work with multiple queries
Jan Kanis <jan.code@jankanis.nl>
parents: 18
diff changeset
328 for hit in hits(result):
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
329 hsps = hit.Hit_hsps.Hsp
7
9e7927673089 intermediate commit before converting some tables to divs
Jan Kanis <jan.code@jankanis.nl>
parents: 5
diff changeset
330
120
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
331 cover = array('B', [0]) * query_length
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
332 for hsp in hsps:
120
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
333 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = array('B', [1]) * blastxml_len(hsp)
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
334 cover_count = cover.count(1)
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
335
120
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
336 best_hsp = max(hsps, key=lambda h: h['Hsp_bit-score'])
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
337
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
338 yield dict(hit = hit,
75
4d2c25baf5a3 Fix rounding errors
Jan Kanis <jan.code@jankanis.nl>
parents: 74
diff changeset
339 title = firsttitle(hit),
120
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
340 maxscore = format(float(best_hsp['Hsp_bit-score']), '.1f'),
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
341 e_value = format(float(best_hsp.Hsp_evalue), '.4'),
78
c0804e6443c6 comment
Jan Kanis <jan.code@jankanis.nl>
parents: 76
diff changeset
342 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6
120
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
343 ident = format(float(best_hsp.Hsp_identity) / blastxml_len(best_hsp), '.0%'),
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
344 totalscore = format(sum(hsp['Hsp_bit-score'] for hsp in hsps), '.1f'),
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
345 cover = format(cover_count / query_length, '.0%'),
2729c2326235 Fix for Rikilt issue 13
Jan Kanis <jan.code@jankanis.nl>
parents: 119
diff changeset
346 )
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
347
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
348 @filter
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
349 def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True):
96
02b795b784e1 fix bug; add comments
Jan Kanis <jan.code@jankanis.nl>
parents: 95
diff changeset
350 """Create a html link from a hit node to a configured gene bank webpage.
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
351 text: The text of the link. If not set applies text_from.
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
352 text_from: string, if text is not specified, take it from specified source. Either 'hitid' (default) or 'dbname'.
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
353 cssclass: extra css classes that will be added to the <a> element
96
02b795b784e1 fix bug; add comments
Jan Kanis <jan.code@jankanis.nl>
parents: 95
diff changeset
354 display_nolink: boolean, if false don't display anything if no link can be created. Default True.
02b795b784e1 fix bug; add comments
Jan Kanis <jan.code@jankanis.nl>
parents: 95
diff changeset
355 """
02b795b784e1 fix bug; add comments
Jan Kanis <jan.code@jankanis.nl>
parents: 95
diff changeset
356
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
357 db = hit.getroottree().getroot().BlastOutput_db
96
02b795b784e1 fix bug; add comments
Jan Kanis <jan.code@jankanis.nl>
parents: 95
diff changeset
358
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
359 template = self.genelinks[db].template
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
360
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
361 if text is None:
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
362 if text_from == 'hitid':
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
363 text = hitid(hit)
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
364 elif text_from == 'dbname':
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
365 text = self.genelinks[db].dbname
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
366 else:
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
367 raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from))
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
368
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
369 if template is None:
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
370 return text if display_nolink else ''
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
371
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
372 args = dict(id=hitid(hit).split('|'),
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
373 fullid=hitid(hit),
104
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
374 defline=str(hit.Hit_def).split(' ', 1)[0].split('|'),
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
375 fulldefline=str(hit.Hit_def).split(' ', 1)[0],
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
376 accession=str(hit.Hit_accession))
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
377 try:
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
378 link = template.format(**args)
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
379 except Exception as e:
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
380 warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e))
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
381 return text if display_nolink else ''
96
02b795b784e1 fix bug; add comments
Jan Kanis <jan.code@jankanis.nl>
parents: 95
diff changeset
382
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
383 classattr = 'class="{0}" '.format(jinja2.escape(cssclass)) if cssclass is not None else ''
118
7f3f8c10f44b fix for Rikilt issues 8, 11, 12, 14
Jan Kanis <jan.code@jankanis.nl>
parents: 116
diff changeset
384 return jinja2.Markup("<a {0}target=\"_top\" href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text)))
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
385
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
386
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
387 genelinks_entry = namedtuple('genelinks_entry', 'dbname template')
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
388 def read_blastdb(dir, default):
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
389 links = defaultdict(lambda: default)
99
8f02008a5f20 look at all blast*.loc files; python2.6 compat fix
Jan Kanis <jan.code@jankanis.nl>
parents: 98
diff changeset
390 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc.
101
5bfaa3ee1f27 better warning message
Jan Kanis <jan.code@jankanis.nl>
parents: 99
diff changeset
391 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc')))
5bfaa3ee1f27 better warning message
Jan Kanis <jan.code@jankanis.nl>
parents: 99
diff changeset
392 # reversed, so blastdb.loc will take precedence
5bfaa3ee1f27 better warning message
Jan Kanis <jan.code@jankanis.nl>
parents: 99
diff changeset
393 for f in reversed(files):
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
394 try:
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
395 f = open(path.join(dir, f))
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
396 for l in f.readlines():
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
397 if l.strip().startswith('#'):
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
398 continue
98
e780606b7c25 test new command line parameters, fix small bug
Jan Kanis <jan.code@jankanis.nl>
parents: 96
diff changeset
399 line = l.rstrip('\n').split('\t')
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
400 try:
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
401 links[line[2]] = genelinks_entry(dbname=line[3] or default.dbname, template=line[4])
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
402 except IndexError:
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
403 continue
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
404 f.close()
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
405 except OSError:
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
406 continue
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
407 if not links:
103
86bcf17f50ef improve warning messages
Jan Kanis <jan.code@jankanis.nl>
parents: 102
diff changeset
408 if not files:
86bcf17f50ef improve warning messages
Jan Kanis <jan.code@jankanis.nl>
parents: 102
diff changeset
409 warnings.warn("No gene bank link templates found (no blastdb*.loc files found in {0})".format(dir))
86bcf17f50ef improve warning messages
Jan Kanis <jan.code@jankanis.nl>
parents: 102
diff changeset
410 else:
86bcf17f50ef improve warning messages
Jan Kanis <jan.code@jankanis.nl>
parents: 102
diff changeset
411 warnings.warn("No gene bank link templates found in {0}".format(', '.join(files)))
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
412 return links
10
2fbdf2eb27b4 All data is displayed now, still some formatting to do
Jan Kanis <jan.code@jankanis.nl>
parents: 7
diff changeset
413
27
4e6ac737ba17 improve the galaxy html stripping warning; make sure the tool can find the template from within galaxy
Jan Kanis <jan.code@jankanis.nl>
parents: 24
diff changeset
414
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
415 def main():
27
4e6ac737ba17 improve the galaxy html stripping warning; make sure the tool can find the template from within galaxy
Jan Kanis <jan.code@jankanis.nl>
parents: 24
diff changeset
416 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')
73
67b1a319c6dc First go at 2.6 compatibility
Jan Kanis <jan.code@jankanis.nl>
parents: 72
diff changeset
417
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
418 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
419 usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE] [--dbname DBNAME]".format(sys.argv[0]))
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
420 input_group = parser.add_mutually_exclusive_group(required=True)
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
421 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
422 help='The input Blast XML file, same as -i/--input')
53
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
423 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'),
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
424 help='The input Blast XML file')
53
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
425 parser.add_argument('-o', '--output', type=argparse.FileType(mode='w'), default=sys.stdout,
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
426 help='The output html file')
18
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
427 # We just want the file name here, so jinja can open the file
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
428 # itself. But it is easier to just use a FileType so argparse can
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
429 # handle the errors. This introduces a small race condition when
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
430 # jinja later tries to re-open the template file, but we don't
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
431 # care too much.
53
4217bb9cf1d3 depend on python 3; fix internal links with multiple iterations
Jan Kanis <jan.code@jankanis.nl>
parents: 51
diff changeset
432 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
18
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
433 help='The template file to use. Defaults to blast_html.html.jinja')
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
434
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
435 parser.add_argument('--dbname', type=str, default='Gene Bank',
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
436 help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'")
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
437 parser.add_argument('--genelink-template', metavar='URL_TEMPLATE',
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
438 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
439 help="""A link template to link hits to a gene bank webpage. The template string is a
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
440 Python format string. It can contain the following replacement elements: {id[N]}, {fullid},
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
441 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
442 replaced by the Nth element of the id or defline, where '|' is the field separator.
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
443
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
444 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
445 which is a link to the NCBI nucleotide database.""")
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
446
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
447 parser.add_argument('--db-config-dir',
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
448 help="""The directory where databases are configured in blastdb*.loc files. These files
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
449 are consulted for creating a gene bank link. The files should conform to the format that
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
450 Galaxy's BLAST expect, i.e. tab-separated tables (with lines starting with '#' ignored),
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
451 with two extra fields. The third field of a line should be a database path and the fourth
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
452 a genebank link template conforming to the --genelink-template option syntax. Entries in
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
453 these config files override links specified using --genelink-template and --dbname.""")
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
454
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
455 args = parser.parse_args()
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
456 if args.input == None:
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
457 args.input = args.positional_arg
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
458 if args.input == None:
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
459 parser.error('no input specified')
5
1df2bfce5c24 first features are working, partial match table
Jan Kanis <jan.code@jankanis.nl>
parents:
diff changeset
460
104
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
461 if six.PY2:
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
462 # The argparse.FileType wrapper doesn't support an encoding
105
b3b5ee557170 update test
Jan Kanis <jan.code@jankanis.nl>
parents: 104
diff changeset
463 # argument, so for python 2 we need to wrap or reopen the
b3b5ee557170 update test
Jan Kanis <jan.code@jankanis.nl>
parents: 104
diff changeset
464 # output. The input files are already read as utf-8 by the
104
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
465 # respective libraries.
107
ee2b105d772a minor fix
Jan Kanis <jan.code@jankanis.nl>
parents: 105
diff changeset
466 #
104
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
467 # One option is using codecs, but the codecs' writelines()
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
468 # method doesn't support streaming but collects all output and
105
b3b5ee557170 update test
Jan Kanis <jan.code@jankanis.nl>
parents: 104
diff changeset
469 # writes at once (see Python issues #5445 and #21910). On the
b3b5ee557170 update test
Jan Kanis <jan.code@jankanis.nl>
parents: 104
diff changeset
470 # other hand the io module is slower (though not
b3b5ee557170 update test
Jan Kanis <jan.code@jankanis.nl>
parents: 104
diff changeset
471 # significantly).
104
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
472
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
473 # args.output = codecs.getwriter('utf-8')(args.output)
107
ee2b105d772a minor fix
Jan Kanis <jan.code@jankanis.nl>
parents: 105
diff changeset
474 # def fixed_writelines(iter, self=args.output):
ee2b105d772a minor fix
Jan Kanis <jan.code@jankanis.nl>
parents: 105
diff changeset
475 # for i in iter:
ee2b105d772a minor fix
Jan Kanis <jan.code@jankanis.nl>
parents: 105
diff changeset
476 # self.write(i)
ee2b105d772a minor fix
Jan Kanis <jan.code@jankanis.nl>
parents: 105
diff changeset
477 # args.output.writelines = fixed_writelines
ee2b105d772a minor fix
Jan Kanis <jan.code@jankanis.nl>
parents: 105
diff changeset
478
ee2b105d772a minor fix
Jan Kanis <jan.code@jankanis.nl>
parents: 105
diff changeset
479 args.output.close()
109
ea3bc0b9a3d9 fix (again) output encoding
Jan Kanis <jan.code@jankanis.nl>
parents: 107
diff changeset
480 args.output = io.open(args.output.name, 'w', encoding='utf-8')
115
0c2a03f9740b make external gene bank name configurable
Jan Kanis <jan.code@jankanis.nl>
parents: 110
diff changeset
481
18
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
482 templatedir, templatename = path.split(args.template.name)
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
483 args.template.close()
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
484 if not templatedir:
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
485 templatedir = '.'
4434ffab721a add a parameter for the template
Jan Kanis <jan.code@jankanis.nl>
parents: 16
diff changeset
486
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
487 defaultentry = genelinks_entry(args.dbname, args.genelink_template)
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
488 if args.db_config_dir is None:
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
489 genelinks = defaultdict(lambda: defaultentry)
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
490 elif not path.isdir(args.db_config_dir):
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
491 parser.error('db-config-dir does not exist or is not a directory')
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
492 else:
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
493 genelinks = read_blastdb(args.db_config_dir, default=defaultentry)
95
4378d11f0ed7 implement configurable gene bank links
Jan Kanis <jan.code@jankanis.nl>
parents: 79
diff changeset
494
116
f5066973029a refactor
Jan Kanis <jan.code@jankanis.nl>
parents: 115
diff changeset
495 b = BlastVisualize(args.input, templatedir, templatename, genelinks=genelinks)
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
496 b.render(args.output)
104
a22c909c9b57 stream output
Jan Kanis <jan.code@jankanis.nl>
parents: 103
diff changeset
497 args.output.close()
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
498
11
7660519f2dc9 proper layout for alignments, added some links
Jan Kanis <jan.code@jankanis.nl>
parents: 10
diff changeset
499
12
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
500 if __name__ == '__main__':
a459c754cdb5 add links, refactor, proper commandline arguments
Jan Kanis <jan.code@jankanis.nl>
parents: 11
diff changeset
501 main()
11
7660519f2dc9 proper layout for alignments, added some links
Jan Kanis <jan.code@jankanis.nl>
parents: 10
diff changeset
502