0
|
1 #!/usr/bin/env python
|
|
2 import argparse
|
|
3 from Bio import SeqIO
|
|
4 import logging
|
|
5
|
|
6 logging.basicConfig(level=logging.INFO)
|
|
7 log = logging.getLogger(name="charges")
|
|
8
|
|
9 HTML_HEADER = "<html><head><title>Charges Report</title></head><body>"
|
|
10 HTML_FOOTER = "</body></html>"
|
|
11
|
|
12 SVG_HEADER = '<svg width="%i" height="%i" xmlns="http://www.w3.org/2000/svg">\n' # % (calcWidth, calcHeight)
|
|
13 SVG_FOOTER = "</svg>"
|
|
14
|
|
15 FULL_AA = [
|
|
16 "H",
|
|
17 "S",
|
|
18 "Q",
|
|
19 "T",
|
|
20 "N",
|
|
21 "C",
|
|
22 "Y",
|
|
23 "A",
|
|
24 "V",
|
|
25 "I",
|
|
26 "L",
|
|
27 "M",
|
|
28 "P",
|
|
29 "F",
|
|
30 "W",
|
|
31 "G",
|
|
32 "E",
|
|
33 "R",
|
|
34 "D",
|
|
35 "K",
|
|
36 ]
|
|
37
|
|
38
|
|
39 def charges_html(svg, fasta, aa, fgColor, bgColor, width=120):
|
|
40 colour_scheme = zip([x.upper() for x in aa], bgColor, fgColor)
|
|
41 copy_colour_scheme = zip([x.upper() for x in aa], bgColor, fgColor)
|
|
42
|
|
43 # CSS and header styling
|
|
44 css = """<style type="text/css">
|
|
45 .list li { list-style: none; margin:10px }
|
|
46 .info { float:left; width:20px }
|
|
47 pre { font-size:1.3em }
|
|
48 """
|
|
49 info = '<h1>Charges</h1><h3>Legend</h3><ul class="list">'
|
|
50 for group in colour_scheme:
|
|
51 css += ".%s{ background: %s; color: %s}\n" % group
|
|
52 info += '<li><span class="%s" style="padding:5px">%s</span></li>\n' % (
|
|
53 group[0],
|
|
54 group[0],
|
|
55 )
|
|
56 css += "</style>"
|
|
57 info += "</ul>"
|
|
58
|
|
59 # Pre-calculate, so we can use for testing 'in'
|
|
60 match_list = [group[0] for group in copy_colour_scheme]
|
|
61
|
|
62 page = ""
|
|
63 # Parse sequences from fasta file
|
|
64 for record in SeqIO.parse(fasta, "fasta"):
|
|
65 page += "<pre><h3>>%s %s</h3>\n" % (record.id, record.description)
|
|
66 seq = list(str(record.seq).upper())
|
|
67
|
|
68 idx = 0
|
|
69 for i in range(0, len(seq), width):
|
|
70 line_charges = []
|
|
71 line_residues = seq[i : i + width]
|
|
72 line_numbers = []
|
|
73
|
|
74 for char in range(len(line_residues)):
|
|
75 if line_residues[char] in "KRkr":
|
|
76 line_charges.append("+")
|
|
77 elif line_residues[char] in "DEde":
|
|
78 line_charges.append("-")
|
|
79 else:
|
|
80 line_charges.append(" ")
|
|
81
|
|
82 # Could be swapped out for math with i+char...
|
|
83 idx += 1
|
|
84 if idx % 10 == 0:
|
|
85 line_numbers.append("%10s" % idx)
|
|
86
|
|
87 # Replace with <span>
|
|
88 for m in match_list:
|
|
89 if line_residues[char].upper() in m:
|
|
90 line_residues[char] = '<span class="%s">%s</span>' % (
|
|
91 m,
|
|
92 line_residues[char],
|
|
93 )
|
|
94
|
|
95 page += "".join(line_charges) + "\n"
|
|
96 page += "".join(line_residues) + "\n"
|
|
97 page += "".join(line_numbers) + "\n"
|
|
98 page += "\n"
|
|
99 page += "</pre>"
|
|
100 return HTML_HEADER + css + info + page + HTML_FOOTER
|
|
101
|
|
102
|
|
103 def charges_svg(svg, fasta, aa, fgColor, bgColor, width=120):
|
|
104 colour_scheme = zip([x.upper() for x in aa], bgColor, fgColor)
|
|
105
|
|
106 svgWidth = 1100
|
|
107
|
|
108 # CSS and header styling
|
|
109 classList = []
|
|
110 classes = '<style type="text/css">\n<![CDATA[\n'
|
|
111
|
|
112 defClass = ""
|
|
113 for x in FULL_AA:
|
|
114 addAA = True
|
|
115 for y in aa:
|
|
116 if x in y:
|
|
117 addAA = False
|
|
118 if addAA:
|
|
119 defClass += x
|
|
120
|
|
121 defBox = "#ffffff"
|
|
122 defText = "#000000"
|
|
123
|
|
124 for group in colour_scheme:
|
|
125 classList.append(group[0])
|
|
126 classes += "text.text_%s{fill: %s;}\n" % (group[0], group[2])
|
|
127 classes += "rect.rect_%s{fill: %s; stroke: %s;}\n" % (
|
|
128 group[0],
|
|
129 group[1],
|
|
130 group[1],
|
|
131 )
|
|
132 # info += '<li><span class="%s" style="padding:5px">%s</span></li>\n' % (group[0], group[0])
|
|
133 if defClass != "":
|
|
134 classes += "text.text_%s{fill: %s;}\n" % (defClass, defText)
|
|
135 classes += "rect.rect_%s{fill: %s; stroke: %s;}\n" % (defClass, defBox, defBox)
|
|
136 classList.append(defClass)
|
|
137 classes += "text.info_text{white-space: pre;}\n"
|
|
138 classes += "rect.rEven{fill: #fdfdfd; stroke: #fbfbfb;}\n"
|
|
139 classes += "rect.rOdd{fill: #f2f2fc; stroke: #fbfbfb;}\n"
|
|
140 classes += "]]></style>\n"
|
|
141 body = ""
|
|
142 groups = ""
|
|
143 # Pre-calculate, so we can use for testing 'in'
|
|
144
|
|
145 match_list = aa
|
|
146 prevIndex = -1
|
|
147 boxLen = 0
|
|
148 page = ""
|
|
149 title = ""
|
|
150
|
|
151 yInd = 60
|
|
152 yInc = 15
|
|
153 seqIndent = 35
|
|
154 idIndent = 20
|
|
155 letterLen = 8.4375
|
|
156 recNum = -1
|
|
157
|
|
158 title += (
|
|
159 '<text x="'
|
|
160 + str(idIndent * 0.5)
|
|
161 + '" y="'
|
|
162 + str(yInd)
|
|
163 + '" style="font-weight:bold; font-size:40px">Charges</text>\n'
|
|
164 )
|
|
165 yInd += 2 * yInc
|
|
166 title += (
|
|
167 '<text x="'
|
|
168 + str(idIndent)
|
|
169 + '" y="'
|
|
170 + str(yInd)
|
|
171 + '" style="font-size:18px">Legend:</text>\n'
|
|
172 )
|
|
173 yInd += 2 * yInc
|
|
174
|
|
175 for i in range(len(classList)):
|
|
176 title += (
|
|
177 '<rect x="'
|
|
178 + str(seqIndent)
|
|
179 + '" y="'
|
|
180 + str(yInd - yInc + 2)
|
|
181 + '" width="'
|
|
182 + str(len(classList[i]) * letterLen)
|
|
183 + '" height="'
|
|
184 + str(yInc)
|
|
185 + '" class="rect_%s"/>\n' % classList[i]
|
|
186 )
|
|
187 title += (
|
|
188 '<text x="'
|
|
189 + str(seqIndent)
|
|
190 + '" y="'
|
|
191 + str(yInd)
|
|
192 + '" class="text_%s" font-family="monospace" font-size="14">%s</text>\n'
|
|
193 % (classList[i], classList[i])
|
|
194 )
|
|
195 yInd += yInc + 3
|
|
196 yInd += yInc * 1.5
|
|
197
|
|
198 # Parse sequences from fasta file
|
|
199 for record in SeqIO.parse(fasta, "fasta"):
|
|
200
|
|
201 recNum += 1
|
|
202 seqHeader = (
|
|
203 '<g><text x="'
|
|
204 + str(idIndent)
|
|
205 + '" y="'
|
|
206 + str(yInd)
|
|
207 + '" style="font-weight:bold">>%s %s</text>\n'
|
|
208 % (record.id, record.description)
|
|
209 )
|
|
210 body += seqHeader
|
|
211 seq = list(str(record.seq).upper())
|
|
212 yTop = yInd - yInc - 3
|
|
213 yInd += yInc
|
|
214 idx = 0
|
|
215 for i in range(0, len(seq), width):
|
|
216 line_charges = []
|
|
217 line_residues = seq[i : i + width]
|
|
218 line_numbers = []
|
|
219
|
|
220 boxList = []
|
|
221 groupList = []
|
|
222 seqList = []
|
|
223 prevIndex = -1
|
|
224 boxLen = 0
|
|
225 for char in range(len(line_residues)):
|
|
226
|
|
227 thisInd = 0
|
|
228 for x in match_list:
|
|
229
|
|
230 if line_residues[char] in x:
|
|
231 break
|
|
232 thisInd += 1
|
|
233
|
|
234 if thisInd == len(match_list):
|
|
235 thisInd = -1
|
|
236
|
|
237 if char != 0 and thisInd != prevIndex:
|
|
238 boxList.append(boxLen)
|
|
239 seqList.append((line_residues[char - boxLen : char]))
|
|
240 groupList.append(prevIndex)
|
|
241 boxLen = 0
|
|
242 prevIndex = thisInd
|
|
243 boxLen += 1
|
|
244
|
|
245 if line_residues[char] in "KRkr":
|
|
246 line_charges.append("+")
|
|
247 elif line_residues[char] in "DEde":
|
|
248 line_charges.append("-")
|
|
249 else:
|
|
250 line_charges.append(" ")
|
|
251
|
|
252 # Could be swapped out for math with i+char...
|
|
253 idx += 1
|
|
254 if idx % 10 == 0:
|
|
255 line_numbers.append("%10s" % idx)
|
|
256
|
|
257 # Replace with <span>
|
|
258 # for m in match_list:
|
|
259 # if line_residues[char].upper() in m:
|
|
260 # line_residues[char] = '<span class="%s">%s</span>' % (m, line_residues[char])
|
|
261
|
|
262 seqList.append((line_residues[-boxLen:]))
|
|
263 boxList.append(boxLen)
|
|
264 groupList.append(prevIndex)
|
|
265 # Write line charges
|
|
266 line = "".join(line_charges)
|
|
267 body += (
|
|
268 '<text x="'
|
|
269 + str(seqIndent)
|
|
270 + '" y="'
|
|
271 + str(yInd)
|
|
272 + '" class="info_text" font-family="monospace" font-size="14">%s</text>\n'
|
|
273 % line
|
|
274 )
|
|
275 yInd += yInc
|
|
276 # Write sequence
|
|
277 sumSeq = 0
|
|
278 for i in range(len(seqList)):
|
|
279 res = ""
|
|
280 for sub in seqList[i]:
|
|
281 res += sub
|
|
282 body += (
|
|
283 '<rect x="'
|
|
284 + str(0.5 + seqIndent + (letterLen * sumSeq))
|
|
285 + '" y="'
|
|
286 + str(yInd - yInc + 2)
|
|
287 + '" width="'
|
|
288 + str(boxList[i] * letterLen)
|
|
289 + '" height="'
|
|
290 + str(yInc)
|
|
291 + '" class="rect_%s"/>\n' % classList[groupList[i]]
|
|
292 )
|
|
293 body += (
|
|
294 '<text x="'
|
|
295 + str(seqIndent + (letterLen * sumSeq))
|
|
296 + '" y="'
|
|
297 + str(yInd)
|
|
298 + '" class="text_%s" font-family="monospace" font-size="14">%s</text>\n'
|
|
299 % (classList[groupList[i]], res)
|
|
300 )
|
|
301 sumSeq += len(seqList[i])
|
|
302 yInd += yInc
|
|
303 # Write numbers
|
|
304 line = "".join(line_numbers) + "\n"
|
|
305 body += (
|
|
306 '<text x="'
|
|
307 + str(seqIndent)
|
|
308 + '" y="'
|
|
309 + str(yInd)
|
|
310 + '" class="info_text" font-size="14" font-family="monospace">%s</text>\n'
|
|
311 % line
|
|
312 )
|
|
313 yInd += yInc
|
|
314
|
|
315 body += "</g>\n"
|
|
316 yInd += yInc
|
|
317 if recNum % 2 == 0:
|
|
318 groups += (
|
|
319 '<rect x="0" y="'
|
|
320 + str(yTop)
|
|
321 + '" width="'
|
|
322 + str(svgWidth + 1)
|
|
323 + '" height="'
|
|
324 + str(yInd - yTop)
|
|
325 + '" class="rEven"/>\n'
|
|
326 )
|
|
327 else:
|
|
328 groups += (
|
|
329 '<rect x="0" y="'
|
|
330 + str(yTop)
|
|
331 + '" width="'
|
|
332 + str(svgWidth + 1)
|
|
333 + '" height="'
|
|
334 + str(yInd - yTop)
|
|
335 + '" class="rOdd"/>\n'
|
|
336 )
|
|
337 svgHeight = yInd
|
|
338
|
|
339 return (
|
|
340 (SVG_HEADER % (svgWidth, svgHeight))
|
|
341 + title
|
|
342 + classes
|
|
343 + groups
|
|
344 + body
|
|
345 + SVG_FOOTER
|
|
346 )
|
|
347
|
|
348
|
|
349 if __name__ == "__main__":
|
|
350 parser = argparse.ArgumentParser(description="Top related genomes")
|
|
351 parser.add_argument("--svg", action="store_true")
|
|
352 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta protein file")
|
|
353 parser.add_argument("--width", type=int, help="Plot width", default=120)
|
|
354 parser.add_argument("--aa", nargs="+")
|
|
355 parser.add_argument("--fgColor", nargs="+")
|
|
356 parser.add_argument("--bgColor", nargs="+")
|
|
357
|
|
358 args = parser.parse_args()
|
|
359 if args.svg:
|
|
360 print(charges_svg(**vars(args)))
|
|
361 else:
|
|
362 print(charges_html(**vars(args)))
|