comparison fasta_charges.py @ 1:054f96a0d0fb draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:41:30 +0000
parents
children
comparison
equal deleted inserted replaced
0:7443c98a4e3c 1:054f96a0d0fb
1 #!/usr/bin/env python
2 import argparse
3 from Bio import SeqIO
4 import logging
5
6 logging.basicConfig(level=logging.INFO)
7 log = logging.getLogger(name="charges")
8
9 HTML_HEADER = "<html><head><title>Charges Report</title></head><body>"
10 HTML_FOOTER = "</body></html>"
11
12 SVG_HEADER = '<svg width="%i" height="%i" xmlns="http://www.w3.org/2000/svg">\n' # % (calcWidth, calcHeight)
13 SVG_FOOTER = "</svg>"
14
15 FULL_AA = [
16 "H",
17 "S",
18 "Q",
19 "T",
20 "N",
21 "C",
22 "Y",
23 "A",
24 "V",
25 "I",
26 "L",
27 "M",
28 "P",
29 "F",
30 "W",
31 "G",
32 "E",
33 "R",
34 "D",
35 "K",
36 ]
37
38
39 def charges_html(svg, fasta, aa, fgColor, bgColor, width=120):
40 colour_scheme = zip([x.upper() for x in aa], bgColor, fgColor)
41 copy_colour_scheme = zip([x.upper() for x in aa], bgColor, fgColor)
42
43 # CSS and header styling
44 css = """<style type="text/css">
45 .list li { list-style: none; margin:10px }
46 .info { float:left; width:20px }
47 pre { font-size:1.3em }
48 """
49 info = '<h1>Charges</h1><h3>Legend</h3><ul class="list">'
50 for group in colour_scheme:
51 css += ".%s{ background: %s; color: %s}\n" % group
52 info += '<li><span class="%s" style="padding:5px">%s</span></li>\n' % (
53 group[0],
54 group[0],
55 )
56 css += "</style>"
57 info += "</ul>"
58
59 # Pre-calculate, so we can use for testing 'in'
60 match_list = [group[0] for group in copy_colour_scheme]
61
62 page = ""
63 # Parse sequences from fasta file
64 for record in SeqIO.parse(fasta, "fasta"):
65 page += "<pre><h3>&gt;%s %s</h3>\n" % (record.id, record.description)
66 seq = list(str(record.seq).upper())
67
68 idx = 0
69 for i in range(0, len(seq), width):
70 line_charges = []
71 line_residues = seq[i : i + width]
72 line_numbers = []
73
74 for char in range(len(line_residues)):
75 if line_residues[char] in "KRkr":
76 line_charges.append("+")
77 elif line_residues[char] in "DEde":
78 line_charges.append("-")
79 else:
80 line_charges.append(" ")
81
82 # Could be swapped out for math with i+char...
83 idx += 1
84 if idx % 10 == 0:
85 line_numbers.append("%10s" % idx)
86
87 # Replace with <span>
88 for m in match_list:
89 if line_residues[char].upper() in m:
90 line_residues[char] = '<span class="%s">%s</span>' % (
91 m,
92 line_residues[char],
93 )
94
95 page += "".join(line_charges) + "\n"
96 page += "".join(line_residues) + "\n"
97 page += "".join(line_numbers) + "\n"
98 page += "\n"
99 page += "</pre>"
100 return HTML_HEADER + css + info + page + HTML_FOOTER
101
102
103 def charges_svg(svg, fasta, aa, fgColor, bgColor, width=120):
104 colour_scheme = zip([x.upper() for x in aa], bgColor, fgColor)
105
106 svgWidth = 1100
107
108 # CSS and header styling
109 classList = []
110 classes = '<style type="text/css">\n<![CDATA[\n'
111
112 defClass = ""
113 for x in FULL_AA:
114 addAA = True
115 for y in aa:
116 if x in y:
117 addAA = False
118 if addAA:
119 defClass += x
120
121 defBox = "#ffffff"
122 defText = "#000000"
123
124 for group in colour_scheme:
125 classList.append(group[0])
126 classes += "text.text_%s{fill: %s;}\n" % (group[0], group[2])
127 classes += "rect.rect_%s{fill: %s; stroke: %s;}\n" % (
128 group[0],
129 group[1],
130 group[1],
131 )
132 # info += '<li><span class="%s" style="padding:5px">%s</span></li>\n' % (group[0], group[0])
133 if defClass != "":
134 classes += "text.text_%s{fill: %s;}\n" % (defClass, defText)
135 classes += "rect.rect_%s{fill: %s; stroke: %s;}\n" % (defClass, defBox, defBox)
136 classList.append(defClass)
137 classes += "text.info_text{white-space: pre;}\n"
138 classes += "rect.rEven{fill: #fdfdfd; stroke: #fbfbfb;}\n"
139 classes += "rect.rOdd{fill: #f2f2fc; stroke: #fbfbfb;}\n"
140 classes += "]]></style>\n"
141 body = ""
142 groups = ""
143 # Pre-calculate, so we can use for testing 'in'
144
145 match_list = aa
146 prevIndex = -1
147 boxLen = 0
148 page = ""
149 title = ""
150
151 yInd = 60
152 yInc = 15
153 seqIndent = 35
154 idIndent = 20
155 letterLen = 8.4375
156 recNum = -1
157
158 title += (
159 '<text x="'
160 + str(idIndent * 0.5)
161 + '" y="'
162 + str(yInd)
163 + '" style="font-weight:bold; font-size:40px">Charges</text>\n'
164 )
165 yInd += 2 * yInc
166 title += (
167 '<text x="'
168 + str(idIndent)
169 + '" y="'
170 + str(yInd)
171 + '" style="font-size:18px">Legend:</text>\n'
172 )
173 yInd += 2 * yInc
174
175 for i in range(len(classList)):
176 title += (
177 '<rect x="'
178 + str(seqIndent)
179 + '" y="'
180 + str(yInd - yInc + 2)
181 + '" width="'
182 + str(len(classList[i]) * letterLen)
183 + '" height="'
184 + str(yInc)
185 + '" class="rect_%s"/>\n' % classList[i]
186 )
187 title += (
188 '<text x="'
189 + str(seqIndent)
190 + '" y="'
191 + str(yInd)
192 + '" class="text_%s" font-family="monospace" font-size="14">%s</text>\n'
193 % (classList[i], classList[i])
194 )
195 yInd += yInc + 3
196 yInd += yInc * 1.5
197
198 # Parse sequences from fasta file
199 for record in SeqIO.parse(fasta, "fasta"):
200
201 recNum += 1
202 seqHeader = (
203 '<g><text x="'
204 + str(idIndent)
205 + '" y="'
206 + str(yInd)
207 + '" style="font-weight:bold">&gt;%s %s</text>\n'
208 % (record.id, record.description)
209 )
210 body += seqHeader
211 seq = list(str(record.seq).upper())
212 yTop = yInd - yInc - 3
213 yInd += yInc
214 idx = 0
215 for i in range(0, len(seq), width):
216 line_charges = []
217 line_residues = seq[i : i + width]
218 line_numbers = []
219
220 boxList = []
221 groupList = []
222 seqList = []
223 prevIndex = -1
224 boxLen = 0
225 for char in range(len(line_residues)):
226
227 thisInd = 0
228 for x in match_list:
229
230 if line_residues[char] in x:
231 break
232 thisInd += 1
233
234 if thisInd == len(match_list):
235 thisInd = -1
236
237 if char != 0 and thisInd != prevIndex:
238 boxList.append(boxLen)
239 seqList.append((line_residues[char - boxLen : char]))
240 groupList.append(prevIndex)
241 boxLen = 0
242 prevIndex = thisInd
243 boxLen += 1
244
245 if line_residues[char] in "KRkr":
246 line_charges.append("+")
247 elif line_residues[char] in "DEde":
248 line_charges.append("-")
249 else:
250 line_charges.append(" ")
251
252 # Could be swapped out for math with i+char...
253 idx += 1
254 if idx % 10 == 0:
255 line_numbers.append("%10s" % idx)
256
257 # Replace with <span>
258 # for m in match_list:
259 # if line_residues[char].upper() in m:
260 # line_residues[char] = '<span class="%s">%s</span>' % (m, line_residues[char])
261
262 seqList.append((line_residues[-boxLen:]))
263 boxList.append(boxLen)
264 groupList.append(prevIndex)
265 # Write line charges
266 line = "".join(line_charges)
267 body += (
268 '<text x="'
269 + str(seqIndent)
270 + '" y="'
271 + str(yInd)
272 + '" class="info_text" font-family="monospace" font-size="14">%s</text>\n'
273 % line
274 )
275 yInd += yInc
276 # Write sequence
277 sumSeq = 0
278 for i in range(len(seqList)):
279 res = ""
280 for sub in seqList[i]:
281 res += sub
282 body += (
283 '<rect x="'
284 + str(0.5 + seqIndent + (letterLen * sumSeq))
285 + '" y="'
286 + str(yInd - yInc + 2)
287 + '" width="'
288 + str(boxList[i] * letterLen)
289 + '" height="'
290 + str(yInc)
291 + '" class="rect_%s"/>\n' % classList[groupList[i]]
292 )
293 body += (
294 '<text x="'
295 + str(seqIndent + (letterLen * sumSeq))
296 + '" y="'
297 + str(yInd)
298 + '" class="text_%s" font-family="monospace" font-size="14">%s</text>\n'
299 % (classList[groupList[i]], res)
300 )
301 sumSeq += len(seqList[i])
302 yInd += yInc
303 # Write numbers
304 line = "".join(line_numbers) + "\n"
305 body += (
306 '<text x="'
307 + str(seqIndent)
308 + '" y="'
309 + str(yInd)
310 + '" class="info_text" font-size="14" font-family="monospace">%s</text>\n'
311 % line
312 )
313 yInd += yInc
314
315 body += "</g>\n"
316 yInd += yInc
317 if recNum % 2 == 0:
318 groups += (
319 '<rect x="0" y="'
320 + str(yTop)
321 + '" width="'
322 + str(svgWidth + 1)
323 + '" height="'
324 + str(yInd - yTop)
325 + '" class="rEven"/>\n'
326 )
327 else:
328 groups += (
329 '<rect x="0" y="'
330 + str(yTop)
331 + '" width="'
332 + str(svgWidth + 1)
333 + '" height="'
334 + str(yInd - yTop)
335 + '" class="rOdd"/>\n'
336 )
337 svgHeight = yInd
338
339 return (
340 (SVG_HEADER % (svgWidth, svgHeight))
341 + title
342 + classes
343 + groups
344 + body
345 + SVG_FOOTER
346 )
347
348
349 if __name__ == "__main__":
350 parser = argparse.ArgumentParser(description="Top related genomes")
351 parser.add_argument("--svg", action="store_true")
352 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta protein file")
353 parser.add_argument("--width", type=int, help="Plot width", default=120)
354 parser.add_argument("--aa", nargs="+")
355 parser.add_argument("--fgColor", nargs="+")
356 parser.add_argument("--bgColor", nargs="+")
357
358 args = parser.parse_args()
359 if args.svg:
360 print(charges_svg(**vars(args)))
361 else:
362 print(charges_html(**vars(args)))