Mercurial > repos > cpt > cpt_fasta_charges
comparison fasta_charges.py @ 1:054f96a0d0fb draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:41:30 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:7443c98a4e3c | 1:054f96a0d0fb |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 from Bio import SeqIO | |
4 import logging | |
5 | |
6 logging.basicConfig(level=logging.INFO) | |
7 log = logging.getLogger(name="charges") | |
8 | |
9 HTML_HEADER = "<html><head><title>Charges Report</title></head><body>" | |
10 HTML_FOOTER = "</body></html>" | |
11 | |
12 SVG_HEADER = '<svg width="%i" height="%i" xmlns="http://www.w3.org/2000/svg">\n' # % (calcWidth, calcHeight) | |
13 SVG_FOOTER = "</svg>" | |
14 | |
15 FULL_AA = [ | |
16 "H", | |
17 "S", | |
18 "Q", | |
19 "T", | |
20 "N", | |
21 "C", | |
22 "Y", | |
23 "A", | |
24 "V", | |
25 "I", | |
26 "L", | |
27 "M", | |
28 "P", | |
29 "F", | |
30 "W", | |
31 "G", | |
32 "E", | |
33 "R", | |
34 "D", | |
35 "K", | |
36 ] | |
37 | |
38 | |
39 def charges_html(svg, fasta, aa, fgColor, bgColor, width=120): | |
40 colour_scheme = zip([x.upper() for x in aa], bgColor, fgColor) | |
41 copy_colour_scheme = zip([x.upper() for x in aa], bgColor, fgColor) | |
42 | |
43 # CSS and header styling | |
44 css = """<style type="text/css"> | |
45 .list li { list-style: none; margin:10px } | |
46 .info { float:left; width:20px } | |
47 pre { font-size:1.3em } | |
48 """ | |
49 info = '<h1>Charges</h1><h3>Legend</h3><ul class="list">' | |
50 for group in colour_scheme: | |
51 css += ".%s{ background: %s; color: %s}\n" % group | |
52 info += '<li><span class="%s" style="padding:5px">%s</span></li>\n' % ( | |
53 group[0], | |
54 group[0], | |
55 ) | |
56 css += "</style>" | |
57 info += "</ul>" | |
58 | |
59 # Pre-calculate, so we can use for testing 'in' | |
60 match_list = [group[0] for group in copy_colour_scheme] | |
61 | |
62 page = "" | |
63 # Parse sequences from fasta file | |
64 for record in SeqIO.parse(fasta, "fasta"): | |
65 page += "<pre><h3>>%s %s</h3>\n" % (record.id, record.description) | |
66 seq = list(str(record.seq).upper()) | |
67 | |
68 idx = 0 | |
69 for i in range(0, len(seq), width): | |
70 line_charges = [] | |
71 line_residues = seq[i : i + width] | |
72 line_numbers = [] | |
73 | |
74 for char in range(len(line_residues)): | |
75 if line_residues[char] in "KRkr": | |
76 line_charges.append("+") | |
77 elif line_residues[char] in "DEde": | |
78 line_charges.append("-") | |
79 else: | |
80 line_charges.append(" ") | |
81 | |
82 # Could be swapped out for math with i+char... | |
83 idx += 1 | |
84 if idx % 10 == 0: | |
85 line_numbers.append("%10s" % idx) | |
86 | |
87 # Replace with <span> | |
88 for m in match_list: | |
89 if line_residues[char].upper() in m: | |
90 line_residues[char] = '<span class="%s">%s</span>' % ( | |
91 m, | |
92 line_residues[char], | |
93 ) | |
94 | |
95 page += "".join(line_charges) + "\n" | |
96 page += "".join(line_residues) + "\n" | |
97 page += "".join(line_numbers) + "\n" | |
98 page += "\n" | |
99 page += "</pre>" | |
100 return HTML_HEADER + css + info + page + HTML_FOOTER | |
101 | |
102 | |
103 def charges_svg(svg, fasta, aa, fgColor, bgColor, width=120): | |
104 colour_scheme = zip([x.upper() for x in aa], bgColor, fgColor) | |
105 | |
106 svgWidth = 1100 | |
107 | |
108 # CSS and header styling | |
109 classList = [] | |
110 classes = '<style type="text/css">\n<![CDATA[\n' | |
111 | |
112 defClass = "" | |
113 for x in FULL_AA: | |
114 addAA = True | |
115 for y in aa: | |
116 if x in y: | |
117 addAA = False | |
118 if addAA: | |
119 defClass += x | |
120 | |
121 defBox = "#ffffff" | |
122 defText = "#000000" | |
123 | |
124 for group in colour_scheme: | |
125 classList.append(group[0]) | |
126 classes += "text.text_%s{fill: %s;}\n" % (group[0], group[2]) | |
127 classes += "rect.rect_%s{fill: %s; stroke: %s;}\n" % ( | |
128 group[0], | |
129 group[1], | |
130 group[1], | |
131 ) | |
132 # info += '<li><span class="%s" style="padding:5px">%s</span></li>\n' % (group[0], group[0]) | |
133 if defClass != "": | |
134 classes += "text.text_%s{fill: %s;}\n" % (defClass, defText) | |
135 classes += "rect.rect_%s{fill: %s; stroke: %s;}\n" % (defClass, defBox, defBox) | |
136 classList.append(defClass) | |
137 classes += "text.info_text{white-space: pre;}\n" | |
138 classes += "rect.rEven{fill: #fdfdfd; stroke: #fbfbfb;}\n" | |
139 classes += "rect.rOdd{fill: #f2f2fc; stroke: #fbfbfb;}\n" | |
140 classes += "]]></style>\n" | |
141 body = "" | |
142 groups = "" | |
143 # Pre-calculate, so we can use for testing 'in' | |
144 | |
145 match_list = aa | |
146 prevIndex = -1 | |
147 boxLen = 0 | |
148 page = "" | |
149 title = "" | |
150 | |
151 yInd = 60 | |
152 yInc = 15 | |
153 seqIndent = 35 | |
154 idIndent = 20 | |
155 letterLen = 8.4375 | |
156 recNum = -1 | |
157 | |
158 title += ( | |
159 '<text x="' | |
160 + str(idIndent * 0.5) | |
161 + '" y="' | |
162 + str(yInd) | |
163 + '" style="font-weight:bold; font-size:40px">Charges</text>\n' | |
164 ) | |
165 yInd += 2 * yInc | |
166 title += ( | |
167 '<text x="' | |
168 + str(idIndent) | |
169 + '" y="' | |
170 + str(yInd) | |
171 + '" style="font-size:18px">Legend:</text>\n' | |
172 ) | |
173 yInd += 2 * yInc | |
174 | |
175 for i in range(len(classList)): | |
176 title += ( | |
177 '<rect x="' | |
178 + str(seqIndent) | |
179 + '" y="' | |
180 + str(yInd - yInc + 2) | |
181 + '" width="' | |
182 + str(len(classList[i]) * letterLen) | |
183 + '" height="' | |
184 + str(yInc) | |
185 + '" class="rect_%s"/>\n' % classList[i] | |
186 ) | |
187 title += ( | |
188 '<text x="' | |
189 + str(seqIndent) | |
190 + '" y="' | |
191 + str(yInd) | |
192 + '" class="text_%s" font-family="monospace" font-size="14">%s</text>\n' | |
193 % (classList[i], classList[i]) | |
194 ) | |
195 yInd += yInc + 3 | |
196 yInd += yInc * 1.5 | |
197 | |
198 # Parse sequences from fasta file | |
199 for record in SeqIO.parse(fasta, "fasta"): | |
200 | |
201 recNum += 1 | |
202 seqHeader = ( | |
203 '<g><text x="' | |
204 + str(idIndent) | |
205 + '" y="' | |
206 + str(yInd) | |
207 + '" style="font-weight:bold">>%s %s</text>\n' | |
208 % (record.id, record.description) | |
209 ) | |
210 body += seqHeader | |
211 seq = list(str(record.seq).upper()) | |
212 yTop = yInd - yInc - 3 | |
213 yInd += yInc | |
214 idx = 0 | |
215 for i in range(0, len(seq), width): | |
216 line_charges = [] | |
217 line_residues = seq[i : i + width] | |
218 line_numbers = [] | |
219 | |
220 boxList = [] | |
221 groupList = [] | |
222 seqList = [] | |
223 prevIndex = -1 | |
224 boxLen = 0 | |
225 for char in range(len(line_residues)): | |
226 | |
227 thisInd = 0 | |
228 for x in match_list: | |
229 | |
230 if line_residues[char] in x: | |
231 break | |
232 thisInd += 1 | |
233 | |
234 if thisInd == len(match_list): | |
235 thisInd = -1 | |
236 | |
237 if char != 0 and thisInd != prevIndex: | |
238 boxList.append(boxLen) | |
239 seqList.append((line_residues[char - boxLen : char])) | |
240 groupList.append(prevIndex) | |
241 boxLen = 0 | |
242 prevIndex = thisInd | |
243 boxLen += 1 | |
244 | |
245 if line_residues[char] in "KRkr": | |
246 line_charges.append("+") | |
247 elif line_residues[char] in "DEde": | |
248 line_charges.append("-") | |
249 else: | |
250 line_charges.append(" ") | |
251 | |
252 # Could be swapped out for math with i+char... | |
253 idx += 1 | |
254 if idx % 10 == 0: | |
255 line_numbers.append("%10s" % idx) | |
256 | |
257 # Replace with <span> | |
258 # for m in match_list: | |
259 # if line_residues[char].upper() in m: | |
260 # line_residues[char] = '<span class="%s">%s</span>' % (m, line_residues[char]) | |
261 | |
262 seqList.append((line_residues[-boxLen:])) | |
263 boxList.append(boxLen) | |
264 groupList.append(prevIndex) | |
265 # Write line charges | |
266 line = "".join(line_charges) | |
267 body += ( | |
268 '<text x="' | |
269 + str(seqIndent) | |
270 + '" y="' | |
271 + str(yInd) | |
272 + '" class="info_text" font-family="monospace" font-size="14">%s</text>\n' | |
273 % line | |
274 ) | |
275 yInd += yInc | |
276 # Write sequence | |
277 sumSeq = 0 | |
278 for i in range(len(seqList)): | |
279 res = "" | |
280 for sub in seqList[i]: | |
281 res += sub | |
282 body += ( | |
283 '<rect x="' | |
284 + str(0.5 + seqIndent + (letterLen * sumSeq)) | |
285 + '" y="' | |
286 + str(yInd - yInc + 2) | |
287 + '" width="' | |
288 + str(boxList[i] * letterLen) | |
289 + '" height="' | |
290 + str(yInc) | |
291 + '" class="rect_%s"/>\n' % classList[groupList[i]] | |
292 ) | |
293 body += ( | |
294 '<text x="' | |
295 + str(seqIndent + (letterLen * sumSeq)) | |
296 + '" y="' | |
297 + str(yInd) | |
298 + '" class="text_%s" font-family="monospace" font-size="14">%s</text>\n' | |
299 % (classList[groupList[i]], res) | |
300 ) | |
301 sumSeq += len(seqList[i]) | |
302 yInd += yInc | |
303 # Write numbers | |
304 line = "".join(line_numbers) + "\n" | |
305 body += ( | |
306 '<text x="' | |
307 + str(seqIndent) | |
308 + '" y="' | |
309 + str(yInd) | |
310 + '" class="info_text" font-size="14" font-family="monospace">%s</text>\n' | |
311 % line | |
312 ) | |
313 yInd += yInc | |
314 | |
315 body += "</g>\n" | |
316 yInd += yInc | |
317 if recNum % 2 == 0: | |
318 groups += ( | |
319 '<rect x="0" y="' | |
320 + str(yTop) | |
321 + '" width="' | |
322 + str(svgWidth + 1) | |
323 + '" height="' | |
324 + str(yInd - yTop) | |
325 + '" class="rEven"/>\n' | |
326 ) | |
327 else: | |
328 groups += ( | |
329 '<rect x="0" y="' | |
330 + str(yTop) | |
331 + '" width="' | |
332 + str(svgWidth + 1) | |
333 + '" height="' | |
334 + str(yInd - yTop) | |
335 + '" class="rOdd"/>\n' | |
336 ) | |
337 svgHeight = yInd | |
338 | |
339 return ( | |
340 (SVG_HEADER % (svgWidth, svgHeight)) | |
341 + title | |
342 + classes | |
343 + groups | |
344 + body | |
345 + SVG_FOOTER | |
346 ) | |
347 | |
348 | |
349 if __name__ == "__main__": | |
350 parser = argparse.ArgumentParser(description="Top related genomes") | |
351 parser.add_argument("--svg", action="store_true") | |
352 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta protein file") | |
353 parser.add_argument("--width", type=int, help="Plot width", default=120) | |
354 parser.add_argument("--aa", nargs="+") | |
355 parser.add_argument("--fgColor", nargs="+") | |
356 parser.add_argument("--bgColor", nargs="+") | |
357 | |
358 args = parser.parse_args() | |
359 if args.svg: | |
360 print(charges_svg(**vars(args))) | |
361 else: | |
362 print(charges_html(**vars(args))) |