Mercurial > repos > cpt > cpt_mist3_wrapper
comparison cpt_mist3/mist3.py @ 0:1a9603d09814 draft default tip
Uploaded
author | cpt |
---|---|
date | Fri, 17 Jun 2022 02:58:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1a9603d09814 |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 import time | |
4 import tempfile | |
5 import pprint | |
6 import shutil | |
7 import os | |
8 import math | |
9 from string import Template | |
10 from Bio import SeqIO | |
11 import subprocess | |
12 import sys | |
13 import logging | |
14 | |
15 FORMAT = "[%(levelname)s:%(filename)s:%(lineno)s:%(funcName)s()] %(message)s" | |
16 logging.basicConfig(level=logging.INFO, format=FORMAT) | |
17 log = logging.getLogger("mist") | |
18 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) | |
19 | |
20 MONTAGE_BORDER = 80 | |
21 IMAGE_BORDER = 2 | |
22 | |
23 MONTAGE_BORDER_COORD = "%sx%s" % (MONTAGE_BORDER, MONTAGE_BORDER) | |
24 IMAGE_BORDER_COORD = "%sx%s" % (IMAGE_BORDER, IMAGE_BORDER) | |
25 | |
26 DOUBLE_IMAGE_BORDER_COORD = "%sx%s" % (2 * IMAGE_BORDER, 2 * IMAGE_BORDER) | |
27 | |
28 MONTAGE_BORDER_COLOUR = "grey70" | |
29 IMAGE_BORDER_COLOUR = "purple" | |
30 LABEL_COLOUR = "grey22" | |
31 TICK_LENGTH = 0.2 * MONTAGE_BORDER | |
32 | |
33 TYPEFONT = "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf" | |
34 | |
35 CREDITS = ( | |
36 "CPTs MISTv3\\n" | |
37 "GPLv3 (C) 2015 Helena Rasche <esr\@tamu.edu>\\n" | |
38 "Dot plots by Gepard Dot Plotter by Dr. Jan Krumsiek" | |
39 ) | |
40 | |
41 | |
42 class FancyRecord(object): | |
43 def __init__(self, record, tmpdir): | |
44 self.temp = tempfile.NamedTemporaryFile(mode='w', dir=tmpdir, delete=False, suffix=".fa") | |
45 self.temp_path = self.temp.name | |
46 self.id = self.temp_path.rsplit("/")[-1] | |
47 self.record = record | |
48 self.header = record.id | |
49 | |
50 # Description include record.id | |
51 sd = record.description.strip().split(" ") | |
52 if len(sd) > 1: | |
53 self.description = " ".join(sd[1:]) | |
54 else: | |
55 self.description = "" | |
56 | |
57 self.length = len(record.seq) | |
58 | |
59 # Serialize to disk | |
60 self._write(self.temp) | |
61 self.temp.flush() | |
62 self.temp.close() | |
63 | |
64 def _write(self, handle): | |
65 SeqIO.write([self.record], handle, "fasta") | |
66 | |
67 | |
68 class Subplot(object): | |
69 def __init__(self, i, j, tmpdir, zoom): | |
70 self.i = i | |
71 self.j = j | |
72 # Mist information | |
73 self.tmpdir = tmpdir | |
74 self.zoom = zoom | |
75 | |
76 self.original_path = None | |
77 self.thumb_path = None | |
78 self.annotated_original_path = None | |
79 | |
80 def safe(self, string): | |
81 return "".join( | |
82 [ | |
83 c | |
84 for c in string.strip() | |
85 if c.isalpha() or c.isdigit() or c == " " or c in ("[", "]", "-", "_") | |
86 ] | |
87 ).rstrip() | |
88 | |
89 def move_to_dir(self, files_path): | |
90 """Move a specific image that's part of a subplot to an output | |
91 directory and return the name | |
92 """ | |
93 destination_fn = ( | |
94 self.safe("%s_vs_%s_[annotated]" % (self.i.header, self.j.header)) + ".png" | |
95 ) | |
96 destination = os.path.join(files_path, destination_fn) | |
97 log.debug("cp %s %s", self.annotated_original_path, destination) | |
98 if ( | |
99 self.annotated_original_path is not None | |
100 and os.path.exists(self.annotated_original_path) | |
101 and not os.path.exists(destination) | |
102 ): | |
103 shutil.move(self.annotated_original_path, destination) | |
104 return destination_fn | |
105 | |
106 def get_description(self): | |
107 return "%s v %s" % (self.i.header, self.j.header) | |
108 | |
109 def __repr__(self): | |
110 return "Subplot [%s]" % self.get_description() | |
111 | |
112 def run_gepard(self, matrix, window, global_rescale="35%"): | |
113 """Run gepard on two sequences, with a specified output file | |
114 """ | |
115 log.info("Running Gepard on %s", self.get_description()) | |
116 | |
117 destination_fn = ( | |
118 self.safe("%s_vs_%s_orig" % (self.i.header, self.j.header)) + ".png" | |
119 ) | |
120 self.original_path = os.path.join(self.tmpdir, destination_fn) | |
121 | |
122 cmd = [ | |
123 "java", | |
124 "-jar", | |
125 os.path.join(SCRIPT_DIR, "gepard.jar"), | |
126 "--seq1", | |
127 self.j.temp_path, | |
128 "--seq2", | |
129 self.i.temp_path, | |
130 "--matrix", | |
131 matrix + ".mat", | |
132 "--outfile", | |
133 self.original_path, | |
134 "--word", | |
135 str(window), | |
136 "--zoom", | |
137 str(self.zoom), | |
138 "--silent", | |
139 ] | |
140 log.debug(subprocess.list2cmdline(cmd)) | |
141 #log.info(subprocess.check_output("convert -list type")) | |
142 #exit(2) | |
143 failure_count = 0 | |
144 while True: | |
145 try: | |
146 subprocess.check_call(cmd) | |
147 break | |
148 except subprocess.CalledProcessError: | |
149 failure_count += 1 | |
150 log.warn("sp.CPE FC %s", failure_count) | |
151 if failure_count > 3: | |
152 break | |
153 time.sleep(1) | |
154 | |
155 # Generate border/individualised images | |
156 log.debug(" Annotating") | |
157 destination_fn = ( | |
158 self.safe("%s_vs_%s_annotated" % (self.i.header, self.j.header)) + ".png" | |
159 ) | |
160 self.annotated_original_path = os.path.join(self.tmpdir, destination_fn) | |
161 self.annotate_image(self.original_path, self.annotated_original_path) | |
162 | |
163 # Generate thumbnail version of base image | |
164 log.debug(" Resizing") | |
165 destination_fn = ( | |
166 self.safe("%s_vs_%s_thumb" % (self.i.header, self.j.header)) + ".png" | |
167 ) | |
168 self.thumb_path = os.path.join(self.tmpdir, destination_fn) | |
169 Misty.resize_image(global_rescale, self.original_path, self.thumb_path) | |
170 | |
171 def get_thumb_dims(self): | |
172 """ | |
173 For NxN sized images, this avoids running `identify` when it won't be | |
174 used (e.g i=3,j=4) | |
175 """ | |
176 if not hasattr(self, "thumb_dims") or self.thumb_dims is None: | |
177 self.thumb_dims = Misty.obtain_image_dimensions(self.thumb_path) | |
178 return self.thumb_dims | |
179 | |
180 def label_formatter(self, bases): | |
181 if bases > 1000000: | |
182 label = "%s Mbp" % int(bases / 1000000) | |
183 elif bases > 1000: | |
184 label = "%s kbp" % int(bases / 1000) | |
185 else: | |
186 label = "%s b" % bases | |
187 return label | |
188 | |
189 def annotate_image(self, infile, outfile): | |
190 original_dims = Misty.obtain_image_dimensions(infile) | |
191 | |
192 half_height = (original_dims[1] / 2) + MONTAGE_BORDER + IMAGE_BORDER | |
193 half_width = (original_dims[0] / 2) + MONTAGE_BORDER + IMAGE_BORDER | |
194 | |
195 restoreBorder = MONTAGE_BORDER | |
196 | |
197 def char_width(font_size): | |
198 """approximate pixel width of a single character at Xpt font | |
199 | |
200 Assumes that 40pt font results in 20px characters | |
201 """ | |
202 return int(float(font_size) / 2) | |
203 | |
204 def char_height(font_size): | |
205 """approximate pixel height of a single character at Xpt font | |
206 | |
207 Assumes that 40pt font results in 30px characters | |
208 """ | |
209 return int(float(font_size) * 30 / 40) | |
210 | |
211 def est_pixels(string, font_size): | |
212 """guess pixel width of a string at a given font size | |
213 """ | |
214 return char_width(font_size) * len(string) | |
215 | |
216 j_ticks = int(Misty.BestTick(self.j.length, 5)) | |
217 i_ticks = int(Misty.BestTick(self.i.length, 5)) | |
218 | |
219 # Convert definitions | |
220 GREY_FILL = ["-fill", LABEL_COLOUR] | |
221 NO_FILL = ["-fill", "none"] | |
222 NO_STROKE = ["-stroke", "none"] | |
223 GREY_STROKE = ["-stroke", LABEL_COLOUR, "-strokewidth", "2"] | |
224 GFNS = GREY_FILL + NO_STROKE | |
225 NFGS = NO_FILL + GREY_STROKE | |
226 # Font for labels | |
227 FONT_SPEC = GFNS + ["-font", TYPEFONT] | |
228 FONT_10pt = FONT_SPEC + ["-pointsize", "10"] | |
229 FONT_20pt = FONT_SPEC + ["-pointsize", "20"] | |
230 FONT_30pt = FONT_SPEC + ["-pointsize", "30"] | |
231 | |
232 cmd = [ | |
233 "convert", | |
234 infile, | |
235 "-bordercolor", | |
236 IMAGE_BORDER_COLOUR, | |
237 "-border", | |
238 IMAGE_BORDER_COORD, | |
239 "-bordercolor", | |
240 MONTAGE_BORDER_COLOUR, | |
241 "-border", | |
242 MONTAGE_BORDER_COORD, | |
243 ] | |
244 | |
245 # Rotate -90, apply row header at bottom. | |
246 primary_header = self.i.header | |
247 secondary_head = self.i.description | |
248 cmd += ( | |
249 ["-rotate", "-90",] | |
250 + FONT_30pt | |
251 + [ | |
252 # Side label (i/row) | |
253 "-annotate", | |
254 "+%s+%s" | |
255 % ( | |
256 half_height - 0.5 * est_pixels(self.i.header, 30), | |
257 original_dims[0] | |
258 + MONTAGE_BORDER | |
259 + 2 * IMAGE_BORDER | |
260 + char_height(30 + 20) | |
261 + 10 | |
262 # 30 = primary header | |
263 # 20 = tick labels | |
264 ), | |
265 primary_header, | |
266 ] | |
267 ) | |
268 | |
269 if est_pixels(self.i.description, 10) < original_dims[1] and secondary_head != "": | |
270 cmd += FONT_10pt + [ | |
271 # Side label (i/row) | |
272 "-annotate", | |
273 "+%s+%s" | |
274 % ( | |
275 half_height - 0.5 * est_pixels(self.i.description, 10), | |
276 original_dims[0] | |
277 + MONTAGE_BORDER | |
278 + 2 * IMAGE_BORDER | |
279 + char_height(30 + 20 + 10 + 4) | |
280 # 30 = primary header | |
281 # 20 = tick labels | |
282 # 10 = secondary header height | |
283 # 4 = line spacing | |
284 ), | |
285 secondary_head, | |
286 ] | |
287 | |
288 # Apply row ticks labels at bottom | |
289 for z in range(0, self.i.length, i_ticks): | |
290 | |
291 image_side_percentage = float(z) / self.i.length | |
292 x = ( | |
293 (image_side_percentage * original_dims[1]) | |
294 + MONTAGE_BORDER | |
295 + IMAGE_BORDER | |
296 ) | |
297 y = MONTAGE_BORDER + original_dims[0] + (2 * IMAGE_BORDER) | |
298 | |
299 # Apply ticks | |
300 cmd += NFGS | |
301 cmd += [ | |
302 "-draw", | |
303 "line %s,%s %s,%s" % (x, y, x, y + TICK_LENGTH), | |
304 ] | |
305 | |
306 # Keep text from running off the edge. | |
307 space_to_end_of_image = (1 - image_side_percentage) * original_dims[1] | |
308 if space_to_end_of_image - est_pixels(self.label_formatter(z), 20) < 0: | |
309 continue | |
310 | |
311 # Text label | |
312 cmd += FONT_20pt | |
313 cmd += ["-annotate", "+%s+%s" % (x + 5, y + 15), self.label_formatter(z)] | |
314 | |
315 # Rotate back to final rotation | |
316 primary_header = self.j.header | |
317 secondary_head = self.j.description | |
318 cmd += ( | |
319 [ | |
320 "-rotate", | |
321 "90", | |
322 # Top label (j/column) | |
323 ] | |
324 + FONT_30pt | |
325 + [ | |
326 "-annotate", | |
327 "+%s+%s" | |
328 % ( | |
329 half_width - 0.5 * est_pixels(self.j.header, 30), | |
330 MONTAGE_BORDER - char_height(20 + 10 + 8), | |
331 ), | |
332 primary_header, | |
333 ] | |
334 + FONT_10pt | |
335 + [ | |
336 # Credits | |
337 "-annotate", | |
338 "+%s+%s" % (2, MONTAGE_BORDER + original_dims[1] + 2 * IMAGE_BORDER), | |
339 "\\n" + CREDITS, | |
340 ] | |
341 ) | |
342 | |
343 if est_pixels(self.j.description, 10) < original_dims[0] and secondary_head != "": | |
344 cmd += FONT_10pt + [ | |
345 "-annotate", | |
346 "+%s+%s" | |
347 % ( | |
348 half_width - 0.5 * est_pixels(self.j.description, 10), | |
349 MONTAGE_BORDER - char_height(20 + 4) | |
350 # 4 = line spacing | |
351 ), | |
352 secondary_head, | |
353 ] | |
354 | |
355 # Apply col ticks along top | |
356 for z in range(0, self.j.length, j_ticks): | |
357 image_side_percentage = float(z) / self.j.length | |
358 x = ( | |
359 (image_side_percentage * original_dims[0]) | |
360 + MONTAGE_BORDER | |
361 + IMAGE_BORDER | |
362 ) | |
363 y = MONTAGE_BORDER - 1 | |
364 | |
365 # Ticks | |
366 cmd += NFGS | |
367 cmd += [ | |
368 "-draw", | |
369 "line %s,%s %s,%s" % (x, y, x, y - TICK_LENGTH), | |
370 ] | |
371 | |
372 # Keep text from running off the edge. | |
373 space_to_end_of_image = (1 - image_side_percentage) * original_dims[0] | |
374 if space_to_end_of_image - est_pixels(self.label_formatter(z), 20) < 0: | |
375 continue | |
376 | |
377 # Text labels | |
378 cmd += FONT_20pt | |
379 cmd += ["-annotate", "+%s+%s" % (x + 5, y), self.label_formatter(z)] | |
380 | |
381 cmd.append(outfile) | |
382 #tmpFile = open(outfile, "w") | |
383 #tmpFile.close() | |
384 log.info(subprocess.check_output( ["cp", infile, outfile] )) | |
385 log.info(subprocess.list2cmdline(cmd)) | |
386 log.info(subprocess.check_output( "ls" )) | |
387 log.info(self.tmpdir) | |
388 log.info(subprocess.check_output( ["ls", self.tmpdir])) | |
389 log.info(outfile[2:]) | |
390 log.info("Above was ls\n") | |
391 try: | |
392 subprocess.check_output(cmd)# + [" 2>&1"]) | |
393 except: | |
394 log.info("Excepted") | |
395 | |
396 | |
397 | |
398 class Misty(object): | |
399 """MIST Class for building MIST Plots | |
400 """ | |
401 | |
402 def __init__(self, window=10, zoom=50, matrix="edna", files_path="mist_images"): | |
403 self.tmpdir = tempfile.mkdtemp(prefix="cpt.mist3.", dir=".") | |
404 self.window = str(window) | |
405 self.zoom = zoom | |
406 self.matrix = matrix | |
407 self.records = [] | |
408 self.matrix_data = [] | |
409 | |
410 # Image storage | |
411 self.files_path = files_path | |
412 if not os.path.exists(self.files_path): | |
413 os.makedirs(self.files_path) | |
414 | |
415 def _get_record(self, record_id): | |
416 for i, record in enumerate(self.records): | |
417 if record.id == record_id: | |
418 return record | |
419 | |
420 def _get_record_idx(self, record_id): | |
421 for i, record in enumerate(self.records): | |
422 if record.id == record_id: | |
423 return i | |
424 | |
425 raise RuntimeError("Could not find record ID=%s" % record_id) | |
426 | |
427 def register_all_files(self, file_list): | |
428 for fasta_file in file_list: | |
429 for record in SeqIO.parse(fasta_file, "fasta"): | |
430 self.register_record(record) | |
431 | |
432 def register_record(self, record): | |
433 self.records.append(FancyRecord(record, self.tmpdir)) | |
434 | |
435 def set_matrix(self, matrix): | |
436 self.matrix_data = matrix | |
437 for i in range(len(self.matrix_data)): | |
438 record_i = self._get_record(self.matrix_data[i][0]["i"]) | |
439 for j in range(len(self.matrix_data[i])): | |
440 record_j = self._get_record(self.matrix_data[i][j]["j"]) | |
441 self.matrix_data[i][j]["subplot"] = Subplot( | |
442 record_i, record_j, self.tmpdir, self.zoom | |
443 ) | |
444 | |
445 # More processing? | |
446 logging.debug("\n" + pprint.pformat(matrix)) | |
447 | |
448 def generate_matrix(self, mtype="complete"): | |
449 matrix = [] | |
450 if mtype == "complete": | |
451 for i in self.records: | |
452 row = [] | |
453 for j in self.records: | |
454 row.append({"i": i.id, "j": j.id}) | |
455 matrix.append(row) | |
456 elif mtype == "1vn": | |
457 if len(self.records) < 2: | |
458 raise RuntimeError("1vN not available for fewer than two sequences") | |
459 else: | |
460 row = [] | |
461 for j in self.records[1:]: | |
462 row.append({"i": self.records[0].id, "j": j.id}) | |
463 matrix.append(row) | |
464 return matrix | |
465 | |
466 @classmethod | |
467 def obtain_image_dimensions(cls, path): | |
468 cmd = ["identify", path] | |
469 output = subprocess.check_output(cmd, universal_newlines=True) | |
470 size = output.split(" ")[3] | |
471 (w, h) = size[0 : size.index("+")].split("x") | |
472 return (int(w), int(h)) | |
473 | |
474 @classmethod | |
475 def BestTick(cls, largest, mostticks): | |
476 # http://stackoverflow.com/a/361687 | |
477 minimum = largest / mostticks | |
478 magnitude = 10 ** math.floor(math.log(minimum) / math.log(10)) | |
479 residual = minimum / magnitude | |
480 if residual > 5: | |
481 tick = 10 * magnitude | |
482 elif residual > 2: | |
483 tick = 5 * magnitude | |
484 elif residual > 1: | |
485 tick = 2 * magnitude | |
486 else: | |
487 tick = magnitude | |
488 return tick | |
489 | |
490 @classmethod | |
491 def resize_image(cls, scale, from_file, to_file): | |
492 cmd = ["convert", "-resize", scale, from_file, to_file] | |
493 log.debug(" ".join(cmd)) | |
494 subprocess.check_call(cmd) | |
495 | |
496 def get_image_map(self): | |
497 image_template = Template( | |
498 '<area shape="rect" coords="${x1},${y1},${x2},${y2}" alt="${alt}" href="${href}" />' | |
499 ) | |
500 imagemap = [] | |
501 | |
502 j_widths = [] | |
503 i_height = [] | |
504 | |
505 for j in range(len(self.matrix_data[0])): | |
506 j_widths.append(self.matrix_data[0][j]["subplot"].get_thumb_dims()[0]) | |
507 | |
508 for i in range(len(self.matrix_data)): | |
509 i_height.append(self.matrix_data[i][0]["subplot"].get_thumb_dims()[1]) | |
510 | |
511 log.debug(pprint.pformat(j_widths)) | |
512 log.debug(pprint.pformat(i_height)) | |
513 | |
514 def cur_y(i_idx): | |
515 return ( | |
516 MONTAGE_BORDER | |
517 + sum(i_height[0:i_idx]) | |
518 + (2 * IMAGE_BORDER * (1 + i_idx)) | |
519 ) | |
520 | |
521 def cur_x(j_idx): | |
522 return ( | |
523 MONTAGE_BORDER | |
524 + sum(j_widths[0:j_idx]) | |
525 + (2 * IMAGE_BORDER * (1 + j_idx)) | |
526 ) | |
527 | |
528 for j in range(len(self.matrix_data[0])): | |
529 for i in range(len(self.matrix_data)): | |
530 current = self.matrix_data[i][j]["subplot"] | |
531 # Move to final resting place | |
532 new_image_location = current.move_to_dir(self.files_path) | |
533 | |
534 # Build imagemagp string | |
535 imagemap.append( | |
536 image_template.substitute( | |
537 { | |
538 # Start at +image border so the border isn't included in | |
539 # start of box | |
540 "x1": cur_x(j), | |
541 "y1": cur_y(i), | |
542 "x2": cur_x(j) + j_widths[j], | |
543 "y2": cur_y(i) + i_height[i], | |
544 "alt": current.get_description(), | |
545 "href": new_image_location, | |
546 } | |
547 ) | |
548 ) | |
549 return "\n".join(imagemap) | |
550 | |
551 def _generate_montage(self): | |
552 image_list = [] | |
553 for i in range(len(self.matrix_data)): | |
554 for j in range(len(self.matrix_data[i])): | |
555 subplot = self.matrix_data[i][j]["subplot"] | |
556 image_list.append(subplot.thumb_path) | |
557 | |
558 # Montage step | |
559 global MONTAGE_BORDER | |
560 global MONTAGE_BORDER_COORD | |
561 for rec in self.records: | |
562 MONTAGE_BORDER = max(MONTAGE_BORDER, (len(rec.id) * 12) + 4) | |
563 MONTAGE_BORDER_COORD = "%sx%s" % (MONTAGE_BORDER, MONTAGE_BORDER) | |
564 | |
565 m0 = os.path.join(self.tmpdir, "m0.png") | |
566 # log.info(subprocess.check_output( ["cp", image_list[0], m0] )) | |
567 cmd = ["montage"] + image_list | |
568 cmd += [ | |
569 "-tile", | |
570 "%sx%s" % (len(self.matrix_data[0]), len(self.matrix_data)), | |
571 "-geometry", | |
572 "+0+0", | |
573 "-border", | |
574 str(IMAGE_BORDER), | |
575 "-bordercolor", | |
576 IMAGE_BORDER_COLOUR, | |
577 "-font", | |
578 TYPEFONT, | |
579 m0, | |
580 ] | |
581 | |
582 log.debug(" ".join(cmd)) | |
583 try: | |
584 subprocess.check_call(cmd) | |
585 except: | |
586 log.debug("Excepted, 2") | |
587 # Add grey borders | |
588 montage_path = os.path.join(self.tmpdir, "montage.png") | |
589 cmd = [ | |
590 "convert", | |
591 m0, | |
592 "-bordercolor", | |
593 IMAGE_BORDER_COLOUR, | |
594 "-border", | |
595 IMAGE_BORDER_COORD, | |
596 "-bordercolor", | |
597 MONTAGE_BORDER_COLOUR, | |
598 "-border", | |
599 MONTAGE_BORDER_COORD, | |
600 montage_path, | |
601 ] | |
602 | |
603 log.debug(" ".join(cmd)) | |
604 try: | |
605 subprocess.check_call(cmd) | |
606 except: | |
607 log.debug("Excepted, 2") | |
608 os.unlink(m0) | |
609 return montage_path | |
610 | |
611 def _annotate_montage(self, base_path): | |
612 # Calculate some expected dimension | |
613 cumulative_width = 0 | |
614 cumulative_height = 0 | |
615 for i in range(len(self.matrix_data)): | |
616 for j in range(len(self.matrix_data[i])): | |
617 subplot = self.matrix_data[i][j]["subplot"] | |
618 | |
619 if i == 0: | |
620 cumulative_width += subplot.get_thumb_dims()[0] + IMAGE_BORDER * 2 | |
621 | |
622 if j == 0: | |
623 cumulative_height += subplot.get_thumb_dims()[1] + IMAGE_BORDER * 2 | |
624 | |
625 convert_arguments_top = [] | |
626 convert_arguments_left = [] | |
627 left_offset = cumulative_width + MONTAGE_BORDER | |
628 | |
629 current_sum_width = MONTAGE_BORDER | |
630 current_sum_height = MONTAGE_BORDER | |
631 | |
632 convert_arguments_top+= [ | |
633 "-rotate", | |
634 "-90" | |
635 ] | |
636 # Top side | |
637 for j in range(len(self.matrix_data[0])): | |
638 subplot = self.matrix_data[0][j]["subplot"] | |
639 convert_arguments_top += [ | |
640 "-fill", | |
641 LABEL_COLOUR, | |
642 "-annotate", | |
643 "-%s+%s" % (0, str(cumulative_width - current_sum_width -(subplot.get_thumb_dims()[0]/2) + (2 * MONTAGE_BORDER) + IMAGE_BORDER)), | |
644 subplot.j.header, | |
645 ] | |
646 current_sum_width += subplot.get_thumb_dims()[0] + (2 * IMAGE_BORDER) | |
647 log.debug("CSW %s", current_sum_width) | |
648 convert_arguments_top+= [ | |
649 "-rotate", | |
650 "90" | |
651 ] | |
652 # Left side | |
653 #convert_arguments_left += [ | |
654 # "-rotate", | |
655 # "90" | |
656 #] | |
657 for i in range(len(self.matrix_data)): | |
658 subplot = self.matrix_data[i][0]["subplot"] | |
659 convert_arguments_left += [ | |
660 "-fill", | |
661 LABEL_COLOUR, | |
662 "-annotate", | |
663 "+2+%s" % str(current_sum_height + (subplot.get_thumb_dims()[1]/2.0) + IMAGE_BORDER), | |
664 "\n" + subplot.i.header, | |
665 ] | |
666 current_sum_height += subplot.get_thumb_dims()[1] + (2 * IMAGE_BORDER) | |
667 log.debug("CSH %s", current_sum_height) | |
668 | |
669 cmd = [ | |
670 "convert", | |
671 base_path, | |
672 # "-rotate", | |
673 # "-90", | |
674 "-pointsize", | |
675 "20", | |
676 "-font", | |
677 TYPEFONT, | |
678 ] | |
679 cmd += convert_arguments_left | |
680 # cmd += ["-rotate", "90"] | |
681 cmd += convert_arguments_top | |
682 | |
683 output_path = os.path.join(self.tmpdir, "large.png") | |
684 cmd += [ | |
685 "-pointsize", | |
686 "14", | |
687 "-annotate", | |
688 "+2+%s" % str(current_sum_height + 40), | |
689 CREDITS, | |
690 output_path, | |
691 ] | |
692 log.debug(" ".join(cmd)) | |
693 try: | |
694 subprocess.check_call(cmd) | |
695 except: | |
696 log.debug("Excepted, 3") | |
697 #subprocess.check_output(cmd) | |
698 return output_path | |
699 | |
700 def run(self): | |
701 # We want the final thumbnail for the overall image to have a max width/height | |
702 # of 700px for nice display | |
703 total_seqlen_j = 0 | |
704 total_seqlen_i = 0 | |
705 for j in range(len(self.matrix_data[0])): | |
706 total_seqlen_j += self.matrix_data[0][j]["subplot"].j.length | |
707 | |
708 for i in range(len(self.matrix_data)): | |
709 total_seqlen_i += self.matrix_data[i][0]["subplot"].i.length | |
710 total_seqlen = max(total_seqlen_i, total_seqlen_j) | |
711 rescale = 200 * (700.0 / (float(total_seqlen) / self.zoom)) | |
712 rescale_p = "%s%%" % rescale | |
713 | |
714 # Generate gepard plots for each of the sub-images | |
715 for i in range(len(self.matrix_data)): | |
716 for j in range(len(self.matrix_data[i])): | |
717 subplot = self.matrix_data[i][j]["subplot"] | |
718 | |
719 # generates _orig and _thumb versions | |
720 subplot.run_gepard(self.matrix, self.window, global_rescale=rescale_p) | |
721 | |
722 base_montage = self._generate_montage() | |
723 annotated_montage = self._annotate_montage(base_montage) | |
724 | |
725 final_montage_path = os.path.join(self.files_path, "large.png") | |
726 shutil.move(annotated_montage, final_montage_path) | |
727 return final_montage_path | |
728 | |
729 | |
730 def mist_wrapper( | |
731 files, | |
732 window=10, | |
733 zoom=50, | |
734 matrix="edna", | |
735 plot_type="complete", | |
736 files_path="mist_images", | |
737 ): | |
738 html_page = """ | |
739 <!DOCTYPE html> | |
740 <html> | |
741 <body> | |
742 <h1>Mist Results</h1> | |
743 <p>Each section of mist output is now clickable to view a higher resolution version of that subsection</p> | |
744 <img src="large.png" usemap="#mistmap"> | |
745 <map name="mistmap"> | |
746 %s | |
747 </map> | |
748 </body> | |
749 </html> | |
750 """ | |
751 | |
752 m = Misty(window=window, zoom=zoom, matrix=matrix, files_path=files_path) | |
753 | |
754 # There is only one special case, so we handle that separately. Every other | |
755 # plot type wants ALL of the sequences available. | |
756 if ( | |
757 plot_type == "2up" | |
758 and len(files) != 2 | |
759 and matrix not in ("protidentity", "blosum62") | |
760 ): | |
761 idx = 0 | |
762 # Pull top two sequences. | |
763 for fasta_file in files: | |
764 for record in SeqIO.parse(fasta_file, "fasta"): | |
765 m.register_record(record) | |
766 | |
767 # Exit after we've seen two sequences | |
768 idx += 1 | |
769 if idx == 2: | |
770 break | |
771 else: | |
772 m.register_all_files(files) | |
773 | |
774 # Generate the matrix appropriate to this plot type. There are two matrix | |
775 # types: 1vN and complete. 1vN is just a line, complete is a complete | |
776 # square. | |
777 if plot_type == "complete": | |
778 # ALL sequences are used. | |
779 m.set_matrix(m.generate_matrix(mtype="complete")) | |
780 elif plot_type == "2up": | |
781 # Just two sequences. | |
782 if len(files) == 2 and matrix in ("protidentity", "blosum62"): | |
783 m.set_matrix(m.generate_matrix(mtype="complete")) | |
784 else: | |
785 # Co-op the 1vn to do a single plot, rather than a "complete" plot | |
786 m.set_matrix(m.generate_matrix(mtype="1vn")) | |
787 elif plot_type == "1vn": | |
788 m.set_matrix(m.generate_matrix(mtype="1vn")) | |
789 else: | |
790 raise ValueError("Unknown plot type %s" % plot_type) | |
791 | |
792 m.run() | |
793 # image_map will be returned from MIST | |
794 image_map = m.get_image_map() | |
795 | |
796 return html_page % image_map | |
797 | |
798 | |
799 if __name__ == "__main__": | |
800 parser = argparse.ArgumentParser(description="MIST v3", epilog="") | |
801 parser.add_argument( | |
802 "files", nargs="+", type=argparse.FileType("r"), help="Fasta sequences" | |
803 ) | |
804 | |
805 parser.add_argument("--zoom", type=int, help="# bases / pixel", default=50) | |
806 parser.add_argument("--window", type=int, help="Window size", default=10) | |
807 parser.add_argument( | |
808 "--matrix", | |
809 type=str, | |
810 choices=["ednaorig", "pam250", "edna", "protidentity", "blosum62"], | |
811 help="# bases / pixel", | |
812 default="edna", | |
813 ) | |
814 | |
815 parser.add_argument( | |
816 "--plot_type", | |
817 choices=["2up", "1vn", "complete"], | |
818 help="Plot type", | |
819 default="complete", | |
820 ) | |
821 | |
822 parser.add_argument( | |
823 "--files_path", type=str, help="Image directory", default="mist_images" | |
824 ) | |
825 | |
826 args = parser.parse_args() | |
827 print(mist_wrapper(**vars(args))) |