comparison cpt_mist3/mist3.py @ 0:1a9603d09814 draft default tip

Uploaded
author cpt
date Fri, 17 Jun 2022 02:58:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1a9603d09814
1 #!/usr/bin/env python
2 import argparse
3 import time
4 import tempfile
5 import pprint
6 import shutil
7 import os
8 import math
9 from string import Template
10 from Bio import SeqIO
11 import subprocess
12 import sys
13 import logging
14
15 FORMAT = "[%(levelname)s:%(filename)s:%(lineno)s:%(funcName)s()] %(message)s"
16 logging.basicConfig(level=logging.INFO, format=FORMAT)
17 log = logging.getLogger("mist")
18 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
19
20 MONTAGE_BORDER = 80
21 IMAGE_BORDER = 2
22
23 MONTAGE_BORDER_COORD = "%sx%s" % (MONTAGE_BORDER, MONTAGE_BORDER)
24 IMAGE_BORDER_COORD = "%sx%s" % (IMAGE_BORDER, IMAGE_BORDER)
25
26 DOUBLE_IMAGE_BORDER_COORD = "%sx%s" % (2 * IMAGE_BORDER, 2 * IMAGE_BORDER)
27
28 MONTAGE_BORDER_COLOUR = "grey70"
29 IMAGE_BORDER_COLOUR = "purple"
30 LABEL_COLOUR = "grey22"
31 TICK_LENGTH = 0.2 * MONTAGE_BORDER
32
33 TYPEFONT = "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf"
34
35 CREDITS = (
36 "CPTs MISTv3\\n"
37 "GPLv3 (C) 2015 Helena Rasche <esr\@tamu.edu>\\n"
38 "Dot plots by Gepard Dot Plotter by Dr. Jan Krumsiek"
39 )
40
41
42 class FancyRecord(object):
43 def __init__(self, record, tmpdir):
44 self.temp = tempfile.NamedTemporaryFile(mode='w', dir=tmpdir, delete=False, suffix=".fa")
45 self.temp_path = self.temp.name
46 self.id = self.temp_path.rsplit("/")[-1]
47 self.record = record
48 self.header = record.id
49
50 # Description include record.id
51 sd = record.description.strip().split(" ")
52 if len(sd) > 1:
53 self.description = " ".join(sd[1:])
54 else:
55 self.description = ""
56
57 self.length = len(record.seq)
58
59 # Serialize to disk
60 self._write(self.temp)
61 self.temp.flush()
62 self.temp.close()
63
64 def _write(self, handle):
65 SeqIO.write([self.record], handle, "fasta")
66
67
68 class Subplot(object):
69 def __init__(self, i, j, tmpdir, zoom):
70 self.i = i
71 self.j = j
72 # Mist information
73 self.tmpdir = tmpdir
74 self.zoom = zoom
75
76 self.original_path = None
77 self.thumb_path = None
78 self.annotated_original_path = None
79
80 def safe(self, string):
81 return "".join(
82 [
83 c
84 for c in string.strip()
85 if c.isalpha() or c.isdigit() or c == " " or c in ("[", "]", "-", "_")
86 ]
87 ).rstrip()
88
89 def move_to_dir(self, files_path):
90 """Move a specific image that's part of a subplot to an output
91 directory and return the name
92 """
93 destination_fn = (
94 self.safe("%s_vs_%s_[annotated]" % (self.i.header, self.j.header)) + ".png"
95 )
96 destination = os.path.join(files_path, destination_fn)
97 log.debug("cp %s %s", self.annotated_original_path, destination)
98 if (
99 self.annotated_original_path is not None
100 and os.path.exists(self.annotated_original_path)
101 and not os.path.exists(destination)
102 ):
103 shutil.move(self.annotated_original_path, destination)
104 return destination_fn
105
106 def get_description(self):
107 return "%s v %s" % (self.i.header, self.j.header)
108
109 def __repr__(self):
110 return "Subplot [%s]" % self.get_description()
111
112 def run_gepard(self, matrix, window, global_rescale="35%"):
113 """Run gepard on two sequences, with a specified output file
114 """
115 log.info("Running Gepard on %s", self.get_description())
116
117 destination_fn = (
118 self.safe("%s_vs_%s_orig" % (self.i.header, self.j.header)) + ".png"
119 )
120 self.original_path = os.path.join(self.tmpdir, destination_fn)
121
122 cmd = [
123 "java",
124 "-jar",
125 os.path.join(SCRIPT_DIR, "gepard.jar"),
126 "--seq1",
127 self.j.temp_path,
128 "--seq2",
129 self.i.temp_path,
130 "--matrix",
131 matrix + ".mat",
132 "--outfile",
133 self.original_path,
134 "--word",
135 str(window),
136 "--zoom",
137 str(self.zoom),
138 "--silent",
139 ]
140 log.debug(subprocess.list2cmdline(cmd))
141 #log.info(subprocess.check_output("convert -list type"))
142 #exit(2)
143 failure_count = 0
144 while True:
145 try:
146 subprocess.check_call(cmd)
147 break
148 except subprocess.CalledProcessError:
149 failure_count += 1
150 log.warn("sp.CPE FC %s", failure_count)
151 if failure_count > 3:
152 break
153 time.sleep(1)
154
155 # Generate border/individualised images
156 log.debug(" Annotating")
157 destination_fn = (
158 self.safe("%s_vs_%s_annotated" % (self.i.header, self.j.header)) + ".png"
159 )
160 self.annotated_original_path = os.path.join(self.tmpdir, destination_fn)
161 self.annotate_image(self.original_path, self.annotated_original_path)
162
163 # Generate thumbnail version of base image
164 log.debug(" Resizing")
165 destination_fn = (
166 self.safe("%s_vs_%s_thumb" % (self.i.header, self.j.header)) + ".png"
167 )
168 self.thumb_path = os.path.join(self.tmpdir, destination_fn)
169 Misty.resize_image(global_rescale, self.original_path, self.thumb_path)
170
171 def get_thumb_dims(self):
172 """
173 For NxN sized images, this avoids running `identify` when it won't be
174 used (e.g i=3,j=4)
175 """
176 if not hasattr(self, "thumb_dims") or self.thumb_dims is None:
177 self.thumb_dims = Misty.obtain_image_dimensions(self.thumb_path)
178 return self.thumb_dims
179
180 def label_formatter(self, bases):
181 if bases > 1000000:
182 label = "%s Mbp" % int(bases / 1000000)
183 elif bases > 1000:
184 label = "%s kbp" % int(bases / 1000)
185 else:
186 label = "%s b" % bases
187 return label
188
189 def annotate_image(self, infile, outfile):
190 original_dims = Misty.obtain_image_dimensions(infile)
191
192 half_height = (original_dims[1] / 2) + MONTAGE_BORDER + IMAGE_BORDER
193 half_width = (original_dims[0] / 2) + MONTAGE_BORDER + IMAGE_BORDER
194
195 restoreBorder = MONTAGE_BORDER
196
197 def char_width(font_size):
198 """approximate pixel width of a single character at Xpt font
199
200 Assumes that 40pt font results in 20px characters
201 """
202 return int(float(font_size) / 2)
203
204 def char_height(font_size):
205 """approximate pixel height of a single character at Xpt font
206
207 Assumes that 40pt font results in 30px characters
208 """
209 return int(float(font_size) * 30 / 40)
210
211 def est_pixels(string, font_size):
212 """guess pixel width of a string at a given font size
213 """
214 return char_width(font_size) * len(string)
215
216 j_ticks = int(Misty.BestTick(self.j.length, 5))
217 i_ticks = int(Misty.BestTick(self.i.length, 5))
218
219 # Convert definitions
220 GREY_FILL = ["-fill", LABEL_COLOUR]
221 NO_FILL = ["-fill", "none"]
222 NO_STROKE = ["-stroke", "none"]
223 GREY_STROKE = ["-stroke", LABEL_COLOUR, "-strokewidth", "2"]
224 GFNS = GREY_FILL + NO_STROKE
225 NFGS = NO_FILL + GREY_STROKE
226 # Font for labels
227 FONT_SPEC = GFNS + ["-font", TYPEFONT]
228 FONT_10pt = FONT_SPEC + ["-pointsize", "10"]
229 FONT_20pt = FONT_SPEC + ["-pointsize", "20"]
230 FONT_30pt = FONT_SPEC + ["-pointsize", "30"]
231
232 cmd = [
233 "convert",
234 infile,
235 "-bordercolor",
236 IMAGE_BORDER_COLOUR,
237 "-border",
238 IMAGE_BORDER_COORD,
239 "-bordercolor",
240 MONTAGE_BORDER_COLOUR,
241 "-border",
242 MONTAGE_BORDER_COORD,
243 ]
244
245 # Rotate -90, apply row header at bottom.
246 primary_header = self.i.header
247 secondary_head = self.i.description
248 cmd += (
249 ["-rotate", "-90",]
250 + FONT_30pt
251 + [
252 # Side label (i/row)
253 "-annotate",
254 "+%s+%s"
255 % (
256 half_height - 0.5 * est_pixels(self.i.header, 30),
257 original_dims[0]
258 + MONTAGE_BORDER
259 + 2 * IMAGE_BORDER
260 + char_height(30 + 20)
261 + 10
262 # 30 = primary header
263 # 20 = tick labels
264 ),
265 primary_header,
266 ]
267 )
268
269 if est_pixels(self.i.description, 10) < original_dims[1] and secondary_head != "":
270 cmd += FONT_10pt + [
271 # Side label (i/row)
272 "-annotate",
273 "+%s+%s"
274 % (
275 half_height - 0.5 * est_pixels(self.i.description, 10),
276 original_dims[0]
277 + MONTAGE_BORDER
278 + 2 * IMAGE_BORDER
279 + char_height(30 + 20 + 10 + 4)
280 # 30 = primary header
281 # 20 = tick labels
282 # 10 = secondary header height
283 # 4 = line spacing
284 ),
285 secondary_head,
286 ]
287
288 # Apply row ticks labels at bottom
289 for z in range(0, self.i.length, i_ticks):
290
291 image_side_percentage = float(z) / self.i.length
292 x = (
293 (image_side_percentage * original_dims[1])
294 + MONTAGE_BORDER
295 + IMAGE_BORDER
296 )
297 y = MONTAGE_BORDER + original_dims[0] + (2 * IMAGE_BORDER)
298
299 # Apply ticks
300 cmd += NFGS
301 cmd += [
302 "-draw",
303 "line %s,%s %s,%s" % (x, y, x, y + TICK_LENGTH),
304 ]
305
306 # Keep text from running off the edge.
307 space_to_end_of_image = (1 - image_side_percentage) * original_dims[1]
308 if space_to_end_of_image - est_pixels(self.label_formatter(z), 20) < 0:
309 continue
310
311 # Text label
312 cmd += FONT_20pt
313 cmd += ["-annotate", "+%s+%s" % (x + 5, y + 15), self.label_formatter(z)]
314
315 # Rotate back to final rotation
316 primary_header = self.j.header
317 secondary_head = self.j.description
318 cmd += (
319 [
320 "-rotate",
321 "90",
322 # Top label (j/column)
323 ]
324 + FONT_30pt
325 + [
326 "-annotate",
327 "+%s+%s"
328 % (
329 half_width - 0.5 * est_pixels(self.j.header, 30),
330 MONTAGE_BORDER - char_height(20 + 10 + 8),
331 ),
332 primary_header,
333 ]
334 + FONT_10pt
335 + [
336 # Credits
337 "-annotate",
338 "+%s+%s" % (2, MONTAGE_BORDER + original_dims[1] + 2 * IMAGE_BORDER),
339 "\\n" + CREDITS,
340 ]
341 )
342
343 if est_pixels(self.j.description, 10) < original_dims[0] and secondary_head != "":
344 cmd += FONT_10pt + [
345 "-annotate",
346 "+%s+%s"
347 % (
348 half_width - 0.5 * est_pixels(self.j.description, 10),
349 MONTAGE_BORDER - char_height(20 + 4)
350 # 4 = line spacing
351 ),
352 secondary_head,
353 ]
354
355 # Apply col ticks along top
356 for z in range(0, self.j.length, j_ticks):
357 image_side_percentage = float(z) / self.j.length
358 x = (
359 (image_side_percentage * original_dims[0])
360 + MONTAGE_BORDER
361 + IMAGE_BORDER
362 )
363 y = MONTAGE_BORDER - 1
364
365 # Ticks
366 cmd += NFGS
367 cmd += [
368 "-draw",
369 "line %s,%s %s,%s" % (x, y, x, y - TICK_LENGTH),
370 ]
371
372 # Keep text from running off the edge.
373 space_to_end_of_image = (1 - image_side_percentage) * original_dims[0]
374 if space_to_end_of_image - est_pixels(self.label_formatter(z), 20) < 0:
375 continue
376
377 # Text labels
378 cmd += FONT_20pt
379 cmd += ["-annotate", "+%s+%s" % (x + 5, y), self.label_formatter(z)]
380
381 cmd.append(outfile)
382 #tmpFile = open(outfile, "w")
383 #tmpFile.close()
384 log.info(subprocess.check_output( ["cp", infile, outfile] ))
385 log.info(subprocess.list2cmdline(cmd))
386 log.info(subprocess.check_output( "ls" ))
387 log.info(self.tmpdir)
388 log.info(subprocess.check_output( ["ls", self.tmpdir]))
389 log.info(outfile[2:])
390 log.info("Above was ls\n")
391 try:
392 subprocess.check_output(cmd)# + [" 2>&1"])
393 except:
394 log.info("Excepted")
395
396
397
398 class Misty(object):
399 """MIST Class for building MIST Plots
400 """
401
402 def __init__(self, window=10, zoom=50, matrix="edna", files_path="mist_images"):
403 self.tmpdir = tempfile.mkdtemp(prefix="cpt.mist3.", dir=".")
404 self.window = str(window)
405 self.zoom = zoom
406 self.matrix = matrix
407 self.records = []
408 self.matrix_data = []
409
410 # Image storage
411 self.files_path = files_path
412 if not os.path.exists(self.files_path):
413 os.makedirs(self.files_path)
414
415 def _get_record(self, record_id):
416 for i, record in enumerate(self.records):
417 if record.id == record_id:
418 return record
419
420 def _get_record_idx(self, record_id):
421 for i, record in enumerate(self.records):
422 if record.id == record_id:
423 return i
424
425 raise RuntimeError("Could not find record ID=%s" % record_id)
426
427 def register_all_files(self, file_list):
428 for fasta_file in file_list:
429 for record in SeqIO.parse(fasta_file, "fasta"):
430 self.register_record(record)
431
432 def register_record(self, record):
433 self.records.append(FancyRecord(record, self.tmpdir))
434
435 def set_matrix(self, matrix):
436 self.matrix_data = matrix
437 for i in range(len(self.matrix_data)):
438 record_i = self._get_record(self.matrix_data[i][0]["i"])
439 for j in range(len(self.matrix_data[i])):
440 record_j = self._get_record(self.matrix_data[i][j]["j"])
441 self.matrix_data[i][j]["subplot"] = Subplot(
442 record_i, record_j, self.tmpdir, self.zoom
443 )
444
445 # More processing?
446 logging.debug("\n" + pprint.pformat(matrix))
447
448 def generate_matrix(self, mtype="complete"):
449 matrix = []
450 if mtype == "complete":
451 for i in self.records:
452 row = []
453 for j in self.records:
454 row.append({"i": i.id, "j": j.id})
455 matrix.append(row)
456 elif mtype == "1vn":
457 if len(self.records) < 2:
458 raise RuntimeError("1vN not available for fewer than two sequences")
459 else:
460 row = []
461 for j in self.records[1:]:
462 row.append({"i": self.records[0].id, "j": j.id})
463 matrix.append(row)
464 return matrix
465
466 @classmethod
467 def obtain_image_dimensions(cls, path):
468 cmd = ["identify", path]
469 output = subprocess.check_output(cmd, universal_newlines=True)
470 size = output.split(" ")[3]
471 (w, h) = size[0 : size.index("+")].split("x")
472 return (int(w), int(h))
473
474 @classmethod
475 def BestTick(cls, largest, mostticks):
476 # http://stackoverflow.com/a/361687
477 minimum = largest / mostticks
478 magnitude = 10 ** math.floor(math.log(minimum) / math.log(10))
479 residual = minimum / magnitude
480 if residual > 5:
481 tick = 10 * magnitude
482 elif residual > 2:
483 tick = 5 * magnitude
484 elif residual > 1:
485 tick = 2 * magnitude
486 else:
487 tick = magnitude
488 return tick
489
490 @classmethod
491 def resize_image(cls, scale, from_file, to_file):
492 cmd = ["convert", "-resize", scale, from_file, to_file]
493 log.debug(" ".join(cmd))
494 subprocess.check_call(cmd)
495
496 def get_image_map(self):
497 image_template = Template(
498 '<area shape="rect" coords="${x1},${y1},${x2},${y2}" alt="${alt}" href="${href}" />'
499 )
500 imagemap = []
501
502 j_widths = []
503 i_height = []
504
505 for j in range(len(self.matrix_data[0])):
506 j_widths.append(self.matrix_data[0][j]["subplot"].get_thumb_dims()[0])
507
508 for i in range(len(self.matrix_data)):
509 i_height.append(self.matrix_data[i][0]["subplot"].get_thumb_dims()[1])
510
511 log.debug(pprint.pformat(j_widths))
512 log.debug(pprint.pformat(i_height))
513
514 def cur_y(i_idx):
515 return (
516 MONTAGE_BORDER
517 + sum(i_height[0:i_idx])
518 + (2 * IMAGE_BORDER * (1 + i_idx))
519 )
520
521 def cur_x(j_idx):
522 return (
523 MONTAGE_BORDER
524 + sum(j_widths[0:j_idx])
525 + (2 * IMAGE_BORDER * (1 + j_idx))
526 )
527
528 for j in range(len(self.matrix_data[0])):
529 for i in range(len(self.matrix_data)):
530 current = self.matrix_data[i][j]["subplot"]
531 # Move to final resting place
532 new_image_location = current.move_to_dir(self.files_path)
533
534 # Build imagemagp string
535 imagemap.append(
536 image_template.substitute(
537 {
538 # Start at +image border so the border isn't included in
539 # start of box
540 "x1": cur_x(j),
541 "y1": cur_y(i),
542 "x2": cur_x(j) + j_widths[j],
543 "y2": cur_y(i) + i_height[i],
544 "alt": current.get_description(),
545 "href": new_image_location,
546 }
547 )
548 )
549 return "\n".join(imagemap)
550
551 def _generate_montage(self):
552 image_list = []
553 for i in range(len(self.matrix_data)):
554 for j in range(len(self.matrix_data[i])):
555 subplot = self.matrix_data[i][j]["subplot"]
556 image_list.append(subplot.thumb_path)
557
558 # Montage step
559 global MONTAGE_BORDER
560 global MONTAGE_BORDER_COORD
561 for rec in self.records:
562 MONTAGE_BORDER = max(MONTAGE_BORDER, (len(rec.id) * 12) + 4)
563 MONTAGE_BORDER_COORD = "%sx%s" % (MONTAGE_BORDER, MONTAGE_BORDER)
564
565 m0 = os.path.join(self.tmpdir, "m0.png")
566 # log.info(subprocess.check_output( ["cp", image_list[0], m0] ))
567 cmd = ["montage"] + image_list
568 cmd += [
569 "-tile",
570 "%sx%s" % (len(self.matrix_data[0]), len(self.matrix_data)),
571 "-geometry",
572 "+0+0",
573 "-border",
574 str(IMAGE_BORDER),
575 "-bordercolor",
576 IMAGE_BORDER_COLOUR,
577 "-font",
578 TYPEFONT,
579 m0,
580 ]
581
582 log.debug(" ".join(cmd))
583 try:
584 subprocess.check_call(cmd)
585 except:
586 log.debug("Excepted, 2")
587 # Add grey borders
588 montage_path = os.path.join(self.tmpdir, "montage.png")
589 cmd = [
590 "convert",
591 m0,
592 "-bordercolor",
593 IMAGE_BORDER_COLOUR,
594 "-border",
595 IMAGE_BORDER_COORD,
596 "-bordercolor",
597 MONTAGE_BORDER_COLOUR,
598 "-border",
599 MONTAGE_BORDER_COORD,
600 montage_path,
601 ]
602
603 log.debug(" ".join(cmd))
604 try:
605 subprocess.check_call(cmd)
606 except:
607 log.debug("Excepted, 2")
608 os.unlink(m0)
609 return montage_path
610
611 def _annotate_montage(self, base_path):
612 # Calculate some expected dimension
613 cumulative_width = 0
614 cumulative_height = 0
615 for i in range(len(self.matrix_data)):
616 for j in range(len(self.matrix_data[i])):
617 subplot = self.matrix_data[i][j]["subplot"]
618
619 if i == 0:
620 cumulative_width += subplot.get_thumb_dims()[0] + IMAGE_BORDER * 2
621
622 if j == 0:
623 cumulative_height += subplot.get_thumb_dims()[1] + IMAGE_BORDER * 2
624
625 convert_arguments_top = []
626 convert_arguments_left = []
627 left_offset = cumulative_width + MONTAGE_BORDER
628
629 current_sum_width = MONTAGE_BORDER
630 current_sum_height = MONTAGE_BORDER
631
632 convert_arguments_top+= [
633 "-rotate",
634 "-90"
635 ]
636 # Top side
637 for j in range(len(self.matrix_data[0])):
638 subplot = self.matrix_data[0][j]["subplot"]
639 convert_arguments_top += [
640 "-fill",
641 LABEL_COLOUR,
642 "-annotate",
643 "-%s+%s" % (0, str(cumulative_width - current_sum_width -(subplot.get_thumb_dims()[0]/2) + (2 * MONTAGE_BORDER) + IMAGE_BORDER)),
644 subplot.j.header,
645 ]
646 current_sum_width += subplot.get_thumb_dims()[0] + (2 * IMAGE_BORDER)
647 log.debug("CSW %s", current_sum_width)
648 convert_arguments_top+= [
649 "-rotate",
650 "90"
651 ]
652 # Left side
653 #convert_arguments_left += [
654 # "-rotate",
655 # "90"
656 #]
657 for i in range(len(self.matrix_data)):
658 subplot = self.matrix_data[i][0]["subplot"]
659 convert_arguments_left += [
660 "-fill",
661 LABEL_COLOUR,
662 "-annotate",
663 "+2+%s" % str(current_sum_height + (subplot.get_thumb_dims()[1]/2.0) + IMAGE_BORDER),
664 "\n" + subplot.i.header,
665 ]
666 current_sum_height += subplot.get_thumb_dims()[1] + (2 * IMAGE_BORDER)
667 log.debug("CSH %s", current_sum_height)
668
669 cmd = [
670 "convert",
671 base_path,
672 # "-rotate",
673 # "-90",
674 "-pointsize",
675 "20",
676 "-font",
677 TYPEFONT,
678 ]
679 cmd += convert_arguments_left
680 # cmd += ["-rotate", "90"]
681 cmd += convert_arguments_top
682
683 output_path = os.path.join(self.tmpdir, "large.png")
684 cmd += [
685 "-pointsize",
686 "14",
687 "-annotate",
688 "+2+%s" % str(current_sum_height + 40),
689 CREDITS,
690 output_path,
691 ]
692 log.debug(" ".join(cmd))
693 try:
694 subprocess.check_call(cmd)
695 except:
696 log.debug("Excepted, 3")
697 #subprocess.check_output(cmd)
698 return output_path
699
700 def run(self):
701 # We want the final thumbnail for the overall image to have a max width/height
702 # of 700px for nice display
703 total_seqlen_j = 0
704 total_seqlen_i = 0
705 for j in range(len(self.matrix_data[0])):
706 total_seqlen_j += self.matrix_data[0][j]["subplot"].j.length
707
708 for i in range(len(self.matrix_data)):
709 total_seqlen_i += self.matrix_data[i][0]["subplot"].i.length
710 total_seqlen = max(total_seqlen_i, total_seqlen_j)
711 rescale = 200 * (700.0 / (float(total_seqlen) / self.zoom))
712 rescale_p = "%s%%" % rescale
713
714 # Generate gepard plots for each of the sub-images
715 for i in range(len(self.matrix_data)):
716 for j in range(len(self.matrix_data[i])):
717 subplot = self.matrix_data[i][j]["subplot"]
718
719 # generates _orig and _thumb versions
720 subplot.run_gepard(self.matrix, self.window, global_rescale=rescale_p)
721
722 base_montage = self._generate_montage()
723 annotated_montage = self._annotate_montage(base_montage)
724
725 final_montage_path = os.path.join(self.files_path, "large.png")
726 shutil.move(annotated_montage, final_montage_path)
727 return final_montage_path
728
729
730 def mist_wrapper(
731 files,
732 window=10,
733 zoom=50,
734 matrix="edna",
735 plot_type="complete",
736 files_path="mist_images",
737 ):
738 html_page = """
739 <!DOCTYPE html>
740 <html>
741 <body>
742 <h1>Mist Results</h1>
743 <p>Each section of mist output is now clickable to view a higher resolution version of that subsection</p>
744 <img src="large.png" usemap="#mistmap">
745 <map name="mistmap">
746 %s
747 </map>
748 </body>
749 </html>
750 """
751
752 m = Misty(window=window, zoom=zoom, matrix=matrix, files_path=files_path)
753
754 # There is only one special case, so we handle that separately. Every other
755 # plot type wants ALL of the sequences available.
756 if (
757 plot_type == "2up"
758 and len(files) != 2
759 and matrix not in ("protidentity", "blosum62")
760 ):
761 idx = 0
762 # Pull top two sequences.
763 for fasta_file in files:
764 for record in SeqIO.parse(fasta_file, "fasta"):
765 m.register_record(record)
766
767 # Exit after we've seen two sequences
768 idx += 1
769 if idx == 2:
770 break
771 else:
772 m.register_all_files(files)
773
774 # Generate the matrix appropriate to this plot type. There are two matrix
775 # types: 1vN and complete. 1vN is just a line, complete is a complete
776 # square.
777 if plot_type == "complete":
778 # ALL sequences are used.
779 m.set_matrix(m.generate_matrix(mtype="complete"))
780 elif plot_type == "2up":
781 # Just two sequences.
782 if len(files) == 2 and matrix in ("protidentity", "blosum62"):
783 m.set_matrix(m.generate_matrix(mtype="complete"))
784 else:
785 # Co-op the 1vn to do a single plot, rather than a "complete" plot
786 m.set_matrix(m.generate_matrix(mtype="1vn"))
787 elif plot_type == "1vn":
788 m.set_matrix(m.generate_matrix(mtype="1vn"))
789 else:
790 raise ValueError("Unknown plot type %s" % plot_type)
791
792 m.run()
793 # image_map will be returned from MIST
794 image_map = m.get_image_map()
795
796 return html_page % image_map
797
798
799 if __name__ == "__main__":
800 parser = argparse.ArgumentParser(description="MIST v3", epilog="")
801 parser.add_argument(
802 "files", nargs="+", type=argparse.FileType("r"), help="Fasta sequences"
803 )
804
805 parser.add_argument("--zoom", type=int, help="# bases / pixel", default=50)
806 parser.add_argument("--window", type=int, help="Window size", default=10)
807 parser.add_argument(
808 "--matrix",
809 type=str,
810 choices=["ednaorig", "pam250", "edna", "protidentity", "blosum62"],
811 help="# bases / pixel",
812 default="edna",
813 )
814
815 parser.add_argument(
816 "--plot_type",
817 choices=["2up", "1vn", "complete"],
818 help="Plot type",
819 default="complete",
820 )
821
822 parser.add_argument(
823 "--files_path", type=str, help="Image directory", default="mist_images"
824 )
825
826 args = parser.parse_args()
827 print(mist_wrapper(**vars(args)))