0
|
1 #!/usr/bin/env python
|
|
2 import argparse
|
|
3 import time
|
|
4 import tempfile
|
|
5 import pprint
|
|
6 import shutil
|
|
7 import os
|
|
8 import math
|
|
9 from string import Template
|
|
10 from Bio import SeqIO
|
|
11 import subprocess
|
|
12 import sys
|
|
13 import logging
|
|
14
|
|
15 FORMAT = "[%(levelname)s:%(filename)s:%(lineno)s:%(funcName)s()] %(message)s"
|
|
16 logging.basicConfig(level=logging.INFO, format=FORMAT)
|
|
17 log = logging.getLogger("mist")
|
|
18 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
19
|
|
20 MONTAGE_BORDER = 80
|
|
21 IMAGE_BORDER = 2
|
|
22
|
|
23 MONTAGE_BORDER_COORD = "%sx%s" % (MONTAGE_BORDER, MONTAGE_BORDER)
|
|
24 IMAGE_BORDER_COORD = "%sx%s" % (IMAGE_BORDER, IMAGE_BORDER)
|
|
25
|
|
26 DOUBLE_IMAGE_BORDER_COORD = "%sx%s" % (2 * IMAGE_BORDER, 2 * IMAGE_BORDER)
|
|
27
|
|
28 MONTAGE_BORDER_COLOUR = "grey70"
|
|
29 IMAGE_BORDER_COLOUR = "purple"
|
|
30 LABEL_COLOUR = "grey22"
|
|
31 TICK_LENGTH = 0.2 * MONTAGE_BORDER
|
|
32
|
|
33 TYPEFONT = "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf"
|
|
34
|
|
35 CREDITS = (
|
|
36 "CPTs MISTv3\\n"
|
|
37 "GPLv3 (C) 2015 Helena Rasche <esr\@tamu.edu>\\n"
|
|
38 "Dot plots by Gepard Dot Plotter by Dr. Jan Krumsiek"
|
|
39 )
|
|
40
|
|
41
|
|
42 class FancyRecord(object):
|
|
43 def __init__(self, record, tmpdir):
|
|
44 self.temp = tempfile.NamedTemporaryFile(mode='w', dir=tmpdir, delete=False, suffix=".fa")
|
|
45 self.temp_path = self.temp.name
|
|
46 self.id = self.temp_path.rsplit("/")[-1]
|
|
47 self.record = record
|
|
48 self.header = record.id
|
|
49
|
|
50 # Description include record.id
|
|
51 sd = record.description.strip().split(" ")
|
|
52 if len(sd) > 1:
|
|
53 self.description = " ".join(sd[1:])
|
|
54 else:
|
|
55 self.description = ""
|
|
56
|
|
57 self.length = len(record.seq)
|
|
58
|
|
59 # Serialize to disk
|
|
60 self._write(self.temp)
|
|
61 self.temp.flush()
|
|
62 self.temp.close()
|
|
63
|
|
64 def _write(self, handle):
|
|
65 SeqIO.write([self.record], handle, "fasta")
|
|
66
|
|
67
|
|
68 class Subplot(object):
|
|
69 def __init__(self, i, j, tmpdir, zoom):
|
|
70 self.i = i
|
|
71 self.j = j
|
|
72 # Mist information
|
|
73 self.tmpdir = tmpdir
|
|
74 self.zoom = zoom
|
|
75
|
|
76 self.original_path = None
|
|
77 self.thumb_path = None
|
|
78 self.annotated_original_path = None
|
|
79
|
|
80 def safe(self, string):
|
|
81 return "".join(
|
|
82 [
|
|
83 c
|
|
84 for c in string.strip()
|
|
85 if c.isalpha() or c.isdigit() or c == " " or c in ("[", "]", "-", "_")
|
|
86 ]
|
|
87 ).rstrip()
|
|
88
|
|
89 def move_to_dir(self, files_path):
|
|
90 """Move a specific image that's part of a subplot to an output
|
|
91 directory and return the name
|
|
92 """
|
|
93 destination_fn = (
|
|
94 self.safe("%s_vs_%s_[annotated]" % (self.i.header, self.j.header)) + ".png"
|
|
95 )
|
|
96 destination = os.path.join(files_path, destination_fn)
|
|
97 log.debug("cp %s %s", self.annotated_original_path, destination)
|
|
98 if (
|
|
99 self.annotated_original_path is not None
|
|
100 and os.path.exists(self.annotated_original_path)
|
|
101 and not os.path.exists(destination)
|
|
102 ):
|
|
103 shutil.move(self.annotated_original_path, destination)
|
|
104 return destination_fn
|
|
105
|
|
106 def get_description(self):
|
|
107 return "%s v %s" % (self.i.header, self.j.header)
|
|
108
|
|
109 def __repr__(self):
|
|
110 return "Subplot [%s]" % self.get_description()
|
|
111
|
|
112 def run_gepard(self, matrix, window, global_rescale="35%"):
|
|
113 """Run gepard on two sequences, with a specified output file
|
|
114 """
|
|
115 log.info("Running Gepard on %s", self.get_description())
|
|
116
|
|
117 destination_fn = (
|
|
118 self.safe("%s_vs_%s_orig" % (self.i.header, self.j.header)) + ".png"
|
|
119 )
|
|
120 self.original_path = os.path.join(self.tmpdir, destination_fn)
|
|
121
|
|
122 cmd = [
|
|
123 "java",
|
|
124 "-jar",
|
|
125 os.path.join(SCRIPT_DIR, "gepard.jar"),
|
|
126 "--seq1",
|
|
127 self.j.temp_path,
|
|
128 "--seq2",
|
|
129 self.i.temp_path,
|
|
130 "--matrix",
|
|
131 matrix + ".mat",
|
|
132 "--outfile",
|
|
133 self.original_path,
|
|
134 "--word",
|
|
135 str(window),
|
|
136 "--zoom",
|
|
137 str(self.zoom),
|
|
138 "--silent",
|
|
139 ]
|
|
140 log.debug(subprocess.list2cmdline(cmd))
|
|
141 #log.info(subprocess.check_output("convert -list type"))
|
|
142 #exit(2)
|
|
143 failure_count = 0
|
|
144 while True:
|
|
145 try:
|
|
146 subprocess.check_call(cmd)
|
|
147 break
|
|
148 except subprocess.CalledProcessError:
|
|
149 failure_count += 1
|
|
150 log.warn("sp.CPE FC %s", failure_count)
|
|
151 if failure_count > 3:
|
|
152 break
|
|
153 time.sleep(1)
|
|
154
|
|
155 # Generate border/individualised images
|
|
156 log.debug(" Annotating")
|
|
157 destination_fn = (
|
|
158 self.safe("%s_vs_%s_annotated" % (self.i.header, self.j.header)) + ".png"
|
|
159 )
|
|
160 self.annotated_original_path = os.path.join(self.tmpdir, destination_fn)
|
|
161 self.annotate_image(self.original_path, self.annotated_original_path)
|
|
162
|
|
163 # Generate thumbnail version of base image
|
|
164 log.debug(" Resizing")
|
|
165 destination_fn = (
|
|
166 self.safe("%s_vs_%s_thumb" % (self.i.header, self.j.header)) + ".png"
|
|
167 )
|
|
168 self.thumb_path = os.path.join(self.tmpdir, destination_fn)
|
|
169 Misty.resize_image(global_rescale, self.original_path, self.thumb_path)
|
|
170
|
|
171 def get_thumb_dims(self):
|
|
172 """
|
|
173 For NxN sized images, this avoids running `identify` when it won't be
|
|
174 used (e.g i=3,j=4)
|
|
175 """
|
|
176 if not hasattr(self, "thumb_dims") or self.thumb_dims is None:
|
|
177 self.thumb_dims = Misty.obtain_image_dimensions(self.thumb_path)
|
|
178 return self.thumb_dims
|
|
179
|
|
180 def label_formatter(self, bases):
|
|
181 if bases > 1000000:
|
|
182 label = "%s Mbp" % int(bases / 1000000)
|
|
183 elif bases > 1000:
|
|
184 label = "%s kbp" % int(bases / 1000)
|
|
185 else:
|
|
186 label = "%s b" % bases
|
|
187 return label
|
|
188
|
|
189 def annotate_image(self, infile, outfile):
|
|
190 original_dims = Misty.obtain_image_dimensions(infile)
|
|
191
|
|
192 half_height = (original_dims[1] / 2) + MONTAGE_BORDER + IMAGE_BORDER
|
|
193 half_width = (original_dims[0] / 2) + MONTAGE_BORDER + IMAGE_BORDER
|
|
194
|
|
195 restoreBorder = MONTAGE_BORDER
|
|
196
|
|
197 def char_width(font_size):
|
|
198 """approximate pixel width of a single character at Xpt font
|
|
199
|
|
200 Assumes that 40pt font results in 20px characters
|
|
201 """
|
|
202 return int(float(font_size) / 2)
|
|
203
|
|
204 def char_height(font_size):
|
|
205 """approximate pixel height of a single character at Xpt font
|
|
206
|
|
207 Assumes that 40pt font results in 30px characters
|
|
208 """
|
|
209 return int(float(font_size) * 30 / 40)
|
|
210
|
|
211 def est_pixels(string, font_size):
|
|
212 """guess pixel width of a string at a given font size
|
|
213 """
|
|
214 return char_width(font_size) * len(string)
|
|
215
|
|
216 j_ticks = int(Misty.BestTick(self.j.length, 5))
|
|
217 i_ticks = int(Misty.BestTick(self.i.length, 5))
|
|
218
|
|
219 # Convert definitions
|
|
220 GREY_FILL = ["-fill", LABEL_COLOUR]
|
|
221 NO_FILL = ["-fill", "none"]
|
|
222 NO_STROKE = ["-stroke", "none"]
|
|
223 GREY_STROKE = ["-stroke", LABEL_COLOUR, "-strokewidth", "2"]
|
|
224 GFNS = GREY_FILL + NO_STROKE
|
|
225 NFGS = NO_FILL + GREY_STROKE
|
|
226 # Font for labels
|
|
227 FONT_SPEC = GFNS + ["-font", TYPEFONT]
|
|
228 FONT_10pt = FONT_SPEC + ["-pointsize", "10"]
|
|
229 FONT_20pt = FONT_SPEC + ["-pointsize", "20"]
|
|
230 FONT_30pt = FONT_SPEC + ["-pointsize", "30"]
|
|
231
|
|
232 cmd = [
|
|
233 "convert",
|
|
234 infile,
|
|
235 "-bordercolor",
|
|
236 IMAGE_BORDER_COLOUR,
|
|
237 "-border",
|
|
238 IMAGE_BORDER_COORD,
|
|
239 "-bordercolor",
|
|
240 MONTAGE_BORDER_COLOUR,
|
|
241 "-border",
|
|
242 MONTAGE_BORDER_COORD,
|
|
243 ]
|
|
244
|
|
245 # Rotate -90, apply row header at bottom.
|
|
246 primary_header = self.i.header
|
|
247 secondary_head = self.i.description
|
|
248 cmd += (
|
|
249 ["-rotate", "-90",]
|
|
250 + FONT_30pt
|
|
251 + [
|
|
252 # Side label (i/row)
|
|
253 "-annotate",
|
|
254 "+%s+%s"
|
|
255 % (
|
|
256 half_height - 0.5 * est_pixels(self.i.header, 30),
|
|
257 original_dims[0]
|
|
258 + MONTAGE_BORDER
|
|
259 + 2 * IMAGE_BORDER
|
|
260 + char_height(30 + 20)
|
|
261 + 10
|
|
262 # 30 = primary header
|
|
263 # 20 = tick labels
|
|
264 ),
|
|
265 primary_header,
|
|
266 ]
|
|
267 )
|
|
268
|
|
269 if est_pixels(self.i.description, 10) < original_dims[1] and secondary_head != "":
|
|
270 cmd += FONT_10pt + [
|
|
271 # Side label (i/row)
|
|
272 "-annotate",
|
|
273 "+%s+%s"
|
|
274 % (
|
|
275 half_height - 0.5 * est_pixels(self.i.description, 10),
|
|
276 original_dims[0]
|
|
277 + MONTAGE_BORDER
|
|
278 + 2 * IMAGE_BORDER
|
|
279 + char_height(30 + 20 + 10 + 4)
|
|
280 # 30 = primary header
|
|
281 # 20 = tick labels
|
|
282 # 10 = secondary header height
|
|
283 # 4 = line spacing
|
|
284 ),
|
|
285 secondary_head,
|
|
286 ]
|
|
287
|
|
288 # Apply row ticks labels at bottom
|
|
289 for z in range(0, self.i.length, i_ticks):
|
|
290
|
|
291 image_side_percentage = float(z) / self.i.length
|
|
292 x = (
|
|
293 (image_side_percentage * original_dims[1])
|
|
294 + MONTAGE_BORDER
|
|
295 + IMAGE_BORDER
|
|
296 )
|
|
297 y = MONTAGE_BORDER + original_dims[0] + (2 * IMAGE_BORDER)
|
|
298
|
|
299 # Apply ticks
|
|
300 cmd += NFGS
|
|
301 cmd += [
|
|
302 "-draw",
|
|
303 "line %s,%s %s,%s" % (x, y, x, y + TICK_LENGTH),
|
|
304 ]
|
|
305
|
|
306 # Keep text from running off the edge.
|
|
307 space_to_end_of_image = (1 - image_side_percentage) * original_dims[1]
|
|
308 if space_to_end_of_image - est_pixels(self.label_formatter(z), 20) < 0:
|
|
309 continue
|
|
310
|
|
311 # Text label
|
|
312 cmd += FONT_20pt
|
|
313 cmd += ["-annotate", "+%s+%s" % (x + 5, y + 15), self.label_formatter(z)]
|
|
314
|
|
315 # Rotate back to final rotation
|
|
316 primary_header = self.j.header
|
|
317 secondary_head = self.j.description
|
|
318 cmd += (
|
|
319 [
|
|
320 "-rotate",
|
|
321 "90",
|
|
322 # Top label (j/column)
|
|
323 ]
|
|
324 + FONT_30pt
|
|
325 + [
|
|
326 "-annotate",
|
|
327 "+%s+%s"
|
|
328 % (
|
|
329 half_width - 0.5 * est_pixels(self.j.header, 30),
|
|
330 MONTAGE_BORDER - char_height(20 + 10 + 8),
|
|
331 ),
|
|
332 primary_header,
|
|
333 ]
|
|
334 + FONT_10pt
|
|
335 + [
|
|
336 # Credits
|
|
337 "-annotate",
|
|
338 "+%s+%s" % (2, MONTAGE_BORDER + original_dims[1] + 2 * IMAGE_BORDER),
|
|
339 "\\n" + CREDITS,
|
|
340 ]
|
|
341 )
|
|
342
|
|
343 if est_pixels(self.j.description, 10) < original_dims[0] and secondary_head != "":
|
|
344 cmd += FONT_10pt + [
|
|
345 "-annotate",
|
|
346 "+%s+%s"
|
|
347 % (
|
|
348 half_width - 0.5 * est_pixels(self.j.description, 10),
|
|
349 MONTAGE_BORDER - char_height(20 + 4)
|
|
350 # 4 = line spacing
|
|
351 ),
|
|
352 secondary_head,
|
|
353 ]
|
|
354
|
|
355 # Apply col ticks along top
|
|
356 for z in range(0, self.j.length, j_ticks):
|
|
357 image_side_percentage = float(z) / self.j.length
|
|
358 x = (
|
|
359 (image_side_percentage * original_dims[0])
|
|
360 + MONTAGE_BORDER
|
|
361 + IMAGE_BORDER
|
|
362 )
|
|
363 y = MONTAGE_BORDER - 1
|
|
364
|
|
365 # Ticks
|
|
366 cmd += NFGS
|
|
367 cmd += [
|
|
368 "-draw",
|
|
369 "line %s,%s %s,%s" % (x, y, x, y - TICK_LENGTH),
|
|
370 ]
|
|
371
|
|
372 # Keep text from running off the edge.
|
|
373 space_to_end_of_image = (1 - image_side_percentage) * original_dims[0]
|
|
374 if space_to_end_of_image - est_pixels(self.label_formatter(z), 20) < 0:
|
|
375 continue
|
|
376
|
|
377 # Text labels
|
|
378 cmd += FONT_20pt
|
|
379 cmd += ["-annotate", "+%s+%s" % (x + 5, y), self.label_formatter(z)]
|
|
380
|
|
381 cmd.append(outfile)
|
|
382 #tmpFile = open(outfile, "w")
|
|
383 #tmpFile.close()
|
|
384 log.info(subprocess.check_output( ["cp", infile, outfile] ))
|
|
385 log.info(subprocess.list2cmdline(cmd))
|
|
386 log.info(subprocess.check_output( "ls" ))
|
|
387 log.info(self.tmpdir)
|
|
388 log.info(subprocess.check_output( ["ls", self.tmpdir]))
|
|
389 log.info(outfile[2:])
|
|
390 log.info("Above was ls\n")
|
|
391 try:
|
|
392 subprocess.check_output(cmd)# + [" 2>&1"])
|
|
393 except:
|
|
394 log.info("Excepted")
|
|
395
|
|
396
|
|
397
|
|
398 class Misty(object):
|
|
399 """MIST Class for building MIST Plots
|
|
400 """
|
|
401
|
|
402 def __init__(self, window=10, zoom=50, matrix="edna", files_path="mist_images"):
|
|
403 self.tmpdir = tempfile.mkdtemp(prefix="cpt.mist3.", dir=".")
|
|
404 self.window = str(window)
|
|
405 self.zoom = zoom
|
|
406 self.matrix = matrix
|
|
407 self.records = []
|
|
408 self.matrix_data = []
|
|
409
|
|
410 # Image storage
|
|
411 self.files_path = files_path
|
|
412 if not os.path.exists(self.files_path):
|
|
413 os.makedirs(self.files_path)
|
|
414
|
|
415 def _get_record(self, record_id):
|
|
416 for i, record in enumerate(self.records):
|
|
417 if record.id == record_id:
|
|
418 return record
|
|
419
|
|
420 def _get_record_idx(self, record_id):
|
|
421 for i, record in enumerate(self.records):
|
|
422 if record.id == record_id:
|
|
423 return i
|
|
424
|
|
425 raise RuntimeError("Could not find record ID=%s" % record_id)
|
|
426
|
|
427 def register_all_files(self, file_list):
|
|
428 for fasta_file in file_list:
|
|
429 for record in SeqIO.parse(fasta_file, "fasta"):
|
|
430 self.register_record(record)
|
|
431
|
|
432 def register_record(self, record):
|
|
433 self.records.append(FancyRecord(record, self.tmpdir))
|
|
434
|
|
435 def set_matrix(self, matrix):
|
|
436 self.matrix_data = matrix
|
|
437 for i in range(len(self.matrix_data)):
|
|
438 record_i = self._get_record(self.matrix_data[i][0]["i"])
|
|
439 for j in range(len(self.matrix_data[i])):
|
|
440 record_j = self._get_record(self.matrix_data[i][j]["j"])
|
|
441 self.matrix_data[i][j]["subplot"] = Subplot(
|
|
442 record_i, record_j, self.tmpdir, self.zoom
|
|
443 )
|
|
444
|
|
445 # More processing?
|
|
446 logging.debug("\n" + pprint.pformat(matrix))
|
|
447
|
|
448 def generate_matrix(self, mtype="complete"):
|
|
449 matrix = []
|
|
450 if mtype == "complete":
|
|
451 for i in self.records:
|
|
452 row = []
|
|
453 for j in self.records:
|
|
454 row.append({"i": i.id, "j": j.id})
|
|
455 matrix.append(row)
|
|
456 elif mtype == "1vn":
|
|
457 if len(self.records) < 2:
|
|
458 raise RuntimeError("1vN not available for fewer than two sequences")
|
|
459 else:
|
|
460 row = []
|
|
461 for j in self.records[1:]:
|
|
462 row.append({"i": self.records[0].id, "j": j.id})
|
|
463 matrix.append(row)
|
|
464 return matrix
|
|
465
|
|
466 @classmethod
|
|
467 def obtain_image_dimensions(cls, path):
|
|
468 cmd = ["identify", path]
|
|
469 output = subprocess.check_output(cmd, universal_newlines=True)
|
|
470 size = output.split(" ")[3]
|
|
471 (w, h) = size[0 : size.index("+")].split("x")
|
|
472 return (int(w), int(h))
|
|
473
|
|
474 @classmethod
|
|
475 def BestTick(cls, largest, mostticks):
|
|
476 # http://stackoverflow.com/a/361687
|
|
477 minimum = largest / mostticks
|
|
478 magnitude = 10 ** math.floor(math.log(minimum) / math.log(10))
|
|
479 residual = minimum / magnitude
|
|
480 if residual > 5:
|
|
481 tick = 10 * magnitude
|
|
482 elif residual > 2:
|
|
483 tick = 5 * magnitude
|
|
484 elif residual > 1:
|
|
485 tick = 2 * magnitude
|
|
486 else:
|
|
487 tick = magnitude
|
|
488 return tick
|
|
489
|
|
490 @classmethod
|
|
491 def resize_image(cls, scale, from_file, to_file):
|
|
492 cmd = ["convert", "-resize", scale, from_file, to_file]
|
|
493 log.debug(" ".join(cmd))
|
|
494 subprocess.check_call(cmd)
|
|
495
|
|
496 def get_image_map(self):
|
|
497 image_template = Template(
|
|
498 '<area shape="rect" coords="${x1},${y1},${x2},${y2}" alt="${alt}" href="${href}" />'
|
|
499 )
|
|
500 imagemap = []
|
|
501
|
|
502 j_widths = []
|
|
503 i_height = []
|
|
504
|
|
505 for j in range(len(self.matrix_data[0])):
|
|
506 j_widths.append(self.matrix_data[0][j]["subplot"].get_thumb_dims()[0])
|
|
507
|
|
508 for i in range(len(self.matrix_data)):
|
|
509 i_height.append(self.matrix_data[i][0]["subplot"].get_thumb_dims()[1])
|
|
510
|
|
511 log.debug(pprint.pformat(j_widths))
|
|
512 log.debug(pprint.pformat(i_height))
|
|
513
|
|
514 def cur_y(i_idx):
|
|
515 return (
|
|
516 MONTAGE_BORDER
|
|
517 + sum(i_height[0:i_idx])
|
|
518 + (2 * IMAGE_BORDER * (1 + i_idx))
|
|
519 )
|
|
520
|
|
521 def cur_x(j_idx):
|
|
522 return (
|
|
523 MONTAGE_BORDER
|
|
524 + sum(j_widths[0:j_idx])
|
|
525 + (2 * IMAGE_BORDER * (1 + j_idx))
|
|
526 )
|
|
527
|
|
528 for j in range(len(self.matrix_data[0])):
|
|
529 for i in range(len(self.matrix_data)):
|
|
530 current = self.matrix_data[i][j]["subplot"]
|
|
531 # Move to final resting place
|
|
532 new_image_location = current.move_to_dir(self.files_path)
|
|
533
|
|
534 # Build imagemagp string
|
|
535 imagemap.append(
|
|
536 image_template.substitute(
|
|
537 {
|
|
538 # Start at +image border so the border isn't included in
|
|
539 # start of box
|
|
540 "x1": cur_x(j),
|
|
541 "y1": cur_y(i),
|
|
542 "x2": cur_x(j) + j_widths[j],
|
|
543 "y2": cur_y(i) + i_height[i],
|
|
544 "alt": current.get_description(),
|
|
545 "href": new_image_location,
|
|
546 }
|
|
547 )
|
|
548 )
|
|
549 return "\n".join(imagemap)
|
|
550
|
|
551 def _generate_montage(self):
|
|
552 image_list = []
|
|
553 for i in range(len(self.matrix_data)):
|
|
554 for j in range(len(self.matrix_data[i])):
|
|
555 subplot = self.matrix_data[i][j]["subplot"]
|
|
556 image_list.append(subplot.thumb_path)
|
|
557
|
|
558 # Montage step
|
|
559 global MONTAGE_BORDER
|
|
560 global MONTAGE_BORDER_COORD
|
|
561 for rec in self.records:
|
|
562 MONTAGE_BORDER = max(MONTAGE_BORDER, (len(rec.id) * 12) + 4)
|
|
563 MONTAGE_BORDER_COORD = "%sx%s" % (MONTAGE_BORDER, MONTAGE_BORDER)
|
|
564
|
|
565 m0 = os.path.join(self.tmpdir, "m0.png")
|
|
566 # log.info(subprocess.check_output( ["cp", image_list[0], m0] ))
|
|
567 cmd = ["montage"] + image_list
|
|
568 cmd += [
|
|
569 "-tile",
|
|
570 "%sx%s" % (len(self.matrix_data[0]), len(self.matrix_data)),
|
|
571 "-geometry",
|
|
572 "+0+0",
|
|
573 "-border",
|
|
574 str(IMAGE_BORDER),
|
|
575 "-bordercolor",
|
|
576 IMAGE_BORDER_COLOUR,
|
|
577 "-font",
|
|
578 TYPEFONT,
|
|
579 m0,
|
|
580 ]
|
|
581
|
|
582 log.debug(" ".join(cmd))
|
|
583 try:
|
|
584 subprocess.check_call(cmd)
|
|
585 except:
|
|
586 log.debug("Excepted, 2")
|
|
587 # Add grey borders
|
|
588 montage_path = os.path.join(self.tmpdir, "montage.png")
|
|
589 cmd = [
|
|
590 "convert",
|
|
591 m0,
|
|
592 "-bordercolor",
|
|
593 IMAGE_BORDER_COLOUR,
|
|
594 "-border",
|
|
595 IMAGE_BORDER_COORD,
|
|
596 "-bordercolor",
|
|
597 MONTAGE_BORDER_COLOUR,
|
|
598 "-border",
|
|
599 MONTAGE_BORDER_COORD,
|
|
600 montage_path,
|
|
601 ]
|
|
602
|
|
603 log.debug(" ".join(cmd))
|
|
604 try:
|
|
605 subprocess.check_call(cmd)
|
|
606 except:
|
|
607 log.debug("Excepted, 2")
|
|
608 os.unlink(m0)
|
|
609 return montage_path
|
|
610
|
|
611 def _annotate_montage(self, base_path):
|
|
612 # Calculate some expected dimension
|
|
613 cumulative_width = 0
|
|
614 cumulative_height = 0
|
|
615 for i in range(len(self.matrix_data)):
|
|
616 for j in range(len(self.matrix_data[i])):
|
|
617 subplot = self.matrix_data[i][j]["subplot"]
|
|
618
|
|
619 if i == 0:
|
|
620 cumulative_width += subplot.get_thumb_dims()[0] + IMAGE_BORDER * 2
|
|
621
|
|
622 if j == 0:
|
|
623 cumulative_height += subplot.get_thumb_dims()[1] + IMAGE_BORDER * 2
|
|
624
|
|
625 convert_arguments_top = []
|
|
626 convert_arguments_left = []
|
|
627 left_offset = cumulative_width + MONTAGE_BORDER
|
|
628
|
|
629 current_sum_width = MONTAGE_BORDER
|
|
630 current_sum_height = MONTAGE_BORDER
|
|
631
|
|
632 convert_arguments_top+= [
|
|
633 "-rotate",
|
|
634 "-90"
|
|
635 ]
|
|
636 # Top side
|
|
637 for j in range(len(self.matrix_data[0])):
|
|
638 subplot = self.matrix_data[0][j]["subplot"]
|
|
639 convert_arguments_top += [
|
|
640 "-fill",
|
|
641 LABEL_COLOUR,
|
|
642 "-annotate",
|
|
643 "-%s+%s" % (0, str(cumulative_width - current_sum_width -(subplot.get_thumb_dims()[0]/2) + (2 * MONTAGE_BORDER) + IMAGE_BORDER)),
|
|
644 subplot.j.header,
|
|
645 ]
|
|
646 current_sum_width += subplot.get_thumb_dims()[0] + (2 * IMAGE_BORDER)
|
|
647 log.debug("CSW %s", current_sum_width)
|
|
648 convert_arguments_top+= [
|
|
649 "-rotate",
|
|
650 "90"
|
|
651 ]
|
|
652 # Left side
|
|
653 #convert_arguments_left += [
|
|
654 # "-rotate",
|
|
655 # "90"
|
|
656 #]
|
|
657 for i in range(len(self.matrix_data)):
|
|
658 subplot = self.matrix_data[i][0]["subplot"]
|
|
659 convert_arguments_left += [
|
|
660 "-fill",
|
|
661 LABEL_COLOUR,
|
|
662 "-annotate",
|
|
663 "+2+%s" % str(current_sum_height + (subplot.get_thumb_dims()[1]/2.0) + IMAGE_BORDER),
|
|
664 "\n" + subplot.i.header,
|
|
665 ]
|
|
666 current_sum_height += subplot.get_thumb_dims()[1] + (2 * IMAGE_BORDER)
|
|
667 log.debug("CSH %s", current_sum_height)
|
|
668
|
|
669 cmd = [
|
|
670 "convert",
|
|
671 base_path,
|
|
672 # "-rotate",
|
|
673 # "-90",
|
|
674 "-pointsize",
|
|
675 "20",
|
|
676 "-font",
|
|
677 TYPEFONT,
|
|
678 ]
|
|
679 cmd += convert_arguments_left
|
|
680 # cmd += ["-rotate", "90"]
|
|
681 cmd += convert_arguments_top
|
|
682
|
|
683 output_path = os.path.join(self.tmpdir, "large.png")
|
|
684 cmd += [
|
|
685 "-pointsize",
|
|
686 "14",
|
|
687 "-annotate",
|
|
688 "+2+%s" % str(current_sum_height + 40),
|
|
689 CREDITS,
|
|
690 output_path,
|
|
691 ]
|
|
692 log.debug(" ".join(cmd))
|
|
693 try:
|
|
694 subprocess.check_call(cmd)
|
|
695 except:
|
|
696 log.debug("Excepted, 3")
|
|
697 #subprocess.check_output(cmd)
|
|
698 return output_path
|
|
699
|
|
700 def run(self):
|
|
701 # We want the final thumbnail for the overall image to have a max width/height
|
|
702 # of 700px for nice display
|
|
703 total_seqlen_j = 0
|
|
704 total_seqlen_i = 0
|
|
705 for j in range(len(self.matrix_data[0])):
|
|
706 total_seqlen_j += self.matrix_data[0][j]["subplot"].j.length
|
|
707
|
|
708 for i in range(len(self.matrix_data)):
|
|
709 total_seqlen_i += self.matrix_data[i][0]["subplot"].i.length
|
|
710 total_seqlen = max(total_seqlen_i, total_seqlen_j)
|
|
711 rescale = 200 * (700.0 / (float(total_seqlen) / self.zoom))
|
|
712 rescale_p = "%s%%" % rescale
|
|
713
|
|
714 # Generate gepard plots for each of the sub-images
|
|
715 for i in range(len(self.matrix_data)):
|
|
716 for j in range(len(self.matrix_data[i])):
|
|
717 subplot = self.matrix_data[i][j]["subplot"]
|
|
718
|
|
719 # generates _orig and _thumb versions
|
|
720 subplot.run_gepard(self.matrix, self.window, global_rescale=rescale_p)
|
|
721
|
|
722 base_montage = self._generate_montage()
|
|
723 annotated_montage = self._annotate_montage(base_montage)
|
|
724
|
|
725 final_montage_path = os.path.join(self.files_path, "large.png")
|
|
726 shutil.move(annotated_montage, final_montage_path)
|
|
727 return final_montage_path
|
|
728
|
|
729
|
|
730 def mist_wrapper(
|
|
731 files,
|
|
732 window=10,
|
|
733 zoom=50,
|
|
734 matrix="edna",
|
|
735 plot_type="complete",
|
|
736 files_path="mist_images",
|
|
737 ):
|
|
738 html_page = """
|
|
739 <!DOCTYPE html>
|
|
740 <html>
|
|
741 <body>
|
|
742 <h1>Mist Results</h1>
|
|
743 <p>Each section of mist output is now clickable to view a higher resolution version of that subsection</p>
|
|
744 <img src="large.png" usemap="#mistmap">
|
|
745 <map name="mistmap">
|
|
746 %s
|
|
747 </map>
|
|
748 </body>
|
|
749 </html>
|
|
750 """
|
|
751
|
|
752 m = Misty(window=window, zoom=zoom, matrix=matrix, files_path=files_path)
|
|
753
|
|
754 # There is only one special case, so we handle that separately. Every other
|
|
755 # plot type wants ALL of the sequences available.
|
|
756 if (
|
|
757 plot_type == "2up"
|
|
758 and len(files) != 2
|
|
759 and matrix not in ("protidentity", "blosum62")
|
|
760 ):
|
|
761 idx = 0
|
|
762 # Pull top two sequences.
|
|
763 for fasta_file in files:
|
|
764 for record in SeqIO.parse(fasta_file, "fasta"):
|
|
765 m.register_record(record)
|
|
766
|
|
767 # Exit after we've seen two sequences
|
|
768 idx += 1
|
|
769 if idx == 2:
|
|
770 break
|
|
771 else:
|
|
772 m.register_all_files(files)
|
|
773
|
|
774 # Generate the matrix appropriate to this plot type. There are two matrix
|
|
775 # types: 1vN and complete. 1vN is just a line, complete is a complete
|
|
776 # square.
|
|
777 if plot_type == "complete":
|
|
778 # ALL sequences are used.
|
|
779 m.set_matrix(m.generate_matrix(mtype="complete"))
|
|
780 elif plot_type == "2up":
|
|
781 # Just two sequences.
|
|
782 if len(files) == 2 and matrix in ("protidentity", "blosum62"):
|
|
783 m.set_matrix(m.generate_matrix(mtype="complete"))
|
|
784 else:
|
|
785 # Co-op the 1vn to do a single plot, rather than a "complete" plot
|
|
786 m.set_matrix(m.generate_matrix(mtype="1vn"))
|
|
787 elif plot_type == "1vn":
|
|
788 m.set_matrix(m.generate_matrix(mtype="1vn"))
|
|
789 else:
|
|
790 raise ValueError("Unknown plot type %s" % plot_type)
|
|
791
|
|
792 m.run()
|
|
793 # image_map will be returned from MIST
|
|
794 image_map = m.get_image_map()
|
|
795
|
|
796 return html_page % image_map
|
|
797
|
|
798
|
|
799 if __name__ == "__main__":
|
|
800 parser = argparse.ArgumentParser(description="MIST v3", epilog="")
|
|
801 parser.add_argument(
|
|
802 "files", nargs="+", type=argparse.FileType("r"), help="Fasta sequences"
|
|
803 )
|
|
804
|
|
805 parser.add_argument("--zoom", type=int, help="# bases / pixel", default=50)
|
|
806 parser.add_argument("--window", type=int, help="Window size", default=10)
|
|
807 parser.add_argument(
|
|
808 "--matrix",
|
|
809 type=str,
|
|
810 choices=["ednaorig", "pam250", "edna", "protidentity", "blosum62"],
|
|
811 help="# bases / pixel",
|
|
812 default="edna",
|
|
813 )
|
|
814
|
|
815 parser.add_argument(
|
|
816 "--plot_type",
|
|
817 choices=["2up", "1vn", "complete"],
|
|
818 help="Plot type",
|
|
819 default="complete",
|
|
820 )
|
|
821
|
|
822 parser.add_argument(
|
|
823 "--files_path", type=str, help="Image directory", default="mist_images"
|
|
824 )
|
|
825
|
|
826 args = parser.parse_args()
|
|
827 print(mist_wrapper(**vars(args)))
|