Mercurial > repos > adam-novak > hexagram
comparison hexagram-6ae12361157c/hexagram/hexagram.py @ 0:1407e3634bcf draft default tip
Uploaded r11 from test tool shed.
author | adam-novak |
---|---|
date | Tue, 22 Oct 2013 14:17:59 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1407e3634bcf |
---|---|
1 #!/usr/bin/env python2.7 | |
2 """ | |
3 hexagram.py: Given a matrix of similarities, produce a hexagram visualization. | |
4 | |
5 This script takes in the filename of a tab-separated value file containing a | |
6 sparse similarity matrix (with string labels) and several matrices of | |
7 layer/score data. It produces an HTML file (and several support files) that | |
8 provide an interactive visualization of the items clustered on a hexagonal grid. | |
9 | |
10 This script depends on the DrL graph layout package, binaries for which must be | |
11 present in your PATH. | |
12 | |
13 Re-uses sample code and documentation from | |
14 <http://users.soe.ucsc.edu/~karplus/bme205/f12/Scaffold.html> | |
15 """ | |
16 | |
17 import argparse, sys, os, itertools, math, numpy, subprocess, shutil, tempfile | |
18 import collections, multiprocessing, traceback, numpy | |
19 import scipy.stats, scipy.linalg | |
20 import os.path | |
21 import tsv | |
22 | |
23 # Global variable to hold opened matrices files | |
24 matrices = []; | |
25 | |
26 | |
27 def parse_args(args): | |
28 """ | |
29 Takes in the command-line arguments list (args), and returns a nice argparse | |
30 result with fields for all the options. | |
31 Borrows heavily from the argparse documentation examples: | |
32 <http://docs.python.org/library/argparse.html> | |
33 """ | |
34 | |
35 # The command line arguments start with the program name, which we don't | |
36 # want to treat as an argument for argparse. So we remove it. | |
37 args = args[1:] | |
38 | |
39 # Construct the parser (which is stored in parser) | |
40 # Module docstring lives in __doc__ | |
41 # See http://python-forum.com/pythonforum/viewtopic.php?f=3&t=36847 | |
42 # And a formatter class so our examples in the docstring look good. Isn't it | |
43 # convenient how we already wrapped it to 80 characters? | |
44 # See http://docs.python.org/library/argparse.html#formatter-class | |
45 parser = argparse.ArgumentParser(description=__doc__, | |
46 formatter_class=argparse.RawDescriptionHelpFormatter) | |
47 | |
48 # Now add all the options to it | |
49 # Options match the ctdHeatmap tool options as much as possible. | |
50 parser.add_argument("similarity", type=str, nargs='+', | |
51 help="the unopened files of similarity matrices") | |
52 parser.add_argument("--names", type=str, action="append", default=[], | |
53 help="the unopened files of similarity matrices") | |
54 parser.add_argument("--scores", type=str, | |
55 action="append", default=[], | |
56 help="a TSV to read scores for each signature from") | |
57 parser.add_argument("--colormaps", type=argparse.FileType("r"), | |
58 default=None, | |
59 help="a TSV defining coloring and value names for discrete scores") | |
60 parser.add_argument("--html", "-H", type=str, | |
61 default="index.html", | |
62 help="where to write HTML report") | |
63 parser.add_argument("--directory", "-d", type=str, default=".", | |
64 help="directory in which to create other output files") | |
65 parser.add_argument("--query", type=str, default=None, | |
66 help="Galaxy-escaped name of the query signature") | |
67 parser.add_argument("--window_size", type=int, default=20, | |
68 help="size of the window to use when looking for clusters") | |
69 parser.add_argument("--truncation_edges", type=int, default=10, | |
70 help="number of edges for DrL truncate to pass per node") | |
71 parser.add_argument("--no-stats", dest="stats", action="store_false", | |
72 default=True, | |
73 help="disable cluster-finding statistics") | |
74 parser.add_argument("--include-singletons", dest="singletons", | |
75 action="store_true", default=False, | |
76 help="add self-edges to retain unconnected points") | |
77 | |
78 return parser.parse_args(args) | |
79 | |
80 def hexagon_center(x, y, scale=1.0): | |
81 """ | |
82 Given a coordinate on a grid of hexagons (using wiggly rows in x), what is | |
83 the 2d Euclidian coordinate of its center? | |
84 | |
85 x and y are integer column and row coordinates of the hexagon in the grid. | |
86 | |
87 scale is a float specifying hexagon side length. | |
88 | |
89 The origin in coordinate space is defined as the upper left corner of the | |
90 bounding box of the hexagon with indices x=0 and y=0. | |
91 | |
92 Returns a tuple of floats. | |
93 """ | |
94 # The grid looks like this: | |
95 # | |
96 # /-\ /-\ /-\ /-\ | |
97 # /-\-/-\-/-\-/-\-/-\ | |
98 # \-/-\-/-\-/-\-/-\-/ | |
99 # /-\-/-\-/-\-/-\-/-\ | |
100 # \-/-\-/-\-/-\-/-\-/ | |
101 # /-\-/-\-/-\-/-\-/-\ | |
102 # \-/ \-/ \-/ \-/ \-/ | |
103 # | |
104 # Say a hexagon side has length 1 | |
105 # It's 2 across corner to corner (x), and sqrt(3) across side to side (y) | |
106 # X coordinates are 1.5 per column | |
107 # Y coordinates (down from top) are sqrt(3) per row, -1/2 sqrt(3) if you're | |
108 # in an odd column. | |
109 | |
110 center_y = math.sqrt(3) * y | |
111 if x % 2 == 1: | |
112 # Odd column: shift up | |
113 center_y -= 0.5 * math.sqrt(3) | |
114 | |
115 return (1.5 * x * scale + scale, center_y * scale + math.sqrt(3.0) / 2.0 * | |
116 scale) | |
117 | |
118 def hexagon_pick(x, y, scale=1.0): | |
119 """ | |
120 Given floats x and y specifying coordinates in the plane, determine which | |
121 hexagon grid cell that point is in. | |
122 | |
123 scale is a float specifying hexagon side length. | |
124 | |
125 See http://blog.ruslans.com/2011/02/hexagonal-grid-math.html | |
126 But we flip the direction of the wiggle. Odd rows are up (-y) | |
127 """ | |
128 | |
129 # How high is a hex? | |
130 hex_height = math.sqrt(3) * scale | |
131 | |
132 # First we pick a rectangular tile, from the point of one side-traingle to | |
133 # the base of the other in width, and the whole hexagon height in height. | |
134 | |
135 # How wide are these tiles? Corner to line-between-far-corners distance | |
136 tile_width = (3.0 / 2.0 * scale) | |
137 | |
138 # Tile X index is floor(x / ) | |
139 tile_x = int(math.floor(x / tile_width)) | |
140 | |
141 # We need this intermediate value for the Y index and for tile-internal | |
142 # picking | |
143 corrected_y = y + (tile_x % 2) * hex_height / 2.0 | |
144 | |
145 # Tile Y index is floor((y + (x index mod 2) * hex height/2) / hex height) | |
146 tile_y = int(math.floor(corrected_y / hex_height)) | |
147 | |
148 # Find coordinates within the tile | |
149 internal_x = x - tile_x * tile_width | |
150 internal_y = corrected_y - tile_y * hex_height | |
151 | |
152 # Do tile-scale picking | |
153 # Are we in the one corner, the other corner, or the bulk of the tile? | |
154 if internal_x > scale * abs(0.5 - internal_y / hex_height): | |
155 # We're in the bulk of the tile | |
156 # This is the column (x) of the picked hexagon | |
157 hexagon_x = tile_x | |
158 | |
159 # This is the row (y) of the picked hexagon | |
160 hexagon_y = tile_y | |
161 else: | |
162 # We're in a corner. | |
163 # In an even column, the lower left is part of the next row, and the | |
164 # upper left is part of the same row. In an odd column, the lower left | |
165 # is part of the same row, and the upper left is part of the previous | |
166 # row. | |
167 if internal_y > hex_height / 2.0: | |
168 # It's the lower left corner | |
169 # This is the offset in row (y) that being in this corner gives us | |
170 # The lower left corner is always 1 row below the upper left corner. | |
171 corner_y_offset = 1 | |
172 else: | |
173 corner_y_offset = 0 | |
174 | |
175 # TODO: verify this for correctness. It seems to be right, but I want a | |
176 # unit test to be sure. | |
177 # This is the row (y) of the picked hexagon | |
178 hexagon_y = tile_y - tile_x % 2 + corner_y_offset | |
179 | |
180 # This is the column (x) of the picked hexagon | |
181 hexagon_x = tile_x - 1 | |
182 | |
183 # Now we've picked the hexagon | |
184 return (hexagon_x, hexagon_y) | |
185 | |
186 def radial_search(center_x, center_y): | |
187 """ | |
188 An iterator that yields coordinate tuples (x, y) in order of increasing | |
189 hex-grid distance from the specified center position. | |
190 """ | |
191 | |
192 # A hexagon has neighbors at the following relative coordinates: | |
193 # (-1, 0), (1, 0), (0, -1), (0, 1) | |
194 # and ((-1, 1) and (1, 1) if in an even column) | |
195 # or ((-1, -1) and (1, -1) if in an odd column) | |
196 | |
197 # We're going to go outwards using breadth-first search, so we need a queue | |
198 # of hexes to visit and a set of already visited hexes. | |
199 | |
200 # This holds a queue (really a deque) of hexes waiting to be visited. | |
201 # A list has O(n) pop/insert at left. | |
202 queue = collections.deque() | |
203 # This holds a set of the (x, y) coordinate tuples of already-seen hexes, | |
204 # so we don't enqueue them again. | |
205 seen = set() | |
206 | |
207 # First place to visit is the center. | |
208 queue.append((center_x, center_y)) | |
209 | |
210 while len(queue) > 0: | |
211 # We should in theory never run out of items in the queue. | |
212 # Get the current x and y to visit. | |
213 x, y = queue.popleft() | |
214 | |
215 # Yield the location we're visiting | |
216 yield (x, y) | |
217 | |
218 # This holds a list of all relative neighbor positions as (x, y) tuples. | |
219 neighbor_offsets = [(-1, 0), (1, 0), (0, -1), (0, 1)] | |
220 if y % 2 == 0: | |
221 # An even-column hex also has these neighbors | |
222 neighbor_offsets += [(-1, 1), (1, 1)] | |
223 else: | |
224 # An odd-column hex also has these neighbors | |
225 neighbor_offsets += [(-1, -1), (1, -1)] | |
226 | |
227 for x_offset, y_offset in neighbor_offsets: | |
228 # First calculate the absolute position of the neighbor in x | |
229 neighbor_x = x + x_offset | |
230 # And in y | |
231 neighbor_y = y + y_offset | |
232 | |
233 if (neighbor_x, neighbor_y) not in seen: | |
234 # This is a hex that has never been in the queue. Add it. | |
235 queue.append((neighbor_x, neighbor_y)) | |
236 | |
237 # Record that it has ever been enqueued | |
238 seen.add((neighbor_x, neighbor_y)) | |
239 | |
240 | |
241 | |
242 | |
243 def assign_hexagon(hexagons, node_x, node_y, node, scale=1.0): | |
244 """ | |
245 This function assigns the given node to a hexagon in hexagons. hexagons is a | |
246 defaultdict from tuples of hexagon (x, y) integer indices to assigned nodes, | |
247 or None if a hexagon is free. node_x and node_y are the x and y coordinates | |
248 of the node, adapted so that the seed node lands in the 0, 0 hexagon, and | |
249 re-scaled to reduce hexagon conflicts. node is the node to be assigned. | |
250 scale, if specified, is the hexagon side length in node space units. | |
251 | |
252 This function assigns nodes to their closest hexagon, reprobing outwards if | |
253 already occupied. | |
254 | |
255 When the function completes, node is stored in hexagons under some (x, y) | |
256 tuple. | |
257 | |
258 Returns the distance this hexagon is from its ideal location. | |
259 """ | |
260 | |
261 # These hold the hexagon that the point falls in, which may be taken. | |
262 best_x, best_y = hexagon_pick(node_x, node_y, scale=scale) | |
263 | |
264 for x, y in radial_search(best_x, best_y): | |
265 # These hexes are enumerated in order of increasign distance from the | |
266 # best one, starting with the best hex itself. | |
267 | |
268 if hexagons[(x, y)] is None: | |
269 # This is the closest free hex. Break out of the loop, leaving x and | |
270 # y pointing here. | |
271 break | |
272 | |
273 # Assign the node to the hexagon | |
274 hexagons[(x, y)] = node | |
275 | |
276 return math.sqrt((x - best_x) ** 2 + (y - best_y) ** 2) | |
277 | |
278 | |
279 | |
280 def assign_hexagon_local_radial(hexagons, node_x, node_y, node, scale=1.0): | |
281 """ | |
282 This function assigns the given node to a hexagon in hexagons. hexagons is a | |
283 defaultdict from tuples of hexagon (x, y) integer indices to assigned nodes, | |
284 or None if a hexagon is free. node_x and node_y are the x and y coordinates | |
285 of the node, adapted so that the seed node lands in the 0, 0 hexagon, and | |
286 re-scaled to reduce hexagon conflicts. node is the node to be assigned. | |
287 scale, if specified, is the hexagon side length in node space units. | |
288 | |
289 This function assigns nodes to their closest hexagon. If thast hexagon is | |
290 full, it re-probes in the direction that the node is from the closest | |
291 hexagon's center. | |
292 | |
293 When the function completes, node is stored in hexagons under some (x, y) | |
294 tuple. | |
295 | |
296 Returns the distance this hexagon is from its ideal location. | |
297 """ | |
298 | |
299 # These hold the hexagon that the point falls in, which may be taken. | |
300 best_x, best_y = hexagon_pick(node_x, node_y, scale=scale) | |
301 | |
302 # These hold the center of that hexagon in float space | |
303 center_x, center_y = hexagon_center(best_x, best_y, scale=scale) | |
304 | |
305 # This holds the distance from this point to the center of that hexagon | |
306 node_distance = math.sqrt((node_x - center_x) ** 2 + (node_y - center_y) ** | |
307 2) | |
308 | |
309 # These hold the normalized direction of this point, relative to the center | |
310 # of its best hexagon | |
311 direction_x = (node_x - center_x) / node_distance | |
312 direction_y = (node_y - center_y) / node_distance | |
313 | |
314 # Do a search in that direction, starting at the best hex. | |
315 | |
316 # These are the hexagon indices we're considering | |
317 x, y = best_x, best_y | |
318 | |
319 # These are the Cartesian coordinates we're probing. Must be in the x, y hex | |
320 # as a loop invariant. | |
321 test_x, test_y = center_x, center_y | |
322 | |
323 while hexagons[(x, y)] is not None: | |
324 # Re-probe outwards from the best hex in scale/2-sized steps | |
325 # TODO: is that the right step size? Scale-sized steps seemed slightly | |
326 # large. | |
327 test_x += direction_x * scale | |
328 test_y += direction_y * scale | |
329 | |
330 # Re-pick x and y for the hex containing our test point | |
331 x, y = hexagon_pick(test_x, test_y, scale=scale) | |
332 | |
333 # We've finally reached the edge of the cluster. | |
334 # Drop our hexagon | |
335 hexagons[(x, y)] = node | |
336 | |
337 return math.sqrt((x - best_x) ** 2 + (y - best_y) ** 2) | |
338 | |
339 def assign_hexagon_radial(hexagons, node_x, node_y, node, scale=1.0): | |
340 """ | |
341 This function assigns the given node to a hexagon in hexagons. hexagons is a | |
342 defaultdict from tuples of hexagon (x, y) integer indices to assigned nodes, | |
343 or None if a hexagon is free. node_x and node_y are the x and y coordinates | |
344 of the node, adapted so that the seed node lands in the 0, 0 hexagon, and | |
345 re-scaled to reduce hexagon conflicts. node is the node to be assigned. | |
346 scale, if specified, is the hexagon side length in node space units. | |
347 | |
348 This function assigns nodes to hexagons based on radial distance from 0, 0. | |
349 This makes hexagon assignment much more dense, but can lose spatial | |
350 structure. | |
351 | |
352 When the function completes, node is stored in hexagons under some (x, y) | |
353 tuple. | |
354 | |
355 Returns the distance this hexagon is from its ideal location. Unfortunately, | |
356 this doesn't really make sense for this assignment scheme, so it is always | |
357 0. | |
358 """ | |
359 | |
360 # Compute node's distance from the origin | |
361 node_distance = math.sqrt(node_x ** 2 + node_y ** 2) | |
362 | |
363 # Compute normalized direction from the origin for this node | |
364 direction_x = node_x / node_distance | |
365 direction_y = node_y / node_distance | |
366 | |
367 # These are the coordinates we are testing | |
368 test_x = 0 | |
369 test_y = 0 | |
370 | |
371 # These are the hexagon indices that correspond to that point | |
372 x, y = hexagon_pick(test_x, test_y, scale=scale) | |
373 | |
374 while hexagons[(x, y)] is not None: | |
375 # Re-probe outwards from the origin in scale-sized steps | |
376 # TODO: is that the right step size? | |
377 test_x += direction_x * scale | |
378 test_y += direction_y * scale | |
379 | |
380 # Re-pick | |
381 x, y = hexagon_pick(test_x, test_y, scale=scale) | |
382 | |
383 # We've finally reached the edge of the cluster. | |
384 # Drop our hexagon | |
385 # TODO: this has to be N^2 if we line them all up in a line | |
386 hexagons[(x, y)] = node | |
387 | |
388 return 0 | |
389 | |
390 def hexagons_in_window(hexagons, x, y, width, height): | |
391 """ | |
392 Given a dict from (x, y) position to signature names, return the list of all | |
393 signatures in the window starting at hexagon x, y and extending width in the | |
394 x direction and height in the y direction on the hexagon grid. | |
395 """ | |
396 | |
397 # This holds the list of hexagons we've found | |
398 found = [] | |
399 | |
400 for i in xrange(x, x + width): | |
401 for j in xrange(y, y + height): | |
402 if hexagons.has_key((i, j)): | |
403 # This position in the window has a hex. | |
404 found.append(hexagons[(i, j)]) | |
405 | |
406 return found | |
407 | |
408 class ClusterFinder(object): | |
409 """ | |
410 A class that can be invoked to find the p value of the best cluster in its | |
411 layer. Instances are pickleable. | |
412 """ | |
413 | |
414 def __init__(self, hexagons, layer, window_size=5): | |
415 """ | |
416 Keep the given hexagons dict (from (x, y) to signature name) and the | |
417 given layer (a dict from signature name to a value), and the given | |
418 window size, in a ClusterFinder object. | |
419 """ | |
420 | |
421 # TODO: This should probably all operate on numpy arrays that we can | |
422 # slice efficiently. | |
423 | |
424 # Store the layer | |
425 self.hexagons = hexagons | |
426 # Store the hexagon assignments | |
427 self.layer = layer | |
428 | |
429 # Store the window size | |
430 self.window_size = window_size | |
431 | |
432 @staticmethod | |
433 def continuous_p(in_values, out_values): | |
434 """ | |
435 Get the p value for in_values and out_values being distinct continuous | |
436 distributions. | |
437 | |
438 in_values and out_values are both Numpy arrays. Returns the p value, or | |
439 raises a ValueError if the statistical test cannot be run for some | |
440 reason. | |
441 | |
442 Uses the Mann-Whitney U test. | |
443 """ | |
444 | |
445 # Do a Mann-Whitney U test to see how different the data | |
446 # sets are. | |
447 u_statistic, p_value = scipy.stats.mannwhitneyu(in_values, | |
448 out_values) | |
449 | |
450 return p_value | |
451 | |
452 @staticmethod | |
453 def dichotomous_p(in_values, out_values): | |
454 """ | |
455 Given two one-dimensional Numpy arrays of 0s and 1s, compute a p value | |
456 for the in_values having a different probability of being 1 than the | |
457 frequency of 1s in the out_values. | |
458 | |
459 This test uses the scipy.stats.binom_test function, which does not claim | |
460 to use the normal approximation. Therefore, this test should be valid | |
461 for arbitrarily small frequencies of either 0s or 1s in in_values. | |
462 | |
463 TODO: What if out_values is shorter than in_values? | |
464 """ | |
465 | |
466 if len(out_values) == 0: | |
467 raise ValueError("Background group is empty!") | |
468 | |
469 # This holds the observed frequency of 1s in out_values | |
470 frequency = numpy.sum(out_values) / len(out_values) | |
471 | |
472 # This holds the number of 1s in in_values | |
473 successes = numpy.sum(in_values) | |
474 | |
475 # This holds the number of "trials" we got that many successes in | |
476 trials = len(in_values) | |
477 | |
478 # Return how significantly the frequency inside differs from that | |
479 # outside. | |
480 return scipy.stats.binom_test(successes, trials, frequency) | |
481 | |
482 @staticmethod | |
483 def categorical_p(in_values, out_values): | |
484 """ | |
485 Given two one-dimensional Numpy arrays of integers (which may be stored | |
486 as floats), which represent items being assigned to different | |
487 categories, return a p value for the distribution of categories observed | |
488 in in_values differing from that observed in out_values. | |
489 | |
490 The normal way to do this is with a chi-squared goodness of fit test. | |
491 However, that test has invalid assumptions when there are fewer than 5 | |
492 expected and 5 observed observations in every category. | |
493 See http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chis | |
494 quare.html | |
495 | |
496 However, we will use it anyway, because the tests that don't break down | |
497 are prohibitively slow. | |
498 """ | |
499 | |
500 # Convert our inputs to integer arrays | |
501 in_values = in_values.astype(int) | |
502 out_values = out_values.astype(int) | |
503 | |
504 # How many categories are there (count 0 to the maximum value) | |
505 num_categories = max(numpy.max(in_values), numpy.max(out_values)) + 1 | |
506 | |
507 # Count the number of in_values and out_values in each category | |
508 in_counts = numpy.array([len(in_values[in_values == i]) for i in | |
509 xrange(num_categories)]) | |
510 out_counts = numpy.array([len(out_values[out_values == i]) for i in | |
511 xrange(num_categories)]) | |
512 | |
513 # Get the p value for the window being from the estimated distribution | |
514 # None of the distribution parameters count as "estimated from data" | |
515 # because they aren't estimated from the data under test. | |
516 _, p_value = scipy.stats.chisquare(in_counts, out_counts) | |
517 | |
518 return p_value | |
519 | |
520 def __call__(self): | |
521 """ | |
522 Find the best p value for any window of size window_size. Return it. | |
523 """ | |
524 | |
525 # Calculate the bounding box where we want to look for windows. | |
526 # TODO: This would just be all of a numpy array | |
527 min_x = min(coords[0] for coords in self.hexagons.iterkeys()) | |
528 min_y = min(coords[1] for coords in self.hexagons.iterkeys()) | |
529 max_x = max(coords[0] for coords in self.hexagons.iterkeys()) | |
530 max_y = max(coords[1] for coords in self.hexagons.iterkeys()) | |
531 | |
532 # This holds a Numpy array of all the data by x, y | |
533 layer_data = numpy.empty((max_x - min_x + 1, max_y - min_y + 1)) | |
534 | |
535 # Fill it with NaN so we can mask those out later | |
536 layer_data[:] = numpy.NAN | |
537 | |
538 for (hex_x, hex_y), name in self.hexagons.iteritems(): | |
539 # Copy the layer values into the Numpy array | |
540 if self.layer.has_key(name): | |
541 layer_data[hex_x - min_x, hex_y - min_y] = self.layer[name] | |
542 | |
543 # This holds a masked version of the layer data | |
544 layer_data_masked = numpy.ma.masked_invalid(layer_data, copy=False) | |
545 | |
546 # This holds the smallest p value we have found for this layer | |
547 best_p = float("+inf") | |
548 | |
549 # This holds the statistical test to use (a function from two Numpy | |
550 # arrays to a p value) | |
551 # The most specific test is the dichotomous test (0 or 1) | |
552 statistical_test = self.dichotomous_p | |
553 | |
554 if numpy.sum(~layer_data_masked.mask) == 0: | |
555 # There is actually no data in this layer at all. | |
556 # nditer complains if we try to iterate over an empty thing. | |
557 # So quit early and say we couldn't find anything. | |
558 return best_p | |
559 | |
560 for value in numpy.nditer(layer_data_masked[~layer_data_masked.mask]): | |
561 # Check all the values in the layer. | |
562 # If this value is out of the domain of the current statistical | |
563 # test, upgrade to a more general test. | |
564 | |
565 if statistical_test == self.dichotomous_p and (value > 1 or | |
566 value < 0): | |
567 | |
568 # We can't use a dichotomous test on things outside 0 to 1 | |
569 # But we haven't yet detected any non-integers | |
570 # Use categorical | |
571 statistical_test = self.categorical_p | |
572 | |
573 if value % 1 != 0: | |
574 # This is not an integer value | |
575 # So, we must use a continuous statistical test | |
576 statistical_test = self.continuous_p | |
577 | |
578 # This is the least specific test, so we can stop now | |
579 break | |
580 | |
581 | |
582 for i in xrange(min_x, max_x - self.window_size): | |
583 for j in xrange(min_y, max_y - self.window_size): | |
584 | |
585 # Get the layer values for hexes in the window, as a Numpy | |
586 # masked array. | |
587 in_region = layer_data_masked[i:i + self.window_size, | |
588 j:j + self.window_size] | |
589 | |
590 # And as a 1d Numpy array | |
591 in_values = numpy.reshape(in_region[~in_region.mask], -1).data | |
592 | |
593 # And out of the window (all the other hexes) as a masked array | |
594 out_region = numpy.ma.copy(layer_data_masked) | |
595 # We get this by masking out everything in the region | |
596 out_region.mask[i:i + self.window_size, | |
597 j:j + self.window_size] = True | |
598 | |
599 # And as a 1d Numpy array | |
600 out_values = numpy.reshape(out_region[~out_region.mask], | |
601 -1).data | |
602 | |
603 | |
604 if len(in_values) == 0 or len(out_values) == 0: | |
605 # Can't do any stats on this window | |
606 continue | |
607 | |
608 if len(in_values) < 0.5 * self.window_size ** 2: | |
609 # The window is less than half full. Skip it. | |
610 # TODO: Make this threshold configurable. | |
611 continue | |
612 | |
613 try: | |
614 | |
615 # Get the p value for this window under the selected | |
616 # statistical test | |
617 p_value = statistical_test(in_values, out_values) | |
618 | |
619 # If this is the best p value so far, record it | |
620 best_p = min(best_p, p_value) | |
621 except ValueError: | |
622 # Probably an all-zero layer, or something else the test | |
623 # can't handle. | |
624 # But let's try all the other windows to be safe. | |
625 # Maybe one will work. | |
626 pass | |
627 | |
628 | |
629 | |
630 # We have now found the best p for any window for this layer. | |
631 print "Best p found: {}".format(best_p) | |
632 sys.stdout.flush() | |
633 | |
634 return best_p | |
635 | |
636 def run_functor(functor): | |
637 """ | |
638 Given a no-argument functor (like a ClusterFinder), run it and return its | |
639 result. We can use this with multiprocessing.map and map it over a list of | |
640 job functors to do them. | |
641 | |
642 Handles getting more than multiprocessing's pitiful exception output | |
643 """ | |
644 | |
645 try: | |
646 return functor() | |
647 except: | |
648 # Put all exception text into an exception and raise that | |
649 raise Exception(traceback.format_exc()) | |
650 | |
651 def open_matrices(names): | |
652 """ | |
653 The argument parser now take multiple similarity matrices as input and | |
654 saves their file name as strings. We want to store the names of these | |
655 strings for display later in hexagram.js in order to allow the user to | |
656 navigate and know what type of visualization map they are looking at - | |
657 gene expression, copy number, etc. | |
658 | |
659 Since, the parser no longer opens the files automatically we must, do it | |
660 in this function. | |
661 """ | |
662 | |
663 # For each file name, open the file and add it to the matrices list | |
664 # 'r' is the argument stating that the file will be read-only | |
665 for similarity_filename in names: | |
666 print "Opening Matrices..." | |
667 matrix_file = tsv.TsvReader(open(similarity_filename, "r")) | |
668 matrices.append(matrix_file) | |
669 | |
670 def compute_beta (coords, matrix, axis, index, options): | |
671 """ | |
672 Compute and return a beta matrix from coords * matrix. | |
673 Then print the matrix to a file to be read on clientside. | |
674 """ | |
675 beta = coords * matrix | |
676 return beta | |
677 # Must add writing function | |
678 | |
679 def drl_similarity_functions(matrix, index, options): | |
680 """ | |
681 Performs all the functions needed to format a similarity matrix into a | |
682 tsv format whereby the DrL can take the values. Then all of the DrL | |
683 functions are performed on the similarity matrix. | |
684 | |
685 Options is passed to access options.singletons and other required apsects | |
686 of the parsed args. | |
687 """ | |
688 | |
689 # Work in a temporary directory | |
690 # If not available, create the directory. | |
691 drl_directory = tempfile.mkdtemp() | |
692 | |
693 # This is the base name for all the files that DrL uses to do the layout | |
694 # We're going to put it in a temporary directory. | |
695 # index added to extension in order to keep track of | |
696 # respective layouts | |
697 drl_basename = os.path.join(drl_directory, "layout" + str(index)) | |
698 | |
699 # We can just pass our similarity matrix to DrL's truncate | |
700 # But we want to run it through our tsv parser to strip comments and ensure | |
701 # it's valid | |
702 | |
703 # This holds a reader for the similarity matrix | |
704 sim_reader = matrix | |
705 | |
706 # This holds a writer for the sim file | |
707 sim_writer = tsv.TsvWriter(open(drl_basename + ".sim", "w")) | |
708 | |
709 print "Regularizing similarity matrix..." | |
710 sys.stdout.flush() | |
711 | |
712 # This holds a list of all unique signature names in the similarity matrix. | |
713 # We can use it to add edges to keep singletons. | |
714 signatures = set() | |
715 | |
716 print "Reach for parts in sim_reader" | |
717 for parts in sim_reader: | |
718 # Keep the signature names used | |
719 signatures.add(parts[0]) | |
720 signatures.add(parts[1]) | |
721 | |
722 # Save the line to the regularized file | |
723 sim_writer.list_line(parts) | |
724 | |
725 if options.singletons: | |
726 # Now add a self-edge on every node, so we don't drop nodes with no | |
727 # other strictly positive edges | |
728 for signature in signatures: | |
729 sim_writer.line(signature, signature, 1) | |
730 | |
731 sim_reader.close() | |
732 sim_writer.close() | |
733 | |
734 # Now our input for DrL is prepared! | |
735 | |
736 # Do DrL truncate. | |
737 # TODO: pass a truncation level | |
738 print "DrL: Truncating..." | |
739 sys.stdout.flush() | |
740 subprocess.check_call(["truncate", "-t", str(options.truncation_edges), | |
741 drl_basename]) | |
742 | |
743 # Run the DrL layout engine. | |
744 print "DrL: Doing layout..." | |
745 sys.stdout.flush() | |
746 subprocess.check_call(["layout", drl_basename]) | |
747 | |
748 # Put the string names back | |
749 print "DrL: Restoring names..." | |
750 sys.stdout.flush() | |
751 subprocess.check_call(["recoord", drl_basename]) | |
752 | |
753 # Now DrL has saved its coordinates as <signature name>\t<x>\t<y> rows in | |
754 # <basename>.coord | |
755 | |
756 # We want to read that. | |
757 # This holds a reader for the DrL output | |
758 coord_reader = tsv.TsvReader(open(drl_basename + ".coord", "r")) | |
759 | |
760 # This holds a dict from signature name string to (x, y) float tuple. It is | |
761 # also our official collection of node names that made it through DrL, and | |
762 # therefore need their score data sent to the client. | |
763 nodes = {} | |
764 | |
765 print "Reading DrL output..." | |
766 sys.stdout.flush() | |
767 for parts in coord_reader: | |
768 nodes[parts[0]] = (float(parts[1]), float(parts[2])) | |
769 | |
770 coord_reader.close() | |
771 | |
772 # Save the DrL coordinates in our bundle, to be displayed client-side for | |
773 # debugging. | |
774 | |
775 # index added to drl.tab extension in order to keep track of | |
776 # respective drl.tabs | |
777 coord_writer = tsv.TsvWriter(open( | |
778 os.path.join(options.directory, "drl" + str(index) + ".tab"), "w")) | |
779 | |
780 for signature_name, (x, y) in nodes.iteritems(): | |
781 # Write a tsv with names instead of numbers, like what DrL recoord would | |
782 # have written. This is what the Javascript on the client side wants. | |
783 coord_writer.line(signature_name, x, y) | |
784 | |
785 coord_writer.close() | |
786 | |
787 # Delete our temporary directory. | |
788 shutil.rmtree(drl_directory) | |
789 | |
790 # Return nodes dict back to main method for further processes | |
791 return nodes | |
792 | |
793 def compute_hexagram_assignments (nodes, index, options): | |
794 """ | |
795 Now that we are taking multiple similarity matrices as inputs, we must | |
796 compute hexagram assignments for each similarity matrix. These assignments | |
797 are based up on the nodes ouput provided by the DrL function. | |
798 | |
799 Index relates each matrix name with its drl output, nodes, assignments, etc. | |
800 Options contains the parsed arguments that are present in the main method. | |
801 """ | |
802 # Do the hexagon layout | |
803 # We do the squiggly rows setup, so express everything as integer x, y | |
804 | |
805 # This is a defaultdict from (x, y) integer tuple to id that goes there, or | |
806 # None if it's free. | |
807 global hexagons | |
808 hexagons = collections.defaultdict(lambda: None) | |
809 | |
810 # This holds the side length that we use | |
811 side_length = 1.0 | |
812 | |
813 # This holds what will be a layer of how badly placed each hexagon is | |
814 # A dict from node name to layer value | |
815 placement_badnesses = {} | |
816 | |
817 for node, (node_x, node_y) in nodes.iteritems(): | |
818 # Assign each node to a hexagon | |
819 # This holds the resulting placement badness for that hexagon (i.e. | |
820 # distance from ideal location) | |
821 badness = assign_hexagon(hexagons, node_x, node_y, node, | |
822 scale=side_length) | |
823 | |
824 # Put the badness in the layer | |
825 placement_badnesses[node] = float(badness) | |
826 | |
827 # Normalize the placement badness layer | |
828 # This holds the max placement badness | |
829 max_placement_badness = max(placement_badnesses.itervalues()) | |
830 print "Max placement badness: {}".format(max_placement_badness) | |
831 | |
832 if max_placement_badness != 0: | |
833 # Normalize by the max if possible. | |
834 placement_badnesses = {node: value / max_placement_badness for node, | |
835 value in placement_badnesses.iteritems()} | |
836 | |
837 # The hexagons have been assigned. Make hexagons be a dict instead of a | |
838 # defaultdict, so it pickles. | |
839 # TODO: I should change it so I don't need to do this. | |
840 hexagons = dict(hexagons) | |
841 | |
842 # Now dump the hexagon assignments as an id, x, y tsv. This will be read by | |
843 # the JavaScript on the static page and be used to produce the | |
844 # visualization. | |
845 hexagon_writer = tsv.TsvWriter(open(os.path.join(options.directory, | |
846 "assignments"+ str(index) + ".tab"), "w")) | |
847 | |
848 # First find the x and y offsets needed to make all hexagon positions | |
849 # positive | |
850 min_x = min(coords[0] for coords in hexagons.iterkeys()) | |
851 min_y = min(coords[1] for coords in hexagons.iterkeys()) | |
852 | |
853 for coords, name in hexagons.iteritems(): | |
854 # Write this hexagon assignment, converted to all-positive coordinates. | |
855 hexagon_writer.line(name, coords[0] - min_x, coords[1] - min_y) | |
856 hexagon_writer.close() | |
857 | |
858 # Hand placement_badness dict to main method so that it can be used else | |
859 # where. | |
860 return placement_badnesses | |
861 | |
862 def write_matrix_names (options): | |
863 """ | |
864 Write the names of the similarity matrices so that hexagram.js can | |
865 process the names and create the toggle layout GUI. | |
866 We pass options to access the parsed args and thus the matrix names. | |
867 """ | |
868 name_writer = tsv.TsvWriter(open(os.path.join(options.directory, | |
869 "matrixnames.tab"), "w")) | |
870 for i in options.names: | |
871 name_writer.line(i) | |
872 | |
873 name_writer.close() | |
874 | |
875 def main(args): | |
876 """ | |
877 Parses command line arguments, and makes visualization. | |
878 "args" specifies the program arguments, with args[0] being the executable | |
879 name. The return value should be used as the program's exit code. | |
880 """ | |
881 | |
882 options = parse_args(args) # This holds the nicely-parsed options object | |
883 | |
884 print "Created Options" | |
885 | |
886 # Test our picking | |
887 x, y = hexagon_center(0, 0) | |
888 if hexagon_pick(x, y) != (0, 0): | |
889 raise Exception("Picking is broken!") | |
890 | |
891 # First bit of stdout becomes annotation in Galaxy | |
892 # Make sure our output directory exists. | |
893 if not os.path.exists(options.directory): | |
894 # makedirs is the right thing to use here: recursive | |
895 os.makedirs(options.directory) | |
896 | |
897 print "Writing matrix names..." | |
898 # We must write the file names for hexagram.js to access. | |
899 write_matrix_names(options) | |
900 | |
901 print "About to open matrices..." | |
902 | |
903 # We have file names stored in options.similarities | |
904 # We must open the files and store them in matrices list for access | |
905 open_matrices(options.similarity) | |
906 | |
907 print "Opened matrices..." | |
908 | |
909 # The nodes list stores the list of nodes for each matrix | |
910 # We must keep track of each set of nodes | |
911 nodes_multiple = [] | |
912 | |
913 print "Created nodes_multiple list..." | |
914 | |
915 # Index for drl.tab and drl.layout file naming. With indexes we can match | |
916 # file names, to matrices, to drl output files. | |
917 for index, i in enumerate (matrices): | |
918 nodes_multiple.append (drl_similarity_functions(i, index, options)) | |
919 | |
920 # Compute Hexagam Assignments for each similarity matrix's drl output, | |
921 # which is found in nodes_multiple. | |
922 | |
923 # placement_badnesses_multiple list is required to store the placement | |
924 # badness dicts that are returned by the compute_hexagram_assignments | |
925 # function. | |
926 placement_badnesses_multiple = [] | |
927 for index, i in enumerate (nodes_multiple): | |
928 placement_badnesses_multiple.append (compute_hexagram_assignments (i, index, options)) | |
929 | |
930 # Now that we have hex assignments, compute layers. | |
931 | |
932 # In addition to making per-layer files, we're going to copy all the score | |
933 # matrices to our output directoy. That way, the client can download layers | |
934 # in big chunks when it wants all layer data for statistics. We need to | |
935 # write a list of matrices that the client can read, which is written by | |
936 # this TSV writer. | |
937 matrix_index_writer = tsv.TsvWriter(open(os.path.join(options.directory, | |
938 "matrices.tab"), "w")) | |
939 | |
940 # Read in all the layer data at once | |
941 # TODO: Don't read in all the layer data at once | |
942 | |
943 # This holds a dict from layer name to a dict from signature name to | |
944 # score. | |
945 layers = {} | |
946 | |
947 # This holds the names of all layers | |
948 layer_names = [] | |
949 | |
950 for matrix_number, score_filename in enumerate(options.scores): | |
951 # First, copy the whole matrix into our output. This holds its filename. | |
952 output_filename = "matrix_{}.tab".format(matrix_number) | |
953 shutil.copy2(score_filename, os.path.join(options.directory, | |
954 output_filename)) | |
955 | |
956 # Record were we put it | |
957 matrix_index_writer.line(output_filename) | |
958 | |
959 # This holds a reader for the scores TSV | |
960 scores_reader = tsv.TsvReader(open(score_filename, "r")) | |
961 | |
962 # This holds an iterator over lines in that file | |
963 # TODO: Write a proper header/data API | |
964 scores_iterator = scores_reader.__iter__() | |
965 | |
966 try: | |
967 # This holds the names of the columns (except the first, which is | |
968 # labels). They also happen to be layer names | |
969 file_layer_names = scores_iterator.next()[1:] | |
970 | |
971 # Add all the layers in this file to the complete list of layers. | |
972 layer_names += file_layer_names | |
973 | |
974 # Ensure that we have a dict for every layer mentioned in the file | |
975 # (even the ones that have no data below). Doing it this way means | |
976 # all score matrices need disjoint columns, or the last one takes | |
977 # precedence. | |
978 for name in file_layer_names: | |
979 layers[name] = {} | |
980 | |
981 for parts in scores_iterator: | |
982 # This is the signature that this line is about | |
983 signature_name = parts[0] | |
984 | |
985 if signature_name not in nodes_multiple[0]: | |
986 # This signature wasn't in our DrL output. Don't bother | |
987 # putting its layer data in our visualization. This saves | |
988 # space and makes the client-side layer counts accurate for | |
989 # the data actually displayable. | |
990 continue | |
991 | |
992 # These are the scores for all the layers for this signature | |
993 layer_scores = parts[1:] | |
994 | |
995 for (layer_name, score) in itertools.izip(file_layer_names, | |
996 layer_scores): | |
997 | |
998 # Store all the layer scores in the appropriate | |
999 # dictionaries. | |
1000 try: | |
1001 layers[layer_name][signature_name] = float(score) | |
1002 except ValueError: | |
1003 # This is not a float. | |
1004 # Don't set that entry for this layer. | |
1005 # TODO: possibly ought to complain to the user? But then | |
1006 # things like "N/A" won't be handled properly. | |
1007 continue | |
1008 | |
1009 except StopIteration: | |
1010 # We don't have any real data here. Couldn't read the header line. | |
1011 # Skip to the next file | |
1012 pass | |
1013 | |
1014 # We're done with this score file now | |
1015 scores_reader.close() | |
1016 | |
1017 # We're done with all the input score matrices, so our index is done too. | |
1018 matrix_index_writer.close() | |
1019 | |
1020 # We have now loaded all layer data into memory as Python objects. What | |
1021 # could possibly go wrong? | |
1022 | |
1023 # Stick our placement badness layer on the end | |
1024 layer_names.append("Placement Badness") | |
1025 layers["Placement Badness"] = placement_badnesses_multiple[0] | |
1026 | |
1027 # Now we need to write layer files. | |
1028 | |
1029 # Generate some filenames for layers that we can look up by layer name. | |
1030 # We do this because layer names may not be valid filenames. | |
1031 layer_files = {name: os.path.join(options.directory, | |
1032 "layer_{}.tab".format(number)) for (name, number) in itertools.izip( | |
1033 layer_names, itertools.count())} | |
1034 | |
1035 for layer_name, layer in layers.iteritems(): | |
1036 # Write out all the individual layer files | |
1037 # This holds the writer for this layer file | |
1038 scores_writer = tsv.TsvWriter(open(layer_files[layer_name], "w")) | |
1039 for signature_name, score in layer.iteritems(): | |
1040 # Write the score for this signature in this layer | |
1041 scores_writer.line(signature_name, score) | |
1042 scores_writer.close() | |
1043 | |
1044 # We need something to sort layers by. We have "priority" (lower is | |
1045 # better) | |
1046 | |
1047 if len(layer_names) > 0 and options.stats: | |
1048 # We want to do this fancy parallel stats thing. | |
1049 # We skip it when there are no layers, so we don't try to join a | |
1050 # never-used pool, which seems to hang. | |
1051 | |
1052 print "Running statistics..." | |
1053 | |
1054 # This holds an iterator that makes ClusterFinders for all out layers | |
1055 cluster_finders = [ClusterFinder(hexagons, layers[layer_name], | |
1056 window_size=options.window_size) for layer_name in layer_names] | |
1057 | |
1058 print "{} jobs to do.".format(len(cluster_finders)) | |
1059 | |
1060 # This holds a multiprocessing pool for parallelization | |
1061 pool = multiprocessing.Pool() | |
1062 | |
1063 # This holds all the best p values in the same order | |
1064 best_p_values = pool.map(run_functor, cluster_finders) | |
1065 | |
1066 # Close down the pool so multiprocessing won't die sillily at the end | |
1067 pool.close() | |
1068 pool.join() | |
1069 | |
1070 # This holds a dict from layer name to priority (best p value) | |
1071 # We hope the order of the dict items has not changed | |
1072 layer_priorities = {layer_name: best_p_value for layer_name, | |
1073 best_p_value in itertools.izip(layer_names, best_p_values)} | |
1074 else: | |
1075 # We aren't doing any stats. | |
1076 | |
1077 print "Skipping statistics." | |
1078 | |
1079 # Make up priorities. | |
1080 layer_priorities = {name: float("+inf") for name in layer_names} | |
1081 | |
1082 # Count how many layer entries are greater than 0 for each binary layer, and | |
1083 # store that number in this dict by layer name. Things with the default | |
1084 # empty string instead of a number aren't binary layers, but they can use | |
1085 # the empty string as their TSV field value, so we can safely pull any layer | |
1086 # out of this by name. | |
1087 layer_positives = collections.defaultdict(str) | |
1088 | |
1089 for layer_name in layer_names: | |
1090 # Assume it's a binary layer until proven otherwise | |
1091 layer_positives[layer_name] = 0 | |
1092 for value in layers[layer_name].itervalues(): | |
1093 if value == 1: | |
1094 # Count up all the 1s in the layer | |
1095 layer_positives[layer_name] += 1 | |
1096 elif value != 0: | |
1097 # It has something that isn't 1 or 0, so it can't be a binary | |
1098 # layer. Throw it out and try the next layer. | |
1099 layer_positives[layer_name] = "" | |
1100 break | |
1101 | |
1102 # Write an index of all the layers we have, in the form: | |
1103 # <layer>\t<file>\t<priority>\t<number of signatures with data>\t<number of | |
1104 # signatures that are 1 for binary layers, or empty> | |
1105 # This is the writer to use. | |
1106 index_writer = tsv.TsvWriter(open(os.path.join(options.directory, | |
1107 "layers.tab"), "w")) | |
1108 | |
1109 for layer_name, layer_file in layer_files.iteritems(): | |
1110 # Write the index entry for this layer | |
1111 index_writer.line(layer_name, os.path.basename(layer_file), | |
1112 layer_priorities[layer_name], len(layers[layer_name]), | |
1113 layer_positives[layer_name]) | |
1114 | |
1115 index_writer.close() | |
1116 | |
1117 # Sahil will implement linear regression code here | |
1118 | |
1119 # We must create a m * n matrix of samples * genes | |
1120 # In order to create this matrix we first must know the number of hexes | |
1121 # and mantain them in a certain order. The order is important so that | |
1122 # we populate the matrix with the data values in the proper row (sample). | |
1123 | |
1124 # Copy over the user-specified colormaps file, or make an empty TSV if it's | |
1125 # not specified. | |
1126 | |
1127 # This holds a writer for the sim file. Creating it creates the file. | |
1128 colormaps_writer = tsv.TsvWriter(open(os.path.join(options.directory, | |
1129 "colormaps.tab"), "w")) | |
1130 | |
1131 if options.colormaps is not None: | |
1132 # The user specified colormap data, so copy it over | |
1133 # This holds a reader for the colormaps file | |
1134 colormaps_reader = tsv.TsvReader(options.colormaps) | |
1135 | |
1136 print "Regularizing colormaps file..." | |
1137 sys.stdout.flush() | |
1138 | |
1139 for parts in colormaps_reader: | |
1140 colormaps_writer.list_line(parts) | |
1141 | |
1142 colormaps_reader.close() | |
1143 | |
1144 # Close the colormaps file we wrote. It may have gotten data, or it may | |
1145 # still be empty. | |
1146 colormaps_writer.close() | |
1147 | |
1148 # Now copy any static files from where they live next to this Python file | |
1149 # into the web page bundle. | |
1150 # This holds the directory where this script lives, which also contains | |
1151 # static files. | |
1152 tool_root = os.path.dirname(os.path.realpath(__file__)) | |
1153 | |
1154 # Copy over all the static files we need for the web page | |
1155 # This holds a list of them | |
1156 static_files = [ | |
1157 # Static images | |
1158 "drag.svg", | |
1159 "filter.svg", | |
1160 "statistics.svg", | |
1161 "right.svg", | |
1162 "set.svg", | |
1163 "save.svg", | |
1164 "inflate.svg", | |
1165 "throbber.svg", | |
1166 | |
1167 # jQuery itself is pulled from a CDN. | |
1168 # We can't take everything offline since Google Maps needs to be sourced | |
1169 # from Google, so we might as well use CDN jQuery. | |
1170 | |
1171 # Select2 scripts and resources: | |
1172 "select2.css", | |
1173 "select2.js", | |
1174 "select2.png", | |
1175 "select2-spinner.gif", | |
1176 "select2x2.png", | |
1177 | |
1178 # The jQuery.tsv plugin | |
1179 "jquery.tsv.js", | |
1180 # The color library | |
1181 "color-0.4.1.js", | |
1182 # The jStat statistics library | |
1183 "jstat-1.0.0.js", | |
1184 # The Google Maps MapLabel library | |
1185 "maplabel-compiled.js", | |
1186 # The main CSS file | |
1187 "hexagram.css", | |
1188 # The main JavaScript file that runs the page | |
1189 "hexagram.js", | |
1190 # Web Worker for statistics | |
1191 "statistics.js", | |
1192 # File with all the tool code | |
1193 "tools.js" | |
1194 ] | |
1195 | |
1196 # We'd just use a directory of static files, but Galaxy needs single-level | |
1197 # output. | |
1198 for filename in static_files: | |
1199 shutil.copy2(os.path.join(tool_root, filename), options.directory) | |
1200 | |
1201 # Copy the HTML file to our output file. It automatically knows to read | |
1202 # assignments.tab, and does its own TSV parsing | |
1203 shutil.copy2(os.path.join(tool_root, "hexagram.html"), options.html) | |
1204 | |
1205 print "Visualization generation complete!" | |
1206 | |
1207 return 0 | |
1208 | |
1209 if __name__ == "__main__" : | |
1210 try: | |
1211 # Get the return code to return | |
1212 # Don't just exit with it because sys.exit works by exceptions. | |
1213 return_code = main(sys.argv) | |
1214 except: | |
1215 traceback.print_exc() | |
1216 # Return a definite number and not some unspecified error code. | |
1217 return_code = 1 | |
1218 | |
1219 sys.exit(return_code) |