annotate venner.py @ 0:8ea9b4e5a389

Uploaded
author g2cmnty@test-web1.g2.bx.psu.edu
date Wed, 22 Jun 2011 03:28:25 -0400
parents
children cc6707a1e044
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
1 #!/usr/bin/env python
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
2 '''
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
3 Created on Jul 13, 2010
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
4
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
5 Generates Venn diagram from 2 or three input files
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
6
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
7 one could generate output file that for each row contain the segment
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
8 i.e. A, B, AB, AmB, BmA
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
9
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
10 @author: Ido M. Tamir
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
11 '''
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
12 from mako.template import Template
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
13 from optparse import OptionParser
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
14 import urllib,re,sys
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
15
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
16 class Bunch:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
17 def __init__(self, d):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
18 for k, v in d.items():
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
19 if isinstance(v, dict):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
20 v = Bunch(v)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
21 self.__dict__[k] = v
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
22
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
23 class VennFile():
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
24 def __init__(self, filePath, column, name):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
25 self.filePath = filePath
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
26 self.column = column
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
27 cleanname = re.sub("/","",name)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
28 self.name = urllib.quote(cleanname, safe="%/:=&?~#+!$,;'@()*[]")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
29 self.dict = {}
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
30 def read(self):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
31 dict = {}
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
32 lineNr = 0
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
33 for line in open( self.filePath, 'rb'):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
34 key = line.split("\t")[self.column].strip()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
35 linesList = dict.get(key, [])
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
36 linesList.append(line+"\t"+str(lineNr))
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
37 dict[key] = linesList
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
38 lineNr += 1
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
39 self.dict = dict
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
40 return self
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
41
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
42 class Venn2:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
43 def __init__(self, title, size, one, two):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
44 self.one = one.read()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
45 self.two = two.read()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
46 self.title = title
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
47 self.size = size
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
48
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
49 def toUrl(self):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
50 one_keys = set(self.one.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
51 two_keys = set(self.two.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
52
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
53 keys_one_i_two = one_keys.intersection(two_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
54
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
55 total = len(one_keys) + len(two_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
56 sizes = [len(one_keys), len(two_keys), 0, len(keys_one_i_two)]
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
57 sizes = self.relSizes(sizes, total)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
58 names = [self.one.name, self.two.name]
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
59 return self.url(total, sizes, names)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
60
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
61 def relSizes(self, sizes, total):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
62 return map(lambda s: str(int(round((s/float(total) * 100)))), sizes)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
63
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
64 def url(self, total, sizes, names):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
65 base = "http://chart.apis.google.com/chart?cht=v&chd=t:"
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
66 counts = ",".join(sizes)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
67 titlep = "&chtt="+self.title
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
68 size = "&chs="+str(self.size)+"x"+str(self.size)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
69 legend = "&chdl="+"|".join(names)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
70 url = base+counts+titlep+size+legend
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
71 return url
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
72
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
73 def toHtml(self):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
74 one_keys = set(self.one.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
75 two_keys = set(self.two.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
76
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
77 numbers = Bunch({
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
78 "one_keys" : len(set(self.one.dict.keys())),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
79 "two_keys" : len(set(self.two.dict.keys())),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
80 "one_only" : len(one_keys.difference(two_keys)),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
81 "two_only" : len(two_keys.difference(one_keys)),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
82 "one_i_two" : len(one_keys.intersection(two_keys)),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
83 })
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
84
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
85 template = """
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
86 <html>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
87 <head>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
88 <title>Venn diagram ${title}</title>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
89 </head>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
90 <body>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
91 <h3>${ title }</h3>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
92 <div>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
93 <img src="${ url }"/>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
94 </div>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
95 <div>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
96 <table>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
97 <tr><th>Segment</th><th>Count</th></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
98 <tr><td>${ one }</td><td>${ n.one_keys }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
99 <tr><td>${ two }</td><td>${ n.two_keys }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
100 <tr><td>${ one } \ ${ two }</td><td>${ n.one_only }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
101 <tr><td>${ two } \ ${ one }</td><td>${ n.two_only }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
102 <tr><td>${ one } &cap; ${ two }</td><td>${ n.one_i_two }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
103 </table>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
104 </div>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
105 </body>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
106 </html>"""
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
107 result = Template(template).render(one=self.one.name, two=self.two.name, n=numbers, title=self.title, url=self.toUrl())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
108 return(result)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
109
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
110
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
111
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
112 class Venn3(Venn2):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
113 def __init__(self, title, size, one, two, three):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
114 Venn2.__init__(self, title, size, one, two)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
115 self.three = three.read()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
116
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
117 def toUrl(self):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
118 one_keys = set(self.one.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
119 two_keys = set(self.two.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
120 three_keys = set(self.three.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
121
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
122 keys_one_i_two = one_keys.intersection(two_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
123 keys_one_i_three = one_keys.intersection(three_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
124 keys_two_i_three = two_keys.intersection(three_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
125 keys_one_i_two_i_three = one_keys.intersection(two_keys).intersection(three_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
126
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
127 total = len(one_keys)+len(two_keys)+len(three_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
128 sizes = [len(one_keys), len(two_keys), len(three_keys), len(keys_one_i_two), len(keys_one_i_three), len(keys_two_i_three), len(keys_one_i_two_i_three)]
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
129 sizes = self.relSizes(sizes, total)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
130 names = [self.one.name, self.two.name, self.three.name]
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
131 return self.url(total, sizes, names)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
132
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
133 def toHtml(self):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
134 one_keys = set(self.one.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
135 two_keys = set(self.two.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
136 three_keys = set(self.three.dict.keys())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
137
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
138 xa = one_keys.intersection(two_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
139 xt = two_keys.intersection(three_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
140 xd = xt.difference(one_keys)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
141
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
142 numbers = Bunch({
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
143 "one_keys" : len(set(self.one.dict.keys())),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
144 "two_keys" : len(set(self.two.dict.keys())),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
145 "three_keys" : len(set(self.three.dict.keys())),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
146 "one_only" : len(one_keys.difference(two_keys.union(three_keys))),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
147 "two_only" : len(two_keys.difference(one_keys.union(three_keys))),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
148 "three_only" : len(three_keys.difference(one_keys.union(two_keys))),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
149 "one_two" : len(one_keys.intersection(two_keys).difference(three_keys)),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
150 "one_three" : len(one_keys.intersection(three_keys).difference(two_keys)),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
151 "two_three" : len(two_keys.intersection(three_keys).difference(one_keys)),
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
152 "one_i_two_i_three" : len(one_keys.intersection(two_keys).intersection(three_keys))
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
153 })
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
154
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
155 template = """
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
156 <html>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
157 <head>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
158 <title>Venn diagram ${title}</title>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
159 </head>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
160 <body>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
161 <h3>${ title }</h3>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
162 <div>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
163 <img src="${ url }"/>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
164 </div>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
165 <div>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
166 <table>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
167 <tr><th>Segment</th><th>Count</th></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
168 <tr><td>${ one }</td><td>${ n.one_keys }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
169 <tr><td>${ two }</td><td>${ n.two_keys }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
170 <tr><td>${ three }</td><td>${ n.three_keys }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
171 <tr><td>${ one } \ (${ two } &cup; ${ three })</td><td>${ n.one_only }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
172 <tr><td>${ two } \ (${ one } &cup; ${ three})</td><td>${ n.two_only }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
173 <tr><td>${ three } \ (${ one } &cup; ${ two })</td><td>${ n.three_only }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
174 <tr><td>${ one } &cap; ${ two } \ ${ three } </td><td>${ n.one_two }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
175 <tr><td>${ one } &cap; ${ three } \ ${ two } </td><td>${ n.one_three }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
176 <tr><td>${ two } &cap; ${ three } \ ${ one } </td><td>${ n.two_three }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
177 <tr><td>${ one } &cap; ${ two } &cap; ${ three }</td><td>${ n.one_i_two_i_three }</td></tr>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
178 </table>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
179 </div>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
180 </body>
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
181 </html>"""
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
182 result = Template(template).render(one=self.one.name, two=self.two.name, three=self.three.name, n=numbers, title=self.title, url=self.toUrl())
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
183 return(result)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
184
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
185
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
186
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
187
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
188 def main():
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
189 '''main worker func'''
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
190 parser = OptionParser()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
191 parser.add_option( "--files", dest="filePaths", help="file paths delimited by ,")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
192 parser.add_option( "--columns", dest="columns", help="0 based columnIndices delimited by ,")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
193 parser.add_option( "--asNames", dest="asNames", help="names of the columns for pretty print")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
194 parser.add_option( "--title", dest="title", help="title of plot")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
195 parser.add_option( "--size", dest="size", help="size plot, default 300")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
196 parser.add_option( "--outname", dest="outfileHtml", help="path of generated html file")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
197
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
198 (o, args) = parser.parse_args()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
199 errors = []
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
200 if o.filePaths is None:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
201 errors.append("please add required paths to files")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
202 if o.columns is None:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
203 errors.append( "please add required columns" )
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
204 if o.asNames is None:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
205 errors.append( "please add required asNames")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
206 if len(errors) > 0:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
207 print("\n".join(errors))
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
208 sys.exit()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
209 filePaths = o.filePaths.split(",")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
210 columns = o.columns.split(",")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
211 columns = map(int, columns)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
212 asNames = o.asNames.split(",")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
213 if len(errors) > 0 and ( len(filePaths) != len(columns) or len(columns) != len(asNames) ):
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
214 errors.append( "different length of filePaths, columns or names:" +o.columns+" "+" "+o.names+" "+o.filePaths )
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
215 title = ""
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
216 if o.title:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
217 title = o.title
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
218 if o.outfileHtml is None:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
219 errors.append( "please add outfile name for html" )
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
220 if len(filePaths) > 3:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
221 errors.append( "can only compare up to three files was:"+str(len(filePaths)))
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
222 if len(filePaths) == 1:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
223 errors.append( "just one file to compare does not make sense!")
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
224 if len(errors) > 0:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
225 print("\n".join(errors))
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
226 sys.exit()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
227
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
228 size = "300"
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
229 if o.size:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
230 size = o.size
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
231
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
232 fileCount = len(filePaths)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
233 if fileCount == 2:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
234 venn = Venn2(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1]))
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
235 else:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
236 venn = Venn3(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1]), VennFile(filePaths[2],columns[2],asNames[2]))
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
237 htmlText = venn.toHtml()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
238 html = open(o.outfileHtml, 'w')
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
239 try:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
240 html.write(htmlText)
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
241 finally:
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
242 html.close()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
243
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
244
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
245
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
246
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
247 if __name__ == '__main__':
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
248 main()
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
249
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
250 #$ python venner.py --files testFiles/fileA.tab,testFiles/fileB.tab --columns 1,1 --outname out.html --asNames As,Bs
8ea9b4e5a389 Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
251