Mercurial > repos > idot > prop_venn
comparison venner.py @ 0:8ea9b4e5a389
Uploaded
author | g2cmnty@test-web1.g2.bx.psu.edu |
---|---|
date | Wed, 22 Jun 2011 03:28:25 -0400 |
parents | |
children | cc6707a1e044 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8ea9b4e5a389 |
---|---|
1 #!/usr/bin/env python | |
2 ''' | |
3 Created on Jul 13, 2010 | |
4 | |
5 Generates Venn diagram from 2 or three input files | |
6 | |
7 one could generate output file that for each row contain the segment | |
8 i.e. A, B, AB, AmB, BmA | |
9 | |
10 @author: Ido M. Tamir | |
11 ''' | |
12 from mako.template import Template | |
13 from optparse import OptionParser | |
14 import urllib,re,sys | |
15 | |
16 class Bunch: | |
17 def __init__(self, d): | |
18 for k, v in d.items(): | |
19 if isinstance(v, dict): | |
20 v = Bunch(v) | |
21 self.__dict__[k] = v | |
22 | |
23 class VennFile(): | |
24 def __init__(self, filePath, column, name): | |
25 self.filePath = filePath | |
26 self.column = column | |
27 cleanname = re.sub("/","",name) | |
28 self.name = urllib.quote(cleanname, safe="%/:=&?~#+!$,;'@()*[]") | |
29 self.dict = {} | |
30 def read(self): | |
31 dict = {} | |
32 lineNr = 0 | |
33 for line in open( self.filePath, 'rb'): | |
34 key = line.split("\t")[self.column].strip() | |
35 linesList = dict.get(key, []) | |
36 linesList.append(line+"\t"+str(lineNr)) | |
37 dict[key] = linesList | |
38 lineNr += 1 | |
39 self.dict = dict | |
40 return self | |
41 | |
42 class Venn2: | |
43 def __init__(self, title, size, one, two): | |
44 self.one = one.read() | |
45 self.two = two.read() | |
46 self.title = title | |
47 self.size = size | |
48 | |
49 def toUrl(self): | |
50 one_keys = set(self.one.dict.keys()) | |
51 two_keys = set(self.two.dict.keys()) | |
52 | |
53 keys_one_i_two = one_keys.intersection(two_keys) | |
54 | |
55 total = len(one_keys) + len(two_keys) | |
56 sizes = [len(one_keys), len(two_keys), 0, len(keys_one_i_two)] | |
57 sizes = self.relSizes(sizes, total) | |
58 names = [self.one.name, self.two.name] | |
59 return self.url(total, sizes, names) | |
60 | |
61 def relSizes(self, sizes, total): | |
62 return map(lambda s: str(int(round((s/float(total) * 100)))), sizes) | |
63 | |
64 def url(self, total, sizes, names): | |
65 base = "http://chart.apis.google.com/chart?cht=v&chd=t:" | |
66 counts = ",".join(sizes) | |
67 titlep = "&chtt="+self.title | |
68 size = "&chs="+str(self.size)+"x"+str(self.size) | |
69 legend = "&chdl="+"|".join(names) | |
70 url = base+counts+titlep+size+legend | |
71 return url | |
72 | |
73 def toHtml(self): | |
74 one_keys = set(self.one.dict.keys()) | |
75 two_keys = set(self.two.dict.keys()) | |
76 | |
77 numbers = Bunch({ | |
78 "one_keys" : len(set(self.one.dict.keys())), | |
79 "two_keys" : len(set(self.two.dict.keys())), | |
80 "one_only" : len(one_keys.difference(two_keys)), | |
81 "two_only" : len(two_keys.difference(one_keys)), | |
82 "one_i_two" : len(one_keys.intersection(two_keys)), | |
83 }) | |
84 | |
85 template = """ | |
86 <html> | |
87 <head> | |
88 <title>Venn diagram ${title}</title> | |
89 </head> | |
90 <body> | |
91 <h3>${ title }</h3> | |
92 <div> | |
93 <img src="${ url }"/> | |
94 </div> | |
95 <div> | |
96 <table> | |
97 <tr><th>Segment</th><th>Count</th></tr> | |
98 <tr><td>${ one }</td><td>${ n.one_keys }</td></tr> | |
99 <tr><td>${ two }</td><td>${ n.two_keys }</td></tr> | |
100 <tr><td>${ one } \ ${ two }</td><td>${ n.one_only }</td></tr> | |
101 <tr><td>${ two } \ ${ one }</td><td>${ n.two_only }</td></tr> | |
102 <tr><td>${ one } ∩ ${ two }</td><td>${ n.one_i_two }</td></tr> | |
103 </table> | |
104 </div> | |
105 </body> | |
106 </html>""" | |
107 result = Template(template).render(one=self.one.name, two=self.two.name, n=numbers, title=self.title, url=self.toUrl()) | |
108 return(result) | |
109 | |
110 | |
111 | |
112 class Venn3(Venn2): | |
113 def __init__(self, title, size, one, two, three): | |
114 Venn2.__init__(self, title, size, one, two) | |
115 self.three = three.read() | |
116 | |
117 def toUrl(self): | |
118 one_keys = set(self.one.dict.keys()) | |
119 two_keys = set(self.two.dict.keys()) | |
120 three_keys = set(self.three.dict.keys()) | |
121 | |
122 keys_one_i_two = one_keys.intersection(two_keys) | |
123 keys_one_i_three = one_keys.intersection(three_keys) | |
124 keys_two_i_three = two_keys.intersection(three_keys) | |
125 keys_one_i_two_i_three = one_keys.intersection(two_keys).intersection(three_keys) | |
126 | |
127 total = len(one_keys)+len(two_keys)+len(three_keys) | |
128 sizes = [len(one_keys), len(two_keys), len(three_keys), len(keys_one_i_two), len(keys_one_i_three), len(keys_two_i_three), len(keys_one_i_two_i_three)] | |
129 sizes = self.relSizes(sizes, total) | |
130 names = [self.one.name, self.two.name, self.three.name] | |
131 return self.url(total, sizes, names) | |
132 | |
133 def toHtml(self): | |
134 one_keys = set(self.one.dict.keys()) | |
135 two_keys = set(self.two.dict.keys()) | |
136 three_keys = set(self.three.dict.keys()) | |
137 | |
138 xa = one_keys.intersection(two_keys) | |
139 xt = two_keys.intersection(three_keys) | |
140 xd = xt.difference(one_keys) | |
141 | |
142 numbers = Bunch({ | |
143 "one_keys" : len(set(self.one.dict.keys())), | |
144 "two_keys" : len(set(self.two.dict.keys())), | |
145 "three_keys" : len(set(self.three.dict.keys())), | |
146 "one_only" : len(one_keys.difference(two_keys.union(three_keys))), | |
147 "two_only" : len(two_keys.difference(one_keys.union(three_keys))), | |
148 "three_only" : len(three_keys.difference(one_keys.union(two_keys))), | |
149 "one_two" : len(one_keys.intersection(two_keys).difference(three_keys)), | |
150 "one_three" : len(one_keys.intersection(three_keys).difference(two_keys)), | |
151 "two_three" : len(two_keys.intersection(three_keys).difference(one_keys)), | |
152 "one_i_two_i_three" : len(one_keys.intersection(two_keys).intersection(three_keys)) | |
153 }) | |
154 | |
155 template = """ | |
156 <html> | |
157 <head> | |
158 <title>Venn diagram ${title}</title> | |
159 </head> | |
160 <body> | |
161 <h3>${ title }</h3> | |
162 <div> | |
163 <img src="${ url }"/> | |
164 </div> | |
165 <div> | |
166 <table> | |
167 <tr><th>Segment</th><th>Count</th></tr> | |
168 <tr><td>${ one }</td><td>${ n.one_keys }</td></tr> | |
169 <tr><td>${ two }</td><td>${ n.two_keys }</td></tr> | |
170 <tr><td>${ three }</td><td>${ n.three_keys }</td></tr> | |
171 <tr><td>${ one } \ (${ two } ∪ ${ three })</td><td>${ n.one_only }</td></tr> | |
172 <tr><td>${ two } \ (${ one } ∪ ${ three})</td><td>${ n.two_only }</td></tr> | |
173 <tr><td>${ three } \ (${ one } ∪ ${ two })</td><td>${ n.three_only }</td></tr> | |
174 <tr><td>${ one } ∩ ${ two } \ ${ three } </td><td>${ n.one_two }</td></tr> | |
175 <tr><td>${ one } ∩ ${ three } \ ${ two } </td><td>${ n.one_three }</td></tr> | |
176 <tr><td>${ two } ∩ ${ three } \ ${ one } </td><td>${ n.two_three }</td></tr> | |
177 <tr><td>${ one } ∩ ${ two } ∩ ${ three }</td><td>${ n.one_i_two_i_three }</td></tr> | |
178 </table> | |
179 </div> | |
180 </body> | |
181 </html>""" | |
182 result = Template(template).render(one=self.one.name, two=self.two.name, three=self.three.name, n=numbers, title=self.title, url=self.toUrl()) | |
183 return(result) | |
184 | |
185 | |
186 | |
187 | |
188 def main(): | |
189 '''main worker func''' | |
190 parser = OptionParser() | |
191 parser.add_option( "--files", dest="filePaths", help="file paths delimited by ,") | |
192 parser.add_option( "--columns", dest="columns", help="0 based columnIndices delimited by ,") | |
193 parser.add_option( "--asNames", dest="asNames", help="names of the columns for pretty print") | |
194 parser.add_option( "--title", dest="title", help="title of plot") | |
195 parser.add_option( "--size", dest="size", help="size plot, default 300") | |
196 parser.add_option( "--outname", dest="outfileHtml", help="path of generated html file") | |
197 | |
198 (o, args) = parser.parse_args() | |
199 errors = [] | |
200 if o.filePaths is None: | |
201 errors.append("please add required paths to files") | |
202 if o.columns is None: | |
203 errors.append( "please add required columns" ) | |
204 if o.asNames is None: | |
205 errors.append( "please add required asNames") | |
206 if len(errors) > 0: | |
207 print("\n".join(errors)) | |
208 sys.exit() | |
209 filePaths = o.filePaths.split(",") | |
210 columns = o.columns.split(",") | |
211 columns = map(int, columns) | |
212 asNames = o.asNames.split(",") | |
213 if len(errors) > 0 and ( len(filePaths) != len(columns) or len(columns) != len(asNames) ): | |
214 errors.append( "different length of filePaths, columns or names:" +o.columns+" "+" "+o.names+" "+o.filePaths ) | |
215 title = "" | |
216 if o.title: | |
217 title = o.title | |
218 if o.outfileHtml is None: | |
219 errors.append( "please add outfile name for html" ) | |
220 if len(filePaths) > 3: | |
221 errors.append( "can only compare up to three files was:"+str(len(filePaths))) | |
222 if len(filePaths) == 1: | |
223 errors.append( "just one file to compare does not make sense!") | |
224 if len(errors) > 0: | |
225 print("\n".join(errors)) | |
226 sys.exit() | |
227 | |
228 size = "300" | |
229 if o.size: | |
230 size = o.size | |
231 | |
232 fileCount = len(filePaths) | |
233 if fileCount == 2: | |
234 venn = Venn2(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1])) | |
235 else: | |
236 venn = Venn3(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1]), VennFile(filePaths[2],columns[2],asNames[2])) | |
237 htmlText = venn.toHtml() | |
238 html = open(o.outfileHtml, 'w') | |
239 try: | |
240 html.write(htmlText) | |
241 finally: | |
242 html.close() | |
243 | |
244 | |
245 | |
246 | |
247 if __name__ == '__main__': | |
248 main() | |
249 | |
250 #$ python venner.py --files testFiles/fileA.tab,testFiles/fileB.tab --columns 1,1 --outname out.html --asNames As,Bs | |
251 |