Mercurial > repos > idot > prop_venn
annotate venner.py @ 1:cc6707a1e044 draft default tip
added fixes from Brad Langhorst; https://bitbucket.org/account/notifications/read/3443176/patch-for-proportional-venn-tool-in-galaxy; added tool_dependencies for Mako template untested
| author | Ido Tamir <ido.tamir@imp.ac.at> |
|---|---|
| date | Mon, 24 Sep 2012 16:46:21 +0200 |
| parents | 8ea9b4e5a389 |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 ''' | |
| 3 Created on Jul 13, 2010 | |
| 4 | |
| 5 Generates Venn diagram from 2 or three input files | |
| 6 | |
| 7 one could generate output file that for each row contain the segment | |
| 8 i.e. A, B, AB, AmB, BmA | |
| 9 | |
| 10 @author: Ido M. Tamir | |
| 11 ''' | |
| 12 from mako.template import Template | |
| 13 from optparse import OptionParser | |
| 14 import urllib,re,sys | |
| 15 | |
| 16 class Bunch: | |
| 17 def __init__(self, d): | |
| 18 for k, v in d.items(): | |
| 19 if isinstance(v, dict): | |
| 20 v = Bunch(v) | |
| 21 self.__dict__[k] = v | |
| 22 | |
| 23 class VennFile(): | |
| 24 def __init__(self, filePath, column, name): | |
| 25 self.filePath = filePath | |
| 26 self.column = column | |
| 27 cleanname = re.sub("/","",name) | |
| 28 self.name = urllib.quote(cleanname, safe="%/:=&?~#+!$,;'@()*[]") | |
| 29 self.dict = {} | |
|
1
cc6707a1e044
added fixes from Brad Langhorst; https://bitbucket.org/account/notifications/read/3443176/patch-for-proportional-venn-tool-in-galaxy; added tool_dependencies for Mako template untested
Ido Tamir <ido.tamir@imp.ac.at>
parents:
0
diff
changeset
|
30 |
| 0 | 31 def read(self): |
| 32 dict = {} | |
| 33 lineNr = 0 | |
| 34 for line in open( self.filePath, 'rb'): | |
| 35 key = line.split("\t")[self.column].strip() | |
| 36 linesList = dict.get(key, []) | |
| 37 linesList.append(line+"\t"+str(lineNr)) | |
| 38 dict[key] = linesList | |
| 39 lineNr += 1 | |
| 40 self.dict = dict | |
| 41 return self | |
| 42 | |
| 43 class Venn2: | |
| 44 def __init__(self, title, size, one, two): | |
| 45 self.one = one.read() | |
| 46 self.two = two.read() | |
| 47 self.title = title | |
| 48 self.size = size | |
| 49 | |
| 50 def toUrl(self): | |
| 51 one_keys = set(self.one.dict.keys()) | |
| 52 two_keys = set(self.two.dict.keys()) | |
| 53 | |
| 54 keys_one_i_two = one_keys.intersection(two_keys) | |
| 55 | |
| 56 total = len(one_keys) + len(two_keys) | |
| 57 sizes = [len(one_keys), len(two_keys), 0, len(keys_one_i_two)] | |
| 58 sizes = self.relSizes(sizes, total) | |
| 59 names = [self.one.name, self.two.name] | |
| 60 return self.url(total, sizes, names) | |
| 61 | |
| 62 def relSizes(self, sizes, total): | |
| 63 return map(lambda s: str(int(round((s/float(total) * 100)))), sizes) | |
| 64 | |
| 65 def url(self, total, sizes, names): | |
| 66 base = "http://chart.apis.google.com/chart?cht=v&chd=t:" | |
| 67 counts = ",".join(sizes) | |
| 68 titlep = "&chtt="+self.title | |
| 69 size = "&chs="+str(self.size)+"x"+str(self.size) | |
| 70 legend = "&chdl="+"|".join(names) | |
| 71 url = base+counts+titlep+size+legend | |
| 72 return url | |
| 73 | |
| 74 def toHtml(self): | |
| 75 one_keys = set(self.one.dict.keys()) | |
| 76 two_keys = set(self.two.dict.keys()) | |
| 77 | |
| 78 numbers = Bunch({ | |
| 79 "one_keys" : len(set(self.one.dict.keys())), | |
| 80 "two_keys" : len(set(self.two.dict.keys())), | |
| 81 "one_only" : len(one_keys.difference(two_keys)), | |
| 82 "two_only" : len(two_keys.difference(one_keys)), | |
| 83 "one_i_two" : len(one_keys.intersection(two_keys)), | |
| 84 }) | |
| 85 | |
| 86 template = """ | |
| 87 <html> | |
| 88 <head> | |
| 89 <title>Venn diagram ${title}</title> | |
| 90 </head> | |
| 91 <body> | |
| 92 <div> | |
| 93 <img src="${ url }"/> | |
| 94 </div> | |
| 95 <div> | |
| 96 <table> | |
|
1
cc6707a1e044
added fixes from Brad Langhorst; https://bitbucket.org/account/notifications/read/3443176/patch-for-proportional-venn-tool-in-galaxy; added tool_dependencies for Mako template untested
Ido Tamir <ido.tamir@imp.ac.at>
parents:
0
diff
changeset
|
97 <tr><th>Set</th><th>Count</th></tr> |
| 0 | 98 <tr><td>${ one }</td><td>${ n.one_keys }</td></tr> |
| 99 <tr><td>${ two }</td><td>${ n.two_keys }</td></tr> | |
| 100 <tr><td>${ one } \ ${ two }</td><td>${ n.one_only }</td></tr> | |
| 101 <tr><td>${ two } \ ${ one }</td><td>${ n.two_only }</td></tr> | |
| 102 <tr><td>${ one } ∩ ${ two }</td><td>${ n.one_i_two }</td></tr> | |
| 103 </table> | |
| 104 </div> | |
| 105 </body> | |
| 106 </html>""" | |
|
1
cc6707a1e044
added fixes from Brad Langhorst; https://bitbucket.org/account/notifications/read/3443176/patch-for-proportional-venn-tool-in-galaxy; added tool_dependencies for Mako template untested
Ido Tamir <ido.tamir@imp.ac.at>
parents:
0
diff
changeset
|
107 result = Template(template).render(one=urllib.unquote(self.one.name), two=urllib.unquote(self.two.name), n=numbers, title=self.title, url=self.toUrl()) |
| 0 | 108 return(result) |
| 109 | |
| 110 | |
| 111 | |
| 112 class Venn3(Venn2): | |
| 113 def __init__(self, title, size, one, two, three): | |
| 114 Venn2.__init__(self, title, size, one, two) | |
| 115 self.three = three.read() | |
| 116 | |
| 117 def toUrl(self): | |
| 118 one_keys = set(self.one.dict.keys()) | |
| 119 two_keys = set(self.two.dict.keys()) | |
| 120 three_keys = set(self.three.dict.keys()) | |
| 121 | |
| 122 keys_one_i_two = one_keys.intersection(two_keys) | |
| 123 keys_one_i_three = one_keys.intersection(three_keys) | |
| 124 keys_two_i_three = two_keys.intersection(three_keys) | |
| 125 keys_one_i_two_i_three = one_keys.intersection(two_keys).intersection(three_keys) | |
| 126 | |
| 127 total = len(one_keys)+len(two_keys)+len(three_keys) | |
| 128 sizes = [len(one_keys), len(two_keys), len(three_keys), len(keys_one_i_two), len(keys_one_i_three), len(keys_two_i_three), len(keys_one_i_two_i_three)] | |
| 129 sizes = self.relSizes(sizes, total) | |
| 130 names = [self.one.name, self.two.name, self.three.name] | |
| 131 return self.url(total, sizes, names) | |
| 132 | |
| 133 def toHtml(self): | |
| 134 one_keys = set(self.one.dict.keys()) | |
| 135 two_keys = set(self.two.dict.keys()) | |
| 136 three_keys = set(self.three.dict.keys()) | |
| 137 | |
| 138 xa = one_keys.intersection(two_keys) | |
| 139 xt = two_keys.intersection(three_keys) | |
| 140 xd = xt.difference(one_keys) | |
| 141 | |
| 142 numbers = Bunch({ | |
| 143 "one_keys" : len(set(self.one.dict.keys())), | |
| 144 "two_keys" : len(set(self.two.dict.keys())), | |
| 145 "three_keys" : len(set(self.three.dict.keys())), | |
| 146 "one_only" : len(one_keys.difference(two_keys.union(three_keys))), | |
| 147 "two_only" : len(two_keys.difference(one_keys.union(three_keys))), | |
| 148 "three_only" : len(three_keys.difference(one_keys.union(two_keys))), | |
| 149 "one_two" : len(one_keys.intersection(two_keys).difference(three_keys)), | |
| 150 "one_three" : len(one_keys.intersection(three_keys).difference(two_keys)), | |
| 151 "two_three" : len(two_keys.intersection(three_keys).difference(one_keys)), | |
| 152 "one_i_two_i_three" : len(one_keys.intersection(two_keys).intersection(three_keys)) | |
| 153 }) | |
| 154 | |
| 155 template = """ | |
| 156 <html> | |
| 157 <head> | |
| 158 <title>Venn diagram ${title}</title> | |
| 159 </head> | |
| 160 <body> | |
| 161 <div> | |
| 162 <img src="${ url }"/> | |
| 163 </div> | |
| 164 <div> | |
| 165 <table> | |
|
1
cc6707a1e044
added fixes from Brad Langhorst; https://bitbucket.org/account/notifications/read/3443176/patch-for-proportional-venn-tool-in-galaxy; added tool_dependencies for Mako template untested
Ido Tamir <ido.tamir@imp.ac.at>
parents:
0
diff
changeset
|
166 <tr><th>Set</th><th>Count</th></tr> |
| 0 | 167 <tr><td>${ one }</td><td>${ n.one_keys }</td></tr> |
| 168 <tr><td>${ two }</td><td>${ n.two_keys }</td></tr> | |
| 169 <tr><td>${ three }</td><td>${ n.three_keys }</td></tr> | |
| 170 <tr><td>${ one } \ (${ two } ∪ ${ three })</td><td>${ n.one_only }</td></tr> | |
| 171 <tr><td>${ two } \ (${ one } ∪ ${ three})</td><td>${ n.two_only }</td></tr> | |
| 172 <tr><td>${ three } \ (${ one } ∪ ${ two })</td><td>${ n.three_only }</td></tr> | |
| 173 <tr><td>${ one } ∩ ${ two } \ ${ three } </td><td>${ n.one_two }</td></tr> | |
| 174 <tr><td>${ one } ∩ ${ three } \ ${ two } </td><td>${ n.one_three }</td></tr> | |
| 175 <tr><td>${ two } ∩ ${ three } \ ${ one } </td><td>${ n.two_three }</td></tr> | |
| 176 <tr><td>${ one } ∩ ${ two } ∩ ${ three }</td><td>${ n.one_i_two_i_three }</td></tr> | |
| 177 </table> | |
| 178 </div> | |
| 179 </body> | |
| 180 </html>""" | |
|
1
cc6707a1e044
added fixes from Brad Langhorst; https://bitbucket.org/account/notifications/read/3443176/patch-for-proportional-venn-tool-in-galaxy; added tool_dependencies for Mako template untested
Ido Tamir <ido.tamir@imp.ac.at>
parents:
0
diff
changeset
|
181 result = Template(template).render(one=urllib.unquote(self.one.name), two=urllib.unquote(self.two.name), three=urllib.unquote(self.three.name), n=numbers, title=self.title, url=self.toUrl()) |
| 0 | 182 return(result) |
| 183 | |
| 184 | |
| 185 | |
| 186 | |
| 187 def main(): | |
| 188 '''main worker func''' | |
| 189 parser = OptionParser() | |
| 190 parser.add_option( "--files", dest="filePaths", help="file paths delimited by ,") | |
| 191 parser.add_option( "--columns", dest="columns", help="0 based columnIndices delimited by ,") | |
| 192 parser.add_option( "--asNames", dest="asNames", help="names of the columns for pretty print") | |
| 193 parser.add_option( "--title", dest="title", help="title of plot") | |
| 194 parser.add_option( "--size", dest="size", help="size plot, default 300") | |
| 195 parser.add_option( "--outname", dest="outfileHtml", help="path of generated html file") | |
| 196 | |
| 197 (o, args) = parser.parse_args() | |
| 198 errors = [] | |
| 199 if o.filePaths is None: | |
| 200 errors.append("please add required paths to files") | |
| 201 if o.columns is None: | |
| 202 errors.append( "please add required columns" ) | |
| 203 if o.asNames is None: | |
| 204 errors.append( "please add required asNames") | |
| 205 if len(errors) > 0: | |
| 206 print("\n".join(errors)) | |
| 207 sys.exit() | |
| 208 filePaths = o.filePaths.split(",") | |
| 209 columns = o.columns.split(",") | |
| 210 columns = map(int, columns) | |
| 211 asNames = o.asNames.split(",") | |
| 212 if len(errors) > 0 and ( len(filePaths) != len(columns) or len(columns) != len(asNames) ): | |
| 213 errors.append( "different length of filePaths, columns or names:" +o.columns+" "+" "+o.names+" "+o.filePaths ) | |
| 214 title = "" | |
| 215 if o.title: | |
| 216 title = o.title | |
| 217 if o.outfileHtml is None: | |
| 218 errors.append( "please add outfile name for html" ) | |
| 219 if len(filePaths) > 3: | |
| 220 errors.append( "can only compare up to three files was:"+str(len(filePaths))) | |
| 221 if len(filePaths) == 1: | |
| 222 errors.append( "just one file to compare does not make sense!") | |
| 223 if len(errors) > 0: | |
| 224 print("\n".join(errors)) | |
| 225 sys.exit() | |
| 226 | |
| 227 size = "300" | |
| 228 if o.size: | |
| 229 size = o.size | |
| 230 | |
| 231 fileCount = len(filePaths) | |
| 232 if fileCount == 2: | |
| 233 venn = Venn2(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1])) | |
| 234 else: | |
| 235 venn = Venn3(title, size, VennFile(filePaths[0],columns[0],asNames[0]), VennFile(filePaths[1], columns[1], asNames[1]), VennFile(filePaths[2],columns[2],asNames[2])) | |
| 236 htmlText = venn.toHtml() | |
| 237 html = open(o.outfileHtml, 'w') | |
| 238 try: | |
| 239 html.write(htmlText) | |
| 240 finally: | |
| 241 html.close() | |
| 242 | |
| 243 | |
| 244 | |
| 245 | |
| 246 if __name__ == '__main__': | |
| 247 main() | |
| 248 | |
| 249 #$ python venner.py --files testFiles/fileA.tab,testFiles/fileB.tab --columns 1,1 --outname out.html --asNames As,Bs | |
| 250 |
