Mercurial > repos > yufei-luo > s_mart
comparison commons/core/tree/Tree.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:ea3082881bf8 | 6:769e306b7933 |
---|---|
1 import os, re, sys | |
2 | |
3 class Tree: | |
4 | |
5 def __init__( self, inFileName="" ): | |
6 self.tree = None | |
7 self.inFileName = inFileName | |
8 if self.inFileName != "": | |
9 self.loadTree() | |
10 | |
11 def loadTree( self, verbose=0 ): | |
12 inF = open( self.inFileName, "r" ) | |
13 lines = inF.readlines() | |
14 inF.close() | |
15 line = "".join(lines).replace("\n","") | |
16 self.tree = self.parseTree( line ) | |
17 if verbose > 0: | |
18 print "nb of leaves: %i" % ( self.getNbOfLeaves( self.tree ) ) | |
19 | |
20 def parseTree( self, sTree ): | |
21 if "," not in sTree: | |
22 name, length = sTree.split(":") | |
23 return self.makeLeaf( name, float(length) ) | |
24 | |
25 distPattern = re.compile(r'(?P<tree>\(.+\))\:(?P<length>[e\-\d\.]+)$') | |
26 m = distPattern.search( sTree ) | |
27 length = 0 | |
28 if m: | |
29 if m.group('length'): length = float( m.group('length') ) | |
30 sTree = m.group('tree') | |
31 if length == "": length = 0 | |
32 | |
33 lhs, rhs = self.parseSubTree( sTree ) | |
34 | |
35 return { "name": "internal", | |
36 "left": self.parseTree( lhs ), | |
37 "right": self.parseTree( rhs ), | |
38 "length": length } | |
39 | |
40 def makeLeaf( self, name, length ): | |
41 return { "left":None, "right":None, "name":name, "length":length } | |
42 | |
43 def parseSubTree( self, sTree ): | |
44 """ | |
45 Parse a newick-formatted string of type 'a,b' into [a,b] | |
46 """ | |
47 chars = list( sTree[1:-1] ) | |
48 count = 0 | |
49 isLhs = True | |
50 leftS = "" | |
51 rightS = "" | |
52 for c in chars: | |
53 if c == "(": | |
54 count += 1 | |
55 elif c == ")": | |
56 count -= 1 | |
57 elif (c == ",") and (count == 0) and (isLhs) : | |
58 isLhs = False | |
59 continue | |
60 if isLhs: leftS += c | |
61 else: rightS += c | |
62 return [ leftS, rightS ] | |
63 | |
64 def toNewick( self, tree ): | |
65 newString = "" | |
66 if tree["name"] is not "internal": | |
67 newString += tree["name"] | |
68 else: | |
69 newString += "(" | |
70 newString += self.toNewick( tree["left"] ) | |
71 newString += "," | |
72 newString += self.toNewick( tree["right"] ) | |
73 newString += ")" | |
74 if tree["length"]: | |
75 newString += ":" | |
76 newString += "%f" % ( tree["length"] ) | |
77 return newString | |
78 | |
79 def saveTree( self, outFileName ): | |
80 outF = open( outFileName, "w" ) | |
81 outF.write( self.toNewick( self.tree ) ) | |
82 outF.close() | |
83 | |
84 def replaceHeaderViaPrefixSearch( self, tree, dNew2Init ): | |
85 if dNew2Init.has_key( tree["name"] ): | |
86 tree["name"] = dNew2Init[ tree["name"] ].replace(" ","_").replace("::","-").replace(",","-") | |
87 if tree["left"] != None: | |
88 self.replaceHeaderViaPrefixSearch( tree["left"], dNew2Init ) | |
89 if tree["right"] != None: | |
90 self.replaceHeaderViaPrefixSearch( tree["right"], dNew2Init ) | |
91 | |
92 def retrieveInitialSequenceHeaders( self, dNew2Init, outFileName ): | |
93 tree = self.tree | |
94 self.replaceHeaderViaPrefixSearch( tree, dNew2Init ) | |
95 self.tree = tree | |
96 self.saveTree( outFileName ) | |
97 | |
98 def getNbOfChildNodes( self, tree, nbNodes ): | |
99 if tree["left"] is not None: | |
100 nbNodes += 1 | |
101 nbNodes = self.getNbOfChildNodes( tree["left"], nbNodes ) | |
102 if tree["right"] is not None: | |
103 nbNodes += 1 | |
104 nbNodes = self.getNbOfChildNodes( tree["right"], nbNodes ) | |
105 return nbNodes | |
106 | |
107 def getNbOfNodes( self ): | |
108 nbNodes = 0 | |
109 return self.getNbOfChildNodes( self.tree, nbNodes ) | |
110 | |
111 def getNbOfChildLeaves( self, tree, nbLeaves ): | |
112 if tree["name"] != "internal": | |
113 nbLeaves += 1 | |
114 if tree["left"] is not None: | |
115 nbLeaves = self.getNbOfChildLeaves( tree["left"], nbLeaves ) | |
116 if tree["right"] is not None: | |
117 nbLeaves = self.getNbOfChildLeaves( tree["right"], nbLeaves ) | |
118 return nbLeaves | |
119 | |
120 def getNbOfLeaves( self ): | |
121 nbLeaves = 0 | |
122 return self.getNbOfChildLeaves( self.tree, nbLeaves ) |