3
|
1 """
|
|
2 Author: Curtis Huttenhower
|
|
3 Description: Used to create tree structures to hierarchically normalize abundance tables.
|
|
4 """
|
|
5
|
|
6 #####################################################################################
|
|
7 #Copyright (C) <2012>
|
|
8 #
|
|
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
10 #this software and associated documentation files (the "Software"), to deal in the
|
|
11 #Software without restriction, including without limitation the rights to use, copy,
|
|
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
13 #and to permit persons to whom the Software is furnished to do so, subject to
|
|
14 #the following conditions:
|
|
15 #
|
|
16 #The above copyright notice and this permission notice shall be included in all copies
|
|
17 #or substantial portions of the Software.
|
|
18 #
|
|
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
|
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
25 #####################################################################################
|
|
26
|
|
27 __author__ = "Curtis Huttenhower"
|
|
28 __copyright__ = "Copyright 2012"
|
|
29 __credits__ = ["Curtis Huttenhower"]
|
|
30 __license__ = "MIT"
|
|
31 __maintainer__ = "Timothy Tickle"
|
|
32 __email__ = "ttickle@sph.harvard.edu"
|
|
33 __status__ = "Development"
|
|
34
|
|
35 import blist
|
|
36 import sys
|
|
37
|
|
38 class CClade:
|
|
39
|
|
40 def __init__( self ):
|
|
41 """
|
|
42 Initialize CClade
|
|
43 Dictionary to hold the children nodes from feature consensus lineages.
|
|
44 adValues is a list of the abundance value.
|
|
45 """
|
|
46
|
|
47 self.m_hashChildren = {}
|
|
48 self.m_adValues = None
|
|
49
|
|
50 def get( self, astrClade ):
|
|
51 """
|
|
52 Recursively travel the length of a tree until you find the terminal node
|
|
53 (where astrClade == Falseor actually [])
|
|
54 or a dict key that matches the clade call.
|
|
55 If at any time a clade is given that is not currently know, return a new clade
|
|
56 which is set to the current Clade as a child.
|
|
57 """
|
|
58
|
|
59 return self.m_hashChildren.setdefault(
|
|
60 astrClade[0], CClade( ) ).get( astrClade[1:] ) if astrClade else self
|
|
61
|
|
62 def set( self, adValues ):
|
|
63 """
|
|
64 Set all the values given as a list in the same order given.
|
|
65 """
|
|
66
|
|
67 self.m_adValues = blist.blist( [0] ) * len( adValues )
|
|
68 for i, d in enumerate( adValues ):
|
|
69 if d:
|
|
70 self.m_adValues[i] = d
|
|
71
|
|
72 def impute( self ):
|
|
73 """
|
|
74 This allows you to recursively impute values for clades without values given their children counts.
|
|
75 Assumably this should be called only once and after all clade abundances have been added.
|
|
76 If the m_adValues already exist return the stored m_adValues. (No imputation needed).
|
|
77 Otherwise call impute for all children and take the sum of the values from all the children by column
|
|
78 Not a sum of a list but summing a list with lists by element.
|
|
79 """
|
|
80
|
|
81 #If values do not exist
|
|
82 if not self.m_adValues:
|
|
83 #Call impute on all children
|
|
84 #If the parent clade has no abundance values
|
|
85 #Then take a copy of the child's
|
|
86 #If they now have a copy of a child's but have other children
|
|
87 #Sum their children with thier current values
|
|
88 for pChild in self.m_hashChildren.values( ):
|
|
89 adChild = pChild.impute( )
|
|
90 if self.m_adValues:
|
|
91 for i in range( len( adChild or [] ) ):
|
|
92 if adChild[i]:
|
|
93 self.m_adValues[i] += adChild[i]
|
|
94 elif adChild:
|
|
95 self.m_adValues = adChild[:]
|
|
96 #If values exist return
|
|
97 return self.m_adValues
|
|
98
|
|
99 def _freeze( self, hashValues, iTarget, astrClade, iDepth, fLeaves ):
|
|
100 """
|
|
101 Update the given hashValues dict with clade abundances given depth specifications
|
|
102 Return a set of integers returning an indicator of the structure of the tree preserved in the dict/hash
|
|
103 When the appropriate level of the tree is reached
|
|
104 Hashvalue is updated with the clade (including lineage) and the abundance. looks like {"clade":blist(["0.0","0.1"...])}
|
|
105 """
|
|
106
|
|
107 #fHit is true on atleast one of the following conditions:
|
|
108 #iTarget is not 0 indicating no changes
|
|
109 #Leaves are indicated to be only given and the target depth for the leaves is reached.
|
|
110 #The required depth is reached.
|
|
111 fHit = ( not iTarget ) or ( ( fLeaves and ( iDepth == iTarget ) ) or ( ( not fLeaves ) and ( iDepth <= iTarget ) ) )
|
|
112 #Increment depth
|
|
113 iDepth += 1
|
|
114 #Returns a set
|
|
115 setiRet = set()
|
|
116 #If there are children build integer set indicating structure of the tree preserved in the dict
|
|
117 if self.m_hashChildren:
|
|
118 #Union all the results from freeze of all children
|
|
119 #Call freeze but append the child clade to the clade in the call.
|
|
120 #And give an incremented depth
|
|
121 for strChild, pChild in self.m_hashChildren.items( ):
|
|
122 setiRet |= pChild._freeze( hashValues, iTarget, astrClade + [strChild], iDepth, fLeaves )
|
|
123 setiRet = set( ( i + 1 ) for i in setiRet )
|
|
124 else:
|
|
125 setiRet.add( 0 )
|
|
126 #Indicate if the correct level is reached
|
|
127 if iTarget < 0:
|
|
128 if fLeaves:
|
|
129 fHit = -( iTarget + 1 ) in setiRet
|
|
130 else:
|
|
131 fHit = -( iTarget + 1 ) <= max( setiRet )
|
|
132 #if astrClade is not == [] (so you are actually in a clade of the tree)
|
|
133 #And the clade has values (should be true, if not impute should have been callded before running this method)
|
|
134 #And we are at the correct level of the tree then
|
|
135 #Add to the dict the clade and the abundance values
|
|
136 if astrClade and self.m_adValues and fHit:
|
|
137 hashValues["|".join( astrClade )] = self.m_adValues
|
|
138 return setiRet
|
|
139
|
|
140 def freeze( self, hashValues, iTarget, fLeaves ):
|
|
141 """
|
|
142 Call helper function setting the clade and depth to defaults (start positions)
|
|
143 The important result of this method is hashValues is updated with clade and abundance information
|
|
144 """
|
|
145 self._freeze( hashValues, iTarget, [], 0, fLeaves )
|
|
146
|
|
147 def _repr( self, strClade ):
|
|
148 """
|
|
149 Represent tree clade for debugging. Helper function for recursive repr.
|
|
150 """
|
|
151
|
|
152 strRet = "<"
|
|
153 if strClade:
|
|
154 strRet += "%s %s" % (strClade, self.m_adValues)
|
|
155 if self.m_hashChildren:
|
|
156 strRet += " "
|
|
157 if self.m_hashChildren:
|
|
158 strRet += " ".join( p._repr( s ) for (s, p) in self.m_hashChildren.items( ) )
|
|
159
|
|
160 return ( strRet + ">" )
|
|
161
|
|
162 def __repr__( self ):
|
|
163 """
|
|
164 Represent tree clade for debugging.
|
|
165 """
|
|
166 return self._repr( "" )
|
|
167
|
|
168 """
|
|
169 pTree = CClade( )
|
|
170 pTree.get( ("A", "B") ).set( [1, 2, 3] )
|
|
171 pTree.get( ("A", "C") ).set( [4, 5, 6] )
|
|
172 pTree.get( ("D", "E") ).set( [7, 8, 9] )
|
|
173 iTaxa = 0
|
|
174 if iTaxa:
|
|
175 pTree.impute( )
|
|
176 hashFeatures = {}
|
|
177 pTree.freeze( hashFeatures, iTaxa )
|
|
178 print( pTree )
|
|
179 print( hashFeatures )
|
|
180 sys.exit( 0 )
|
|
181 #"""
|