comparison src/breadcrumbs/src/CClade.py @ 0:0de566f21448 draft default tip

v2
author sagun98
date Thu, 03 Jun 2021 18:13:32 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0de566f21448
1 """
2 Author: Curtis Huttenhower
3 Description: Used to create tree structures to hierarchically normalize abundance tables.
4 """
5
6 #####################################################################################
7 #Copyright (C) <2012>
8 #
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of
10 #this software and associated documentation files (the "Software"), to deal in the
11 #Software without restriction, including without limitation the rights to use, copy,
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
13 #and to permit persons to whom the Software is furnished to do so, subject to
14 #the following conditions:
15 #
16 #The above copyright notice and this permission notice shall be included in all copies
17 #or substantial portions of the Software.
18 #
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #####################################################################################
26
27 __author__ = "Curtis Huttenhower"
28 __copyright__ = "Copyright 2012"
29 __credits__ = ["Curtis Huttenhower"]
30 __license__ = "MIT"
31 __maintainer__ = "Timothy Tickle"
32 __email__ = "ttickle@sph.harvard.edu"
33 __status__ = "Development"
34
35 import blist
36 import sys
37
38 class CClade:
39
40 def __init__( self ):
41 """
42 Initialize CClade
43 Dictionary to hold the children nodes from feature consensus lineages.
44 adValues is a list of the abundance value.
45 """
46
47 self.m_hashChildren = {}
48 self.m_adValues = None
49
50 def get( self, astrClade ):
51 """
52 Recursively travel the length of a tree until you find the terminal node
53 (where astrClade == Falseor actually [])
54 or a dict key that matches the clade call.
55 If at any time a clade is given that is not currently know, return a new clade
56 which is set to the current Clade as a child.
57 """
58
59 return self.m_hashChildren.setdefault(
60 astrClade[0], CClade( ) ).get( astrClade[1:] ) if astrClade else self
61
62 def set( self, adValues ):
63 """
64 Set all the values given as a list in the same order given.
65 """
66
67 self.m_adValues = blist.blist( [0] ) * len( adValues )
68 for i, d in enumerate( adValues ):
69 if d:
70 self.m_adValues[i] = d
71
72 def impute( self ):
73 """
74 This allows you to recursively impute values for clades without values given their children counts.
75 Assumably this should be called only once and after all clade abundances have been added.
76 If the m_adValues already exist return the stored m_adValues. (No imputation needed).
77 Otherwise call impute for all children and take the sum of the values from all the children by column
78 Not a sum of a list but summing a list with lists by element.
79 """
80
81 #If values do not exist
82 if not self.m_adValues:
83 #Call impute on all children
84 #If the parent clade has no abundance values
85 #Then take a copy of the child's
86 #If they now have a copy of a child's but have other children
87 #Sum their children with thier current values
88 for pChild in self.m_hashChildren.values( ):
89 adChild = pChild.impute( )
90 if self.m_adValues:
91 for i in range( len( adChild or [] ) ):
92 if adChild[i]:
93 self.m_adValues[i] += adChild[i]
94 elif adChild:
95 self.m_adValues = adChild[:]
96 #If values exist return
97 return self.m_adValues
98
99 def _freeze( self, hashValues, iTarget, astrClade, iDepth, fLeaves ):
100 """
101 Update the given hashValues dict with clade abundances given depth specifications
102 Return a set of integers returning an indicator of the structure of the tree preserved in the dict/hash
103 When the appropriate level of the tree is reached
104 Hashvalue is updated with the clade (including lineage) and the abundance. looks like {"clade":blist(["0.0","0.1"...])}
105 """
106
107 #fHit is true on atleast one of the following conditions:
108 #iTarget is not 0 indicating no changes
109 #Leaves are indicated to be only given and the target depth for the leaves is reached.
110 #The required depth is reached.
111 fHit = ( not iTarget ) or ( ( fLeaves and ( iDepth == iTarget ) ) or ( ( not fLeaves ) and ( iDepth <= iTarget ) ) )
112 #Increment depth
113 iDepth += 1
114 #Returns a set
115 setiRet = set()
116 #If there are children build integer set indicating structure of the tree preserved in the dict
117 if self.m_hashChildren:
118 #Union all the results from freeze of all children
119 #Call freeze but append the child clade to the clade in the call.
120 #And give an incremented depth
121 for strChild, pChild in self.m_hashChildren.items( ):
122 setiRet |= pChild._freeze( hashValues, iTarget, astrClade + [strChild], iDepth, fLeaves )
123 setiRet = set( ( i + 1 ) for i in setiRet )
124 else:
125 setiRet.add( 0 )
126 #Indicate if the correct level is reached
127 if iTarget < 0:
128 if fLeaves:
129 fHit = -( iTarget + 1 ) in setiRet
130 else:
131 fHit = -( iTarget + 1 ) <= max( setiRet )
132 #if astrClade is not == [] (so you are actually in a clade of the tree)
133 #And the clade has values (should be true, if not impute should have been callded before running this method)
134 #And we are at the correct level of the tree then
135 #Add to the dict the clade and the abundance values
136 if astrClade and self.m_adValues and fHit:
137 hashValues["|".join( astrClade )] = self.m_adValues
138 return setiRet
139
140 def freeze( self, hashValues, iTarget, fLeaves ):
141 """
142 Call helper function setting the clade and depth to defaults (start positions)
143 The important result of this method is hashValues is updated with clade and abundance information
144 """
145 self._freeze( hashValues, iTarget, [], 0, fLeaves )
146
147 def _repr( self, strClade ):
148 """
149 Represent tree clade for debugging. Helper function for recursive repr.
150 """
151
152 strRet = "<"
153 if strClade:
154 strRet += "%s %s" % (strClade, self.m_adValues)
155 if self.m_hashChildren:
156 strRet += " "
157 if self.m_hashChildren:
158 strRet += " ".join( p._repr( s ) for (s, p) in self.m_hashChildren.items( ) )
159
160 return ( strRet + ">" )
161
162 def __repr__( self ):
163 """
164 Represent tree clade for debugging.
165 """
166 return self._repr( "" )
167
168 """
169 pTree = CClade( )
170 pTree.get( ("A", "B") ).set( [1, 2, 3] )
171 pTree.get( ("A", "C") ).set( [4, 5, 6] )
172 pTree.get( ("D", "E") ).set( [7, 8, 9] )
173 iTaxa = 0
174 if iTaxa:
175 pTree.impute( )
176 hashFeatures = {}
177 pTree.freeze( hashFeatures, iTaxa )
178 print( pTree )
179 print( hashFeatures )
180 sys.exit( 0 )
181 #"""