annotate src/breadcrumbs/src/CClade.py @ 0:0de566f21448 draft default tip

v2
author sagun98
date Thu, 03 Jun 2021 18:13:32 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
sagun98
parents:
diff changeset
1 """
sagun98
parents:
diff changeset
2 Author: Curtis Huttenhower
sagun98
parents:
diff changeset
3 Description: Used to create tree structures to hierarchically normalize abundance tables.
sagun98
parents:
diff changeset
4 """
sagun98
parents:
diff changeset
5
sagun98
parents:
diff changeset
6 #####################################################################################
sagun98
parents:
diff changeset
7 #Copyright (C) <2012>
sagun98
parents:
diff changeset
8 #
sagun98
parents:
diff changeset
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of
sagun98
parents:
diff changeset
10 #this software and associated documentation files (the "Software"), to deal in the
sagun98
parents:
diff changeset
11 #Software without restriction, including without limitation the rights to use, copy,
sagun98
parents:
diff changeset
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
sagun98
parents:
diff changeset
13 #and to permit persons to whom the Software is furnished to do so, subject to
sagun98
parents:
diff changeset
14 #the following conditions:
sagun98
parents:
diff changeset
15 #
sagun98
parents:
diff changeset
16 #The above copyright notice and this permission notice shall be included in all copies
sagun98
parents:
diff changeset
17 #or substantial portions of the Software.
sagun98
parents:
diff changeset
18 #
sagun98
parents:
diff changeset
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
sagun98
parents:
diff changeset
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
sagun98
parents:
diff changeset
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
sagun98
parents:
diff changeset
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
sagun98
parents:
diff changeset
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
sagun98
parents:
diff changeset
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
sagun98
parents:
diff changeset
25 #####################################################################################
sagun98
parents:
diff changeset
26
sagun98
parents:
diff changeset
27 __author__ = "Curtis Huttenhower"
sagun98
parents:
diff changeset
28 __copyright__ = "Copyright 2012"
sagun98
parents:
diff changeset
29 __credits__ = ["Curtis Huttenhower"]
sagun98
parents:
diff changeset
30 __license__ = "MIT"
sagun98
parents:
diff changeset
31 __maintainer__ = "Timothy Tickle"
sagun98
parents:
diff changeset
32 __email__ = "ttickle@sph.harvard.edu"
sagun98
parents:
diff changeset
33 __status__ = "Development"
sagun98
parents:
diff changeset
34
sagun98
parents:
diff changeset
35 import blist
sagun98
parents:
diff changeset
36 import sys
sagun98
parents:
diff changeset
37
sagun98
parents:
diff changeset
38 class CClade:
sagun98
parents:
diff changeset
39
sagun98
parents:
diff changeset
40 def __init__( self ):
sagun98
parents:
diff changeset
41 """
sagun98
parents:
diff changeset
42 Initialize CClade
sagun98
parents:
diff changeset
43 Dictionary to hold the children nodes from feature consensus lineages.
sagun98
parents:
diff changeset
44 adValues is a list of the abundance value.
sagun98
parents:
diff changeset
45 """
sagun98
parents:
diff changeset
46
sagun98
parents:
diff changeset
47 self.m_hashChildren = {}
sagun98
parents:
diff changeset
48 self.m_adValues = None
sagun98
parents:
diff changeset
49
sagun98
parents:
diff changeset
50 def get( self, astrClade ):
sagun98
parents:
diff changeset
51 """
sagun98
parents:
diff changeset
52 Recursively travel the length of a tree until you find the terminal node
sagun98
parents:
diff changeset
53 (where astrClade == Falseor actually [])
sagun98
parents:
diff changeset
54 or a dict key that matches the clade call.
sagun98
parents:
diff changeset
55 If at any time a clade is given that is not currently know, return a new clade
sagun98
parents:
diff changeset
56 which is set to the current Clade as a child.
sagun98
parents:
diff changeset
57 """
sagun98
parents:
diff changeset
58
sagun98
parents:
diff changeset
59 return self.m_hashChildren.setdefault(
sagun98
parents:
diff changeset
60 astrClade[0], CClade( ) ).get( astrClade[1:] ) if astrClade else self
sagun98
parents:
diff changeset
61
sagun98
parents:
diff changeset
62 def set( self, adValues ):
sagun98
parents:
diff changeset
63 """
sagun98
parents:
diff changeset
64 Set all the values given as a list in the same order given.
sagun98
parents:
diff changeset
65 """
sagun98
parents:
diff changeset
66
sagun98
parents:
diff changeset
67 self.m_adValues = blist.blist( [0] ) * len( adValues )
sagun98
parents:
diff changeset
68 for i, d in enumerate( adValues ):
sagun98
parents:
diff changeset
69 if d:
sagun98
parents:
diff changeset
70 self.m_adValues[i] = d
sagun98
parents:
diff changeset
71
sagun98
parents:
diff changeset
72 def impute( self ):
sagun98
parents:
diff changeset
73 """
sagun98
parents:
diff changeset
74 This allows you to recursively impute values for clades without values given their children counts.
sagun98
parents:
diff changeset
75 Assumably this should be called only once and after all clade abundances have been added.
sagun98
parents:
diff changeset
76 If the m_adValues already exist return the stored m_adValues. (No imputation needed).
sagun98
parents:
diff changeset
77 Otherwise call impute for all children and take the sum of the values from all the children by column
sagun98
parents:
diff changeset
78 Not a sum of a list but summing a list with lists by element.
sagun98
parents:
diff changeset
79 """
sagun98
parents:
diff changeset
80
sagun98
parents:
diff changeset
81 #If values do not exist
sagun98
parents:
diff changeset
82 if not self.m_adValues:
sagun98
parents:
diff changeset
83 #Call impute on all children
sagun98
parents:
diff changeset
84 #If the parent clade has no abundance values
sagun98
parents:
diff changeset
85 #Then take a copy of the child's
sagun98
parents:
diff changeset
86 #If they now have a copy of a child's but have other children
sagun98
parents:
diff changeset
87 #Sum their children with thier current values
sagun98
parents:
diff changeset
88 for pChild in self.m_hashChildren.values( ):
sagun98
parents:
diff changeset
89 adChild = pChild.impute( )
sagun98
parents:
diff changeset
90 if self.m_adValues:
sagun98
parents:
diff changeset
91 for i in range( len( adChild or [] ) ):
sagun98
parents:
diff changeset
92 if adChild[i]:
sagun98
parents:
diff changeset
93 self.m_adValues[i] += adChild[i]
sagun98
parents:
diff changeset
94 elif adChild:
sagun98
parents:
diff changeset
95 self.m_adValues = adChild[:]
sagun98
parents:
diff changeset
96 #If values exist return
sagun98
parents:
diff changeset
97 return self.m_adValues
sagun98
parents:
diff changeset
98
sagun98
parents:
diff changeset
99 def _freeze( self, hashValues, iTarget, astrClade, iDepth, fLeaves ):
sagun98
parents:
diff changeset
100 """
sagun98
parents:
diff changeset
101 Update the given hashValues dict with clade abundances given depth specifications
sagun98
parents:
diff changeset
102 Return a set of integers returning an indicator of the structure of the tree preserved in the dict/hash
sagun98
parents:
diff changeset
103 When the appropriate level of the tree is reached
sagun98
parents:
diff changeset
104 Hashvalue is updated with the clade (including lineage) and the abundance. looks like {"clade":blist(["0.0","0.1"...])}
sagun98
parents:
diff changeset
105 """
sagun98
parents:
diff changeset
106
sagun98
parents:
diff changeset
107 #fHit is true on atleast one of the following conditions:
sagun98
parents:
diff changeset
108 #iTarget is not 0 indicating no changes
sagun98
parents:
diff changeset
109 #Leaves are indicated to be only given and the target depth for the leaves is reached.
sagun98
parents:
diff changeset
110 #The required depth is reached.
sagun98
parents:
diff changeset
111 fHit = ( not iTarget ) or ( ( fLeaves and ( iDepth == iTarget ) ) or ( ( not fLeaves ) and ( iDepth <= iTarget ) ) )
sagun98
parents:
diff changeset
112 #Increment depth
sagun98
parents:
diff changeset
113 iDepth += 1
sagun98
parents:
diff changeset
114 #Returns a set
sagun98
parents:
diff changeset
115 setiRet = set()
sagun98
parents:
diff changeset
116 #If there are children build integer set indicating structure of the tree preserved in the dict
sagun98
parents:
diff changeset
117 if self.m_hashChildren:
sagun98
parents:
diff changeset
118 #Union all the results from freeze of all children
sagun98
parents:
diff changeset
119 #Call freeze but append the child clade to the clade in the call.
sagun98
parents:
diff changeset
120 #And give an incremented depth
sagun98
parents:
diff changeset
121 for strChild, pChild in self.m_hashChildren.items( ):
sagun98
parents:
diff changeset
122 setiRet |= pChild._freeze( hashValues, iTarget, astrClade + [strChild], iDepth, fLeaves )
sagun98
parents:
diff changeset
123 setiRet = set( ( i + 1 ) for i in setiRet )
sagun98
parents:
diff changeset
124 else:
sagun98
parents:
diff changeset
125 setiRet.add( 0 )
sagun98
parents:
diff changeset
126 #Indicate if the correct level is reached
sagun98
parents:
diff changeset
127 if iTarget < 0:
sagun98
parents:
diff changeset
128 if fLeaves:
sagun98
parents:
diff changeset
129 fHit = -( iTarget + 1 ) in setiRet
sagun98
parents:
diff changeset
130 else:
sagun98
parents:
diff changeset
131 fHit = -( iTarget + 1 ) <= max( setiRet )
sagun98
parents:
diff changeset
132 #if astrClade is not == [] (so you are actually in a clade of the tree)
sagun98
parents:
diff changeset
133 #And the clade has values (should be true, if not impute should have been callded before running this method)
sagun98
parents:
diff changeset
134 #And we are at the correct level of the tree then
sagun98
parents:
diff changeset
135 #Add to the dict the clade and the abundance values
sagun98
parents:
diff changeset
136 if astrClade and self.m_adValues and fHit:
sagun98
parents:
diff changeset
137 hashValues["|".join( astrClade )] = self.m_adValues
sagun98
parents:
diff changeset
138 return setiRet
sagun98
parents:
diff changeset
139
sagun98
parents:
diff changeset
140 def freeze( self, hashValues, iTarget, fLeaves ):
sagun98
parents:
diff changeset
141 """
sagun98
parents:
diff changeset
142 Call helper function setting the clade and depth to defaults (start positions)
sagun98
parents:
diff changeset
143 The important result of this method is hashValues is updated with clade and abundance information
sagun98
parents:
diff changeset
144 """
sagun98
parents:
diff changeset
145 self._freeze( hashValues, iTarget, [], 0, fLeaves )
sagun98
parents:
diff changeset
146
sagun98
parents:
diff changeset
147 def _repr( self, strClade ):
sagun98
parents:
diff changeset
148 """
sagun98
parents:
diff changeset
149 Represent tree clade for debugging. Helper function for recursive repr.
sagun98
parents:
diff changeset
150 """
sagun98
parents:
diff changeset
151
sagun98
parents:
diff changeset
152 strRet = "<"
sagun98
parents:
diff changeset
153 if strClade:
sagun98
parents:
diff changeset
154 strRet += "%s %s" % (strClade, self.m_adValues)
sagun98
parents:
diff changeset
155 if self.m_hashChildren:
sagun98
parents:
diff changeset
156 strRet += " "
sagun98
parents:
diff changeset
157 if self.m_hashChildren:
sagun98
parents:
diff changeset
158 strRet += " ".join( p._repr( s ) for (s, p) in self.m_hashChildren.items( ) )
sagun98
parents:
diff changeset
159
sagun98
parents:
diff changeset
160 return ( strRet + ">" )
sagun98
parents:
diff changeset
161
sagun98
parents:
diff changeset
162 def __repr__( self ):
sagun98
parents:
diff changeset
163 """
sagun98
parents:
diff changeset
164 Represent tree clade for debugging.
sagun98
parents:
diff changeset
165 """
sagun98
parents:
diff changeset
166 return self._repr( "" )
sagun98
parents:
diff changeset
167
sagun98
parents:
diff changeset
168 """
sagun98
parents:
diff changeset
169 pTree = CClade( )
sagun98
parents:
diff changeset
170 pTree.get( ("A", "B") ).set( [1, 2, 3] )
sagun98
parents:
diff changeset
171 pTree.get( ("A", "C") ).set( [4, 5, 6] )
sagun98
parents:
diff changeset
172 pTree.get( ("D", "E") ).set( [7, 8, 9] )
sagun98
parents:
diff changeset
173 iTaxa = 0
sagun98
parents:
diff changeset
174 if iTaxa:
sagun98
parents:
diff changeset
175 pTree.impute( )
sagun98
parents:
diff changeset
176 hashFeatures = {}
sagun98
parents:
diff changeset
177 pTree.freeze( hashFeatures, iTaxa )
sagun98
parents:
diff changeset
178 print( pTree )
sagun98
parents:
diff changeset
179 print( hashFeatures )
sagun98
parents:
diff changeset
180 sys.exit( 0 )
sagun98
parents:
diff changeset
181 #"""