Mercurial > repos > sagun98 > micropita
comparison galaxy_micropita/src/breadcrumbs/src/CClade.py @ 3:8fb4630ab314 draft default tip
Uploaded
author | sagun98 |
---|---|
date | Thu, 03 Jun 2021 17:07:36 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:1c5736dc85ab | 3:8fb4630ab314 |
---|---|
1 """ | |
2 Author: Curtis Huttenhower | |
3 Description: Used to create tree structures to hierarchically normalize abundance tables. | |
4 """ | |
5 | |
6 ##################################################################################### | |
7 #Copyright (C) <2012> | |
8 # | |
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of | |
10 #this software and associated documentation files (the "Software"), to deal in the | |
11 #Software without restriction, including without limitation the rights to use, copy, | |
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
13 #and to permit persons to whom the Software is furnished to do so, subject to | |
14 #the following conditions: | |
15 # | |
16 #The above copyright notice and this permission notice shall be included in all copies | |
17 #or substantial portions of the Software. | |
18 # | |
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | |
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | |
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
25 ##################################################################################### | |
26 | |
27 __author__ = "Curtis Huttenhower" | |
28 __copyright__ = "Copyright 2012" | |
29 __credits__ = ["Curtis Huttenhower"] | |
30 __license__ = "MIT" | |
31 __maintainer__ = "Timothy Tickle" | |
32 __email__ = "ttickle@sph.harvard.edu" | |
33 __status__ = "Development" | |
34 | |
35 import blist | |
36 import sys | |
37 | |
38 class CClade: | |
39 | |
40 def __init__( self ): | |
41 """ | |
42 Initialize CClade | |
43 Dictionary to hold the children nodes from feature consensus lineages. | |
44 adValues is a list of the abundance value. | |
45 """ | |
46 | |
47 self.m_hashChildren = {} | |
48 self.m_adValues = None | |
49 | |
50 def get( self, astrClade ): | |
51 """ | |
52 Recursively travel the length of a tree until you find the terminal node | |
53 (where astrClade == Falseor actually []) | |
54 or a dict key that matches the clade call. | |
55 If at any time a clade is given that is not currently know, return a new clade | |
56 which is set to the current Clade as a child. | |
57 """ | |
58 | |
59 return self.m_hashChildren.setdefault( | |
60 astrClade[0], CClade( ) ).get( astrClade[1:] ) if astrClade else self | |
61 | |
62 def set( self, adValues ): | |
63 """ | |
64 Set all the values given as a list in the same order given. | |
65 """ | |
66 | |
67 self.m_adValues = blist.blist( [0] ) * len( adValues ) | |
68 for i, d in enumerate( adValues ): | |
69 if d: | |
70 self.m_adValues[i] = d | |
71 | |
72 def impute( self ): | |
73 """ | |
74 This allows you to recursively impute values for clades without values given their children counts. | |
75 Assumably this should be called only once and after all clade abundances have been added. | |
76 If the m_adValues already exist return the stored m_adValues. (No imputation needed). | |
77 Otherwise call impute for all children and take the sum of the values from all the children by column | |
78 Not a sum of a list but summing a list with lists by element. | |
79 """ | |
80 | |
81 #If values do not exist | |
82 if not self.m_adValues: | |
83 #Call impute on all children | |
84 #If the parent clade has no abundance values | |
85 #Then take a copy of the child's | |
86 #If they now have a copy of a child's but have other children | |
87 #Sum their children with thier current values | |
88 for pChild in self.m_hashChildren.values( ): | |
89 adChild = pChild.impute( ) | |
90 if self.m_adValues: | |
91 for i in range( len( adChild or [] ) ): | |
92 if adChild[i]: | |
93 self.m_adValues[i] += adChild[i] | |
94 elif adChild: | |
95 self.m_adValues = adChild[:] | |
96 #If values exist return | |
97 return self.m_adValues | |
98 | |
99 def _freeze( self, hashValues, iTarget, astrClade, iDepth, fLeaves ): | |
100 """ | |
101 Update the given hashValues dict with clade abundances given depth specifications | |
102 Return a set of integers returning an indicator of the structure of the tree preserved in the dict/hash | |
103 When the appropriate level of the tree is reached | |
104 Hashvalue is updated with the clade (including lineage) and the abundance. looks like {"clade":blist(["0.0","0.1"...])} | |
105 """ | |
106 | |
107 #fHit is true on atleast one of the following conditions: | |
108 #iTarget is not 0 indicating no changes | |
109 #Leaves are indicated to be only given and the target depth for the leaves is reached. | |
110 #The required depth is reached. | |
111 fHit = ( not iTarget ) or ( ( fLeaves and ( iDepth == iTarget ) ) or ( ( not fLeaves ) and ( iDepth <= iTarget ) ) ) | |
112 #Increment depth | |
113 iDepth += 1 | |
114 #Returns a set | |
115 setiRet = set() | |
116 #If there are children build integer set indicating structure of the tree preserved in the dict | |
117 if self.m_hashChildren: | |
118 #Union all the results from freeze of all children | |
119 #Call freeze but append the child clade to the clade in the call. | |
120 #And give an incremented depth | |
121 for strChild, pChild in self.m_hashChildren.items( ): | |
122 setiRet |= pChild._freeze( hashValues, iTarget, astrClade + [strChild], iDepth, fLeaves ) | |
123 setiRet = set( ( i + 1 ) for i in setiRet ) | |
124 else: | |
125 setiRet.add( 0 ) | |
126 #Indicate if the correct level is reached | |
127 if iTarget < 0: | |
128 if fLeaves: | |
129 fHit = -( iTarget + 1 ) in setiRet | |
130 else: | |
131 fHit = -( iTarget + 1 ) <= max( setiRet ) | |
132 #if astrClade is not == [] (so you are actually in a clade of the tree) | |
133 #And the clade has values (should be true, if not impute should have been callded before running this method) | |
134 #And we are at the correct level of the tree then | |
135 #Add to the dict the clade and the abundance values | |
136 if astrClade and self.m_adValues and fHit: | |
137 hashValues["|".join( astrClade )] = self.m_adValues | |
138 return setiRet | |
139 | |
140 def freeze( self, hashValues, iTarget, fLeaves ): | |
141 """ | |
142 Call helper function setting the clade and depth to defaults (start positions) | |
143 The important result of this method is hashValues is updated with clade and abundance information | |
144 """ | |
145 self._freeze( hashValues, iTarget, [], 0, fLeaves ) | |
146 | |
147 def _repr( self, strClade ): | |
148 """ | |
149 Represent tree clade for debugging. Helper function for recursive repr. | |
150 """ | |
151 | |
152 strRet = "<" | |
153 if strClade: | |
154 strRet += "%s %s" % (strClade, self.m_adValues) | |
155 if self.m_hashChildren: | |
156 strRet += " " | |
157 if self.m_hashChildren: | |
158 strRet += " ".join( p._repr( s ) for (s, p) in self.m_hashChildren.items( ) ) | |
159 | |
160 return ( strRet + ">" ) | |
161 | |
162 def __repr__( self ): | |
163 """ | |
164 Represent tree clade for debugging. | |
165 """ | |
166 return self._repr( "" ) | |
167 | |
168 """ | |
169 pTree = CClade( ) | |
170 pTree.get( ("A", "B") ).set( [1, 2, 3] ) | |
171 pTree.get( ("A", "C") ).set( [4, 5, 6] ) | |
172 pTree.get( ("D", "E") ).set( [7, 8, 9] ) | |
173 iTaxa = 0 | |
174 if iTaxa: | |
175 pTree.impute( ) | |
176 hashFeatures = {} | |
177 pTree.freeze( hashFeatures, iTaxa ) | |
178 print( pTree ) | |
179 print( hashFeatures ) | |
180 sys.exit( 0 ) | |
181 #""" |