Mercurial > repos > sagun98 > micropita_v2
comparison src/breadcrumbs/src/CClade.py @ 0:0de566f21448 draft default tip
v2
| author | sagun98 |
|---|---|
| date | Thu, 03 Jun 2021 18:13:32 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0de566f21448 |
|---|---|
| 1 """ | |
| 2 Author: Curtis Huttenhower | |
| 3 Description: Used to create tree structures to hierarchically normalize abundance tables. | |
| 4 """ | |
| 5 | |
| 6 ##################################################################################### | |
| 7 #Copyright (C) <2012> | |
| 8 # | |
| 9 #Permission is hereby granted, free of charge, to any person obtaining a copy of | |
| 10 #this software and associated documentation files (the "Software"), to deal in the | |
| 11 #Software without restriction, including without limitation the rights to use, copy, | |
| 12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
| 13 #and to permit persons to whom the Software is furnished to do so, subject to | |
| 14 #the following conditions: | |
| 15 # | |
| 16 #The above copyright notice and this permission notice shall be included in all copies | |
| 17 #or substantial portions of the Software. | |
| 18 # | |
| 19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | |
| 20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | |
| 21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
| 22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
| 23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
| 24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
| 25 ##################################################################################### | |
| 26 | |
| 27 __author__ = "Curtis Huttenhower" | |
| 28 __copyright__ = "Copyright 2012" | |
| 29 __credits__ = ["Curtis Huttenhower"] | |
| 30 __license__ = "MIT" | |
| 31 __maintainer__ = "Timothy Tickle" | |
| 32 __email__ = "ttickle@sph.harvard.edu" | |
| 33 __status__ = "Development" | |
| 34 | |
| 35 import blist | |
| 36 import sys | |
| 37 | |
| 38 class CClade: | |
| 39 | |
| 40 def __init__( self ): | |
| 41 """ | |
| 42 Initialize CClade | |
| 43 Dictionary to hold the children nodes from feature consensus lineages. | |
| 44 adValues is a list of the abundance value. | |
| 45 """ | |
| 46 | |
| 47 self.m_hashChildren = {} | |
| 48 self.m_adValues = None | |
| 49 | |
| 50 def get( self, astrClade ): | |
| 51 """ | |
| 52 Recursively travel the length of a tree until you find the terminal node | |
| 53 (where astrClade == Falseor actually []) | |
| 54 or a dict key that matches the clade call. | |
| 55 If at any time a clade is given that is not currently know, return a new clade | |
| 56 which is set to the current Clade as a child. | |
| 57 """ | |
| 58 | |
| 59 return self.m_hashChildren.setdefault( | |
| 60 astrClade[0], CClade( ) ).get( astrClade[1:] ) if astrClade else self | |
| 61 | |
| 62 def set( self, adValues ): | |
| 63 """ | |
| 64 Set all the values given as a list in the same order given. | |
| 65 """ | |
| 66 | |
| 67 self.m_adValues = blist.blist( [0] ) * len( adValues ) | |
| 68 for i, d in enumerate( adValues ): | |
| 69 if d: | |
| 70 self.m_adValues[i] = d | |
| 71 | |
| 72 def impute( self ): | |
| 73 """ | |
| 74 This allows you to recursively impute values for clades without values given their children counts. | |
| 75 Assumably this should be called only once and after all clade abundances have been added. | |
| 76 If the m_adValues already exist return the stored m_adValues. (No imputation needed). | |
| 77 Otherwise call impute for all children and take the sum of the values from all the children by column | |
| 78 Not a sum of a list but summing a list with lists by element. | |
| 79 """ | |
| 80 | |
| 81 #If values do not exist | |
| 82 if not self.m_adValues: | |
| 83 #Call impute on all children | |
| 84 #If the parent clade has no abundance values | |
| 85 #Then take a copy of the child's | |
| 86 #If they now have a copy of a child's but have other children | |
| 87 #Sum their children with thier current values | |
| 88 for pChild in self.m_hashChildren.values( ): | |
| 89 adChild = pChild.impute( ) | |
| 90 if self.m_adValues: | |
| 91 for i in range( len( adChild or [] ) ): | |
| 92 if adChild[i]: | |
| 93 self.m_adValues[i] += adChild[i] | |
| 94 elif adChild: | |
| 95 self.m_adValues = adChild[:] | |
| 96 #If values exist return | |
| 97 return self.m_adValues | |
| 98 | |
| 99 def _freeze( self, hashValues, iTarget, astrClade, iDepth, fLeaves ): | |
| 100 """ | |
| 101 Update the given hashValues dict with clade abundances given depth specifications | |
| 102 Return a set of integers returning an indicator of the structure of the tree preserved in the dict/hash | |
| 103 When the appropriate level of the tree is reached | |
| 104 Hashvalue is updated with the clade (including lineage) and the abundance. looks like {"clade":blist(["0.0","0.1"...])} | |
| 105 """ | |
| 106 | |
| 107 #fHit is true on atleast one of the following conditions: | |
| 108 #iTarget is not 0 indicating no changes | |
| 109 #Leaves are indicated to be only given and the target depth for the leaves is reached. | |
| 110 #The required depth is reached. | |
| 111 fHit = ( not iTarget ) or ( ( fLeaves and ( iDepth == iTarget ) ) or ( ( not fLeaves ) and ( iDepth <= iTarget ) ) ) | |
| 112 #Increment depth | |
| 113 iDepth += 1 | |
| 114 #Returns a set | |
| 115 setiRet = set() | |
| 116 #If there are children build integer set indicating structure of the tree preserved in the dict | |
| 117 if self.m_hashChildren: | |
| 118 #Union all the results from freeze of all children | |
| 119 #Call freeze but append the child clade to the clade in the call. | |
| 120 #And give an incremented depth | |
| 121 for strChild, pChild in self.m_hashChildren.items( ): | |
| 122 setiRet |= pChild._freeze( hashValues, iTarget, astrClade + [strChild], iDepth, fLeaves ) | |
| 123 setiRet = set( ( i + 1 ) for i in setiRet ) | |
| 124 else: | |
| 125 setiRet.add( 0 ) | |
| 126 #Indicate if the correct level is reached | |
| 127 if iTarget < 0: | |
| 128 if fLeaves: | |
| 129 fHit = -( iTarget + 1 ) in setiRet | |
| 130 else: | |
| 131 fHit = -( iTarget + 1 ) <= max( setiRet ) | |
| 132 #if astrClade is not == [] (so you are actually in a clade of the tree) | |
| 133 #And the clade has values (should be true, if not impute should have been callded before running this method) | |
| 134 #And we are at the correct level of the tree then | |
| 135 #Add to the dict the clade and the abundance values | |
| 136 if astrClade and self.m_adValues and fHit: | |
| 137 hashValues["|".join( astrClade )] = self.m_adValues | |
| 138 return setiRet | |
| 139 | |
| 140 def freeze( self, hashValues, iTarget, fLeaves ): | |
| 141 """ | |
| 142 Call helper function setting the clade and depth to defaults (start positions) | |
| 143 The important result of this method is hashValues is updated with clade and abundance information | |
| 144 """ | |
| 145 self._freeze( hashValues, iTarget, [], 0, fLeaves ) | |
| 146 | |
| 147 def _repr( self, strClade ): | |
| 148 """ | |
| 149 Represent tree clade for debugging. Helper function for recursive repr. | |
| 150 """ | |
| 151 | |
| 152 strRet = "<" | |
| 153 if strClade: | |
| 154 strRet += "%s %s" % (strClade, self.m_adValues) | |
| 155 if self.m_hashChildren: | |
| 156 strRet += " " | |
| 157 if self.m_hashChildren: | |
| 158 strRet += " ".join( p._repr( s ) for (s, p) in self.m_hashChildren.items( ) ) | |
| 159 | |
| 160 return ( strRet + ">" ) | |
| 161 | |
| 162 def __repr__( self ): | |
| 163 """ | |
| 164 Represent tree clade for debugging. | |
| 165 """ | |
| 166 return self._repr( "" ) | |
| 167 | |
| 168 """ | |
| 169 pTree = CClade( ) | |
| 170 pTree.get( ("A", "B") ).set( [1, 2, 3] ) | |
| 171 pTree.get( ("A", "C") ).set( [4, 5, 6] ) | |
| 172 pTree.get( ("D", "E") ).set( [7, 8, 9] ) | |
| 173 iTaxa = 0 | |
| 174 if iTaxa: | |
| 175 pTree.impute( ) | |
| 176 hashFeatures = {} | |
| 177 pTree.freeze( hashFeatures, iTaxa ) | |
| 178 print( pTree ) | |
| 179 print( hashFeatures ) | |
| 180 sys.exit( 0 ) | |
| 181 #""" |
