diff galaxy_micropita/src/breadcrumbs/src/CClade.py @ 3:8fb4630ab314 draft default tip

Uploaded
author sagun98
date Thu, 03 Jun 2021 17:07:36 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy_micropita/src/breadcrumbs/src/CClade.py	Thu Jun 03 17:07:36 2021 +0000
@@ -0,0 +1,181 @@
+"""
+Author: Curtis Huttenhower
+Description: Used to create tree structures to hierarchically normalize abundance tables.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Curtis Huttenhower"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Curtis Huttenhower"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+
+import blist
+import sys
+
+class CClade:
+
+	def __init__( self ):
+		"""
+		Initialize CClade
+		Dictionary to hold the children nodes from feature consensus lineages.
+		adValues is a list of the abundance value.
+		"""
+		
+		self.m_hashChildren = {}
+		self.m_adValues = None
+
+	def get( self, astrClade ):
+		"""
+		Recursively travel the length of a tree until you find the terminal node
+		(where astrClade == Falseor actually [])
+		or a dict key that matches the clade call.
+		If at any time a clade is given that is not currently know, return a new clade
+		which is set to the current Clade as a child.
+		"""
+		
+		return self.m_hashChildren.setdefault(
+			astrClade[0], CClade( ) ).get( astrClade[1:] ) if astrClade else self
+
+	def set( self, adValues ):
+		"""
+        Set all the values given as a list in the same order given.
+		"""
+		
+		self.m_adValues = blist.blist( [0] ) * len( adValues )
+		for i, d in enumerate( adValues ):
+			if d:
+				self.m_adValues[i] = d
+
+	def impute( self ):
+		"""
+		This allows you to recursively impute values for clades without values given their children counts.
+		Assumably this should be called only once and after all clade abundances have been added.
+		If the m_adValues already exist return the stored m_adValues. (No imputation needed).
+		Otherwise call impute for all children and take the sum of the values from all the children by column
+		Not a sum of a list but summing a list with lists by element.
+		"""
+		
+        #If values do not exist
+		if not self.m_adValues:
+            #Call impute on all children
+            #If the parent clade has no abundance values
+            #Then take a copy of the child's
+            #If they now have a copy of a child's but have other children
+            #Sum their children with thier current values
+			for pChild in self.m_hashChildren.values( ):
+				adChild = pChild.impute( )
+				if self.m_adValues:
+					for i in range( len( adChild or [] ) ):
+						if adChild[i]:
+							self.m_adValues[i] += adChild[i]
+				elif adChild:
+					self.m_adValues = adChild[:] 
+		#If values exist return			
+		return self.m_adValues
+	
+	def _freeze( self, hashValues, iTarget, astrClade, iDepth, fLeaves ):
+		"""
+		Update the given hashValues dict with clade abundances given depth specifications
+		Return a set of integers returning an indicator of the structure of the tree preserved in the dict/hash
+		When the appropriate level of the tree is reached
+		Hashvalue is updated with the clade (including lineage) and the abundance. looks like {"clade":blist(["0.0","0.1"...])}
+		"""
+		
+        #fHit is true on atleast one of the following conditions:
+        #iTarget is not 0 indicating no changes
+        #Leaves are indicated to be only given and the target depth for the leaves is reached.
+        #The required depth is reached.
+		fHit = ( not iTarget ) or ( ( fLeaves and ( iDepth == iTarget ) ) or ( ( not fLeaves ) and ( iDepth <= iTarget ) ) )
+                #Increment depth
+		iDepth += 1
+                #Returns a set
+		setiRet = set()
+                #If there are children build integer set indicating structure of the tree preserved in the dict
+		if self.m_hashChildren:
+                        #Union all the results from freeze of all children
+                        #Call freeze but append the child clade to the clade in the call.
+                        #And give an incremented depth
+			for strChild, pChild in self.m_hashChildren.items( ):
+				setiRet |= pChild._freeze( hashValues, iTarget, astrClade + [strChild], iDepth, fLeaves )
+			setiRet = set( ( i + 1 ) for i in setiRet )
+		else:
+			setiRet.add( 0 )
+                #Indicate if the correct level is reached
+		if iTarget < 0:
+			if fLeaves:
+				fHit = -( iTarget + 1 ) in setiRet
+			else:
+				fHit = -( iTarget + 1 ) <= max( setiRet )
+                #if astrClade is not == [] (so you are actually in a clade of the tree)
+                #And the clade has values (should be true, if not impute should have been callded before running this method)
+                #And we are at the correct level of the tree then
+                #Add to the dict the clade and the abundance values
+		if astrClade and self.m_adValues and fHit:
+			hashValues["|".join( astrClade )] = self.m_adValues
+		return setiRet
+	
+	def freeze( self, hashValues, iTarget, fLeaves ):
+		"""
+		Call helper function setting the clade and depth to defaults (start positions)
+		The important result of this method is hashValues is updated with clade and abundance information
+		"""
+		self._freeze( hashValues, iTarget, [], 0, fLeaves )
+
+	def _repr( self, strClade ):
+		"""
+		Represent tree clade for debugging. Helper function for recursive repr.
+		"""
+
+		strRet = "<"
+		if strClade:
+			strRet += "%s %s" % (strClade, self.m_adValues)
+			if self.m_hashChildren:
+				strRet += " "
+		if self.m_hashChildren:
+			strRet += " ".join( p._repr( s ) for (s, p) in self.m_hashChildren.items( ) )
+		
+		return ( strRet + ">" )
+		
+	def __repr__( self ):
+		"""
+		Represent tree clade for debugging.
+		"""
+		return self._repr( "" )
+
+"""
+pTree = CClade( )
+pTree.get( ("A", "B") ).set( [1, 2, 3] )
+pTree.get( ("A", "C") ).set( [4, 5, 6] )
+pTree.get( ("D", "E") ).set( [7, 8, 9] )
+iTaxa = 0
+if iTaxa:
+	pTree.impute( )
+hashFeatures = {}
+pTree.freeze( hashFeatures, iTaxa )
+print( pTree )
+print( hashFeatures )
+sys.exit( 0 )
+#"""