s_mart: commons/core/stat/Stat.py comparison

comparison commons/core/stat/Stat.py @ 6:769e306b7933

Change the repository level.

author	yufei-luo
date	Fri, 18 Jan 2013 04:54:14 -0500
parents
children

comparison

equal deleted inserted replaced

-:ea3082881bf8
+:769e306b7933
+import math
+class Stat(object):
+def __init__(self, lValues = []):
+self.reset()
+if lValues != []:
+self.fill(lValues)
+def __eq__(self, o):
+self._lValues.sort()
+o._lValues.sort()
+return self._lValues == o._lValues and round(self._sum, 6) == round(o._sum, 6) \
+and round(self._sumOfSquares, 6) == round(o._sumOfSquares, 6) and self._n == self._n \
+and round(self._min, 6) == round(o._min, 6) and round(self._max, 6) == round(o._max, 6)
+def getValuesList(self):
+return self._lValues
+def getSum(self):
+return self._sum
+def getSumOfSquares(self):
+return self._sumOfSquares
+def getValuesNumber(self):
+return self._n
+def getMin(self):
+return self._min
+def getMax(self):
+return self._max
+## Reset all attributes
+#
+def reset(self):
+self._lValues = []
+self._sum = 0.0
+self._sumOfSquares = 0.0
+self._n = 0
+self._max = 0.0
+self._min = 0.0
+## Add a value to Stat instance list and update attributes
+#
+# @param v float value to add
+#
+def add(self, v):
+self._lValues.append( float(v) )
+self._sum += float(v)
+self._sumOfSquares += float(v) * float(v)
+self._n = self._n + 1
+if v > self._max:
+self._max = float(v)
+if self._n == 1:
+self._min = float(v)
+elif v < self._min:
+self._min = float(v)
+## Add a list of values to Stat instance list and update attributes
+#
+# @param lValues list of float list to add
+#
+def fill(self, lValues):
+for v in lValues:
+self.add(v)
+## Get the arithmetic mean of the Stat instance list
+#
+# @return float
+#
+def mean(self):
+if self._n == 0:
+return 0
+else:
+return self._sum / float(self._n)
+## Get the variance of the sample
+# @note we consider a sample, not a population. So for calculation, we use n-1
+#
+# @return float
+#
+def var(self):
+if self._n < 2 or self.mean() == 0.0:
+return 0
+else:
+variance = self._sumOfSquares/float(self._n - 1) - self._n/float(self._n - 1) * self.mean()*self.mean()
+if round(variance, 10) == 0:
+variance = 0
+return variance
+## Get the standard deviation of the sample
+#
+# @return float
+#
+def sd(self):
+return math.sqrt( self.var() )
+## Get the coefficient of variation of the sample
+#
+# @return float
+#
+def cv(self):
+if self._n < 2 or self.mean() == 0.0:
+return 0
+else:
+return self.sd() / self.mean()
+## Get the median of the sample
+#
+# @return number or "NA" (Not available)
+#
+def median( self ):
+if len(self._lValues) == 0:
+return "NA"
+if len(self._lValues) == 1:
+return self._lValues[0]
+self._lValues.sort()
+m = int( math.ceil( len(self._lValues) / 2.0 ) )
+if len(self._lValues) % 2:
+return self._lValues[m-1]
+else:
+return ( self._lValues[m-1] + self._lValues[m] ) / 2.0
+## Get the kurtosis (measure of whether the data are peaked or flat relative to a normal distribution, 'coef d'aplatissement ' in french)).
+#  k = 0 -> completely flat
+#  k = 3 -> same as normal distribution
+#  k >> 3 -> peak
+#
+# @return float
+#
+def kurtosis(self):
+numerator = 0
+for i in self._lValues:
+numerator += math.pow( i - self.mean(), 4 )
+return numerator / float(self._n - 1) * self.sd()
+## Prepare a string with calculations on your values
+#
+# @return string
+#
+def string(self):
+msg = ""
+msg += "n=%d" % ( self._n )
+msg += " mean=%5.3f" % ( self.mean() )
+msg += " var=%5.3f" % ( self.var() )
+msg += " sd=%5.3f" % ( self.sd() )
+msg += " min=%5.3f" % ( self.getMin() )
+median = self.median()
+if median == "NA":
+msg += " med=%s" % (median)
+else:
+msg += " med=%5.3f" % (median)
+msg += " max=%5.3f" % ( self.getMax() )
+return msg
+## Print descriptive statistics
+#
+def view(self):
+print self.string()
+## Return sorted list of values, ascending (default) or descending
+#
+# @return list
+#
+def sort( self, isReverse = False ):
+self._lValues.sort(reverse = isReverse)
+return self._lValues
+## Give the quantile corresponding to the chosen percentage
+#
+# @return number
+#
+def quantile( self, percentage ):
+if self._n == 0:
+return 0
+elif percentage == 1:
+return self.getMax()
+else:
+return self.sort()[int(self._n * percentage)]
+## Prepare a string with quantile values
+#
+# @return string
+#
+def stringQuantiles( self ):
+return "n=%d min=%5.3f Q1=%5.3f median=%5.3f Q3=%5.3f max=%5.3f" % \
+(self._n, self.quantile(0), self.quantile(0.25), self.quantile(0.5), self.quantile(0.75), self.quantile(1))
+## Print quantiles string
+#
+def viewQuantiles( self ):
+print self.stringQuantiles()
+## Compute N50
+# @return number
+def N50(self ):
+lSorted = self.sort(True)
+midlValues = self.getSum() / 2
+cumul = 0
+index = 0
+while cumul < midlValues:
+cumul =  cumul + lSorted[index]
+index += 1
+if (index == 0):
+return lSorted[index]
+else :
+return lSorted[index - 1]

Mercurial > repos > yufei-luo > s_mart

comparison commons/core/stat/Stat.py @ 6:769e306b7933