Mercurial > repos > yufei-luo > s_mart
diff commons/core/stat/Stat.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/stat/Stat.py Fri Jan 18 04:54:14 2013 -0500 @@ -0,0 +1,209 @@ +import math + +class Stat(object): + + def __init__(self, lValues = []): + self.reset() + if lValues != []: + self.fill(lValues) + + def __eq__(self, o): + self._lValues.sort() + o._lValues.sort() + return self._lValues == o._lValues and round(self._sum, 6) == round(o._sum, 6) \ + and round(self._sumOfSquares, 6) == round(o._sumOfSquares, 6) and self._n == self._n \ + and round(self._min, 6) == round(o._min, 6) and round(self._max, 6) == round(o._max, 6) + + def getValuesList(self): + return self._lValues + + def getSum(self): + return self._sum + + def getSumOfSquares(self): + return self._sumOfSquares + + def getValuesNumber(self): + return self._n + + def getMin(self): + return self._min + + def getMax(self): + return self._max + + ## Reset all attributes + # + def reset(self): + self._lValues = [] + self._sum = 0.0 + self._sumOfSquares = 0.0 + self._n = 0 + self._max = 0.0 + self._min = 0.0 + + ## Add a value to Stat instance list and update attributes + # + # @param v float value to add + # + def add(self, v): + self._lValues.append( float(v) ) + self._sum += float(v) + self._sumOfSquares += float(v) * float(v) + self._n = self._n + 1 + if v > self._max: + self._max = float(v) + if self._n == 1: + self._min = float(v) + elif v < self._min: + self._min = float(v) + + ## Add a list of values to Stat instance list and update attributes + # + # @param lValues list of float list to add + # + def fill(self, lValues): + for v in lValues: + self.add(v) + + ## Get the arithmetic mean of the Stat instance list + # + # @return float + # + def mean(self): + if self._n == 0: + return 0 + else: + return self._sum / float(self._n) + + ## Get the variance of the sample + # @note we consider a sample, not a population. So for calculation, we use n-1 + # + # @return float + # + def var(self): + if self._n < 2 or self.mean() == 0.0: + return 0 + else: + variance = self._sumOfSquares/float(self._n - 1) - self._n/float(self._n - 1) * self.mean()*self.mean() + if round(variance, 10) == 0: + variance = 0 + return variance + + ## Get the standard deviation of the sample + # + # @return float + # + def sd(self): + return math.sqrt( self.var() ) + + ## Get the coefficient of variation of the sample + # + # @return float + # + def cv(self): + if self._n < 2 or self.mean() == 0.0: + return 0 + else: + return self.sd() / self.mean() + + ## Get the median of the sample + # + # @return number or "NA" (Not available) + # + def median( self ): + if len(self._lValues) == 0: + return "NA" + if len(self._lValues) == 1: + return self._lValues[0] + self._lValues.sort() + m = int( math.ceil( len(self._lValues) / 2.0 ) ) + if len(self._lValues) % 2: + return self._lValues[m-1] + else: + return ( self._lValues[m-1] + self._lValues[m] ) / 2.0 + + ## Get the kurtosis (measure of whether the data are peaked or flat relative to a normal distribution, 'coef d'aplatissement ' in french)). + # k = 0 -> completely flat + # k = 3 -> same as normal distribution + # k >> 3 -> peak + # + # @return float + # + def kurtosis(self): + numerator = 0 + for i in self._lValues: + numerator += math.pow( i - self.mean(), 4 ) + return numerator / float(self._n - 1) * self.sd() + + ## Prepare a string with calculations on your values + # + # @return string + # + def string(self): + msg = "" + msg += "n=%d" % ( self._n ) + msg += " mean=%5.3f" % ( self.mean() ) + msg += " var=%5.3f" % ( self.var() ) + msg += " sd=%5.3f" % ( self.sd() ) + msg += " min=%5.3f" % ( self.getMin() ) + median = self.median() + if median == "NA": + msg += " med=%s" % (median) + else: + msg += " med=%5.3f" % (median) + msg += " max=%5.3f" % ( self.getMax() ) + return msg + + ## Print descriptive statistics + # + def view(self): + print self.string() + + ## Return sorted list of values, ascending (default) or descending + # + # @return list + # + def sort( self, isReverse = False ): + self._lValues.sort(reverse = isReverse) + return self._lValues + + ## Give the quantile corresponding to the chosen percentage + # + # @return number + # + def quantile( self, percentage ): + if self._n == 0: + return 0 + elif percentage == 1: + return self.getMax() + else: + return self.sort()[int(self._n * percentage)] + + ## Prepare a string with quantile values + # + # @return string + # + def stringQuantiles( self ): + return "n=%d min=%5.3f Q1=%5.3f median=%5.3f Q3=%5.3f max=%5.3f" % \ + (self._n, self.quantile(0), self.quantile(0.25), self.quantile(0.5), self.quantile(0.75), self.quantile(1)) + + ## Print quantiles string + # + def viewQuantiles( self ): + print self.stringQuantiles() + + ## Compute N50 + # @return number + def N50(self ): + lSorted = self.sort(True) + midlValues = self.getSum() / 2 + cumul = 0 + index = 0 + while cumul < midlValues: + cumul = cumul + lSorted[index] + index += 1 + if (index == 0): + return lSorted[index] + else : + return lSorted[index - 1] \ No newline at end of file