comparison commons/core/stat/Stat.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 import math
2
3 class Stat(object):
4
5 def __init__(self, lValues = []):
6 self.reset()
7 if lValues != []:
8 self.fill(lValues)
9
10 def __eq__(self, o):
11 self._lValues.sort()
12 o._lValues.sort()
13 return self._lValues == o._lValues and round(self._sum, 6) == round(o._sum, 6) \
14 and round(self._sumOfSquares, 6) == round(o._sumOfSquares, 6) and self._n == self._n \
15 and round(self._min, 6) == round(o._min, 6) and round(self._max, 6) == round(o._max, 6)
16
17 def getValuesList(self):
18 return self._lValues
19
20 def getSum(self):
21 return self._sum
22
23 def getSumOfSquares(self):
24 return self._sumOfSquares
25
26 def getValuesNumber(self):
27 return self._n
28
29 def getMin(self):
30 return self._min
31
32 def getMax(self):
33 return self._max
34
35 ## Reset all attributes
36 #
37 def reset(self):
38 self._lValues = []
39 self._sum = 0.0
40 self._sumOfSquares = 0.0
41 self._n = 0
42 self._max = 0.0
43 self._min = 0.0
44
45 ## Add a value to Stat instance list and update attributes
46 #
47 # @param v float value to add
48 #
49 def add(self, v):
50 self._lValues.append( float(v) )
51 self._sum += float(v)
52 self._sumOfSquares += float(v) * float(v)
53 self._n = self._n + 1
54 if v > self._max:
55 self._max = float(v)
56 if self._n == 1:
57 self._min = float(v)
58 elif v < self._min:
59 self._min = float(v)
60
61 ## Add a list of values to Stat instance list and update attributes
62 #
63 # @param lValues list of float list to add
64 #
65 def fill(self, lValues):
66 for v in lValues:
67 self.add(v)
68
69 ## Get the arithmetic mean of the Stat instance list
70 #
71 # @return float
72 #
73 def mean(self):
74 if self._n == 0:
75 return 0
76 else:
77 return self._sum / float(self._n)
78
79 ## Get the variance of the sample
80 # @note we consider a sample, not a population. So for calculation, we use n-1
81 #
82 # @return float
83 #
84 def var(self):
85 if self._n < 2 or self.mean() == 0.0:
86 return 0
87 else:
88 variance = self._sumOfSquares/float(self._n - 1) - self._n/float(self._n - 1) * self.mean()*self.mean()
89 if round(variance, 10) == 0:
90 variance = 0
91 return variance
92
93 ## Get the standard deviation of the sample
94 #
95 # @return float
96 #
97 def sd(self):
98 return math.sqrt( self.var() )
99
100 ## Get the coefficient of variation of the sample
101 #
102 # @return float
103 #
104 def cv(self):
105 if self._n < 2 or self.mean() == 0.0:
106 return 0
107 else:
108 return self.sd() / self.mean()
109
110 ## Get the median of the sample
111 #
112 # @return number or "NA" (Not available)
113 #
114 def median( self ):
115 if len(self._lValues) == 0:
116 return "NA"
117 if len(self._lValues) == 1:
118 return self._lValues[0]
119 self._lValues.sort()
120 m = int( math.ceil( len(self._lValues) / 2.0 ) )
121 if len(self._lValues) % 2:
122 return self._lValues[m-1]
123 else:
124 return ( self._lValues[m-1] + self._lValues[m] ) / 2.0
125
126 ## Get the kurtosis (measure of whether the data are peaked or flat relative to a normal distribution, 'coef d'aplatissement ' in french)).
127 # k = 0 -> completely flat
128 # k = 3 -> same as normal distribution
129 # k >> 3 -> peak
130 #
131 # @return float
132 #
133 def kurtosis(self):
134 numerator = 0
135 for i in self._lValues:
136 numerator += math.pow( i - self.mean(), 4 )
137 return numerator / float(self._n - 1) * self.sd()
138
139 ## Prepare a string with calculations on your values
140 #
141 # @return string
142 #
143 def string(self):
144 msg = ""
145 msg += "n=%d" % ( self._n )
146 msg += " mean=%5.3f" % ( self.mean() )
147 msg += " var=%5.3f" % ( self.var() )
148 msg += " sd=%5.3f" % ( self.sd() )
149 msg += " min=%5.3f" % ( self.getMin() )
150 median = self.median()
151 if median == "NA":
152 msg += " med=%s" % (median)
153 else:
154 msg += " med=%5.3f" % (median)
155 msg += " max=%5.3f" % ( self.getMax() )
156 return msg
157
158 ## Print descriptive statistics
159 #
160 def view(self):
161 print self.string()
162
163 ## Return sorted list of values, ascending (default) or descending
164 #
165 # @return list
166 #
167 def sort( self, isReverse = False ):
168 self._lValues.sort(reverse = isReverse)
169 return self._lValues
170
171 ## Give the quantile corresponding to the chosen percentage
172 #
173 # @return number
174 #
175 def quantile( self, percentage ):
176 if self._n == 0:
177 return 0
178 elif percentage == 1:
179 return self.getMax()
180 else:
181 return self.sort()[int(self._n * percentage)]
182
183 ## Prepare a string with quantile values
184 #
185 # @return string
186 #
187 def stringQuantiles( self ):
188 return "n=%d min=%5.3f Q1=%5.3f median=%5.3f Q3=%5.3f max=%5.3f" % \
189 (self._n, self.quantile(0), self.quantile(0.25), self.quantile(0.5), self.quantile(0.75), self.quantile(1))
190
191 ## Print quantiles string
192 #
193 def viewQuantiles( self ):
194 print self.stringQuantiles()
195
196 ## Compute N50
197 # @return number
198 def N50(self ):
199 lSorted = self.sort(True)
200 midlValues = self.getSum() / 2
201 cumul = 0
202 index = 0
203 while cumul < midlValues:
204 cumul = cumul + lSorted[index]
205 index += 1
206 if (index == 0):
207 return lSorted[index]
208 else :
209 return lSorted[index - 1]