Mercurial > repos > yufei-luo > s_mart
comparison commons/core/stat/Stat.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:ea3082881bf8 | 6:769e306b7933 |
---|---|
1 import math | |
2 | |
3 class Stat(object): | |
4 | |
5 def __init__(self, lValues = []): | |
6 self.reset() | |
7 if lValues != []: | |
8 self.fill(lValues) | |
9 | |
10 def __eq__(self, o): | |
11 self._lValues.sort() | |
12 o._lValues.sort() | |
13 return self._lValues == o._lValues and round(self._sum, 6) == round(o._sum, 6) \ | |
14 and round(self._sumOfSquares, 6) == round(o._sumOfSquares, 6) and self._n == self._n \ | |
15 and round(self._min, 6) == round(o._min, 6) and round(self._max, 6) == round(o._max, 6) | |
16 | |
17 def getValuesList(self): | |
18 return self._lValues | |
19 | |
20 def getSum(self): | |
21 return self._sum | |
22 | |
23 def getSumOfSquares(self): | |
24 return self._sumOfSquares | |
25 | |
26 def getValuesNumber(self): | |
27 return self._n | |
28 | |
29 def getMin(self): | |
30 return self._min | |
31 | |
32 def getMax(self): | |
33 return self._max | |
34 | |
35 ## Reset all attributes | |
36 # | |
37 def reset(self): | |
38 self._lValues = [] | |
39 self._sum = 0.0 | |
40 self._sumOfSquares = 0.0 | |
41 self._n = 0 | |
42 self._max = 0.0 | |
43 self._min = 0.0 | |
44 | |
45 ## Add a value to Stat instance list and update attributes | |
46 # | |
47 # @param v float value to add | |
48 # | |
49 def add(self, v): | |
50 self._lValues.append( float(v) ) | |
51 self._sum += float(v) | |
52 self._sumOfSquares += float(v) * float(v) | |
53 self._n = self._n + 1 | |
54 if v > self._max: | |
55 self._max = float(v) | |
56 if self._n == 1: | |
57 self._min = float(v) | |
58 elif v < self._min: | |
59 self._min = float(v) | |
60 | |
61 ## Add a list of values to Stat instance list and update attributes | |
62 # | |
63 # @param lValues list of float list to add | |
64 # | |
65 def fill(self, lValues): | |
66 for v in lValues: | |
67 self.add(v) | |
68 | |
69 ## Get the arithmetic mean of the Stat instance list | |
70 # | |
71 # @return float | |
72 # | |
73 def mean(self): | |
74 if self._n == 0: | |
75 return 0 | |
76 else: | |
77 return self._sum / float(self._n) | |
78 | |
79 ## Get the variance of the sample | |
80 # @note we consider a sample, not a population. So for calculation, we use n-1 | |
81 # | |
82 # @return float | |
83 # | |
84 def var(self): | |
85 if self._n < 2 or self.mean() == 0.0: | |
86 return 0 | |
87 else: | |
88 variance = self._sumOfSquares/float(self._n - 1) - self._n/float(self._n - 1) * self.mean()*self.mean() | |
89 if round(variance, 10) == 0: | |
90 variance = 0 | |
91 return variance | |
92 | |
93 ## Get the standard deviation of the sample | |
94 # | |
95 # @return float | |
96 # | |
97 def sd(self): | |
98 return math.sqrt( self.var() ) | |
99 | |
100 ## Get the coefficient of variation of the sample | |
101 # | |
102 # @return float | |
103 # | |
104 def cv(self): | |
105 if self._n < 2 or self.mean() == 0.0: | |
106 return 0 | |
107 else: | |
108 return self.sd() / self.mean() | |
109 | |
110 ## Get the median of the sample | |
111 # | |
112 # @return number or "NA" (Not available) | |
113 # | |
114 def median( self ): | |
115 if len(self._lValues) == 0: | |
116 return "NA" | |
117 if len(self._lValues) == 1: | |
118 return self._lValues[0] | |
119 self._lValues.sort() | |
120 m = int( math.ceil( len(self._lValues) / 2.0 ) ) | |
121 if len(self._lValues) % 2: | |
122 return self._lValues[m-1] | |
123 else: | |
124 return ( self._lValues[m-1] + self._lValues[m] ) / 2.0 | |
125 | |
126 ## Get the kurtosis (measure of whether the data are peaked or flat relative to a normal distribution, 'coef d'aplatissement ' in french)). | |
127 # k = 0 -> completely flat | |
128 # k = 3 -> same as normal distribution | |
129 # k >> 3 -> peak | |
130 # | |
131 # @return float | |
132 # | |
133 def kurtosis(self): | |
134 numerator = 0 | |
135 for i in self._lValues: | |
136 numerator += math.pow( i - self.mean(), 4 ) | |
137 return numerator / float(self._n - 1) * self.sd() | |
138 | |
139 ## Prepare a string with calculations on your values | |
140 # | |
141 # @return string | |
142 # | |
143 def string(self): | |
144 msg = "" | |
145 msg += "n=%d" % ( self._n ) | |
146 msg += " mean=%5.3f" % ( self.mean() ) | |
147 msg += " var=%5.3f" % ( self.var() ) | |
148 msg += " sd=%5.3f" % ( self.sd() ) | |
149 msg += " min=%5.3f" % ( self.getMin() ) | |
150 median = self.median() | |
151 if median == "NA": | |
152 msg += " med=%s" % (median) | |
153 else: | |
154 msg += " med=%5.3f" % (median) | |
155 msg += " max=%5.3f" % ( self.getMax() ) | |
156 return msg | |
157 | |
158 ## Print descriptive statistics | |
159 # | |
160 def view(self): | |
161 print self.string() | |
162 | |
163 ## Return sorted list of values, ascending (default) or descending | |
164 # | |
165 # @return list | |
166 # | |
167 def sort( self, isReverse = False ): | |
168 self._lValues.sort(reverse = isReverse) | |
169 return self._lValues | |
170 | |
171 ## Give the quantile corresponding to the chosen percentage | |
172 # | |
173 # @return number | |
174 # | |
175 def quantile( self, percentage ): | |
176 if self._n == 0: | |
177 return 0 | |
178 elif percentage == 1: | |
179 return self.getMax() | |
180 else: | |
181 return self.sort()[int(self._n * percentage)] | |
182 | |
183 ## Prepare a string with quantile values | |
184 # | |
185 # @return string | |
186 # | |
187 def stringQuantiles( self ): | |
188 return "n=%d min=%5.3f Q1=%5.3f median=%5.3f Q3=%5.3f max=%5.3f" % \ | |
189 (self._n, self.quantile(0), self.quantile(0.25), self.quantile(0.5), self.quantile(0.75), self.quantile(1)) | |
190 | |
191 ## Print quantiles string | |
192 # | |
193 def viewQuantiles( self ): | |
194 print self.stringQuantiles() | |
195 | |
196 ## Compute N50 | |
197 # @return number | |
198 def N50(self ): | |
199 lSorted = self.sort(True) | |
200 midlValues = self.getSum() / 2 | |
201 cumul = 0 | |
202 index = 0 | |
203 while cumul < midlValues: | |
204 cumul = cumul + lSorted[index] | |
205 index += 1 | |
206 if (index == 0): | |
207 return lSorted[index] | |
208 else : | |
209 return lSorted[index - 1] |