6
|
1 import math
|
|
2
|
|
3 class Stat(object):
|
|
4
|
|
5 def __init__(self, lValues = []):
|
|
6 self.reset()
|
|
7 if lValues != []:
|
|
8 self.fill(lValues)
|
|
9
|
|
10 def __eq__(self, o):
|
|
11 self._lValues.sort()
|
|
12 o._lValues.sort()
|
|
13 return self._lValues == o._lValues and round(self._sum, 6) == round(o._sum, 6) \
|
|
14 and round(self._sumOfSquares, 6) == round(o._sumOfSquares, 6) and self._n == self._n \
|
|
15 and round(self._min, 6) == round(o._min, 6) and round(self._max, 6) == round(o._max, 6)
|
|
16
|
|
17 def getValuesList(self):
|
|
18 return self._lValues
|
|
19
|
|
20 def getSum(self):
|
|
21 return self._sum
|
|
22
|
|
23 def getSumOfSquares(self):
|
|
24 return self._sumOfSquares
|
|
25
|
|
26 def getValuesNumber(self):
|
|
27 return self._n
|
|
28
|
|
29 def getMin(self):
|
|
30 return self._min
|
|
31
|
|
32 def getMax(self):
|
|
33 return self._max
|
|
34
|
|
35 ## Reset all attributes
|
|
36 #
|
|
37 def reset(self):
|
|
38 self._lValues = []
|
|
39 self._sum = 0.0
|
|
40 self._sumOfSquares = 0.0
|
|
41 self._n = 0
|
|
42 self._max = 0.0
|
|
43 self._min = 0.0
|
|
44
|
|
45 ## Add a value to Stat instance list and update attributes
|
|
46 #
|
|
47 # @param v float value to add
|
|
48 #
|
|
49 def add(self, v):
|
|
50 self._lValues.append( float(v) )
|
|
51 self._sum += float(v)
|
|
52 self._sumOfSquares += float(v) * float(v)
|
|
53 self._n = self._n + 1
|
|
54 if v > self._max:
|
|
55 self._max = float(v)
|
|
56 if self._n == 1:
|
|
57 self._min = float(v)
|
|
58 elif v < self._min:
|
|
59 self._min = float(v)
|
|
60
|
|
61 ## Add a list of values to Stat instance list and update attributes
|
|
62 #
|
|
63 # @param lValues list of float list to add
|
|
64 #
|
|
65 def fill(self, lValues):
|
|
66 for v in lValues:
|
|
67 self.add(v)
|
|
68
|
|
69 ## Get the arithmetic mean of the Stat instance list
|
|
70 #
|
|
71 # @return float
|
|
72 #
|
|
73 def mean(self):
|
|
74 if self._n == 0:
|
|
75 return 0
|
|
76 else:
|
|
77 return self._sum / float(self._n)
|
|
78
|
|
79 ## Get the variance of the sample
|
|
80 # @note we consider a sample, not a population. So for calculation, we use n-1
|
|
81 #
|
|
82 # @return float
|
|
83 #
|
|
84 def var(self):
|
|
85 if self._n < 2 or self.mean() == 0.0:
|
|
86 return 0
|
|
87 else:
|
|
88 variance = self._sumOfSquares/float(self._n - 1) - self._n/float(self._n - 1) * self.mean()*self.mean()
|
|
89 if round(variance, 10) == 0:
|
|
90 variance = 0
|
|
91 return variance
|
|
92
|
|
93 ## Get the standard deviation of the sample
|
|
94 #
|
|
95 # @return float
|
|
96 #
|
|
97 def sd(self):
|
|
98 return math.sqrt( self.var() )
|
|
99
|
|
100 ## Get the coefficient of variation of the sample
|
|
101 #
|
|
102 # @return float
|
|
103 #
|
|
104 def cv(self):
|
|
105 if self._n < 2 or self.mean() == 0.0:
|
|
106 return 0
|
|
107 else:
|
|
108 return self.sd() / self.mean()
|
|
109
|
|
110 ## Get the median of the sample
|
|
111 #
|
|
112 # @return number or "NA" (Not available)
|
|
113 #
|
|
114 def median( self ):
|
|
115 if len(self._lValues) == 0:
|
|
116 return "NA"
|
|
117 if len(self._lValues) == 1:
|
|
118 return self._lValues[0]
|
|
119 self._lValues.sort()
|
|
120 m = int( math.ceil( len(self._lValues) / 2.0 ) )
|
|
121 if len(self._lValues) % 2:
|
|
122 return self._lValues[m-1]
|
|
123 else:
|
|
124 return ( self._lValues[m-1] + self._lValues[m] ) / 2.0
|
|
125
|
|
126 ## Get the kurtosis (measure of whether the data are peaked or flat relative to a normal distribution, 'coef d'aplatissement ' in french)).
|
|
127 # k = 0 -> completely flat
|
|
128 # k = 3 -> same as normal distribution
|
|
129 # k >> 3 -> peak
|
|
130 #
|
|
131 # @return float
|
|
132 #
|
|
133 def kurtosis(self):
|
|
134 numerator = 0
|
|
135 for i in self._lValues:
|
|
136 numerator += math.pow( i - self.mean(), 4 )
|
|
137 return numerator / float(self._n - 1) * self.sd()
|
|
138
|
|
139 ## Prepare a string with calculations on your values
|
|
140 #
|
|
141 # @return string
|
|
142 #
|
|
143 def string(self):
|
|
144 msg = ""
|
|
145 msg += "n=%d" % ( self._n )
|
|
146 msg += " mean=%5.3f" % ( self.mean() )
|
|
147 msg += " var=%5.3f" % ( self.var() )
|
|
148 msg += " sd=%5.3f" % ( self.sd() )
|
|
149 msg += " min=%5.3f" % ( self.getMin() )
|
|
150 median = self.median()
|
|
151 if median == "NA":
|
|
152 msg += " med=%s" % (median)
|
|
153 else:
|
|
154 msg += " med=%5.3f" % (median)
|
|
155 msg += " max=%5.3f" % ( self.getMax() )
|
|
156 return msg
|
|
157
|
|
158 ## Print descriptive statistics
|
|
159 #
|
|
160 def view(self):
|
|
161 print self.string()
|
|
162
|
|
163 ## Return sorted list of values, ascending (default) or descending
|
|
164 #
|
|
165 # @return list
|
|
166 #
|
|
167 def sort( self, isReverse = False ):
|
|
168 self._lValues.sort(reverse = isReverse)
|
|
169 return self._lValues
|
|
170
|
|
171 ## Give the quantile corresponding to the chosen percentage
|
|
172 #
|
|
173 # @return number
|
|
174 #
|
|
175 def quantile( self, percentage ):
|
|
176 if self._n == 0:
|
|
177 return 0
|
|
178 elif percentage == 1:
|
|
179 return self.getMax()
|
|
180 else:
|
|
181 return self.sort()[int(self._n * percentage)]
|
|
182
|
|
183 ## Prepare a string with quantile values
|
|
184 #
|
|
185 # @return string
|
|
186 #
|
|
187 def stringQuantiles( self ):
|
|
188 return "n=%d min=%5.3f Q1=%5.3f median=%5.3f Q3=%5.3f max=%5.3f" % \
|
|
189 (self._n, self.quantile(0), self.quantile(0.25), self.quantile(0.5), self.quantile(0.75), self.quantile(1))
|
|
190
|
|
191 ## Print quantiles string
|
|
192 #
|
|
193 def viewQuantiles( self ):
|
|
194 print self.stringQuantiles()
|
|
195
|
|
196 ## Compute N50
|
|
197 # @return number
|
|
198 def N50(self ):
|
|
199 lSorted = self.sort(True)
|
|
200 midlValues = self.getSum() / 2
|
|
201 cumul = 0
|
|
202 index = 0
|
|
203 while cumul < midlValues:
|
|
204 cumul = cumul + lSorted[index]
|
|
205 index += 1
|
|
206 if (index == 0):
|
|
207 return lSorted[index]
|
|
208 else :
|
|
209 return lSorted[index - 1] |