annotate commons/core/stat/Stat.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 import math
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 class Stat(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 def __init__(self, lValues = []):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 self.reset()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 if lValues != []:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 self.fill(lValues)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 def __eq__(self, o):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 self._lValues.sort()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 o._lValues.sort()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 return self._lValues == o._lValues and round(self._sum, 6) == round(o._sum, 6) \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 and round(self._sumOfSquares, 6) == round(o._sumOfSquares, 6) and self._n == self._n \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 and round(self._min, 6) == round(o._min, 6) and round(self._max, 6) == round(o._max, 6)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 def getValuesList(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 return self._lValues
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 def getSum(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 return self._sum
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 def getSumOfSquares(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 return self._sumOfSquares
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 def getValuesNumber(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 return self._n
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 def getMin(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 return self._min
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 def getMax(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 return self._max
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 ## Reset all attributes
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 def reset(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 self._lValues = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 self._sum = 0.0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 self._sumOfSquares = 0.0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41 self._n = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 self._max = 0.0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 self._min = 0.0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 ## Add a value to Stat instance list and update attributes
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 # @param v float value to add
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 def add(self, v):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 self._lValues.append( float(v) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 self._sum += float(v)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 self._sumOfSquares += float(v) * float(v)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 self._n = self._n + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 if v > self._max:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 self._max = float(v)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 if self._n == 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 self._min = float(v)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 elif v < self._min:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 self._min = float(v)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 ## Add a list of values to Stat instance list and update attributes
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 # @param lValues list of float list to add
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 def fill(self, lValues):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66 for v in lValues:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 self.add(v)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 ## Get the arithmetic mean of the Stat instance list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 # @return float
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 def mean(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 if self._n == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 return 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 return self._sum / float(self._n)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 ## Get the variance of the sample
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 # @note we consider a sample, not a population. So for calculation, we use n-1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 # @return float
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84 def var(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 if self._n < 2 or self.mean() == 0.0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 return 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 variance = self._sumOfSquares/float(self._n - 1) - self._n/float(self._n - 1) * self.mean()*self.mean()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89 if round(variance, 10) == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 variance = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 return variance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 ## Get the standard deviation of the sample
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95 # @return float
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 def sd(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98 return math.sqrt( self.var() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 ## Get the coefficient of variation of the sample
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 # @return float
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 def cv(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105 if self._n < 2 or self.mean() == 0.0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 return 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 return self.sd() / self.mean()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 ## Get the median of the sample
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112 # @return number or "NA" (Not available)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114 def median( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115 if len(self._lValues) == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 return "NA"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117 if len(self._lValues) == 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118 return self._lValues[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119 self._lValues.sort()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 m = int( math.ceil( len(self._lValues) / 2.0 ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 if len(self._lValues) % 2:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 return self._lValues[m-1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 return ( self._lValues[m-1] + self._lValues[m] ) / 2.0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 ## Get the kurtosis (measure of whether the data are peaked or flat relative to a normal distribution, 'coef d'aplatissement ' in french)).
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 # k = 0 -> completely flat
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128 # k = 3 -> same as normal distribution
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129 # k >> 3 -> peak
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131 # @return float
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133 def kurtosis(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134 numerator = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 for i in self._lValues:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136 numerator += math.pow( i - self.mean(), 4 )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137 return numerator / float(self._n - 1) * self.sd()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 ## Prepare a string with calculations on your values
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 # @return string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 def string(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 msg = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 msg += "n=%d" % ( self._n )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 msg += " mean=%5.3f" % ( self.mean() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 msg += " var=%5.3f" % ( self.var() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148 msg += " sd=%5.3f" % ( self.sd() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 msg += " min=%5.3f" % ( self.getMin() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 median = self.median()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 if median == "NA":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152 msg += " med=%s" % (median)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154 msg += " med=%5.3f" % (median)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 msg += " max=%5.3f" % ( self.getMax() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156 return msg
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
158 ## Print descriptive statistics
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
159 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
160 def view(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
161 print self.string()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
162
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
163 ## Return sorted list of values, ascending (default) or descending
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
164 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
165 # @return list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
166 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
167 def sort( self, isReverse = False ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
168 self._lValues.sort(reverse = isReverse)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
169 return self._lValues
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
170
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171 ## Give the quantile corresponding to the chosen percentage
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
173 # @return number
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
174 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
175 def quantile( self, percentage ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176 if self._n == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177 return 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
178 elif percentage == 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
179 return self.getMax()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181 return self.sort()[int(self._n * percentage)]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
182
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
183 ## Prepare a string with quantile values
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
184 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
185 # @return string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
186 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
187 def stringQuantiles( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
188 return "n=%d min=%5.3f Q1=%5.3f median=%5.3f Q3=%5.3f max=%5.3f" % \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
189 (self._n, self.quantile(0), self.quantile(0.25), self.quantile(0.5), self.quantile(0.75), self.quantile(1))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
190
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
191 ## Print quantiles string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
192 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
193 def viewQuantiles( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
194 print self.stringQuantiles()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
195
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
196 ## Compute N50
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
197 # @return number
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
198 def N50(self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
199 lSorted = self.sort(True)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
200 midlValues = self.getSum() / 2
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
201 cumul = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
202 index = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
203 while cumul < midlValues:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
204 cumul = cumul + lSorted[index]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
205 index += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
206 if (index == 0):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
207 return lSorted[index]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
208 else :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
209 return lSorted[index - 1]