Mercurial > repos > davidmurphy > codonlogo
comparison corebio/moremath.py @ 0:c55bdc2fb9fa
Uploaded
| author | davidmurphy | 
|---|---|
| date | Thu, 27 Oct 2011 12:09:09 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:c55bdc2fb9fa | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> | |
| 4 # | |
| 5 # This software is distributed under the MIT Open Source License. | |
| 6 # <http://www.opensource.org/licenses/mit-license.html> | |
| 7 # | |
| 8 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 9 # copy of this software and associated documentation files (the "Software"), | |
| 10 # to deal in the Software without restriction, including without limitation | |
| 11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
| 12 # and/or sell copies of the Software, and to permit persons to whom the | |
| 13 # Software is furnished to do so, subject to the following conditions: | |
| 14 # | |
| 15 # The above copyright notice and this permission notice shall be included | |
| 16 # in all copies or substantial portions of the Software. | |
| 17 # | |
| 18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 24 # THE SOFTWARE. | |
| 25 # | |
| 26 | |
| 27 | |
| 28 """ Various bits of useful math not in the standard python library. | |
| 29 | |
| 30 Constants : | |
| 31 | |
| 32 - euler_gamma = 0.577215... | |
| 33 - catalan = 0.915965... | |
| 34 - golden_ratio = 1.618033... | |
| 35 - bits_per_nat = log2(e) = 1/log(2) | |
| 36 - sqrt_2pi = 2.50662... | |
| 37 | |
| 38 Special Functions : | |
| 39 | |
| 40 - gamma() -- Gamma function. | |
| 41 - lngamma() -- Logarithm of the gamma function | |
| 42 - factorial() -- The factorial function. | |
| 43 - digamma() -- Digamma function (logarithmic derivative of gamma). | |
| 44 - trigamma() -- Trigamma function (derivative of digamma). | |
| 45 - entropy() -- The entropy of a probability vector | |
| 46 - incomplete_gamma() -- The 'upper' incomplete gamma function. | |
| 47 - normalized_incomplete_gamma() -- | |
| 48 - lg() -- Base 2 logarithms. | |
| 49 | |
| 50 | |
| 51 Vector Operations : | |
| 52 | |
| 53 - rmsd() -- Root mean squared deviation of two point vectors | |
| 54 - minimize_rmsd() -- Find the rigid transformation that minimized the | |
| 55 RMSD between two vectors of points. | |
| 56 | |
| 57 Minimization : | |
| 58 | |
| 59 - find_root() -- 1d root finding | |
| 60 | |
| 61 Probability Distributions : | |
| 62 - Gamma | |
| 63 - Dirichlet | |
| 64 - Multinomial | |
| 65 - Gaussian | |
| 66 | |
| 67 """ | |
| 68 | |
| 69 | |
| 70 | |
| 71 __all__ = ('euler_gamma', 'catalan', 'golden_ratio', 'bits_per_nat', 'sqrt_2pi', | |
| 72 'gamma', 'lngamma', 'factorial', 'digamma', 'trigamma', | |
| 73 'entropy', 'log2', | |
| 74 'incomplete_gamma', 'normalized_incomplete_gamma', | |
| 75 # 'integrate', | |
| 76 # 'rmsd', 'minimize_rmsd', 'find_root', | |
| 77 # 'Gamma', 'Dirichlet', | |
| 78 # 'decompose_log_odds_array', | |
| 79 'argmax', 'argmin' | |
| 80 ) | |
| 81 | |
| 82 from math import * | |
| 83 import random | |
| 84 from itertools import izip, count | |
| 85 | |
| 86 # Some mathematical constants | |
| 87 euler_gamma = 0.57721566490153286060651 | |
| 88 catalan = 0.91596559417721901505460 | |
| 89 golden_ratio = 1.6180339887498948482046 | |
| 90 bits_per_nat = 1.44269504088896340735992468100 # = log_2(e) = 1/log(2) | |
| 91 sqrt_2pi = 2.5066282746310005024157652848110 | |
| 92 | |
| 93 | |
| 94 | |
| 95 | |
| 96 | |
| 97 # The Lanczos approximation for the gamma function is | |
| 98 # | |
| 99 # -(z + g + 1/2) (z + 1/2) | |
| 100 # Gamma(z+1) = e * (z + g + 1/2) * Sqrt(2Pi) * C | |
| 101 # | |
| 102 # | |
| 103 # c[1] c[2] c[3] | |
| 104 # C = [c[0] + ----- + ----- + ----- + ... ] | |
| 105 # z + 1 z + 2 z + 3 | |
| 106 # | |
| 107 # | |
| 108 # To calculate digamma and trigamma functions we take an analytic derivative | |
| 109 # of the Lanczos approximation. | |
| 110 # | |
| 111 # Gamma(z) = Gamma(z+1)/z | |
| 112 # Digamma(z) = D ln Gamma(z) | |
| 113 # Trigamma(z) = D Digamma(z) | |
| 114 | |
| 115 # These Lanczos constants are from | |
| 116 # "A note on the computation of the convergent | |
| 117 # Lanczos complex Gamma approximation." Paul Godfrey (2001) | |
| 118 # http://my.fit.edu/~gabdo/gamma.txt | |
| 119 | |
| 120 | |
| 121 __lanczos_gamma = 607./128. | |
| 122 __lanczos_coefficients = ( | |
| 123 0.99999999999999709182, | |
| 124 57.156235665862923517, | |
| 125 -59.597960355475491248, | |
| 126 14.136097974741747174, | |
| 127 -0.49191381609762019978, | |
| 128 .33994649984811888699e-4, | |
| 129 .46523628927048575665e-4, | |
| 130 -.98374475304879564677e-4, | |
| 131 .15808870322491248884e-3, | |
| 132 -.21026444172410488319e-3, | |
| 133 .21743961811521264320e-3, | |
| 134 -.16431810653676389022e-3, | |
| 135 .84418223983852743293e-4, | |
| 136 -.26190838401581408670e-4, | |
| 137 .36899182659531622704e-5) | |
| 138 | |
| 139 __factorial =( | |
| 140 1., | |
| 141 1., | |
| 142 2., | |
| 143 6., | |
| 144 24., | |
| 145 120., | |
| 146 720., | |
| 147 5040., | |
| 148 40320., | |
| 149 362880., | |
| 150 3628800., | |
| 151 39916800., | |
| 152 479001600., | |
| 153 6227020800., | |
| 154 87178291200., | |
| 155 1307674368000., | |
| 156 20922789888000., | |
| 157 355687428096000., | |
| 158 6402373705728000., | |
| 159 121645100408832000., | |
| 160 2432902008176640000., | |
| 161 51090942171709440000., | |
| 162 1124000727777607680000., | |
| 163 25852016738884976640000., | |
| 164 620448401733239439360000., | |
| 165 15511210043330985984000000., | |
| 166 403291461126605635584000000., | |
| 167 10888869450418352160768000000., | |
| 168 304888344611713860501504000000., | |
| 169 8841761993739701954543616000000., | |
| 170 265252859812191058636308480000000., | |
| 171 8222838654177922817725562880000000., | |
| 172 263130836933693530167218012160000000. ) | |
| 173 | |
| 174 def gamma(z) : | |
| 175 """The gamma function. Returns exact results for small integers. Will | |
| 176 overflow for modest sized arguments. Use lngamma(z) instead. | |
| 177 | |
| 178 See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource. | |
| 179 http://mathworld.wolfram.com/GammaFunction.html | |
| 180 | |
| 181 """ | |
| 182 | |
| 183 n = floor(z) | |
| 184 if n == z : | |
| 185 if z <= 0 : | |
| 186 return 1.0/0.0 # Infinity | |
| 187 elif n <= len(__factorial) : | |
| 188 return __factorial[int(n)-1] | |
| 189 | |
| 190 zz = z | |
| 191 if z < 0.5 : | |
| 192 zz = 1-z | |
| 193 | |
| 194 | |
| 195 g = __lanczos_gamma | |
| 196 c = __lanczos_coefficients | |
| 197 | |
| 198 zz = zz - 1. | |
| 199 zh = zz + 0.5 | |
| 200 zgh = zh + g | |
| 201 zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141 | |
| 202 | |
| 203 ss = 0.0 | |
| 204 for k in range(len(c)-1,0,-1): | |
| 205 ss += c[k]/(zz+k) | |
| 206 | |
| 207 f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp) | |
| 208 | |
| 209 if z<0.5 : | |
| 210 f = pi /( sin(pi*z) *f) | |
| 211 | |
| 212 return f | |
| 213 | |
| 214 | |
| 215 def lngamma(z) : | |
| 216 """The logarithm of the gamma function. | |
| 217 """ | |
| 218 | |
| 219 # common case optimization | |
| 220 | |
| 221 n = floor(z) | |
| 222 if n == z : | |
| 223 if z <= 0 : | |
| 224 return 1.0/0.0 # Infinity | |
| 225 elif n <= len(__factorial) : | |
| 226 return __factorial[int(n)-1] | |
| 227 | |
| 228 zz = z | |
| 229 if z < 0.5 : | |
| 230 zz = 1-z | |
| 231 | |
| 232 | |
| 233 g = __lanczos_gamma | |
| 234 c = __lanczos_coefficients | |
| 235 | |
| 236 zz = zz - 1. | |
| 237 zh = zz + 0.5 | |
| 238 zgh = zh + g | |
| 239 zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141 | |
| 240 | |
| 241 ss = 0.0 | |
| 242 for k in range(len(c)-1,0,-1): | |
| 243 ss += c[k]/(zz+k) | |
| 244 | |
| 245 f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp) | |
| 246 | |
| 247 if z<0.5 : | |
| 248 f = pi /( sin(pi*z) *f) | |
| 249 | |
| 250 return log(f) | |
| 251 | |
| 252 | |
| 253 def factorial(z) : | |
| 254 """ The factorial function. | |
| 255 factorial(z) == gamma(z+1) | |
| 256 """ | |
| 257 return gamma(z+1) | |
| 258 | |
| 259 | |
| 260 def digamma(z) : | |
| 261 """The digamma function, the logarithmic derivative of the gamma function. | |
| 262 digamma(z) = d/dz ln( gamma(z) ) | |
| 263 | |
| 264 See: Eric W. Weisstein. "Digamma Function." From MathWorld-- | |
| 265 A Wolfram Web Resource. http://mathworld.wolfram.com/DigammaFunction.html | |
| 266 """ | |
| 267 | |
| 268 g = __lanczos_gamma | |
| 269 c = __lanczos_coefficients | |
| 270 | |
| 271 zz = z | |
| 272 if z < 0.5 : | |
| 273 zz = 1 -z | |
| 274 | |
| 275 n=0. | |
| 276 d=0. | |
| 277 for k in range(len(c)-1,0,-1): | |
| 278 dz =1./(zz+(k+1)-2); | |
| 279 dd =c[k] * dz | |
| 280 d = d + dd | |
| 281 n = n - dd * dz | |
| 282 | |
| 283 d = d + c[0] | |
| 284 gg = zz + g - 0.5 | |
| 285 f = log(gg) + (n/d - g/gg) | |
| 286 | |
| 287 if z<0.5 : | |
| 288 f -= pi / tan( pi * z) | |
| 289 | |
| 290 return f | |
| 291 | |
| 292 | |
| 293 def trigamma(z) : | |
| 294 """The trigamma function, the derivative of the digamma function. | |
| 295 trigamma(z) = d/dz digamma(z) = d/dz d/dz ln( gamma(z) ) | |
| 296 | |
| 297 See: Eric W. Weisstein. "Digamma Function." From MathWorld-- | |
| 298 A Wolfram Web Resource. http://mathworld.wolfram.com/TrigammaFunction.html | |
| 299 """ | |
| 300 | |
| 301 g = __lanczos_gamma | |
| 302 c = __lanczos_coefficients | |
| 303 | |
| 304 t1=0. | |
| 305 t2=0. | |
| 306 t3=0. | |
| 307 for k in range(len(c)-1,0,-1): | |
| 308 dz =1./(z+k); | |
| 309 dd1 = c[k]* dz | |
| 310 t1 += dd1 | |
| 311 dd2 = dd1 * dz | |
| 312 t2 += dd2 | |
| 313 t3 += dd2 * dz | |
| 314 | |
| 315 t1 += c[0] | |
| 316 c = - (t2*t2)/(t1*t1) +2*t3/t1 | |
| 317 | |
| 318 result = 1./(z*z) | |
| 319 gg = z + g + 0.5 | |
| 320 result += - (z+0.5)/ (gg*gg) | |
| 321 result += 2./gg | |
| 322 | |
| 323 result += c | |
| 324 | |
| 325 return result | |
| 326 | |
| 327 def incomplete_gamma(a,x) : | |
| 328 """The 'upper' incomplete gamma function: | |
| 329 | |
| 330 oo | |
| 331 - | |
| 332 | -t a-1 | |
| 333 incomplete_gamma(a,x) = | e t dt. | |
| 334 | | |
| 335 - | |
| 336 x | |
| 337 | |
| 338 In Mathematica, Gamma[a,x]. | |
| 339 | |
| 340 Note that, very confusingly, the phrase 'incomplete gamma fucntion' | |
| 341 can also refer to the same integral between 0 and x, (the 'lower' | |
| 342 incomplete gamma function) or to the normalized versions, | |
| 343 normalized_incomplete_gamma() ) | |
| 344 | |
| 345 | |
| 346 See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource. | |
| 347 http://mathworld.wolfram.com/IncompleteGammaFunction.html | |
| 348 | |
| 349 Bugs : | |
| 350 This implentation is not very accurate for some arguments. | |
| 351 """ | |
| 352 return normalized_incomplete_gamma(a,x) * gamma(a) | |
| 353 | |
| 354 | |
| 355 def normalized_incomplete_gamma(a,x) : | |
| 356 """The upper, incomplete gamma function normalized so that the limiting | |
| 357 values are zero and one. | |
| 358 | |
| 359 Q(a,x) = incomplete_gamma(a,x) / gamma(a) | |
| 360 | |
| 361 See: | |
| 362 incomplete_gamma() | |
| 363 Bugs : | |
| 364 This implentation is not very accurate for some arguments. | |
| 365 """ | |
| 366 maxiter = 100 | |
| 367 epsilon = 1.48e-8 | |
| 368 small = 1e-30 | |
| 369 | |
| 370 | |
| 371 if a<=0 or x<0 : | |
| 372 raise ValueError("Invalid arguments") | |
| 373 if x == 0.0 : return 1.0 | |
| 374 | |
| 375 if x<= a+1 : | |
| 376 # Use the series representation | |
| 377 term = 1./a | |
| 378 total = term | |
| 379 for n in range(1,maxiter) : | |
| 380 term *= x/(a+n) | |
| 381 total += term | |
| 382 if abs(term/total) < epsilon : | |
| 383 return 1. - total * exp(-x+a*log(x) - lngamma(a) ) | |
| 384 raise RuntimeError( | |
| 385 "Failed to converge after %d iterations." % (maxiter) ) | |
| 386 else : | |
| 387 # Use the continued fraction representation | |
| 388 total = 1.0 | |
| 389 b = x + 1. -a | |
| 390 c = 1./small | |
| 391 d = 1./b | |
| 392 h = d | |
| 393 for i in range(1, maxiter) : | |
| 394 an = -i * (i-a) | |
| 395 b = b+2. | |
| 396 d = an * d + b | |
| 397 if abs(d) < small : d = small | |
| 398 c = b + an /c | |
| 399 if abs(c) < small : c= small | |
| 400 d = 1./d | |
| 401 term = d * c | |
| 402 h = h * term | |
| 403 if abs( term-1.) < epsilon : | |
| 404 return h * exp(-x+a*log(x) - lngamma(a) ) | |
| 405 raise RuntimeError( | |
| 406 "Failed to converge after %d iterations." % (maxiter) ) | |
| 407 | |
| 408 | |
| 409 | |
| 410 def log2( x) : | |
| 411 """ Return the base 2 logarithm of x """ | |
| 412 return log(x,2) | |
| 413 | |
| 414 | |
| 415 def entropy( pvec, base= exp(1) ) : | |
| 416 """ The entropy S = -Sum_i p_i ln p_i | |
| 417 pvec is a frequency vector, not necessarily normalized. | |
| 418 """ | |
| 419 # TODO: Optimize | |
| 420 if len(pvec) ==0 : | |
| 421 raise ValueError("Zero length vector") | |
| 422 | |
| 423 | |
| 424 total = 0.0 | |
| 425 ent = 0.0 | |
| 426 for p in pvec: | |
| 427 if p>0 : # 0 log(0) =0 | |
| 428 total += p | |
| 429 ent += - log(float(p)) *p | |
| 430 elif p<0: | |
| 431 raise ValueError("Negative probability") | |
| 432 | |
| 433 | |
| 434 ent = (ent/total) + log(total) | |
| 435 ent /= log(base) | |
| 436 | |
| 437 return ent | |
| 438 | |
| 439 | |
| 440 | |
| 441 | |
| 442 | |
| 443 def argmax( alist) : | |
| 444 """Return the index of the last occurance of the maximum value in the list.""" | |
| 445 return max(izip(alist, count() ))[1] | |
| 446 | |
| 447 def argmin( alist) : | |
| 448 """Return the index of the first occurance of the minimum value in the list.""" | |
| 449 return min(izip(alist, count() ))[1] | |
| 450 | |
| 451 | |
| 452 | |
| 453 | |
| 454 | |
| 455 | 
