comparison ezBAMQC/src/htslib/kfunc.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfa3745e5fd8
1 /* The MIT License
2
3 Copyright (C) 2010, 2013 Genome Research Ltd.
4 Copyright (C) 2011 Attractive Chaos <attractor@live.co.uk>
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice shall be
15 included in all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 SOFTWARE.
25 */
26
27 #include <math.h>
28 #include <stdlib.h>
29 #include "htslib/kfunc.h"
30
31 /* Log gamma function
32 * \log{\Gamma(z)}
33 * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245
34 */
35 double kf_lgamma(double z)
36 {
37 double x = 0;
38 x += 0.1659470187408462e-06 / (z+7);
39 x += 0.9934937113930748e-05 / (z+6);
40 x -= 0.1385710331296526 / (z+5);
41 x += 12.50734324009056 / (z+4);
42 x -= 176.6150291498386 / (z+3);
43 x += 771.3234287757674 / (z+2);
44 x -= 1259.139216722289 / (z+1);
45 x += 676.5203681218835 / z;
46 x += 0.9999999999995183;
47 return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5);
48 }
49
50 /* complementary error function
51 * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt
52 * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66
53 */
54 double kf_erfc(double x)
55 {
56 const double p0 = 220.2068679123761;
57 const double p1 = 221.2135961699311;
58 const double p2 = 112.0792914978709;
59 const double p3 = 33.912866078383;
60 const double p4 = 6.37396220353165;
61 const double p5 = .7003830644436881;
62 const double p6 = .03526249659989109;
63 const double q0 = 440.4137358247522;
64 const double q1 = 793.8265125199484;
65 const double q2 = 637.3336333788311;
66 const double q3 = 296.5642487796737;
67 const double q4 = 86.78073220294608;
68 const double q5 = 16.06417757920695;
69 const double q6 = 1.755667163182642;
70 const double q7 = .08838834764831844;
71 double expntl, z, p;
72 z = fabs(x) * M_SQRT2;
73 if (z > 37.) return x > 0.? 0. : 2.;
74 expntl = exp(z * z * - .5);
75 if (z < 10. / M_SQRT2) // for small z
76 p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0)
77 / (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0);
78 else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65)))));
79 return x > 0.? 2. * p : 2. * (1. - p);
80 }
81
82 /* The following computes regularized incomplete gamma functions.
83 * Formulas are taken from Wiki, with additional input from Numerical
84 * Recipes in C (for modified Lentz's algorithm) and AS245
85 * (http://lib.stat.cmu.edu/apstat/245).
86 *
87 * A good online calculator is available at:
88 *
89 * http://www.danielsoper.com/statcalc/calc23.aspx
90 *
91 * It calculates upper incomplete gamma function, which equals
92 * kf_gammaq(s,z)*tgamma(s).
93 */
94
95 #define KF_GAMMA_EPS 1e-14
96 #define KF_TINY 1e-290
97
98 // regularized lower incomplete gamma function, by series expansion
99 static double _kf_gammap(double s, double z)
100 {
101 double sum, x;
102 int k;
103 for (k = 1, sum = x = 1.; k < 100; ++k) {
104 sum += (x *= z / (s + k));
105 if (x / sum < KF_GAMMA_EPS) break;
106 }
107 return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum));
108 }
109 // regularized upper incomplete gamma function, by continued fraction
110 static double _kf_gammaq(double s, double z)
111 {
112 int j;
113 double C, D, f;
114 f = 1. + z - s; C = f; D = 0.;
115 // Modified Lentz's algorithm for computing continued fraction
116 // See Numerical Recipes in C, 2nd edition, section 5.2
117 for (j = 1; j < 100; ++j) {
118 double a = j * (s - j), b = (j<<1) + 1 + z - s, d;
119 D = b + a * D;
120 if (D < KF_TINY) D = KF_TINY;
121 C = b + a / C;
122 if (C < KF_TINY) C = KF_TINY;
123 D = 1. / D;
124 d = C * D;
125 f *= d;
126 if (fabs(d - 1.) < KF_GAMMA_EPS) break;
127 }
128 return exp(s * log(z) - z - kf_lgamma(s) - log(f));
129 }
130
131 double kf_gammap(double s, double z)
132 {
133 return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z);
134 }
135
136 double kf_gammaq(double s, double z)
137 {
138 return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z);
139 }
140
141 /* Regularized incomplete beta function. The method is taken from
142 * Numerical Recipe in C, 2nd edition, section 6.4. The following web
143 * page calculates the incomplete beta function, which equals
144 * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b):
145 *
146 * http://www.danielsoper.com/statcalc/calc36.aspx
147 */
148 static double kf_betai_aux(double a, double b, double x)
149 {
150 double C, D, f;
151 int j;
152 if (x == 0.) return 0.;
153 if (x == 1.) return 1.;
154 f = 1.; C = f; D = 0.;
155 // Modified Lentz's algorithm for computing continued fraction
156 for (j = 1; j < 200; ++j) {
157 double aa, d;
158 int m = j>>1;
159 aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1))
160 : m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m));
161 D = 1. + aa * D;
162 if (D < KF_TINY) D = KF_TINY;
163 C = 1. + aa / C;
164 if (C < KF_TINY) C = KF_TINY;
165 D = 1. / D;
166 d = C * D;
167 f *= d;
168 if (fabs(d - 1.) < KF_GAMMA_EPS) break;
169 }
170 return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f;
171 }
172 double kf_betai(double a, double b, double x)
173 {
174 return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x);
175 }
176
177 #ifdef KF_MAIN
178 #include <stdio.h>
179 int main(int argc, char *argv[])
180 {
181 double x = 5.5, y = 3;
182 double a, b;
183 printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x));
184 printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y));
185 a = 2; b = 2; x = 0.5;
186 printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b)));
187 return 0;
188 }
189 #endif
190
191
192 // log\binom{n}{k}
193 static double lbinom(int n, int k)
194 {
195 if (k == 0 || n == k) return 0;
196 return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);
197 }
198
199 // n11 n12 | n1_
200 // n21 n22 | n2_
201 //-----------+----
202 // n_1 n_2 | n
203
204 // hypergeometric distribution
205 static double hypergeo(int n11, int n1_, int n_1, int n)
206 {
207 return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1));
208 }
209
210 typedef struct {
211 int n11, n1_, n_1, n;
212 double p;
213 } hgacc_t;
214
215 // incremental version of hypergenometric distribution
216 static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux)
217 {
218 if (n1_ || n_1 || n) {
219 aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n;
220 } else { // then only n11 changed; the rest fixed
221 if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) {
222 if (n11 == aux->n11 + 1) { // incremental
223 aux->p *= (double)(aux->n1_ - aux->n11) / n11
224 * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1);
225 aux->n11 = n11;
226 return aux->p;
227 }
228 if (n11 == aux->n11 - 1) { // incremental
229 aux->p *= (double)aux->n11 / (aux->n1_ - n11)
230 * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11);
231 aux->n11 = n11;
232 return aux->p;
233 }
234 }
235 aux->n11 = n11;
236 }
237 aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n);
238 return aux->p;
239 }
240
241 double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two)
242 {
243 int i, j, max, min;
244 double p, q, left, right;
245 hgacc_t aux;
246 int n1_, n_1, n;
247
248 n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n
249 max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail
250 min = n1_ + n_1 - n; // not sure why n11-n22 is used instead of min(n_1,n1_)
251 if (min < 0) min = 0; // min n11, for left tail
252 *two = *_left = *_right = 1.;
253 if (min == max) return 1.; // no need to do test
254 q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table
255 // left tail
256 p = hypergeo_acc(min, 0, 0, 0, &aux);
257 for (left = 0., i = min + 1; p < 0.99999999 * q && i<=max; ++i) // loop until underflow
258 left += p, p = hypergeo_acc(i, 0, 0, 0, &aux);
259 --i;
260 if (p < 1.00000001 * q) left += p;
261 else --i;
262 // right tail
263 p = hypergeo_acc(max, 0, 0, 0, &aux);
264 for (right = 0., j = max - 1; p < 0.99999999 * q && j>=0; --j) // loop until underflow
265 right += p, p = hypergeo_acc(j, 0, 0, 0, &aux);
266 ++j;
267 if (p < 1.00000001 * q) right += p;
268 else ++j;
269 // two-tail
270 *two = left + right;
271 if (*two > 1.) *two = 1.;
272 // adjust left and right
273 if (abs(i - n11) < abs(j - n11)) right = 1. - left + q;
274 else left = 1.0 - right + q;
275 *_left = left; *_right = right;
276 return q;
277 }
278
279
280