annotate PsiCLASS-1.0.2/samtools-0.1.19/bcftools/fet.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #include <math.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 /* This program is implemented with ideas from this web page:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 * http://www.langsrud.com/fisher.htm
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 // log\binom{n}{k}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 static double lbinom(int n, int k)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 if (k == 0 || n == k) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 // n11 n12 | n1_
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 // n21 n22 | n2_
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 //-----------+----
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 // n_1 n_2 | n
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 // hypergeometric distribution
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 static double hypergeo(int n11, int n1_, int n_1, int n)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 int n11, n1_, n_1, n;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 double p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 } hgacc_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 // incremental version of hypergenometric distribution
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 if (n1_ || n_1 || n) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 } else { // then only n11 changed; the rest fixed
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 if (n11 == aux->n11 + 1) { // incremental
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 aux->p *= (double)(aux->n1_ - aux->n11) / n11
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 aux->n11 = n11;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 return aux->p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 if (n11 == aux->n11 - 1) { // incremental
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 aux->p *= (double)aux->n11 / (aux->n1_ - n11)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 aux->n11 = n11;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 return aux->p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 aux->n11 = n11;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 return aux->p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 int i, j, max, min;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 double p, q, left, right;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 hgacc_t aux;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 int n1_, n_1, n;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 min = n1_ + n_1 - n;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 if (min < 0) min = 0; // min n11, for left tail
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 *two = *_left = *_right = 1.;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 if (min == max) return 1.; // no need to do test
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 // left tail
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 p = hypergeo_acc(min, 0, 0, 0, &aux);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 for (left = 0., i = min + 1; p < 0.99999999 * q; ++i) // loop until underflow
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 left += p, p = hypergeo_acc(i, 0, 0, 0, &aux);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 --i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 if (p < 1.00000001 * q) left += p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 else --i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 // right tail
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 p = hypergeo_acc(max, 0, 0, 0, &aux);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 for (right = 0., j = max - 1; p < 0.99999999 * q; --j) // loop until underflow
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 right += p, p = hypergeo_acc(j, 0, 0, 0, &aux);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 ++j;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 if (p < 1.00000001 * q) right += p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 else ++j;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 // two-tail
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 *two = left + right;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 if (*two > 1.) *two = 1.;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 // adjust left and right
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 if (abs(i - n11) < abs(j - n11)) right = 1. - left + q;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 else left = 1.0 - right + q;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 *_left = left; *_right = right;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 return q;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 #ifdef FET_MAIN
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 int main(int argc, char *argv[])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 char id[1024];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 int n11, n12, n21, n22;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 double left, right, twotail, prob;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 while (scanf("%s%d%d%d%d", id, &n11, &n12, &n21, &n22) == 5) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 printf("%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", id, n11, n12, n21, n22,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 prob, left, right, twotail);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 #endif