annotate bwa-0.7.9a/bwtsw2_chain.c @ 0:ce5a8082bbb8 draft

Uploaded
author xilinxu
date Thu, 14 Aug 2014 02:16:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
1 #include <stdio.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
2 #include "bwtsw2.h"
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
3
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
4 #ifdef USE_MALLOC_WRAPPERS
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
5 # include "malloc_wrap.h"
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
6 #endif
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
7
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
8 typedef struct {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
9 uint32_t tbeg, tend;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
10 int qbeg, qend;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
11 uint32_t flag:1, idx:31;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
12 int chain; // also reuse as a counter
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
13 } hsaip_t;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
14
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
15 #define _hsaip_lt(a, b) ((a).qbeg < (b).qbeg)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
16
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
17 #include "ksort.h"
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
18 KSORT_INIT(hsaip, hsaip_t, _hsaip_lt)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
19
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
20 static int chaining(const bsw2opt_t *opt, int shift, int n, hsaip_t *z, hsaip_t *chain)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
21 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
22 int j, k, m = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
23 ks_introsort(hsaip, n, z);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
24 for (j = 0; j < n; ++j) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
25 hsaip_t *p = z + j;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
26 for (k = m - 1; k >= 0; --k) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
27 hsaip_t *q = chain + k;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
28 int x = p->qbeg - q->qbeg; // always positive
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
29 int y = p->tbeg - q->tbeg;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
30 if (y > 0 && x < opt->max_chain_gap && y < opt->max_chain_gap && x - y <= opt->bw && y - x <= opt->bw) { // chained
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
31 if (p->qend > q->qend) q->qend = p->qend;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
32 if (p->tend > q->tend) q->tend = p->tend;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
33 ++q->chain;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
34 p->chain = shift + k;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
35 break;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
36 } else if (q->chain > opt->t_seeds * 2) k = 0; // if the chain is strong enough, do not check the previous chains
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
37 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
38 if (k < 0) { // not added to any previous chains
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
39 chain[m] = *p;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
40 chain[m].chain = 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
41 chain[m].idx = p->chain = shift + m;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
42 ++m;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
43 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
44 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
45 return m;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
46 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
47
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
48 void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2])
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
49 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
50 hsaip_t *z[2], *chain[2];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
51 int i, j, k, n[2], m[2], thres = opt->t_seeds * 2;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
52 char *flag;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
53 // initialization
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
54 n[0] = b[0]->n; n[1] = b[1]->n;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
55 z[0] = calloc(n[0] + n[1], sizeof(hsaip_t));
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
56 z[1] = z[0] + n[0];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
57 chain[0] = calloc(n[0] + n[1], sizeof(hsaip_t));
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
58 for (k = j = 0; k < 2; ++k) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
59 for (i = 0; i < b[k]->n; ++i) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
60 bsw2hit_t *p = b[k]->hits + i;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
61 hsaip_t *q = z[k] + i;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
62 q->flag = k; q->idx = i;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
63 q->tbeg = p->k; q->tend = p->k + p->len;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
64 q->chain = -1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
65 q->qbeg = p->beg; q->qend = p->end;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
66 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
67 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
68 // chaining
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
69 m[0] = chaining(opt, 0, n[0], z[0], chain[0]);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
70 chain[1] = chain[0] + m[0];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
71 m[1] = chaining(opt, m[0], n[1], z[1], chain[1]);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
72 // change query coordinate on the reverse strand
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
73 for (k = 0; k < m[1]; ++k) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
74 hsaip_t *p = chain[1] + k;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
75 int tmp = p->qbeg;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
76 p->qbeg = len - p->qend; p->qend = len - tmp;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
77 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
78 //for (k = 0; k < m[0]; ++k) printf("%d, [%d,%d), [%d,%d)\n", chain[0][k].chain, chain[0][k].tbeg, chain[0][k].tend, chain[0][k].qbeg, chain[0][k].qend);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
79 // filtering
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
80 flag = calloc(m[0] + m[1], 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
81 ks_introsort(hsaip, m[0] + m[1], chain[0]);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
82 for (k = 1; k < m[0] + m[1]; ++k) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
83 hsaip_t *p = chain[0] + k;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
84 for (j = 0; j < k; ++j) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
85 hsaip_t *q = chain[0] + j;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
86 if (flag[q->idx]) continue;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
87 if (q->qend >= p->qend && q->chain > p->chain * thres && p->chain < thres) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
88 flag[p->idx] = 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
89 break;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
90 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
91 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
92 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
93 for (k = 0; k < n[0] + n[1]; ++k) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
94 hsaip_t *p = z[0] + k;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
95 if (flag[p->chain])
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
96 b[p->flag]->hits[p->idx].G = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
97 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
98 free(flag);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
99 // squeeze out filtered elements in b[2]
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
100 for (k = 0; k < 2; ++k) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
101 for (j = i = 0; j < n[k]; ++j) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
102 bsw2hit_t *p = b[k]->hits + j;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
103 if (p->G) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
104 if (i != j) b[k]->hits[i++] = *p;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
105 else ++i;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
106 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
107 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
108 b[k]->n = i;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
109 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
110 // free
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
111 free(z[0]); free(chain[0]);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
112 }