annotate pyPRADA_1.2/tools/bwa-0.5.7-mh/bwt.c @ 3:f17965495ec9 draft default tip

Uploaded
author siyuan
date Tue, 11 Mar 2014 12:14:01 -0400
parents acc2ca1a3ba4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 /* The MIT License
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 Copyright (c) 2008 Genome Research Ltd (GRL).
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 Permission is hereby granted, free of charge, to any person obtaining
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 a copy of this software and associated documentation files (the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 "Software"), to deal in the Software without restriction, including
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 without limitation the rights to use, copy, modify, merge, publish,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 distribute, sublicense, and/or sell copies of the Software, and to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 permit persons to whom the Software is furnished to do so, subject to
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 the following conditions:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 The above copyright notice and this permission notice shall be
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 included in all copies or substantial portions of the Software.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 SOFTWARE.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 #include <stdlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 #include <stdio.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 #include <string.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 #include <assert.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 #include <stdint.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 #include "utils.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 #include "bwt.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 void bwt_gen_cnt_table(bwt_t *bwt)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 int i, j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 for (i = 0; i != 256; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 uint32_t x = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 for (j = 0; j != 4; ++j)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 bwt->cnt_table[i] = x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 // bwt->bwt and bwt->occ must be precalculated
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 void bwt_cal_sa(bwt_t *bwt, int intv)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 bwtint_t isa, sa, i; // S(isa) = sa
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 xassert(bwt->bwt, "bwt_t::bwt is not initialized.");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 if (bwt->sa) free(bwt->sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 bwt->sa_intv = intv;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 bwt->n_sa = (bwt->seq_len + intv) / intv;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 // calculate SA value
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 isa = 0; sa = bwt->seq_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 for (i = 0; i < bwt->seq_len; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 if (isa % intv == 0) bwt->sa[isa/intv] = sa;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 --sa;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 isa = bwt_invPsi(bwt, isa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 if (isa % intv == 0) bwt->sa[isa/intv] = sa;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 bwt->sa[0] = (bwtint_t)-1; // before this line, bwt->sa[0] = bwt->seq_len
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 bwtint_t sa = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 while (k % bwt->sa_intv != 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 ++sa;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 k = bwt_invPsi(bwt, k);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 /* without setting bwt->sa[0] = -1, the following line should be
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 changed to (sa + bwt->sa[k/bwt->sa_intv]) % (bwt->seq_len + 1) */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 return sa + bwt->sa[k/bwt->sa_intv];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 static inline int __occ_aux(uint64_t y, int c)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 // reduce nucleotide counting to bits counting
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 y = ((c&2)? y : ~y) >> 1 & ((c&1)? y : ~y) & 0x5555555555555555ull;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 // count the number of 1s in y
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 y = (y & 0x3333333333333333ull) + (y >> 2 & 0x3333333333333333ull);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 return ((y + (y >> 4)) & 0xf0f0f0f0f0f0f0full) * 0x101010101010101ull >> 56;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 inline bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 bwtint_t n, l, j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 uint32_t *p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 if (k == bwt->seq_len) return bwt->L2[c+1] - bwt->L2[c];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 if (k == (bwtint_t)(-1)) return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 if (k >= bwt->primary) --k; // because $ is not in bwt
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 // retrieve Occ at k/OCC_INTERVAL
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 n = (p = bwt_occ_intv(bwt, k))[c];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 p += 4; // jump to the start of the first BWT cell
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 // calculate Occ up to the last k/32
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 j = k >> 5 << 5;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 for (l = k/OCC_INTERVAL*OCC_INTERVAL; l < j; l += 32, p += 2)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 // calculate Occ
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 if (c == 0) n -= ~k&31; // corrected for the masked bits
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 return n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 // an analogy to bwt_occ() but more efficient, requiring k <= l
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 inline void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118 bwtint_t _k, _l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 if (k == l) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 *ok = *ol = bwt_occ(bwt, k, c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 return;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 _k = (k >= bwt->primary)? k-1 : k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 _l = (l >= bwt->primary)? l-1 : l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 *ok = bwt_occ(bwt, k, c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 *ol = bwt_occ(bwt, l, c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 bwtint_t m, n, i, j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 uint32_t *p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 if (k >= bwt->primary) --k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 if (l >= bwt->primary) --l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 n = (p = bwt_occ_intv(bwt, k))[c];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 p += 4;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 // calculate *ok
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 j = k >> 5 << 5;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 for (i = k/OCC_INTERVAL*OCC_INTERVAL; i < j; i += 32, p += 2)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 m = n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 if (c == 0) n -= ~k&31; // corrected for the masked bits
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 *ok = n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 // calculate *ol
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 j = l >> 5 << 5;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 for (; i < j; i += 32, p += 2)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 m += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 m += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~l&31)<<1)) - 1), c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 if (c == 0) m -= ~l&31; // corrected for the masked bits
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 *ol = m;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 #define __occ_aux4(bwt, b) \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 ((bwt)->cnt_table[(b)&0xff] + (bwt)->cnt_table[(b)>>8&0xff] \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 + (bwt)->cnt_table[(b)>>16&0xff] + (bwt)->cnt_table[(b)>>24])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 inline void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 bwtint_t l, j, x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 uint32_t *p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 if (k == (bwtint_t)(-1)) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 memset(cnt, 0, 4 * sizeof(bwtint_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 return;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 if (k >= bwt->primary) --k; // because $ is not in bwt
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 p = bwt_occ_intv(bwt, k);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 memcpy(cnt, p, 16);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168 p += 4;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 j = k >> 4 << 4;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 for (l = k / OCC_INTERVAL * OCC_INTERVAL, x = 0; l < j; l += 16, ++p)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 x += __occ_aux4(bwt, *p);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172 x += __occ_aux4(bwt, *p & ~((1U<<((~k&15)<<1)) - 1)) - (~k&15);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 cnt[0] += x&0xff; cnt[1] += x>>8&0xff; cnt[2] += x>>16&0xff; cnt[3] += x>>24;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 // an analogy to bwt_occ4() but more efficient, requiring k <= l
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 inline void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179 bwtint_t _k, _l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 if (k == l) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 bwt_occ4(bwt, k, cntk);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 memcpy(cntl, cntk, 4 * sizeof(bwtint_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183 return;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185 _k = (k >= bwt->primary)? k-1 : k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186 _l = (l >= bwt->primary)? l-1 : l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
187 if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
188 bwt_occ4(bwt, k, cntk);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
189 bwt_occ4(bwt, l, cntl);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
190 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
191 bwtint_t i, j, x, y;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
192 uint32_t *p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
193 int cl[4];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
194 if (k >= bwt->primary) --k; // because $ is not in bwt
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
195 if (l >= bwt->primary) --l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
196 cl[0] = cl[1] = cl[2] = cl[3] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
197 p = bwt_occ_intv(bwt, k);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
198 memcpy(cntk, p, 4 * sizeof(bwtint_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
199 p += 4;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
200 // prepare cntk[]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
201 j = k >> 4 << 4;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
202 for (i = k / OCC_INTERVAL * OCC_INTERVAL, x = 0; i < j; i += 16, ++p)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
203 x += __occ_aux4(bwt, *p);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
204 y = x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
205 x += __occ_aux4(bwt, *p & ~((1U<<((~k&15)<<1)) - 1)) - (~k&15);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
206 // calculate cntl[] and finalize cntk[]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
207 j = l >> 4 << 4;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
208 for (; i < j; i += 16, ++p) y += __occ_aux4(bwt, *p);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
209 y += __occ_aux4(bwt, *p & ~((1U<<((~l&15)<<1)) - 1)) - (~l&15);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
210 memcpy(cntl, cntk, 16);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
211 cntk[0] += x&0xff; cntk[1] += x>>8&0xff; cntk[2] += x>>16&0xff; cntk[3] += x>>24;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
212 cntl[0] += y&0xff; cntl[1] += y>>8&0xff; cntl[2] += y>>16&0xff; cntl[3] += y>>24;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
213 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
214 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
215
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
216 int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
217 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
218 bwtint_t k, l, ok, ol;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
219 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
220 k = 0; l = bwt->seq_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
221 for (i = len - 1; i >= 0; --i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
222 ubyte_t c = str[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
223 if (c > 3) return 0; // no match
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
224 bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
225 k = bwt->L2[c] + ok + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
226 l = bwt->L2[c] + ol;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
227 if (k > l) break; // no match
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
228 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
229 if (k > l) return 0; // no match
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
230 if (sa_begin) *sa_begin = k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
231 if (sa_end) *sa_end = l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
232 return l - k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
233 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
234
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
235 int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
236 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
237 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
238 bwtint_t k, l, ok, ol;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
239 k = *k0; l = *l0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
240 for (i = len - 1; i >= 0; --i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
241 ubyte_t c = str[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
242 if (c > 3) return 0; // there is an N here. no match
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
243 bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
244 k = bwt->L2[c] + ok + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
245 l = bwt->L2[c] + ol;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
246 if (k > l) return 0; // no match
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
247 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
248 *k0 = k; *l0 = l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
249 return l - k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
250 }