annotate pyPRADA_1.2/tools/bwa-0.5.7-mh/bwtgap.c @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 #include <stdio.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2 #include <stdlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 #include <string.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4 #include "bwtgap.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 #include "bwtaln.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 #define STATE_M 0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 #define STATE_I 1
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 #define STATE_D 2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 #define aln_score(m,o,e,p) ((m)*(p)->s_mm + (o)*(p)->s_gapo + (e)*(p)->s_gape)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_opt_t *opt)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 gap_stack_t *stack;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 stack = (gap_stack_t*)calloc(1, sizeof(gap_stack_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 stack->n_stacks = aln_score(max_mm+1, max_gapo+1, max_gape+1, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 stack->stacks = (gap_stack1_t*)calloc(stack->n_stacks, sizeof(gap_stack1_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 for (i = 0; i != stack->n_stacks; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 gap_stack1_t *p = stack->stacks + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 p->m_entries = 4;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 p->stack = (gap_entry_t*)calloc(p->m_entries, sizeof(gap_entry_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25 return stack;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 void gap_destroy_stack(gap_stack_t *stack)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 for (i = 0; i != stack->n_stacks; ++i) free(stack->stacks[i].stack);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 free(stack->stacks);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 free(stack);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 static void gap_reset_stack(gap_stack_t *stack)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 for (i = 0; i != stack->n_stacks; ++i)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 stack->stacks[i].n_entries = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 stack->best = stack->n_stacks;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 stack->n_entries = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 static inline void gap_push(gap_stack_t *stack, int a, int i, bwtint_t k, bwtint_t l, int n_mm, int n_gapo, int n_gape,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 int state, int is_diff, const gap_opt_t *opt)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 int score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 gap_entry_t *p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 gap_stack1_t *q;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 score = aln_score(n_mm, n_gapo, n_gape, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 q = stack->stacks + score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 if (q->n_entries == q->m_entries) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 q->m_entries <<= 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 q->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 p = q->stack + q->n_entries;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 p->info = (u_int32_t)score<<21 | a<<20 | i; p->k = k; p->l = l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape; p->state = state;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 if (is_diff) p->last_diff_pos = i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 ++(q->n_entries);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 ++(stack->n_entries);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 if (stack->best > score) stack->best = score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 static inline void gap_pop(gap_stack_t *stack, gap_entry_t *e)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 gap_stack1_t *q;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 q = stack->stacks + stack->best;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 *e = q->stack[q->n_entries - 1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 --(q->n_entries);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 --(stack->n_entries);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 if (q->n_entries == 0 && stack->n_entries) { // reset best
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 for (i = stack->best + 1; i < stack->n_stacks; ++i)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 if (stack->stacks[i].n_entries != 0) break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 stack->best = i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 } else if (stack->n_entries == 0) stack->best = stack->n_stacks;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 static inline void gap_shadow(int x, int len, bwtint_t max, int last_diff_pos, bwt_width_t *w)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 int i, j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 for (i = j = 0; i < last_diff_pos; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 if (w[i].w > x) w[i].w -= x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 else if (w[i].w == x) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 w[i].bid = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 w[i].w = max - (++j);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 } // else should not happen
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 static inline int int_log2(uint32_t v)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 int c = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 if (v & 0xffff0000u) { v >>= 16; c |= 16; }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 if (v & 0xff00) { v >>= 8; c |= 8; }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98 if (v & 0xf0) { v >>= 4; c |= 4; }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 if (v & 0xc) { v >>= 2; c |= 2; }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 if (v & 0x2) c |= 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 return c;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 bwt_aln1_t *bwt_match_gap(bwt_t *const bwts[2], int len, const ubyte_t *seq[2], bwt_width_t *w[2],
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 bwt_width_t *seed_w[2], const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 int best_score = aln_score(opt->max_diff+1, opt->max_gapo+1, opt->max_gape+1, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 int best_diff = opt->max_diff + 1, max_diff = opt->max_diff;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 int best_cnt = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 int max_entries = 0, j, _j, n_aln, m_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 bwt_aln1_t *aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 m_aln = 4; n_aln = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 aln = (bwt_aln1_t*)calloc(m_aln, sizeof(bwt_aln1_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 // check whether there are too many N
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 for (j = _j = 0; j < len; ++j)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118 if (seq[0][j] > 3) ++_j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 if (_j > max_diff) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 *_n_aln = n_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 return aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 //for (j = 0; j != len; ++j) printf("#0 %d: [%d,%u]\t[%d,%u]\n", j, w[0][j].bid, w[0][j].w, w[1][j].bid, w[1][j].w);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 gap_reset_stack(stack); // reset stack
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 gap_push(stack, 0, len, 0, bwts[0]->seq_len, 0, 0, 0, 0, 0, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 gap_push(stack, 1, len, 0, bwts[0]->seq_len, 0, 0, 0, 0, 0, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 while (stack->n_entries) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 gap_entry_t e;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 int a, i, m, m_seed = 0, hit_found, allow_diff, allow_M, tmp;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 bwtint_t k, l, cnt_k[4], cnt_l[4], occ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 const bwt_t *bwt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 const ubyte_t *str;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 const bwt_width_t *seed_width = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 bwt_width_t *width;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 if (max_entries < stack->n_entries) max_entries = stack->n_entries;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 if (stack->n_entries > opt->max_entries) break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 gap_pop(stack, &e); // get the best entry
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 k = e.k; l = e.l; // SA interval
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 a = e.info>>20&1; i = e.info&0xffff; // strand, length
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 if (!(opt->mode & BWA_MODE_NONSTOP) && e.info>>21 > best_score + opt->s_mm) break; // no need to proceed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 m = max_diff - (e.n_mm + e.n_gapo);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 if (opt->mode & BWA_MODE_GAPE) m -= e.n_gape;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 if (m < 0) continue;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 bwt = bwts[1-a]; str = seq[a]; width = w[a];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 if (seed_w) { // apply seeding
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 seed_width = seed_w[a];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 m_seed = opt->max_seed_diff - (e.n_mm + e.n_gapo);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152 if (opt->mode & BWA_MODE_GAPE) m_seed -= e.n_gape;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 //printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%u,%u]\t[%u,%u]\t%d\n", stack->n_entries, a, i, "MID"[e.state], e.n_mm, e.n_gapo, e.n_gape, width[i-1].bid, width[i-1].w, k, l, e.last_diff_pos);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 if (i > 0 && m < width[i-1].bid) continue;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 // check whether a hit is found
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 hit_found = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 if (i == 0) hit_found = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 else if (m == 0 && (e.state == STATE_M || (opt->mode&BWA_MODE_GAPE) || e.n_gape == opt->max_gape)) { // no diff allowed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 if (bwt_match_exact_alt(bwt, i, str, &k, &l)) hit_found = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 else continue; // no hit, skip
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 if (hit_found) { // action for found hits
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 int score = aln_score(e.n_mm, e.n_gapo, e.n_gape, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 int do_add = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168 //printf("#2 hits found: %d:(%u,%u)\n", e.n_mm+e.n_gapo, k, l);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 if (n_aln == 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 best_score = score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 best_diff = e.n_mm + e.n_gapo;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172 if (opt->mode & BWA_MODE_GAPE) best_diff += e.n_gape;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 if (!(opt->mode & BWA_MODE_NONSTOP))
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174 max_diff = (best_diff + 1 > opt->max_diff)? opt->max_diff : best_diff + 1; // top2 behaviour
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 if (score == best_score) best_cnt += l - k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 else if (best_cnt > opt->max_top2) break; // top2b behaviour
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 if (e.n_gapo) { // check whether the hit has been found. this may happen when a gap occurs in a tandem repeat
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179 for (j = 0; j != n_aln; ++j)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 if (aln[j].k == k && aln[j].l == l) break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 if (j < n_aln) do_add = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183 if (do_add) { // append
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 bwt_aln1_t *p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185 gap_shadow(l - k + 1, len, bwt->seq_len, e.last_diff_pos, width);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186 if (n_aln == m_aln) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
187 m_aln <<= 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
188 aln = (bwt_aln1_t*)realloc(aln, m_aln * sizeof(bwt_aln1_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
189 memset(aln + m_aln/2, 0, m_aln/2*sizeof(bwt_aln1_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
190 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
191 p = aln + n_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
192 p->n_mm = e.n_mm; p->n_gapo = e.n_gapo; p->n_gape = e.n_gape; p->a = a;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
193 p->k = k; p->l = l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
194 p->score = score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
195 ++n_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
196 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
197 continue;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
198 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
199
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
200 --i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
201 bwt_2occ4(bwt, k - 1, l, cnt_k, cnt_l); // retrieve Occ values
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
202 occ = l - k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
203 // test whether diff is allowed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
204 allow_diff = allow_M = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
205 if (i > 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
206 int ii = i - (len - opt->seed_len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
207 if (width[i-1].bid > m-1) allow_diff = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
208 else if (width[i-1].bid == m-1 && width[i].bid == m-1 && width[i-1].w == width[i].w) allow_M = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
209 if (seed_w && ii > 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
210 if (seed_width[ii-1].bid > m_seed-1) allow_diff = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
211 else if (seed_width[ii-1].bid == m_seed-1 && seed_width[ii].bid == m_seed-1
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
212 && seed_width[ii-1].w == seed_width[ii].w) allow_M = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
213 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
214 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
215 // indels
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
216 tmp = (opt->mode & BWA_MODE_LOGGAP)? int_log2(e.n_gape + e.n_gapo)/2+1 : e.n_gapo + e.n_gape;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
217 if (allow_diff && i >= opt->indel_end_skip + tmp && len - i >= opt->indel_end_skip + tmp) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
218 if (e.state == STATE_M) { // gap open
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
219 if (e.n_gapo < opt->max_gapo) { // gap open is allowed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
220 // insertion
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
221 gap_push(stack, a, i, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_I, 1, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
222 // deletion
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
223 for (j = 0; j != 4; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
224 k = bwt->L2[j] + cnt_k[j] + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
225 l = bwt->L2[j] + cnt_l[j];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
226 if (k <= l) gap_push(stack, a, i + 1, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_D, 1, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
227 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
228 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
229 } else if (e.state == STATE_I) { // extention of an insertion
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
230 if (e.n_gape < opt->max_gape) // gap extention is allowed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
231 gap_push(stack, a, i, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_I, 1, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
232 } else if (e.state == STATE_D) { // extention of a deletion
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
233 if (e.n_gape < opt->max_gape) { // gap extention is allowed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
234 if (e.n_gape + e.n_gapo < max_diff || occ < opt->max_del_occ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
235 for (j = 0; j != 4; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
236 k = bwt->L2[j] + cnt_k[j] + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
237 l = bwt->L2[j] + cnt_l[j];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
238 if (k <= l) gap_push(stack, a, i + 1, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_D, 1, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
239 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
240 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
241 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
242 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
243 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
244 // mismatches
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
245 if (allow_diff && allow_M) { // mismatch is allowed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
246 for (j = 1; j <= 4; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
247 int c = (str[i] + j) & 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
248 int is_mm = (j != 4 || str[i] > 3);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
249 k = bwt->L2[c] + cnt_k[c] + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
250 l = bwt->L2[c] + cnt_l[c];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
251 if (k <= l) gap_push(stack, a, i, k, l, e.n_mm + is_mm, e.n_gapo, e.n_gape, STATE_M, is_mm, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
252 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
253 } else if (str[i] < 4) { // try exact match only
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
254 int c = str[i] & 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
255 k = bwt->L2[c] + cnt_k[c] + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
256 l = bwt->L2[c] + cnt_l[c];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
257 if (k <= l) gap_push(stack, a, i, k, l, e.n_mm, e.n_gapo, e.n_gape, STATE_M, 0, opt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
258 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
259 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
260
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
261 *_n_aln = n_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
262 //fprintf(stderr, "max_entries = %d\n", max_entries);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
263 return aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
264 }