Mercurial > repos > siyuan > prada
comparison pyPRADA_1.2/tools/bwa-0.5.7-mh/bwase.c @ 0:acc2ca1a3ba4
Uploaded
author | siyuan |
---|---|
date | Thu, 20 Feb 2014 00:44:58 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acc2ca1a3ba4 |
---|---|
1 #include <unistd.h> | |
2 #include <string.h> | |
3 #include <stdio.h> | |
4 #include <stdlib.h> | |
5 #include <math.h> | |
6 #include <time.h> | |
7 #include "stdaln.h" | |
8 #include "bwase.h" | |
9 #include "bwtaln.h" | |
10 #include "bntseq.h" | |
11 #include "utils.h" | |
12 #include "kstring.h" | |
13 | |
14 static int g_log_n[256]; | |
15 | |
16 void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_main, int n_multi) | |
17 { | |
18 int i, cnt, best; | |
19 if (n_aln == 0) { | |
20 s->type = BWA_TYPE_NO_MATCH; | |
21 s->c1 = s->c2 = 0; | |
22 return; | |
23 } | |
24 | |
25 if (set_main) { | |
26 best = aln[0].score; | |
27 for (i = cnt = 0; i < n_aln; ++i) { | |
28 const bwt_aln1_t *p = aln + i; | |
29 if (p->score > best) break; | |
30 if (drand48() * (p->l - p->k + 1) > (double)cnt) { | |
31 s->n_mm = p->n_mm; s->n_gapo = p->n_gapo; s->n_gape = p->n_gape; s->strand = p->a; | |
32 s->score = p->score; | |
33 s->sa = p->k + (bwtint_t)((p->l - p->k + 1) * drand48()); | |
34 } | |
35 cnt += p->l - p->k + 1; | |
36 } | |
37 s->c1 = cnt; | |
38 for (; i < n_aln; ++i) cnt += aln[i].l - aln[i].k + 1; | |
39 s->c2 = cnt - s->c1; | |
40 s->type = s->c1 > 1? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE; | |
41 } | |
42 | |
43 if (n_multi) { | |
44 int k, rest, n_occ, z = 0; | |
45 for (k = n_occ = 0; k < n_aln; ++k) { | |
46 const bwt_aln1_t *q = aln + k; | |
47 n_occ += q->l - q->k + 1; | |
48 } | |
49 if (s->multi) free(s->multi); | |
50 if (n_occ > n_multi + 1) { // if there are too many hits, generate none of them | |
51 s->multi = 0; s->n_multi = 0; | |
52 return; | |
53 } | |
54 /* The following code is more flexible than what is required | |
55 * here. In principle, due to the requirement above, we can | |
56 * simply output all hits, but the following samples "rest" | |
57 * number of random hits. */ | |
58 rest = n_occ > n_multi + 1? n_multi + 1 : n_occ; // find one additional for ->sa | |
59 s->multi = calloc(rest, rest * sizeof(bwt_multi1_t)); | |
60 for (k = 0; k < n_aln; ++k) { | |
61 const bwt_aln1_t *q = aln + k; | |
62 if (q->l - q->k + 1 <= rest) { | |
63 bwtint_t l; | |
64 for (l = q->k; l <= q->l; ++l) { | |
65 s->multi[z].pos = l; | |
66 s->multi[z].gap = q->n_gapo + q->n_gape; | |
67 s->multi[z].mm = q->n_mm; | |
68 s->multi[z++].strand = q->a; | |
69 } | |
70 rest -= q->l - q->k + 1; | |
71 } else { // Random sampling (http://code.activestate.com/recipes/272884/). In fact, we never come here. | |
72 int j, i, k; | |
73 for (j = rest, i = q->l - q->k + 1, k = 0; j > 0; --j) { | |
74 double p = 1.0, x = drand48(); | |
75 while (x < p) p -= p * j / (i--); | |
76 s->multi[z].pos = q->l - i; | |
77 s->multi[z].gap = q->n_gapo + q->n_gape; | |
78 s->multi[z].mm = q->n_mm; | |
79 s->multi[z++].strand = q->a; | |
80 } | |
81 rest = 0; | |
82 break; | |
83 } | |
84 } | |
85 s->n_multi = z; | |
86 for (k = z = 0; k < s->n_multi; ++k) | |
87 if (s->multi[k].pos != s->sa) | |
88 s->multi[z++] = s->multi[k]; | |
89 s->n_multi = z < n_multi? z : n_multi; | |
90 } | |
91 } | |
92 | |
93 void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s) | |
94 { | |
95 bwa_aln2seq_core(n_aln, aln, s, 1, 0); | |
96 } | |
97 | |
98 /* *aln points to alignments found for the current sequence, n_aln is the | |
99 size of the array pointed to by *aln. Array *s of size n_seq is a collection | |
100 of SAM records that must be replicas, i.e. initialized with the same current sequence. | |
101 This method updates sequence records in *s with placements recorded in *aln, | |
102 until all n_seq replicas are updated. For each separate alignment record in array *aln, | |
103 each placement corresponding to this record will be assigned to a separate record in *s | |
104 if there are enough elements remaining in *s, otherwise a random subset of the placements will | |
105 be assigned to the remaining elements in *s. The total number of best placements and total | |
106 number of non-best placements will be computed from the whole array *aln (regardless of whether it | |
107 fits completely into *s or not) and assigned to each updated record in *s. | |
108 */ | |
109 void bwa_aln2seq_all(int n_aln, const bwt_aln1_t *aln, int n_seq, bwa_seq_t *s) | |
110 { | |
111 int i, cnt1, cnt2, j, best, N; | |
112 if (n_aln == 0) { | |
113 /* there is no match found for *s */ | |
114 s->type = BWA_TYPE_NO_MATCH; | |
115 s->c1 = s->c2 = 0; | |
116 return; | |
117 } | |
118 | |
119 N = n_seq; // remember the size of the array | |
120 best = aln[0].score; | |
121 | |
122 cnt1 = 0; // total number of already processed alignments (i.e. distinct placements, NOT alignment records) with best score | |
123 cnt2 = 0; // total number of already processed alignments with inferior score(s) | |
124 for (i = 0; i < n_aln && n_seq > 0 ; ++i) { | |
125 const bwt_aln1_t *p_aln = aln + i; | |
126 | |
127 int N_aligns = p_aln->l-p_aln->k +1 ; // number of placements (alignments) in the current alignment record p_aln | |
128 | |
129 if (N_aligns <= n_seq) { /* we have space to save all the alignments stored in 'p_aln' */ | |
130 | |
131 for ( j = 0 ; j < N_aligns ; j++ ) { | |
132 | |
133 bwa_seq_t * seq = s + cnt1+ cnt2+j ; | |
134 seq->n_mm = p_aln->n_mm; seq->n_gapo = p_aln->n_gapo; seq->n_gape = p_aln->n_gape; seq->strand = p_aln->a; | |
135 seq->score = p_aln->score; | |
136 seq->sa = p_aln->k + j; | |
137 } | |
138 n_seq -= N_aligns; // we have n_seq slots remaining to store more alignments | |
139 } else { // See also: http://code.activestate.com/recipes/272884/ | |
140 // we have to truncate, so let's select few remaining alignments randomly: | |
141 int xj, xi, xk; | |
142 for (xj = n_seq, xi = N_aligns, xk = 0; xj > 0; --xj, ++xk) { | |
143 double p = 1.0, x = drand48(); | |
144 while (x < p) p -= p * xj / (xi--); | |
145 | |
146 bwa_seq_t * seq = s+cnt1+cnt2+xk ; | |
147 seq->n_mm = p_aln->n_mm; seq->n_gapo = p_aln->n_gapo; seq->n_gape = p_aln->n_gape; seq->strand = p_aln->a; | |
148 seq->score = p_aln->score; | |
149 seq->sa = p_aln->l - xi; | |
150 | |
151 } | |
152 n_seq = 0; | |
153 } | |
154 // cnt1 + cnt2 is the total count of hits processed so far: | |
155 if ( p_aln->score == best ) cnt1 += N_aligns; // we found N_aligns more placements with best score | |
156 else cnt2 += N_aligns; // N_aligns more placements with inferior score | |
157 } | |
158 | |
159 // we filled all available slots in the array *s, but there can be more alignments | |
160 // left; we need to count them: | |
161 for (; i < n_aln; ++i) { | |
162 if ( aln[i].score == best ) cnt1 += aln[i].l-aln[i].k+1; | |
163 else cnt2 += aln[i].l-aln[i].k+1; | |
164 } | |
165 | |
166 // now cnt1 is the total number of found alignments (placements) with best score | |
167 // and cnt2 is the total number of found placements with worse score | |
168 | |
169 /* set counts and flags for all hits: */ | |
170 for (i = 0; i < N ; ++i) { | |
171 bwa_seq_t * seq = s+i ; | |
172 | |
173 seq->c1 = cnt1; | |
174 seq->c2 = cnt2; | |
175 seq->type = seq->c1 > 1? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE; | |
176 } | |
177 } | |
178 | |
179 | |
180 | |
181 int bwa_approx_mapQ(const bwa_seq_t *p, int mm) | |
182 { | |
183 int n; | |
184 if (p->c1 == 0) return 23; | |
185 if (p->c1 > 1) return 0; | |
186 if (p->n_mm == mm) return 25; | |
187 if (p->c2 == 0) return 37; | |
188 n = (p->c2 >= 255)? 255 : p->c2; | |
189 return (23 < g_log_n[n])? 0 : 23 - g_log_n[n]; | |
190 } | |
191 | |
192 /** | |
193 * Derive the actual position in the read from the given suffix array | |
194 * coordinates. Note that the position will be approximate based on | |
195 * whether indels appear in the read and whether calculations are | |
196 * performed from the start or end of the read. | |
197 */ | |
198 void bwa_cal_pac_pos_core(const bwt_t *forward_bwt, const bwt_t *reverse_bwt, int n_seqs, bwa_seq_t *s, const int max_mm, const float fnr) | |
199 { | |
200 int max_diff; | |
201 bwa_seq_t *seq; | |
202 int i; | |
203 | |
204 for ( i = 0 ; i < n_seqs ; i++ ) { | |
205 seq = s + i; | |
206 if (seq->type != BWA_TYPE_UNIQUE && seq->type != BWA_TYPE_REPEAT) continue; | |
207 | |
208 max_diff = fnr > 0.0? bwa_cal_maxdiff(seq->len, BWA_AVG_ERR, fnr) : max_mm; | |
209 if (seq->strand) { // reverse strand only | |
210 seq->pos = bwt_sa(forward_bwt, seq->sa); | |
211 seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff); | |
212 } else { // forward strand only | |
213 /* NB: For gapped alignment, p->pos may not be correct, which | |
214 * will be fixed in refine_gapped_core(). This line also | |
215 * determines the way "x" is calculated in | |
216 * refine_gapped_core() when (ext < 0 && is_end == 0). */ | |
217 seq->pos = reverse_bwt->seq_len - (bwt_sa(reverse_bwt, seq->sa) + seq->len); | |
218 seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff); | |
219 } | |
220 } | |
221 } | |
222 | |
223 void bwa_cal_pac_pos(const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr) | |
224 { | |
225 int i, j; | |
226 char str[1024]; | |
227 bwt_t *bwt; | |
228 // load forward SA | |
229 strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); | |
230 strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt); | |
231 for (i = 0; i != n_seqs; ++i) { | |
232 if (seqs[i].strand) bwa_cal_pac_pos_core(bwt, 0, 1, &seqs[i], max_mm, fnr); | |
233 for (j = 0; j < seqs[i].n_multi; ++j) { | |
234 bwt_multi1_t *p = seqs[i].multi + j; | |
235 if (p->strand) p->pos = bwt_sa(bwt, p->pos); | |
236 } | |
237 } | |
238 bwt_destroy(bwt); | |
239 // load reverse BWT and SA | |
240 strcpy(str, prefix); strcat(str, ".rbwt"); bwt = bwt_restore_bwt(str); | |
241 strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt); | |
242 for (i = 0; i != n_seqs; ++i) { | |
243 if (!seqs[i].strand) bwa_cal_pac_pos_core(0, bwt, 1, &seqs[i], max_mm, fnr); | |
244 for (j = 0; j < seqs[i].n_multi; ++j) { | |
245 bwt_multi1_t *p = seqs[i].multi + j; | |
246 if (!p->strand) p->pos = bwt->seq_len - (bwt_sa(bwt, p->pos) + seqs[i].len); | |
247 } | |
248 } | |
249 bwt_destroy(bwt); | |
250 } | |
251 | |
252 /* is_end_correct == 1 if (*pos+len) gives the correct coordinate on | |
253 * forward strand. This happens when p->pos is calculated by | |
254 * bwa_cal_pac_pos(). is_end_correct==0 if (*pos) gives the correct | |
255 * coordinate. This happens only for color-converted alignment. */ | |
256 static bwa_cigar_t *refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, bwtint_t *_pos, | |
257 int ext, int *n_cigar, int is_end_correct) | |
258 { | |
259 bwa_cigar_t *cigar = 0; | |
260 ubyte_t *ref_seq; | |
261 int l = 0, path_len, ref_len; | |
262 AlnParam ap = aln_param_bwa; | |
263 path_t *path; | |
264 int64_t k, __pos = *_pos > l_pac? (int64_t)((int32_t)*_pos) : *_pos; | |
265 | |
266 ref_len = len + abs(ext); | |
267 if (ext > 0) { | |
268 ref_seq = (ubyte_t*)calloc(ref_len, 1); | |
269 for (k = __pos; k < __pos + ref_len && k < l_pac; ++k) | |
270 ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3; | |
271 } else { | |
272 int64_t x = __pos + (is_end_correct? len : ref_len); | |
273 ref_seq = (ubyte_t*)calloc(ref_len, 1); | |
274 for (l = 0, k = x - ref_len > 0? x - ref_len : 0; k < x && k < l_pac; ++k) | |
275 ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3; | |
276 } | |
277 path = (path_t*)calloc(l+len, sizeof(path_t)); | |
278 | |
279 aln_global_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len); | |
280 cigar = bwa_aln_path2cigar(path, path_len, n_cigar); | |
281 | |
282 if (ext < 0 && is_end_correct) { // fix coordinate for reads mapped on the forward strand | |
283 for (l = k = 0; k < *n_cigar; ++k) { | |
284 if (__cigar_op(cigar[k]) == FROM_D) l -= __cigar_len(cigar[k]); | |
285 else if (__cigar_op(cigar[k]) == FROM_I) l += __cigar_len(cigar[k]); | |
286 } | |
287 __pos += l; | |
288 } | |
289 | |
290 if (__cigar_op(cigar[0]) == FROM_D) { // deletion at the 5'-end | |
291 __pos += __cigar_len(cigar[0]); | |
292 for (k = 0; k < *n_cigar - 1; ++k) cigar[k] = cigar[k+1]; | |
293 --(*n_cigar); | |
294 } | |
295 if (__cigar_op(cigar[*n_cigar-1]) == FROM_D) --(*n_cigar); // deletion at the 3'-end | |
296 | |
297 // change "I" at either end of the read to S. just in case. This should rarely happen... | |
298 if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(3, (__cigar_len(cigar[*n_cigar-1]))); | |
299 if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(3, (__cigar_len(cigar[0]))); | |
300 | |
301 *_pos = (bwtint_t)__pos; | |
302 free(ref_seq); free(path); | |
303 return cigar; | |
304 } | |
305 | |
306 char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq, | |
307 bwtint_t l_pac, ubyte_t *pacseq, kstring_t *str, int *_nm) | |
308 { | |
309 bwtint_t x, y; | |
310 int z, u, c, nm = 0; | |
311 str->l = 0; // reset | |
312 x = pos; y = 0; | |
313 if (cigar) { | |
314 int k, l; | |
315 for (k = u = 0; k < n_cigar; ++k) { | |
316 l = __cigar_len(cigar[k]); | |
317 if (__cigar_op(cigar[k]) == FROM_M) { | |
318 for (z = 0; z < l && x+z < l_pac; ++z) { | |
319 c = pacseq[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3; | |
320 if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) { | |
321 ksprintf(str, "%d", u); | |
322 kputc("ACGTN"[c], str); | |
323 ++nm; | |
324 u = 0; | |
325 } else ++u; | |
326 } | |
327 x += l; y += l; | |
328 /* } else if (cigar[k]>>14 == FROM_I || cigar[k]>>14 == 3) { */ | |
329 } else if (__cigar_op(cigar[k]) == FROM_I || __cigar_op(cigar[k]) == FROM_S) { | |
330 y += l; | |
331 if (__cigar_op(cigar[k]) == FROM_I) nm += l; | |
332 } else if (__cigar_op(cigar[k]) == FROM_D) { | |
333 ksprintf(str, "%d", u); | |
334 kputc('^', str); | |
335 for (z = 0; z < l && x+z < l_pac; ++z) | |
336 kputc("ACGT"[pacseq[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3], str); | |
337 u = 0; | |
338 x += l; nm += l; | |
339 } | |
340 } | |
341 } else { // no gaps | |
342 for (z = u = 0; z < (bwtint_t)len; ++z) { | |
343 c = pacseq[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3; | |
344 if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) { | |
345 ksprintf(str, "%d", u); | |
346 kputc("ACGTN"[c], str); | |
347 ++nm; | |
348 u = 0; | |
349 } else ++u; | |
350 } | |
351 } | |
352 ksprintf(str, "%d", u); | |
353 *_nm = nm; | |
354 return strdup(str->s); | |
355 } | |
356 | |
357 void bwa_correct_trimmed(bwa_seq_t *s) | |
358 { | |
359 if (s->len == s->full_len) return; | |
360 if (s->strand == 0) { // forward | |
361 if (s->cigar && __cigar_op(s->cigar[s->n_cigar-1]) == FROM_S) { // the last is S | |
362 s->cigar[s->n_cigar-1] += s->full_len - s->len; | |
363 } else { | |
364 if (s->cigar == 0) { | |
365 s->n_cigar = 2; | |
366 s->cigar = calloc(s->n_cigar, sizeof(bwa_cigar_t)); | |
367 s->cigar[0] = __cigar_create(0, s->len); | |
368 } else { | |
369 ++s->n_cigar; | |
370 s->cigar = realloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t)); | |
371 } | |
372 s->cigar[s->n_cigar-1] = __cigar_create(3, (s->full_len - s->len)); | |
373 } | |
374 } else { // reverse | |
375 if (s->cigar && __cigar_op(s->cigar[0]) == FROM_S) { // the first is S | |
376 s->cigar[0] += s->full_len - s->len; | |
377 } else { | |
378 if (s->cigar == 0) { | |
379 s->n_cigar = 2; | |
380 s->cigar = calloc(s->n_cigar, sizeof(bwa_cigar_t)); | |
381 s->cigar[1] = __cigar_create(0, s->len); | |
382 } else { | |
383 ++s->n_cigar; | |
384 s->cigar = realloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t)); | |
385 memmove(s->cigar + 1, s->cigar, (s->n_cigar-1) * sizeof(bwa_cigar_t)); | |
386 } | |
387 s->cigar[0] = __cigar_create(3, (s->full_len - s->len)); | |
388 } | |
389 } | |
390 s->len = s->full_len; | |
391 } | |
392 | |
393 void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq, bntseq_t *ntbns) | |
394 { | |
395 ubyte_t *pacseq, *ntpac = 0; | |
396 int i, j; | |
397 kstring_t *str; | |
398 | |
399 if (ntbns) { // in color space | |
400 ntpac = (ubyte_t*)calloc(ntbns->l_pac/4+1, 1); | |
401 rewind(ntbns->fp_pac); | |
402 fread(ntpac, 1, ntbns->l_pac/4 + 1, ntbns->fp_pac); | |
403 } | |
404 | |
405 if (!_pacseq) { | |
406 pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); | |
407 rewind(bns->fp_pac); | |
408 fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); | |
409 } else pacseq = _pacseq; | |
410 for (i = 0; i != n_seqs; ++i) { | |
411 bwa_seq_t *s = seqs + i; | |
412 for (j = 0; j < s->n_multi; ++j) { | |
413 bwt_multi1_t *q = s->multi + j; | |
414 int n_cigar; | |
415 if (q->gap == 0) continue; | |
416 q->cigar = refine_gapped_core(bns->l_pac, pacseq, s->len, q->strand? s->rseq : s->seq, &q->pos, | |
417 (q->strand? 1 : -1) * q->gap, &n_cigar, 1); | |
418 q->n_cigar = n_cigar; | |
419 } | |
420 if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue; | |
421 s->cigar = refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos, | |
422 (s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 1); | |
423 } | |
424 | |
425 if (ntbns) { // in color space | |
426 for (i = 0; i < n_seqs; ++i) { | |
427 bwa_seq_t *s = seqs + i; | |
428 bwa_cs2nt_core(s, bns->l_pac, ntpac); | |
429 for (j = 0; j < s->n_multi; ++j) { | |
430 bwt_multi1_t *q = s->multi + j; | |
431 int n_cigar; | |
432 if (q->gap == 0) continue; | |
433 free(q->cigar); | |
434 q->cigar = refine_gapped_core(bns->l_pac, ntpac, s->len, q->strand? s->rseq : s->seq, &q->pos, | |
435 (q->strand? 1 : -1) * q->gap, &n_cigar, 0); | |
436 q->n_cigar = n_cigar; | |
437 } | |
438 if (s->type != BWA_TYPE_NO_MATCH && s->cigar) { // update cigar again | |
439 free(s->cigar); | |
440 s->cigar = refine_gapped_core(bns->l_pac, ntpac, s->len, s->strand? s->rseq : s->seq, &s->pos, | |
441 (s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 0); | |
442 } | |
443 } | |
444 } | |
445 | |
446 // generate MD tag | |
447 str = (kstring_t*)calloc(1, sizeof(kstring_t)); | |
448 for (i = 0; i != n_seqs; ++i) { | |
449 bwa_seq_t *s = seqs + i; | |
450 if (s->type != BWA_TYPE_NO_MATCH) { | |
451 int nm; | |
452 s->md = bwa_cal_md1(s->n_cigar, s->cigar, s->len, s->pos, s->strand? s->rseq : s->seq, | |
453 bns->l_pac, ntbns? ntpac : pacseq, str, &nm); | |
454 s->nm = nm; | |
455 } | |
456 } | |
457 free(str->s); free(str); | |
458 | |
459 // correct for trimmed reads | |
460 if (!ntbns) // trimming is only enabled for Illumina reads | |
461 for (i = 0; i < n_seqs; ++i) bwa_correct_trimmed(seqs + i); | |
462 | |
463 if (!_pacseq) free(pacseq); | |
464 free(ntpac); | |
465 } | |
466 | |
467 int64_t pos_end(const bwa_seq_t *p) | |
468 { | |
469 if (p->cigar) { | |
470 int j; | |
471 int64_t x = p->pos; | |
472 for (j = 0; j != p->n_cigar; ++j) { | |
473 int op = __cigar_op(p->cigar[j]); | |
474 if (op == 0 || op == 2) x += __cigar_len(p->cigar[j]); | |
475 } | |
476 return x; | |
477 } else return p->pos + p->len; | |
478 } | |
479 | |
480 int64_t pos_end_multi(const bwt_multi1_t *p, int len) // analogy to pos_end() | |
481 { | |
482 if (p->cigar) { | |
483 int j; | |
484 int64_t x = p->pos; | |
485 for (j = 0; j != p->n_cigar; ++j) { | |
486 int op = __cigar_op(p->cigar[j]); | |
487 if (op == 0 || op == 2) x += __cigar_len(p->cigar[j]); | |
488 } | |
489 return x; | |
490 } else return p->pos + len; | |
491 } | |
492 | |
493 static int64_t pos_5(const bwa_seq_t *p) | |
494 { | |
495 if (p->type != BWA_TYPE_NO_MATCH) | |
496 return p->strand? pos_end(p) : p->pos; | |
497 return -1; | |
498 } | |
499 | |
500 /* Prints <bases>\t<quals> of the sequence *p into STDOUT; | |
501 */ | |
502 void bwa_print_seq_and_qual(bwa_seq_t *p) { | |
503 int j; | |
504 ubyte_t * char_ptr; | |
505 | |
506 if (p->strand == 0) | |
507 for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]); | |
508 else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]); | |
509 putchar('\t'); | |
510 if (p->qual) { | |
511 if (p->strand) { | |
512 // seq_reverse(p->len, p->qual, 0); // reverse quality | |
513 for ( char_ptr = p->qual + p->len - 1 ; char_ptr >= p->qual ; char_ptr-- ) putchar( *char_ptr ); | |
514 } else { | |
515 printf("%s", p->qual); | |
516 } | |
517 } else printf("*"); | |
518 | |
519 } | |
520 | |
521 | |
522 | |
523 void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2) | |
524 { | |
525 int j; | |
526 if (p->type != BWA_TYPE_NO_MATCH || (mate && mate->type != BWA_TYPE_NO_MATCH)) { | |
527 int seqid, nn, am = 0, flag = p->extra_flag; | |
528 char XT; | |
529 | |
530 if (p->type == BWA_TYPE_NO_MATCH) { | |
531 p->pos = mate->pos; | |
532 p->strand = mate->strand; | |
533 flag |= SAM_FSU; | |
534 j = 1; | |
535 } else j = pos_end(p) - p->pos; // j is the length of the reference in the alignment | |
536 | |
537 // get seqid | |
538 nn = bns_coor_pac2real(bns, p->pos, j, &seqid); | |
539 if (p->type != BWA_TYPE_NO_MATCH && p->pos + j - bns->anns[seqid].offset > bns->anns[seqid].len) | |
540 flag |= SAM_FSU; // flag UNMAP as this alignment bridges two adjacent reference sequences | |
541 | |
542 // update flag and print it | |
543 if (p->strand) flag |= SAM_FSR; | |
544 if (mate) { | |
545 if (mate->type != BWA_TYPE_NO_MATCH) { | |
546 if (mate->strand) flag |= SAM_FMR; | |
547 } else flag |= SAM_FMU; | |
548 } | |
549 printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name); | |
550 printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ); | |
551 | |
552 // print CIGAR | |
553 if (p->cigar) { | |
554 for (j = 0; j != p->n_cigar; ++j) | |
555 printf("%d%c", __cigar_len(p->cigar[j]), "MIDS"[__cigar_op(p->cigar[j])]); | |
556 } else if (p->type == BWA_TYPE_NO_MATCH) printf("*"); | |
557 else printf("%dM", p->len); | |
558 | |
559 // print mate coordinate | |
560 if (mate && mate->type != BWA_TYPE_NO_MATCH) { | |
561 int m_seqid, m_is_N; | |
562 long long isize; | |
563 am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality | |
564 // redundant calculation here, but should not matter too much | |
565 m_is_N = bns_coor_pac2real(bns, mate->pos, mate->len, &m_seqid); | |
566 printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name); | |
567 isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0; | |
568 if (p->type == BWA_TYPE_NO_MATCH) isize = 0; | |
569 printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize); | |
570 } else if (mate) printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1)); | |
571 else printf("\t*\t0\t0\t"); | |
572 | |
573 // print sequence and quality | |
574 bwa_print_seq_and_qual(p); | |
575 | |
576 if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len); | |
577 if (p->type != BWA_TYPE_NO_MATCH) { | |
578 int i; | |
579 // calculate XT tag | |
580 XT = "NURM"[p->type]; | |
581 if (nn > 10) XT = 'N'; | |
582 // print tags | |
583 printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm); | |
584 if (nn) printf("\tXN:i:%d", nn); | |
585 if (mate) printf("\tSM:i:%d\tAM:i:%d", p->seQ, am); | |
586 if (p->type != BWA_TYPE_MATESW) { // X0 and X1 are not available for this type of alignment | |
587 printf("\tX0:i:%d", p->c1); | |
588 if (p->c1 <= max_top2) printf("\tX1:i:%d", p->c2); | |
589 } | |
590 printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo, p->n_gapo+p->n_gape); | |
591 if (p->md) printf("\tMD:Z:%s", p->md); | |
592 // print multiple hits | |
593 if (p->n_multi) { | |
594 printf("\tXA:Z:"); | |
595 for (i = 0; i < p->n_multi; ++i) { | |
596 bwt_multi1_t *q = p->multi + i; | |
597 int k; | |
598 j = pos_end_multi(q, p->len) - q->pos; | |
599 nn = bns_coor_pac2real(bns, q->pos, j, &seqid); | |
600 printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+', | |
601 (int)(q->pos - bns->anns[seqid].offset + 1)); | |
602 if (q->cigar) { | |
603 for (k = 0; k < q->n_cigar; ++k) | |
604 printf("%d%c", __cigar_len(q->cigar[k]), "MIDS"[__cigar_op(q->cigar[k])]); | |
605 } else printf("%dM", p->len); | |
606 printf(",%d;", q->gap + q->mm); | |
607 } | |
608 } | |
609 } | |
610 putchar('\n'); | |
611 } else { // this read has no match | |
612 ubyte_t *s = p->strand? p->rseq : p->seq; | |
613 int flag = p->extra_flag | SAM_FSU; | |
614 if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU; | |
615 printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag); | |
616 for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]); | |
617 putchar('\t'); | |
618 if (p->qual) { | |
619 if (p->strand) { | |
620 // seq_reverse(p->len, p->qual, 0); // reverse quality | |
621 for ( s = p->qual + p->len - 1 ; s >= p->qual ; s-- ) putchar( *s ); | |
622 | |
623 } else { | |
624 printf("%s", p->qual); | |
625 } | |
626 } else printf("*"); | |
627 if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len); | |
628 putchar('\n'); | |
629 } | |
630 } | |
631 | |
632 /* UNUSED | |
633 void bwa_print_partial_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2) | |
634 { | |
635 int j; | |
636 if (p->type != BWA_TYPE_NO_MATCH || (mate && mate->type != BWA_TYPE_NO_MATCH)) { | |
637 int seqid, nn, am = 0, flag = p->extra_flag; | |
638 char XT; | |
639 | |
640 if (p->type == BWA_TYPE_NO_MATCH) { | |
641 p->pos = mate->pos; | |
642 p->strand = mate->strand; | |
643 flag |= SAM_FSU; | |
644 j = 1; | |
645 } else j = pos_end(p) - p->pos; // j is the length of the reference in the alignment | |
646 | |
647 // get seqid | |
648 nn = bns_coor_pac2real(bns, p->pos, j, &seqid); | |
649 if (p->type != BWA_TYPE_NO_MATCH && p->pos + j - bns->anns[seqid].offset > bns->anns[seqid].len) | |
650 flag |= SAM_FSU; // flag UNMAP as this alignment bridges two adjacent reference sequences | |
651 | |
652 // update flag and print it | |
653 if (p->strand) flag |= SAM_FSR; | |
654 if (mate) { | |
655 if (mate->type != BWA_TYPE_NO_MATCH) { | |
656 if (mate->strand) flag |= SAM_FMR; | |
657 } else flag |= SAM_FMU; | |
658 } | |
659 printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name); | |
660 printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ); | |
661 | |
662 // print CIGAR | |
663 if (p->cigar) { | |
664 for (j = 0; j != p->n_cigar; ++j) | |
665 printf("%d%c", __cigar_len(p->cigar[j]), "MIDS"[__cigar_op(p->cigar[j])]); | |
666 } else if (p->type == BWA_TYPE_NO_MATCH) printf("*"); | |
667 else printf("%dM", p->len); | |
668 | |
669 // print mate coordinate | |
670 if (mate && mate->type != BWA_TYPE_NO_MATCH) { | |
671 int m_seqid, m_is_N; | |
672 long long isize; | |
673 am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality | |
674 // redundant calculation here, but should not matter too much | |
675 m_is_N = bns_coor_pac2real(bns, mate->pos, mate->len, &m_seqid); | |
676 printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name); | |
677 isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0; | |
678 if (p->type == BWA_TYPE_NO_MATCH) isize = 0; | |
679 printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize); | |
680 } else if (mate) printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1)); | |
681 else printf("\t*\t0\t0\t"); | |
682 | |
683 | |
684 if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len); | |
685 if (p->type != BWA_TYPE_NO_MATCH) { | |
686 int i; | |
687 // calculate XT tag | |
688 XT = "NURM"[p->type]; | |
689 if (nn > 10) XT = 'N'; | |
690 // print tags | |
691 printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm); | |
692 if (nn) printf("\tXN:i:%d", nn); | |
693 if (mate) printf("\tSM:i:%d\tAM:i:%d", p->seQ, am); | |
694 printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo, p->n_gapo+p->n_gape); | |
695 if (p->md) printf("\tMD:Z:%s", p->md); | |
696 } | |
697 putchar('\n'); | |
698 } else { // this read has no match | |
699 ubyte_t *s = p->strand? p->rseq : p->seq; | |
700 int flag = p->extra_flag | SAM_FSU; | |
701 if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU; | |
702 printf("%d\t*\t0\t0\t*\t*\t0\t0\t", flag); | |
703 if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len); | |
704 putchar('\n'); | |
705 } | |
706 } | |
707 | |
708 */ | |
709 | |
710 bntseq_t *bwa_open_nt(const char *prefix) | |
711 { | |
712 bntseq_t *ntbns; | |
713 char *str; | |
714 str = (char*)calloc(strlen(prefix) + 10, 1); | |
715 strcat(strcpy(str, prefix), ".nt"); | |
716 ntbns = bns_restore(str); | |
717 free(str); | |
718 return ntbns; | |
719 } | |
720 | |
721 void bwa_print_sam_SQ(const bntseq_t *bns) | |
722 { | |
723 int i; | |
724 for (i = 0; i < bns->n_seqs; ++i) | |
725 printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len); | |
726 } | |
727 | |
728 void bwase_initialize() | |
729 { | |
730 int i; | |
731 for (i = 1; i != 256; ++i) g_log_n[i] = (int)(4.343 * log(i) + 0.5); | |
732 } | |
733 | |
734 void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ) | |
735 { | |
736 int i, n_seqs, tot_seqs = 0, m_aln; | |
737 bwt_aln1_t *aln = 0; | |
738 bwa_seq_t *seqs; | |
739 bwa_seqio_t *ks; | |
740 clock_t t; | |
741 bntseq_t *bns, *ntbns = 0; | |
742 FILE *fp_sa; | |
743 gap_opt_t opt; | |
744 | |
745 // initialization | |
746 bwase_initialize(); | |
747 bns = bns_restore(prefix); | |
748 srand48(bns->seed); | |
749 ks = bwa_seq_open(fn_fa); | |
750 fp_sa = xopen(fn_sa, "r"); | |
751 | |
752 // core loop | |
753 m_aln = 0; | |
754 fread(&opt, sizeof(gap_opt_t), 1, fp_sa); | |
755 if (!(opt.mode & BWA_MODE_COMPREAD)) // in color space; initialize ntpac | |
756 ntbns = bwa_open_nt(prefix); | |
757 bwa_print_sam_SQ(bns); | |
758 while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual)) != 0) { | |
759 tot_seqs += n_seqs; | |
760 t = clock(); | |
761 | |
762 // read alignment | |
763 for (i = 0; i < n_seqs; ++i) { | |
764 bwa_seq_t *p = seqs + i; | |
765 int n_aln; | |
766 fread(&n_aln, 4, 1, fp_sa); | |
767 if (n_aln > m_aln) { | |
768 m_aln = n_aln; | |
769 aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln); | |
770 } | |
771 fread(aln, sizeof(bwt_aln1_t), n_aln, fp_sa); | |
772 bwa_aln2seq_core(n_aln, aln, p, 1, n_occ); | |
773 // seq_reverse(p->len, p->seq, 0); | |
774 } | |
775 | |
776 fprintf(stderr, "[bwa_aln_core] convert to sequence coordinate... "); | |
777 bwa_cal_pac_pos(prefix, n_seqs, seqs, opt.max_diff, opt.fnr); // forward bwt will be destroyed here | |
778 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); | |
779 | |
780 for (i = 0; i < n_seqs; ++i) { | |
781 bwa_seq_t *p = seqs + i; | |
782 seq_reverse(p->len, p->seq, 0); | |
783 } | |
784 | |
785 fprintf(stderr, "[bwa_aln_core] refine gapped alignments... "); | |
786 bwa_refine_gapped(bns, n_seqs, seqs, 0, ntbns); | |
787 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); | |
788 | |
789 fprintf(stderr, "[bwa_aln_core] print alignments... "); | |
790 for (i = 0; i < n_seqs; ++i) | |
791 bwa_print_sam1(bns, seqs + i, 0, opt.mode, opt.max_top2); | |
792 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); | |
793 | |
794 bwa_free_read_seq(n_seqs, seqs); | |
795 fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); | |
796 } | |
797 | |
798 // destroy | |
799 bwa_seq_close(ks); | |
800 if (ntbns) bns_destroy(ntbns); | |
801 bns_destroy(bns); | |
802 fclose(fp_sa); | |
803 free(aln); | |
804 } | |
805 | |
806 void bwa_print_all_hits(const char *prefix, const char *fn_sa, const char *fn_fa, int max_extra_occ) | |
807 { | |
808 int i, n_seqs, tot_seqs = 0, m_aln, m_rest; | |
809 bwt_aln1_t *aln = 0; | |
810 bwa_seq_t *seqs; | |
811 bwa_seqio_t *ks; | |
812 clock_t t,t_convert, t_refine, t_write;; | |
813 bntseq_t *bns, *ntbns = 0; | |
814 FILE *fp_sa; | |
815 gap_opt_t opt; | |
816 | |
817 //****** below modified (added) for multiple hit printout: | |
818 | |
819 bwa_seq_t * rest_seqs = 0; // this array will keep (shallow) replicas of the current sequence; | |
820 // each of the replicas will be updated with its own alignment | |
821 // selected from all the (multiple) alignmens available for the current seq. | |
822 | |
823 bwt_t *bwt[2]; | |
824 char str[1024]; | |
825 ubyte_t *pacseq; | |
826 | |
827 t = clock(); | |
828 fprintf(stderr, "[bwa_aln_core] Data structures initialized: "); | |
829 | |
830 | |
831 strcpy(str, prefix); strcat(str, ".bwt"); bwt[0] = bwt_restore_bwt(str); | |
832 strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt[0]); | |
833 | |
834 // load reverse BWT and SA | |
835 strcpy(str, prefix); strcat(str, ".rbwt"); bwt[1] = bwt_restore_bwt(str); | |
836 strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt[1]); | |
837 //*************** | |
838 | |
839 // initialization | |
840 bwase_initialize(); | |
841 bns = bns_restore(prefix); | |
842 srand48(bns->seed); | |
843 ks = bwa_seq_open(fn_fa); | |
844 fp_sa = xopen(fn_sa, "r"); | |
845 | |
846 pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); | |
847 rewind(bns->fp_pac); | |
848 fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); | |
849 | |
850 | |
851 // core loop | |
852 m_aln = 0; | |
853 m_rest = 0; | |
854 | |
855 fread(&opt, sizeof(gap_opt_t), 1, fp_sa); | |
856 if (!(opt.mode & BWA_MODE_COMPREAD)) // in color space; initialize ntpac | |
857 ntbns = bwa_open_nt(prefix); | |
858 bwa_print_sam_SQ(bns); | |
859 | |
860 fprintf(stderr, "%.2f sec\n", (float)(clock()-t) / CLOCKS_PER_SEC); | |
861 | |
862 t = clock(); | |
863 | |
864 max_extra_occ++; // now this variable holds TOTAL number of alignments we want to print (1+requested extra). | |
865 | |
866 while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual)) != 0) { | |
867 tot_seqs += n_seqs; | |
868 t_convert = 0; | |
869 t_refine = 0; | |
870 t_write = 0; | |
871 | |
872 fprintf(stderr, "[bwa_aln_core] %d sequences loaded: ",n_seqs); | |
873 fprintf(stderr, "%.2f sec\n", (float)(clock()-t) / CLOCKS_PER_SEC); | |
874 | |
875 | |
876 // read alignment | |
877 for (i = 0; i < n_seqs; ++i) { | |
878 | |
879 bwa_seq_t *p = seqs + i; | |
880 int n_aln, n_occ, k, rest; | |
881 fread(&n_aln, 4, 1, fp_sa); | |
882 if (n_aln > m_aln) { | |
883 m_aln = n_aln; | |
884 aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln); | |
885 } | |
886 | |
887 fread(aln, sizeof(bwt_aln1_t), n_aln, fp_sa); | |
888 for ( k = n_occ = 0 ; k < n_aln; ++k ) { | |
889 const bwt_aln1_t *q = aln + k; | |
890 n_occ += q->l - q->k + 1; | |
891 } /* n_occ is now keeping total number of available alignments to the reference | |
892 (i.e. placements, NOT bwa records, each of which can describe few placements) | |
893 */ | |
894 | |
895 // we are going to keep and print 'rest' alignments: | |
896 rest = ((n_occ > max_extra_occ)? max_extra_occ : n_occ); | |
897 | |
898 if ( rest == 0 ) rest++; /* we need at least one record, even if it is going to say "UNMAPPED" */ | |
899 | |
900 if ( rest > m_rest ) { | |
901 // reallocate rest_seqs array (only if needed) to ensure it can keep 'rest' records | |
902 m_rest = rest; | |
903 rest_seqs = (bwa_seq_t*)realloc(rest_seqs,sizeof(bwa_seq_t)*m_rest); | |
904 } | |
905 // initialize 'rest' replicas of the current sequence record | |
906 for ( k = 0 ; k < rest ; k++ ) { | |
907 rest_seqs[k] = *p; /* clone current sequence p; IMPORTANT: it's a shallow copy */ | |
908 } | |
909 | |
910 bwa_aln2seq_all(n_aln, aln, rest,rest_seqs); | |
911 // now each of the replicas carries its own bwa alignment selected from all alignments | |
912 // available for the current sequence *p. | |
913 | |
914 /* compute positions of the alignments on the ref: */ | |
915 t = clock(); | |
916 | |
917 bwa_cal_pac_pos_core(bwt[0],bwt[1], rest, rest_seqs, opt.max_diff, opt.fnr ); | |
918 t_convert += ( clock() - t ); | |
919 *p = rest_seqs[0]; // copy first selected alignment back into p; | |
920 | |
921 seq_reverse(p->len, p->seq,0); | |
922 | |
923 /* compute positions of the alignments on the ref: */ | |
924 t = clock(); | |
925 | |
926 bwa_cal_pac_pos_core(bwt[0],bwt[1], rest, rest_seqs, opt.max_diff, opt.fnr ); | |
927 t_convert += ( clock() - t ); | |
928 | |
929 t = clock(); | |
930 | |
931 bwa_refine_gapped(bns,rest,rest_seqs,pacseq,ntbns); // refine all gapped aligns in our replicas; | |
932 // side effect: cigars will be allocated for each replica | |
933 t_refine += ( clock() - t ); | |
934 | |
935 t = clock(); | |
936 // for ( k = 0 ; k < n_seqs ; k++ ) { | |
937 for ( k = 0 ; k < rest ; k++ ) { | |
938 | |
939 bwa_print_sam1(bns, rest_seqs + k, 0, opt.mode, opt.max_top2); | |
940 // cigar was allocated for us in every replica as a side effect, free it now: | |
941 free ( (rest_seqs+k)->cigar ); | |
942 } | |
943 t_write+= ( clock()-t); | |
944 | |
945 } | |
946 | |
947 bwa_free_read_seq(n_seqs, seqs); | |
948 | |
949 fprintf(stderr, "[bwa_aln_core] convert %d sequences to sequence coordinate: ",n_seqs); | |
950 fprintf(stderr, "%.2f sec\n", (float)t_convert / CLOCKS_PER_SEC); | |
951 fprintf(stderr, "[bwa_aln_core] refine gapped alignments for %d sequences: ", n_seqs); | |
952 fprintf(stderr, "%.2f sec\n", (float)t_refine / CLOCKS_PER_SEC); | |
953 fprintf(stderr, "[bwa_aln_core] print alignments for %d sequences: ", n_seqs); | |
954 fprintf(stderr, "%.2f sec\n", (float)t_write/ CLOCKS_PER_SEC); | |
955 fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); | |
956 | |
957 t = clock(); | |
958 } | |
959 | |
960 // destroy | |
961 bwt_destroy(bwt[0]); | |
962 bwt_destroy(bwt[1]); | |
963 | |
964 free(rest_seqs); | |
965 free(pacseq); | |
966 | |
967 bwa_seq_close(ks); | |
968 if (ntbns) bns_destroy(ntbns); | |
969 bns_destroy(bns); | |
970 fclose(fp_sa); | |
971 free(aln); | |
972 } | |
973 | |
974 int bwa_sai2sam_se(int argc, char *argv[]) | |
975 { | |
976 int c, n_occ = 3; | |
977 int do_full_sam = 0; | |
978 while ((c = getopt(argc, argv, "hsn:f:")) >= 0) { | |
979 switch (c) { | |
980 case 'h': break; | |
981 case 's': do_full_sam = 1; break; | |
982 case 'n': n_occ = atoi(optarg); break; | |
983 case 'f': freopen(optarg, "w", stdout); break; | |
984 default: return 1; | |
985 } | |
986 } | |
987 | |
988 if (optind + 3 > argc) { | |
989 fprintf(stderr, "Usage: bwa samse [-n max_occ [-s] ] [-f out.sam] <prefix> <in.sai> <in.fq>\n"); | |
990 return 1; | |
991 } | |
992 if ( do_full_sam ) bwa_print_all_hits(argv[optind], argv[optind+1], argv[optind+2], n_occ); | |
993 else bwa_sai2sam_se_core(argv[optind], argv[optind+1], argv[optind+2], n_occ); | |
994 return 0; | |
995 } |