annotate pyPRADA_1.2/tools/bwa-0.5.7-mh/bwase.c @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 #include <unistd.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2 #include <string.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 #include <stdio.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4 #include <stdlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 #include <math.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 #include <time.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 #include "stdaln.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 #include "bwase.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 #include "bwtaln.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 #include "bntseq.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 #include "utils.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12 #include "kstring.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 static int g_log_n[256];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_main, int n_multi)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 int i, cnt, best;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 if (n_aln == 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 s->type = BWA_TYPE_NO_MATCH;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 s->c1 = s->c2 = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 return;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25 if (set_main) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 best = aln[0].score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27 for (i = cnt = 0; i < n_aln; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 const bwt_aln1_t *p = aln + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 if (p->score > best) break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 if (drand48() * (p->l - p->k + 1) > (double)cnt) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 s->n_mm = p->n_mm; s->n_gapo = p->n_gapo; s->n_gape = p->n_gape; s->strand = p->a;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 s->score = p->score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 s->sa = p->k + (bwtint_t)((p->l - p->k + 1) * drand48());
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 cnt += p->l - p->k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 s->c1 = cnt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 for (; i < n_aln; ++i) cnt += aln[i].l - aln[i].k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 s->c2 = cnt - s->c1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 s->type = s->c1 > 1? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 if (n_multi) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 int k, rest, n_occ, z = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 for (k = n_occ = 0; k < n_aln; ++k) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 const bwt_aln1_t *q = aln + k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 n_occ += q->l - q->k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 if (s->multi) free(s->multi);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 if (n_occ > n_multi + 1) { // if there are too many hits, generate none of them
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 s->multi = 0; s->n_multi = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 return;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 /* The following code is more flexible than what is required
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 * here. In principle, due to the requirement above, we can
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 * simply output all hits, but the following samples "rest"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 * number of random hits. */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 rest = n_occ > n_multi + 1? n_multi + 1 : n_occ; // find one additional for ->sa
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 s->multi = calloc(rest, rest * sizeof(bwt_multi1_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 for (k = 0; k < n_aln; ++k) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 const bwt_aln1_t *q = aln + k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 if (q->l - q->k + 1 <= rest) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 bwtint_t l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 for (l = q->k; l <= q->l; ++l) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 s->multi[z].pos = l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 s->multi[z].gap = q->n_gapo + q->n_gape;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 s->multi[z].mm = q->n_mm;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 s->multi[z++].strand = q->a;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 rest -= q->l - q->k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 } else { // Random sampling (http://code.activestate.com/recipes/272884/). In fact, we never come here.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 int j, i, k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 for (j = rest, i = q->l - q->k + 1, k = 0; j > 0; --j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 double p = 1.0, x = drand48();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 while (x < p) p -= p * j / (i--);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 s->multi[z].pos = q->l - i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 s->multi[z].gap = q->n_gapo + q->n_gape;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 s->multi[z].mm = q->n_mm;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 s->multi[z++].strand = q->a;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 rest = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 s->n_multi = z;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 for (k = z = 0; k < s->n_multi; ++k)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 if (s->multi[k].pos != s->sa)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 s->multi[z++] = s->multi[k];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 s->n_multi = z < n_multi? z : n_multi;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 bwa_aln2seq_core(n_aln, aln, s, 1, 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98 /* *aln points to alignments found for the current sequence, n_aln is the
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 size of the array pointed to by *aln. Array *s of size n_seq is a collection
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 of SAM records that must be replicas, i.e. initialized with the same current sequence.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 This method updates sequence records in *s with placements recorded in *aln,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 until all n_seq replicas are updated. For each separate alignment record in array *aln,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 each placement corresponding to this record will be assigned to a separate record in *s
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 if there are enough elements remaining in *s, otherwise a random subset of the placements will
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 be assigned to the remaining elements in *s. The total number of best placements and total
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 number of non-best placements will be computed from the whole array *aln (regardless of whether it
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 fits completely into *s or not) and assigned to each updated record in *s.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 void bwa_aln2seq_all(int n_aln, const bwt_aln1_t *aln, int n_seq, bwa_seq_t *s)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 int i, cnt1, cnt2, j, best, N;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 if (n_aln == 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 /* there is no match found for *s */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 s->type = BWA_TYPE_NO_MATCH;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 s->c1 = s->c2 = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 return;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 N = n_seq; // remember the size of the array
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 best = aln[0].score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 cnt1 = 0; // total number of already processed alignments (i.e. distinct placements, NOT alignment records) with best score
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 cnt2 = 0; // total number of already processed alignments with inferior score(s)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 for (i = 0; i < n_aln && n_seq > 0 ; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 const bwt_aln1_t *p_aln = aln + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 int N_aligns = p_aln->l-p_aln->k +1 ; // number of placements (alignments) in the current alignment record p_aln
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 if (N_aligns <= n_seq) { /* we have space to save all the alignments stored in 'p_aln' */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 for ( j = 0 ; j < N_aligns ; j++ ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 bwa_seq_t * seq = s + cnt1+ cnt2+j ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 seq->n_mm = p_aln->n_mm; seq->n_gapo = p_aln->n_gapo; seq->n_gape = p_aln->n_gape; seq->strand = p_aln->a;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 seq->score = p_aln->score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 seq->sa = p_aln->k + j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 n_seq -= N_aligns; // we have n_seq slots remaining to store more alignments
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 } else { // See also: http://code.activestate.com/recipes/272884/
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 // we have to truncate, so let's select few remaining alignments randomly:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 int xj, xi, xk;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 for (xj = n_seq, xi = N_aligns, xk = 0; xj > 0; --xj, ++xk) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 double p = 1.0, x = drand48();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 while (x < p) p -= p * xj / (xi--);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 bwa_seq_t * seq = s+cnt1+cnt2+xk ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 seq->n_mm = p_aln->n_mm; seq->n_gapo = p_aln->n_gapo; seq->n_gape = p_aln->n_gape; seq->strand = p_aln->a;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148 seq->score = p_aln->score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 seq->sa = p_aln->l - xi;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152 n_seq = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 // cnt1 + cnt2 is the total count of hits processed so far:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 if ( p_aln->score == best ) cnt1 += N_aligns; // we found N_aligns more placements with best score
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156 else cnt2 += N_aligns; // N_aligns more placements with inferior score
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 // we filled all available slots in the array *s, but there can be more alignments
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 // left; we need to count them:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 for (; i < n_aln; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 if ( aln[i].score == best ) cnt1 += aln[i].l-aln[i].k+1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 else cnt2 += aln[i].l-aln[i].k+1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 // now cnt1 is the total number of found alignments (placements) with best score
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 // and cnt2 is the total number of found placements with worse score
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 /* set counts and flags for all hits: */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 for (i = 0; i < N ; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 bwa_seq_t * seq = s+i ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 seq->c1 = cnt1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174 seq->c2 = cnt2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 seq->type = seq->c1 > 1? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 int bwa_approx_mapQ(const bwa_seq_t *p, int mm)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183 int n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 if (p->c1 == 0) return 23;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185 if (p->c1 > 1) return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186 if (p->n_mm == mm) return 25;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
187 if (p->c2 == 0) return 37;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
188 n = (p->c2 >= 255)? 255 : p->c2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
189 return (23 < g_log_n[n])? 0 : 23 - g_log_n[n];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
190 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
191
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
192 /**
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
193 * Derive the actual position in the read from the given suffix array
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
194 * coordinates. Note that the position will be approximate based on
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
195 * whether indels appear in the read and whether calculations are
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
196 * performed from the start or end of the read.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
197 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
198 void bwa_cal_pac_pos_core(const bwt_t *forward_bwt, const bwt_t *reverse_bwt, int n_seqs, bwa_seq_t *s, const int max_mm, const float fnr)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
199 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
200 int max_diff;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
201 bwa_seq_t *seq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
202 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
203
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
204 for ( i = 0 ; i < n_seqs ; i++ ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
205 seq = s + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
206 if (seq->type != BWA_TYPE_UNIQUE && seq->type != BWA_TYPE_REPEAT) continue;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
207
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
208 max_diff = fnr > 0.0? bwa_cal_maxdiff(seq->len, BWA_AVG_ERR, fnr) : max_mm;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
209 if (seq->strand) { // reverse strand only
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
210 seq->pos = bwt_sa(forward_bwt, seq->sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
211 seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
212 } else { // forward strand only
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
213 /* NB: For gapped alignment, p->pos may not be correct, which
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
214 * will be fixed in refine_gapped_core(). This line also
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
215 * determines the way "x" is calculated in
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
216 * refine_gapped_core() when (ext < 0 && is_end == 0). */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
217 seq->pos = reverse_bwt->seq_len - (bwt_sa(reverse_bwt, seq->sa) + seq->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
218 seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
219 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
220 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
221 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
222
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
223 void bwa_cal_pac_pos(const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
224 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
225 int i, j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
226 char str[1024];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
227 bwt_t *bwt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
228 // load forward SA
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
229 strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
230 strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
231 for (i = 0; i != n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
232 if (seqs[i].strand) bwa_cal_pac_pos_core(bwt, 0, 1, &seqs[i], max_mm, fnr);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
233 for (j = 0; j < seqs[i].n_multi; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
234 bwt_multi1_t *p = seqs[i].multi + j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
235 if (p->strand) p->pos = bwt_sa(bwt, p->pos);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
236 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
237 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
238 bwt_destroy(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
239 // load reverse BWT and SA
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
240 strcpy(str, prefix); strcat(str, ".rbwt"); bwt = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
241 strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
242 for (i = 0; i != n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
243 if (!seqs[i].strand) bwa_cal_pac_pos_core(0, bwt, 1, &seqs[i], max_mm, fnr);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
244 for (j = 0; j < seqs[i].n_multi; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
245 bwt_multi1_t *p = seqs[i].multi + j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
246 if (!p->strand) p->pos = bwt->seq_len - (bwt_sa(bwt, p->pos) + seqs[i].len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
247 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
248 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
249 bwt_destroy(bwt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
250 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
251
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
252 /* is_end_correct == 1 if (*pos+len) gives the correct coordinate on
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
253 * forward strand. This happens when p->pos is calculated by
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
254 * bwa_cal_pac_pos(). is_end_correct==0 if (*pos) gives the correct
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
255 * coordinate. This happens only for color-converted alignment. */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
256 static bwa_cigar_t *refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, bwtint_t *_pos,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
257 int ext, int *n_cigar, int is_end_correct)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
258 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
259 bwa_cigar_t *cigar = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
260 ubyte_t *ref_seq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
261 int l = 0, path_len, ref_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
262 AlnParam ap = aln_param_bwa;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
263 path_t *path;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
264 int64_t k, __pos = *_pos > l_pac? (int64_t)((int32_t)*_pos) : *_pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
265
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
266 ref_len = len + abs(ext);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
267 if (ext > 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
268 ref_seq = (ubyte_t*)calloc(ref_len, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
269 for (k = __pos; k < __pos + ref_len && k < l_pac; ++k)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
270 ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
271 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
272 int64_t x = __pos + (is_end_correct? len : ref_len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
273 ref_seq = (ubyte_t*)calloc(ref_len, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
274 for (l = 0, k = x - ref_len > 0? x - ref_len : 0; k < x && k < l_pac; ++k)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
275 ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
276 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
277 path = (path_t*)calloc(l+len, sizeof(path_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
278
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
279 aln_global_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
280 cigar = bwa_aln_path2cigar(path, path_len, n_cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
281
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
282 if (ext < 0 && is_end_correct) { // fix coordinate for reads mapped on the forward strand
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
283 for (l = k = 0; k < *n_cigar; ++k) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
284 if (__cigar_op(cigar[k]) == FROM_D) l -= __cigar_len(cigar[k]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
285 else if (__cigar_op(cigar[k]) == FROM_I) l += __cigar_len(cigar[k]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
286 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
287 __pos += l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
288 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
289
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
290 if (__cigar_op(cigar[0]) == FROM_D) { // deletion at the 5'-end
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
291 __pos += __cigar_len(cigar[0]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
292 for (k = 0; k < *n_cigar - 1; ++k) cigar[k] = cigar[k+1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
293 --(*n_cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
294 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
295 if (__cigar_op(cigar[*n_cigar-1]) == FROM_D) --(*n_cigar); // deletion at the 3'-end
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
296
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
297 // change "I" at either end of the read to S. just in case. This should rarely happen...
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
298 if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(3, (__cigar_len(cigar[*n_cigar-1])));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
299 if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(3, (__cigar_len(cigar[0])));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
300
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
301 *_pos = (bwtint_t)__pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
302 free(ref_seq); free(path);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
303 return cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
304 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
305
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
306 char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
307 bwtint_t l_pac, ubyte_t *pacseq, kstring_t *str, int *_nm)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
308 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
309 bwtint_t x, y;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
310 int z, u, c, nm = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
311 str->l = 0; // reset
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
312 x = pos; y = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
313 if (cigar) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
314 int k, l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
315 for (k = u = 0; k < n_cigar; ++k) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
316 l = __cigar_len(cigar[k]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
317 if (__cigar_op(cigar[k]) == FROM_M) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
318 for (z = 0; z < l && x+z < l_pac; ++z) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
319 c = pacseq[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
320 if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
321 ksprintf(str, "%d", u);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
322 kputc("ACGTN"[c], str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
323 ++nm;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
324 u = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
325 } else ++u;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
326 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
327 x += l; y += l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
328 /* } else if (cigar[k]>>14 == FROM_I || cigar[k]>>14 == 3) { */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
329 } else if (__cigar_op(cigar[k]) == FROM_I || __cigar_op(cigar[k]) == FROM_S) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
330 y += l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
331 if (__cigar_op(cigar[k]) == FROM_I) nm += l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
332 } else if (__cigar_op(cigar[k]) == FROM_D) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
333 ksprintf(str, "%d", u);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
334 kputc('^', str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
335 for (z = 0; z < l && x+z < l_pac; ++z)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
336 kputc("ACGT"[pacseq[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3], str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
337 u = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
338 x += l; nm += l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
339 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
340 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
341 } else { // no gaps
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
342 for (z = u = 0; z < (bwtint_t)len; ++z) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
343 c = pacseq[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
344 if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
345 ksprintf(str, "%d", u);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
346 kputc("ACGTN"[c], str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
347 ++nm;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
348 u = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
349 } else ++u;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
350 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
351 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
352 ksprintf(str, "%d", u);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
353 *_nm = nm;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
354 return strdup(str->s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
355 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
356
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
357 void bwa_correct_trimmed(bwa_seq_t *s)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
358 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
359 if (s->len == s->full_len) return;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
360 if (s->strand == 0) { // forward
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
361 if (s->cigar && __cigar_op(s->cigar[s->n_cigar-1]) == FROM_S) { // the last is S
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
362 s->cigar[s->n_cigar-1] += s->full_len - s->len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
363 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
364 if (s->cigar == 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
365 s->n_cigar = 2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
366 s->cigar = calloc(s->n_cigar, sizeof(bwa_cigar_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
367 s->cigar[0] = __cigar_create(0, s->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
368 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
369 ++s->n_cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
370 s->cigar = realloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
371 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
372 s->cigar[s->n_cigar-1] = __cigar_create(3, (s->full_len - s->len));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
373 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
374 } else { // reverse
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
375 if (s->cigar && __cigar_op(s->cigar[0]) == FROM_S) { // the first is S
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
376 s->cigar[0] += s->full_len - s->len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
377 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
378 if (s->cigar == 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
379 s->n_cigar = 2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
380 s->cigar = calloc(s->n_cigar, sizeof(bwa_cigar_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
381 s->cigar[1] = __cigar_create(0, s->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
382 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
383 ++s->n_cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
384 s->cigar = realloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
385 memmove(s->cigar + 1, s->cigar, (s->n_cigar-1) * sizeof(bwa_cigar_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
386 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
387 s->cigar[0] = __cigar_create(3, (s->full_len - s->len));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
388 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
389 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
390 s->len = s->full_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
391 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
392
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
393 void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq, bntseq_t *ntbns)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
394 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
395 ubyte_t *pacseq, *ntpac = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
396 int i, j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
397 kstring_t *str;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
398
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
399 if (ntbns) { // in color space
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
400 ntpac = (ubyte_t*)calloc(ntbns->l_pac/4+1, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
401 rewind(ntbns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
402 fread(ntpac, 1, ntbns->l_pac/4 + 1, ntbns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
403 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
404
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
405 if (!_pacseq) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
406 pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
407 rewind(bns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
408 fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
409 } else pacseq = _pacseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
410 for (i = 0; i != n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
411 bwa_seq_t *s = seqs + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
412 for (j = 0; j < s->n_multi; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
413 bwt_multi1_t *q = s->multi + j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
414 int n_cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
415 if (q->gap == 0) continue;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
416 q->cigar = refine_gapped_core(bns->l_pac, pacseq, s->len, q->strand? s->rseq : s->seq, &q->pos,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
417 (q->strand? 1 : -1) * q->gap, &n_cigar, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
418 q->n_cigar = n_cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
419 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
420 if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
421 s->cigar = refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
422 (s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
423 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
424
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
425 if (ntbns) { // in color space
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
426 for (i = 0; i < n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
427 bwa_seq_t *s = seqs + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
428 bwa_cs2nt_core(s, bns->l_pac, ntpac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
429 for (j = 0; j < s->n_multi; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
430 bwt_multi1_t *q = s->multi + j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
431 int n_cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
432 if (q->gap == 0) continue;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
433 free(q->cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
434 q->cigar = refine_gapped_core(bns->l_pac, ntpac, s->len, q->strand? s->rseq : s->seq, &q->pos,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
435 (q->strand? 1 : -1) * q->gap, &n_cigar, 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
436 q->n_cigar = n_cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
437 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
438 if (s->type != BWA_TYPE_NO_MATCH && s->cigar) { // update cigar again
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
439 free(s->cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
440 s->cigar = refine_gapped_core(bns->l_pac, ntpac, s->len, s->strand? s->rseq : s->seq, &s->pos,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
441 (s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
442 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
443 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
444 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
445
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
446 // generate MD tag
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
447 str = (kstring_t*)calloc(1, sizeof(kstring_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
448 for (i = 0; i != n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
449 bwa_seq_t *s = seqs + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
450 if (s->type != BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
451 int nm;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
452 s->md = bwa_cal_md1(s->n_cigar, s->cigar, s->len, s->pos, s->strand? s->rseq : s->seq,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
453 bns->l_pac, ntbns? ntpac : pacseq, str, &nm);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
454 s->nm = nm;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
455 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
456 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
457 free(str->s); free(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
458
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
459 // correct for trimmed reads
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
460 if (!ntbns) // trimming is only enabled for Illumina reads
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
461 for (i = 0; i < n_seqs; ++i) bwa_correct_trimmed(seqs + i);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
462
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
463 if (!_pacseq) free(pacseq);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
464 free(ntpac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
465 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
466
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
467 int64_t pos_end(const bwa_seq_t *p)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
468 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
469 if (p->cigar) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
470 int j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
471 int64_t x = p->pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
472 for (j = 0; j != p->n_cigar; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
473 int op = __cigar_op(p->cigar[j]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
474 if (op == 0 || op == 2) x += __cigar_len(p->cigar[j]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
475 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
476 return x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
477 } else return p->pos + p->len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
478 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
479
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
480 int64_t pos_end_multi(const bwt_multi1_t *p, int len) // analogy to pos_end()
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
481 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
482 if (p->cigar) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
483 int j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
484 int64_t x = p->pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
485 for (j = 0; j != p->n_cigar; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
486 int op = __cigar_op(p->cigar[j]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
487 if (op == 0 || op == 2) x += __cigar_len(p->cigar[j]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
488 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
489 return x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
490 } else return p->pos + len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
491 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
492
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
493 static int64_t pos_5(const bwa_seq_t *p)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
494 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
495 if (p->type != BWA_TYPE_NO_MATCH)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
496 return p->strand? pos_end(p) : p->pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
497 return -1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
498 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
499
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
500 /* Prints <bases>\t<quals> of the sequence *p into STDOUT;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
501 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
502 void bwa_print_seq_and_qual(bwa_seq_t *p) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
503 int j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
504 ubyte_t * char_ptr;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
505
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
506 if (p->strand == 0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
507 for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
508 else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
509 putchar('\t');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
510 if (p->qual) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
511 if (p->strand) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
512 // seq_reverse(p->len, p->qual, 0); // reverse quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
513 for ( char_ptr = p->qual + p->len - 1 ; char_ptr >= p->qual ; char_ptr-- ) putchar( *char_ptr );
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
514 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
515 printf("%s", p->qual);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
516 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
517 } else printf("*");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
518
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
519 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
520
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
521
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
522
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
523 void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
524 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
525 int j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
526 if (p->type != BWA_TYPE_NO_MATCH || (mate && mate->type != BWA_TYPE_NO_MATCH)) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
527 int seqid, nn, am = 0, flag = p->extra_flag;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
528 char XT;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
529
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
530 if (p->type == BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
531 p->pos = mate->pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
532 p->strand = mate->strand;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
533 flag |= SAM_FSU;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
534 j = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
535 } else j = pos_end(p) - p->pos; // j is the length of the reference in the alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
536
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
537 // get seqid
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
538 nn = bns_coor_pac2real(bns, p->pos, j, &seqid);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
539 if (p->type != BWA_TYPE_NO_MATCH && p->pos + j - bns->anns[seqid].offset > bns->anns[seqid].len)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
540 flag |= SAM_FSU; // flag UNMAP as this alignment bridges two adjacent reference sequences
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
541
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
542 // update flag and print it
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
543 if (p->strand) flag |= SAM_FSR;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
544 if (mate) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
545 if (mate->type != BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
546 if (mate->strand) flag |= SAM_FMR;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
547 } else flag |= SAM_FMU;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
548 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
549 printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
550 printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
551
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
552 // print CIGAR
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
553 if (p->cigar) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
554 for (j = 0; j != p->n_cigar; ++j)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
555 printf("%d%c", __cigar_len(p->cigar[j]), "MIDS"[__cigar_op(p->cigar[j])]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
556 } else if (p->type == BWA_TYPE_NO_MATCH) printf("*");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
557 else printf("%dM", p->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
558
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
559 // print mate coordinate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
560 if (mate && mate->type != BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
561 int m_seqid, m_is_N;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
562 long long isize;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
563 am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
564 // redundant calculation here, but should not matter too much
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
565 m_is_N = bns_coor_pac2real(bns, mate->pos, mate->len, &m_seqid);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
566 printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
567 isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
568 if (p->type == BWA_TYPE_NO_MATCH) isize = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
569 printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
570 } else if (mate) printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
571 else printf("\t*\t0\t0\t");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
572
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
573 // print sequence and quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
574 bwa_print_seq_and_qual(p);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
575
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
576 if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
577 if (p->type != BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
578 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
579 // calculate XT tag
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
580 XT = "NURM"[p->type];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
581 if (nn > 10) XT = 'N';
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
582 // print tags
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
583 printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
584 if (nn) printf("\tXN:i:%d", nn);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
585 if (mate) printf("\tSM:i:%d\tAM:i:%d", p->seQ, am);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
586 if (p->type != BWA_TYPE_MATESW) { // X0 and X1 are not available for this type of alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
587 printf("\tX0:i:%d", p->c1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
588 if (p->c1 <= max_top2) printf("\tX1:i:%d", p->c2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
589 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
590 printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo, p->n_gapo+p->n_gape);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
591 if (p->md) printf("\tMD:Z:%s", p->md);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
592 // print multiple hits
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
593 if (p->n_multi) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
594 printf("\tXA:Z:");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
595 for (i = 0; i < p->n_multi; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
596 bwt_multi1_t *q = p->multi + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
597 int k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
598 j = pos_end_multi(q, p->len) - q->pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
599 nn = bns_coor_pac2real(bns, q->pos, j, &seqid);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
600 printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
601 (int)(q->pos - bns->anns[seqid].offset + 1));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
602 if (q->cigar) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
603 for (k = 0; k < q->n_cigar; ++k)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
604 printf("%d%c", __cigar_len(q->cigar[k]), "MIDS"[__cigar_op(q->cigar[k])]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
605 } else printf("%dM", p->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
606 printf(",%d;", q->gap + q->mm);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
607 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
608 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
609 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
610 putchar('\n');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
611 } else { // this read has no match
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
612 ubyte_t *s = p->strand? p->rseq : p->seq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
613 int flag = p->extra_flag | SAM_FSU;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
614 if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
615 printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
616 for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
617 putchar('\t');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
618 if (p->qual) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
619 if (p->strand) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
620 // seq_reverse(p->len, p->qual, 0); // reverse quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
621 for ( s = p->qual + p->len - 1 ; s >= p->qual ; s-- ) putchar( *s );
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
622
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
623 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
624 printf("%s", p->qual);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
625 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
626 } else printf("*");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
627 if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
628 putchar('\n');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
629 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
630 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
631
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
632 /* UNUSED
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
633 void bwa_print_partial_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
634 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
635 int j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
636 if (p->type != BWA_TYPE_NO_MATCH || (mate && mate->type != BWA_TYPE_NO_MATCH)) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
637 int seqid, nn, am = 0, flag = p->extra_flag;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
638 char XT;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
639
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
640 if (p->type == BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
641 p->pos = mate->pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
642 p->strand = mate->strand;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
643 flag |= SAM_FSU;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
644 j = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
645 } else j = pos_end(p) - p->pos; // j is the length of the reference in the alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
646
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
647 // get seqid
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
648 nn = bns_coor_pac2real(bns, p->pos, j, &seqid);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
649 if (p->type != BWA_TYPE_NO_MATCH && p->pos + j - bns->anns[seqid].offset > bns->anns[seqid].len)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
650 flag |= SAM_FSU; // flag UNMAP as this alignment bridges two adjacent reference sequences
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
651
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
652 // update flag and print it
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
653 if (p->strand) flag |= SAM_FSR;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
654 if (mate) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
655 if (mate->type != BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
656 if (mate->strand) flag |= SAM_FMR;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
657 } else flag |= SAM_FMU;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
658 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
659 printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
660 printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
661
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
662 // print CIGAR
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
663 if (p->cigar) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
664 for (j = 0; j != p->n_cigar; ++j)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
665 printf("%d%c", __cigar_len(p->cigar[j]), "MIDS"[__cigar_op(p->cigar[j])]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
666 } else if (p->type == BWA_TYPE_NO_MATCH) printf("*");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
667 else printf("%dM", p->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
668
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
669 // print mate coordinate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
670 if (mate && mate->type != BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
671 int m_seqid, m_is_N;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
672 long long isize;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
673 am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
674 // redundant calculation here, but should not matter too much
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
675 m_is_N = bns_coor_pac2real(bns, mate->pos, mate->len, &m_seqid);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
676 printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
677 isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
678 if (p->type == BWA_TYPE_NO_MATCH) isize = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
679 printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
680 } else if (mate) printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
681 else printf("\t*\t0\t0\t");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
682
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
683
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
684 if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
685 if (p->type != BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
686 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
687 // calculate XT tag
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
688 XT = "NURM"[p->type];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
689 if (nn > 10) XT = 'N';
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
690 // print tags
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
691 printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
692 if (nn) printf("\tXN:i:%d", nn);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
693 if (mate) printf("\tSM:i:%d\tAM:i:%d", p->seQ, am);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
694 printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo, p->n_gapo+p->n_gape);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
695 if (p->md) printf("\tMD:Z:%s", p->md);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
696 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
697 putchar('\n');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
698 } else { // this read has no match
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
699 ubyte_t *s = p->strand? p->rseq : p->seq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
700 int flag = p->extra_flag | SAM_FSU;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
701 if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
702 printf("%d\t*\t0\t0\t*\t*\t0\t0\t", flag);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
703 if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
704 putchar('\n');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
705 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
706 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
707
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
708 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
709
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
710 bntseq_t *bwa_open_nt(const char *prefix)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
711 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
712 bntseq_t *ntbns;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
713 char *str;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
714 str = (char*)calloc(strlen(prefix) + 10, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
715 strcat(strcpy(str, prefix), ".nt");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
716 ntbns = bns_restore(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
717 free(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
718 return ntbns;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
719 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
720
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
721 void bwa_print_sam_SQ(const bntseq_t *bns)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
722 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
723 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
724 for (i = 0; i < bns->n_seqs; ++i)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
725 printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
726 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
727
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
728 void bwase_initialize()
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
729 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
730 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
731 for (i = 1; i != 256; ++i) g_log_n[i] = (int)(4.343 * log(i) + 0.5);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
732 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
733
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
734 void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
735 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
736 int i, n_seqs, tot_seqs = 0, m_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
737 bwt_aln1_t *aln = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
738 bwa_seq_t *seqs;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
739 bwa_seqio_t *ks;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
740 clock_t t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
741 bntseq_t *bns, *ntbns = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
742 FILE *fp_sa;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
743 gap_opt_t opt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
744
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
745 // initialization
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
746 bwase_initialize();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
747 bns = bns_restore(prefix);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
748 srand48(bns->seed);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
749 ks = bwa_seq_open(fn_fa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
750 fp_sa = xopen(fn_sa, "r");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
751
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
752 // core loop
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
753 m_aln = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
754 fread(&opt, sizeof(gap_opt_t), 1, fp_sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
755 if (!(opt.mode & BWA_MODE_COMPREAD)) // in color space; initialize ntpac
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
756 ntbns = bwa_open_nt(prefix);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
757 bwa_print_sam_SQ(bns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
758 while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual)) != 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
759 tot_seqs += n_seqs;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
760 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
761
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
762 // read alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
763 for (i = 0; i < n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
764 bwa_seq_t *p = seqs + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
765 int n_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
766 fread(&n_aln, 4, 1, fp_sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
767 if (n_aln > m_aln) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
768 m_aln = n_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
769 aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
770 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
771 fread(aln, sizeof(bwt_aln1_t), n_aln, fp_sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
772 bwa_aln2seq_core(n_aln, aln, p, 1, n_occ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
773 // seq_reverse(p->len, p->seq, 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
774 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
775
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
776 fprintf(stderr, "[bwa_aln_core] convert to sequence coordinate... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
777 bwa_cal_pac_pos(prefix, n_seqs, seqs, opt.max_diff, opt.fnr); // forward bwt will be destroyed here
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
778 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
779
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
780 for (i = 0; i < n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
781 bwa_seq_t *p = seqs + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
782 seq_reverse(p->len, p->seq, 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
783 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
784
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
785 fprintf(stderr, "[bwa_aln_core] refine gapped alignments... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
786 bwa_refine_gapped(bns, n_seqs, seqs, 0, ntbns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
787 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
788
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
789 fprintf(stderr, "[bwa_aln_core] print alignments... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
790 for (i = 0; i < n_seqs; ++i)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
791 bwa_print_sam1(bns, seqs + i, 0, opt.mode, opt.max_top2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
792 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
793
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
794 bwa_free_read_seq(n_seqs, seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
795 fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
796 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
797
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
798 // destroy
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
799 bwa_seq_close(ks);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
800 if (ntbns) bns_destroy(ntbns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
801 bns_destroy(bns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
802 fclose(fp_sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
803 free(aln);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
804 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
805
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
806 void bwa_print_all_hits(const char *prefix, const char *fn_sa, const char *fn_fa, int max_extra_occ)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
807 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
808 int i, n_seqs, tot_seqs = 0, m_aln, m_rest;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
809 bwt_aln1_t *aln = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
810 bwa_seq_t *seqs;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
811 bwa_seqio_t *ks;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
812 clock_t t,t_convert, t_refine, t_write;;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
813 bntseq_t *bns, *ntbns = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
814 FILE *fp_sa;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
815 gap_opt_t opt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
816
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
817 //****** below modified (added) for multiple hit printout:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
818
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
819 bwa_seq_t * rest_seqs = 0; // this array will keep (shallow) replicas of the current sequence;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
820 // each of the replicas will be updated with its own alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
821 // selected from all the (multiple) alignmens available for the current seq.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
822
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
823 bwt_t *bwt[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
824 char str[1024];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
825 ubyte_t *pacseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
826
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
827 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
828 fprintf(stderr, "[bwa_aln_core] Data structures initialized: ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
829
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
830
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
831 strcpy(str, prefix); strcat(str, ".bwt"); bwt[0] = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
832 strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt[0]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
833
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
834 // load reverse BWT and SA
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
835 strcpy(str, prefix); strcat(str, ".rbwt"); bwt[1] = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
836 strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
837 //***************
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
838
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
839 // initialization
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
840 bwase_initialize();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
841 bns = bns_restore(prefix);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
842 srand48(bns->seed);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
843 ks = bwa_seq_open(fn_fa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
844 fp_sa = xopen(fn_sa, "r");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
845
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
846 pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
847 rewind(bns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
848 fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
849
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
850
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
851 // core loop
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
852 m_aln = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
853 m_rest = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
854
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
855 fread(&opt, sizeof(gap_opt_t), 1, fp_sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
856 if (!(opt.mode & BWA_MODE_COMPREAD)) // in color space; initialize ntpac
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
857 ntbns = bwa_open_nt(prefix);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
858 bwa_print_sam_SQ(bns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
859
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
860 fprintf(stderr, "%.2f sec\n", (float)(clock()-t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
861
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
862 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
863
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
864 max_extra_occ++; // now this variable holds TOTAL number of alignments we want to print (1+requested extra).
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
865
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
866 while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual)) != 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
867 tot_seqs += n_seqs;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
868 t_convert = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
869 t_refine = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
870 t_write = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
871
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
872 fprintf(stderr, "[bwa_aln_core] %d sequences loaded: ",n_seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
873 fprintf(stderr, "%.2f sec\n", (float)(clock()-t) / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
874
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
875
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
876 // read alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
877 for (i = 0; i < n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
878
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
879 bwa_seq_t *p = seqs + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
880 int n_aln, n_occ, k, rest;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
881 fread(&n_aln, 4, 1, fp_sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
882 if (n_aln > m_aln) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
883 m_aln = n_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
884 aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
885 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
886
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
887 fread(aln, sizeof(bwt_aln1_t), n_aln, fp_sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
888 for ( k = n_occ = 0 ; k < n_aln; ++k ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
889 const bwt_aln1_t *q = aln + k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
890 n_occ += q->l - q->k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
891 } /* n_occ is now keeping total number of available alignments to the reference
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
892 (i.e. placements, NOT bwa records, each of which can describe few placements)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
893 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
894
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
895 // we are going to keep and print 'rest' alignments:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
896 rest = ((n_occ > max_extra_occ)? max_extra_occ : n_occ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
897
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
898 if ( rest == 0 ) rest++; /* we need at least one record, even if it is going to say "UNMAPPED" */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
899
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
900 if ( rest > m_rest ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
901 // reallocate rest_seqs array (only if needed) to ensure it can keep 'rest' records
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
902 m_rest = rest;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
903 rest_seqs = (bwa_seq_t*)realloc(rest_seqs,sizeof(bwa_seq_t)*m_rest);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
904 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
905 // initialize 'rest' replicas of the current sequence record
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
906 for ( k = 0 ; k < rest ; k++ ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
907 rest_seqs[k] = *p; /* clone current sequence p; IMPORTANT: it's a shallow copy */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
908 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
909
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
910 bwa_aln2seq_all(n_aln, aln, rest,rest_seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
911 // now each of the replicas carries its own bwa alignment selected from all alignments
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
912 // available for the current sequence *p.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
913
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
914 /* compute positions of the alignments on the ref: */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
915 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
916
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
917 bwa_cal_pac_pos_core(bwt[0],bwt[1], rest, rest_seqs, opt.max_diff, opt.fnr );
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
918 t_convert += ( clock() - t );
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
919 *p = rest_seqs[0]; // copy first selected alignment back into p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
920
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
921 seq_reverse(p->len, p->seq,0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
922
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
923 /* compute positions of the alignments on the ref: */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
924 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
925
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
926 bwa_cal_pac_pos_core(bwt[0],bwt[1], rest, rest_seqs, opt.max_diff, opt.fnr );
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
927 t_convert += ( clock() - t );
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
928
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
929 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
930
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
931 bwa_refine_gapped(bns,rest,rest_seqs,pacseq,ntbns); // refine all gapped aligns in our replicas;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
932 // side effect: cigars will be allocated for each replica
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
933 t_refine += ( clock() - t );
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
934
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
935 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
936 // for ( k = 0 ; k < n_seqs ; k++ ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
937 for ( k = 0 ; k < rest ; k++ ) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
938
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
939 bwa_print_sam1(bns, rest_seqs + k, 0, opt.mode, opt.max_top2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
940 // cigar was allocated for us in every replica as a side effect, free it now:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
941 free ( (rest_seqs+k)->cigar );
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
942 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
943 t_write+= ( clock()-t);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
944
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
945 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
946
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
947 bwa_free_read_seq(n_seqs, seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
948
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
949 fprintf(stderr, "[bwa_aln_core] convert %d sequences to sequence coordinate: ",n_seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
950 fprintf(stderr, "%.2f sec\n", (float)t_convert / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
951 fprintf(stderr, "[bwa_aln_core] refine gapped alignments for %d sequences: ", n_seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
952 fprintf(stderr, "%.2f sec\n", (float)t_refine / CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
953 fprintf(stderr, "[bwa_aln_core] print alignments for %d sequences: ", n_seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
954 fprintf(stderr, "%.2f sec\n", (float)t_write/ CLOCKS_PER_SEC);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
955 fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
956
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
957 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
958 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
959
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
960 // destroy
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
961 bwt_destroy(bwt[0]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
962 bwt_destroy(bwt[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
963
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
964 free(rest_seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
965 free(pacseq);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
966
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
967 bwa_seq_close(ks);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
968 if (ntbns) bns_destroy(ntbns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
969 bns_destroy(bns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
970 fclose(fp_sa);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
971 free(aln);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
972 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
973
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
974 int bwa_sai2sam_se(int argc, char *argv[])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
975 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
976 int c, n_occ = 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
977 int do_full_sam = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
978 while ((c = getopt(argc, argv, "hsn:f:")) >= 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
979 switch (c) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
980 case 'h': break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
981 case 's': do_full_sam = 1; break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
982 case 'n': n_occ = atoi(optarg); break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
983 case 'f': freopen(optarg, "w", stdout); break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
984 default: return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
985 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
986 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
987
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
988 if (optind + 3 > argc) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
989 fprintf(stderr, "Usage: bwa samse [-n max_occ [-s] ] [-f out.sam] <prefix> <in.sai> <in.fq>\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
990 return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
991 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
992 if ( do_full_sam ) bwa_print_all_hits(argv[optind], argv[optind+1], argv[optind+2], n_occ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
993 else bwa_sai2sam_se_core(argv[optind], argv[optind+1], argv[optind+2], n_occ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
994 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
995 }