0
|
1 #include <assert.h>
|
|
2 #include "bam_tview.h"
|
|
3
|
|
4 int base_tv_init(tview_t* tv,const char *fn, const char *fn_fa, const char *samples)
|
|
5 {
|
|
6 assert(tv!=NULL);
|
|
7 assert(fn!=NULL);
|
|
8 tv->mrow = 24; tv->mcol = 80;
|
|
9 tv->color_for = TV_COLOR_MAPQ;
|
|
10 tv->is_dot = 1;
|
|
11
|
|
12 tv->fp = bam_open(fn, "r");
|
|
13 if(tv->fp==0)
|
|
14 {
|
|
15 fprintf(stderr,"bam_open %s. %s\n", fn,fn_fa);
|
|
16 exit(EXIT_FAILURE);
|
|
17 }
|
|
18 bgzf_set_cache_size(tv->fp, 8 * 1024 *1024);
|
|
19 assert(tv->fp);
|
|
20
|
|
21 tv->header = bam_header_read(tv->fp);
|
|
22 if(tv->header==0)
|
|
23 {
|
|
24 fprintf(stderr,"Cannot read '%s'.\n", fn);
|
|
25 exit(EXIT_FAILURE);
|
|
26 }
|
|
27 tv->idx = bam_index_load(fn);
|
|
28 if (tv->idx == 0)
|
|
29 {
|
|
30 fprintf(stderr,"Cannot read index for '%s'.\n", fn);
|
|
31 exit(EXIT_FAILURE);
|
|
32 }
|
|
33 tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv);
|
|
34 if (fn_fa) tv->fai = fai_load(fn_fa);
|
|
35 tv->bca = bcf_call_init(0.83, 13);
|
|
36 tv->ins = 1;
|
|
37
|
|
38 if ( samples )
|
|
39 {
|
|
40 if ( !tv->header->dict ) tv->header->dict = sam_header_parse2(tv->header->text);
|
|
41 void *iter = tv->header->dict;
|
|
42 const char *key, *val;
|
|
43 int n = 0;
|
|
44 tv->rg_hash = kh_init(kh_rg);
|
|
45 while ( (iter = sam_header2key_val(iter, "RG","ID","SM", &key, &val)) )
|
|
46 {
|
|
47 if ( !strcmp(samples,key) || (val && !strcmp(samples,val)) )
|
|
48 {
|
|
49 khiter_t k = kh_get(kh_rg, tv->rg_hash, key);
|
|
50 if ( k != kh_end(tv->rg_hash) ) continue;
|
|
51 int ret;
|
|
52 k = kh_put(kh_rg, tv->rg_hash, key, &ret);
|
|
53 kh_value(tv->rg_hash, k) = val;
|
|
54 n++;
|
|
55 }
|
|
56 }
|
|
57 if ( !n )
|
|
58 {
|
|
59 fprintf(stderr,"The sample or read group \"%s\" not present.\n", samples);
|
|
60 exit(EXIT_FAILURE);
|
|
61 }
|
|
62 }
|
|
63
|
|
64 return 0;
|
|
65 }
|
|
66
|
|
67
|
|
68 void base_tv_destroy(tview_t* tv)
|
|
69 {
|
|
70 bam_lplbuf_destroy(tv->lplbuf);
|
|
71 bcf_call_destroy(tv->bca);
|
|
72 bam_index_destroy(tv->idx);
|
|
73 if (tv->fai) fai_destroy(tv->fai);
|
|
74 free(tv->ref);
|
|
75 bam_header_destroy(tv->header);
|
|
76 bam_close(tv->fp);
|
|
77 }
|
|
78
|
|
79
|
|
80 int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
|
|
81 {
|
|
82 extern unsigned char bam_nt16_table[256];
|
|
83 tview_t *tv = (tview_t*)data;
|
|
84 int i, j, c, rb, attr, max_ins = 0;
|
|
85 uint32_t call = 0;
|
|
86 if (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen
|
|
87 // print referece
|
|
88 rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N';
|
|
89 for (i = tv->last_pos + 1; i < pos; ++i) {
|
|
90 if (i%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", i+1);
|
|
91 c = tv->ref? tv->ref[i - tv->left_pos] : 'N';
|
|
92 tv->my_mvaddch(tv,1, tv->ccol++, c);
|
|
93 }
|
|
94 if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
|
|
95 { // call consensus
|
|
96 bcf_callret1_t bcr;
|
|
97 int qsum[4], a1, a2, tmp;
|
|
98 double p[3], prior = 30;
|
|
99 bcf_call_glfgen(n, pl, bam_nt16_table[rb], tv->bca, &bcr);
|
|
100 for (i = 0; i < 4; ++i) qsum[i] = bcr.qsum[i]<<2 | i;
|
|
101 for (i = 1; i < 4; ++i) // insertion sort
|
|
102 for (j = i; j > 0 && qsum[j] > qsum[j-1]; --j)
|
|
103 tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp;
|
|
104 a1 = qsum[0]&3; a2 = qsum[1]&3;
|
|
105 p[0] = bcr.p[a1*5+a1]; p[1] = bcr.p[a1*5+a2] + prior; p[2] = bcr.p[a2*5+a2];
|
|
106 if ("ACGT"[a1] != toupper(rb)) p[0] += prior + 3;
|
|
107 if ("ACGT"[a2] != toupper(rb)) p[2] += prior + 3;
|
|
108 if (p[0] < p[1] && p[0] < p[2]) call = (1<<a1)<<16 | (int)((p[1]<p[2]?p[1]:p[2]) - p[0] + .499);
|
|
109 else if (p[2] < p[1] && p[2] < p[0]) call = (1<<a2)<<16 | (int)((p[0]<p[1]?p[0]:p[1]) - p[2] + .499);
|
|
110 else call = (1<<a1|1<<a2)<<16 | (int)((p[0]<p[2]?p[0]:p[2]) - p[1] + .499);
|
|
111 }
|
|
112 attr = tv->my_underline(tv);
|
|
113 c = ",ACMGRSVTWYHKDBN"[call>>16&0xf];
|
|
114 i = (call&0xffff)/10+1;
|
|
115 if (i > 4) i = 4;
|
|
116 attr |= tv->my_colorpair(tv,i);
|
|
117 if (c == toupper(rb)) c = '.';
|
|
118 tv->my_attron(tv,attr);
|
|
119 tv->my_mvaddch(tv,2, tv->ccol, c);
|
|
120 tv->my_attroff(tv,attr);
|
|
121 if(tv->ins) {
|
|
122 // calculate maximum insert
|
|
123 for (i = 0; i < n; ++i) {
|
|
124 const bam_pileup1_t *p = pl + i;
|
|
125 if (p->indel > 0 && max_ins < p->indel) max_ins = p->indel;
|
|
126 }
|
|
127 }
|
|
128 // core loop
|
|
129 for (j = 0; j <= max_ins; ++j) {
|
|
130 for (i = 0; i < n; ++i) {
|
|
131 const bam_pileup1_t *p = pl + i;
|
|
132 int row = TV_MIN_ALNROW + p->level - tv->row_shift;
|
|
133 if (j == 0) {
|
|
134 if (!p->is_del) {
|
|
135 if (tv->base_for == TV_BASE_COLOR_SPACE &&
|
|
136 (c = bam_aux_getCSi(p->b, p->qpos))) {
|
|
137 // assume that if we found one color, we will be able to get the color error
|
|
138 if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos)) c = bam1_strand(p->b)? ',' : '.';
|
|
139 } else {
|
|
140 if (tv->show_name) {
|
|
141 char *name = bam1_qname(p->b);
|
|
142 c = (p->qpos + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos];
|
|
143 } else {
|
|
144 c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
|
|
145 if (tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
|
|
146 }
|
|
147 }
|
|
148 } else c = p->is_refskip? (bam1_strand(p->b)? '<' : '>') : '*';
|
|
149 } else { // padding
|
|
150 if (j > p->indel) c = '*';
|
|
151 else { // insertion
|
|
152 if (tv->base_for == TV_BASE_NUCL) {
|
|
153 if (tv->show_name) {
|
|
154 char *name = bam1_qname(p->b);
|
|
155 c = (p->qpos + j + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos + j];
|
|
156 } else {
|
|
157 c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
|
|
158 if (j == 0 && tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
|
|
159 }
|
|
160 } else {
|
|
161 c = bam_aux_getCSi(p->b, p->qpos + j);
|
|
162 if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos + j)) c = bam1_strand(p->b)? ',' : '.';
|
|
163 }
|
|
164 }
|
|
165 }
|
|
166 if (row > TV_MIN_ALNROW && row < tv->mrow) {
|
|
167 int x;
|
|
168 attr = 0;
|
|
169 if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR))
|
|
170 || (p->b->core.flag & BAM_FSECONDARY)) attr |= tv->my_underline(tv);
|
|
171 if (tv->color_for == TV_COLOR_BASEQ) {
|
|
172 x = bam1_qual(p->b)[p->qpos]/10 + 1;
|
|
173 if (x > 4) x = 4;
|
|
174 attr |= tv->my_colorpair(tv,x);
|
|
175 } else if (tv->color_for == TV_COLOR_MAPQ) {
|
|
176 x = p->b->core.qual/10 + 1;
|
|
177 if (x > 4) x = 4;
|
|
178 attr |= tv->my_colorpair(tv,x);
|
|
179 } else if (tv->color_for == TV_COLOR_NUCL) {
|
|
180 x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)] + 5;
|
|
181 attr |= tv->my_colorpair(tv,x);
|
|
182 } else if(tv->color_for == TV_COLOR_COL) {
|
|
183 x = 0;
|
|
184 switch(bam_aux_getCSi(p->b, p->qpos)) {
|
|
185 case '0': x = 0; break;
|
|
186 case '1': x = 1; break;
|
|
187 case '2': x = 2; break;
|
|
188 case '3': x = 3; break;
|
|
189 case '4': x = 4; break;
|
|
190 default: x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; break;
|
|
191 }
|
|
192 x+=5;
|
|
193 attr |= tv->my_colorpair(tv,x);
|
|
194 } else if(tv->color_for == TV_COLOR_COLQ) {
|
|
195 x = bam_aux_getCQi(p->b, p->qpos);
|
|
196 if(0 == x) x = bam1_qual(p->b)[p->qpos];
|
|
197 x = x/10 + 1;
|
|
198 if (x > 4) x = 4;
|
|
199 attr |= tv->my_colorpair(tv,x);
|
|
200 }
|
|
201 tv->my_attron(tv,attr);
|
|
202 tv->my_mvaddch(tv,row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c));
|
|
203 tv->my_attroff(tv,attr);
|
|
204 }
|
|
205 }
|
|
206 c = j? '*' : rb;
|
|
207 if (c == '*') {
|
|
208 attr = tv->my_colorpair(tv,8);
|
|
209 tv->my_attron(tv,attr);
|
|
210 tv->my_mvaddch(tv,1, tv->ccol++, c);
|
|
211 tv->my_attroff(tv,attr);
|
|
212 } else tv->my_mvaddch(tv,1, tv->ccol++, c);
|
|
213 }
|
|
214 tv->last_pos = pos;
|
|
215 return 0;
|
|
216 }
|
|
217
|
|
218
|
|
219
|
|
220
|
|
221 int tv_fetch_func(const bam1_t *b, void *data)
|
|
222 {
|
|
223 tview_t *tv = (tview_t*)data;
|
|
224 if ( tv->rg_hash )
|
|
225 {
|
|
226 const uint8_t *rg = bam_aux_get(b, "RG");
|
|
227 if ( !rg ) return 0;
|
|
228 khiter_t k = kh_get(kh_rg, tv->rg_hash, (const char*)(rg + 1));
|
|
229 if ( k == kh_end(tv->rg_hash) ) return 0;
|
|
230 }
|
|
231 if (tv->no_skip) {
|
|
232 uint32_t *cigar = bam1_cigar(b); // this is cheating...
|
|
233 int i;
|
|
234 for (i = 0; i <b->core.n_cigar; ++i) {
|
|
235 if ((cigar[i]&0xf) == BAM_CREF_SKIP)
|
|
236 cigar[i] = cigar[i]>>4<<4 | BAM_CDEL;
|
|
237 }
|
|
238 }
|
|
239 bam_lplbuf_push(b, tv->lplbuf);
|
|
240 return 0;
|
|
241 }
|
|
242
|
|
243 int base_draw_aln(tview_t *tv, int tid, int pos)
|
|
244 {
|
|
245 assert(tv!=NULL);
|
|
246 // reset
|
|
247 tv->my_clear(tv);
|
|
248 tv->curr_tid = tid; tv->left_pos = pos;
|
|
249 tv->last_pos = tv->left_pos - 1;
|
|
250 tv->ccol = 0;
|
|
251 // print ref and consensus
|
|
252 if (tv->fai) {
|
|
253 char *str;
|
|
254 if (tv->ref) free(tv->ref);
|
|
255 assert(tv->curr_tid>=0);
|
|
256
|
|
257 str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1);
|
|
258 assert(str!=NULL);
|
|
259 sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol);
|
|
260 tv->ref = fai_fetch(tv->fai, str, &tv->l_ref);
|
|
261 free(str);
|
|
262 }
|
|
263 // draw aln
|
|
264 bam_lplbuf_reset(tv->lplbuf);
|
|
265 bam_fetch(tv->fp, tv->idx, tv->curr_tid, tv->left_pos, tv->left_pos + tv->mcol, tv, tv_fetch_func);
|
|
266 bam_lplbuf_push(0, tv->lplbuf);
|
|
267
|
|
268 while (tv->ccol < tv->mcol) {
|
|
269 int pos = tv->last_pos + 1;
|
|
270 if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
|
|
271 tv->my_mvaddch(tv,1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N');
|
|
272 ++tv->last_pos;
|
|
273 }
|
|
274 return 0;
|
|
275 }
|
|
276
|
|
277
|
|
278
|
|
279
|
|
280 static void error(const char *format, ...)
|
|
281 {
|
|
282 if ( !format )
|
|
283 {
|
|
284 fprintf(stderr, "\n");
|
|
285 fprintf(stderr, "Usage: bamtk tview [options] <aln.bam> [ref.fasta]\n");
|
|
286 fprintf(stderr, "Options:\n");
|
|
287 fprintf(stderr, " -d display output as (H)tml or (C)urses or (T)ext \n");
|
|
288 fprintf(stderr, " -p chr:pos go directly to this position\n");
|
|
289 fprintf(stderr, " -s STR display only reads from this sample or group\n");
|
|
290 fprintf(stderr, "\n\n");
|
|
291 }
|
|
292 else
|
|
293 {
|
|
294 va_list ap;
|
|
295 va_start(ap, format);
|
|
296 vfprintf(stderr, format, ap);
|
|
297 va_end(ap);
|
|
298 }
|
|
299 exit(-1);
|
|
300 }
|
|
301
|
|
302 enum dipsay_mode {display_ncurses,display_html,display_text};
|
|
303 extern tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples);
|
|
304 extern tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples);
|
|
305 extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples);
|
|
306
|
|
307 int bam_tview_main(int argc, char *argv[])
|
|
308 {
|
|
309 int view_mode=display_ncurses;
|
|
310 tview_t* tv=NULL;
|
|
311 char *samples=NULL, *position=NULL;
|
|
312 int c;
|
|
313 while ((c = getopt(argc, argv, "s:p:d:")) >= 0) {
|
|
314 switch (c) {
|
|
315 case 's': samples=optarg; break;
|
|
316 case 'p': position=optarg; break;
|
|
317 case 'd':
|
|
318 {
|
|
319 switch(optarg[0])
|
|
320 {
|
|
321 case 'H': case 'h': view_mode=display_html;break;
|
|
322 case 'T': case 't': view_mode=display_text;break;
|
|
323 case 'C': case 'c': view_mode=display_ncurses;break;
|
|
324 default: view_mode=display_ncurses;break;
|
|
325 }
|
|
326 break;
|
|
327 }
|
|
328 default: error(NULL);
|
|
329 }
|
|
330 }
|
|
331 if (argc==optind) error(NULL);
|
|
332
|
|
333 switch(view_mode)
|
|
334 {
|
|
335 case display_ncurses:
|
|
336 {
|
|
337 tv = curses_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples);
|
|
338 break;
|
|
339 }
|
|
340 case display_text:
|
|
341 {
|
|
342 tv = text_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples);
|
|
343 break;
|
|
344 }
|
|
345 case display_html:
|
|
346 {
|
|
347 tv = html_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples);
|
|
348 break;
|
|
349 }
|
|
350 }
|
|
351 if(tv==NULL)
|
|
352 {
|
|
353 error("cannot create view");
|
|
354 return EXIT_FAILURE;
|
|
355 }
|
|
356
|
|
357 if ( position )
|
|
358 {
|
|
359 int _tid = -1, _beg, _end;
|
|
360 bam_parse_region(tv->header, position, &_tid, &_beg, &_end);
|
|
361 if (_tid >= 0) { tv->curr_tid = _tid; tv->left_pos = _beg; }
|
|
362 }
|
|
363 tv->my_drawaln(tv, tv->curr_tid, tv->left_pos);
|
|
364 tv->my_loop(tv);
|
|
365 tv->my_destroy(tv);
|
|
366
|
|
367 return EXIT_SUCCESS;
|
|
368 }
|