Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/bam_tview.c @ 0:903fc43d6227 draft default tip
Uploaded
| author | lsong10 |
|---|---|
| date | Fri, 26 Mar 2021 16:52:45 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:903fc43d6227 |
|---|---|
| 1 #include <assert.h> | |
| 2 #include "bam_tview.h" | |
| 3 | |
| 4 int base_tv_init(tview_t* tv,const char *fn, const char *fn_fa, const char *samples) | |
| 5 { | |
| 6 assert(tv!=NULL); | |
| 7 assert(fn!=NULL); | |
| 8 tv->mrow = 24; tv->mcol = 80; | |
| 9 tv->color_for = TV_COLOR_MAPQ; | |
| 10 tv->is_dot = 1; | |
| 11 | |
| 12 tv->fp = bam_open(fn, "r"); | |
| 13 if(tv->fp==0) | |
| 14 { | |
| 15 fprintf(stderr,"bam_open %s. %s\n", fn,fn_fa); | |
| 16 exit(EXIT_FAILURE); | |
| 17 } | |
| 18 bgzf_set_cache_size(tv->fp, 8 * 1024 *1024); | |
| 19 assert(tv->fp); | |
| 20 | |
| 21 tv->header = bam_header_read(tv->fp); | |
| 22 if(tv->header==0) | |
| 23 { | |
| 24 fprintf(stderr,"Cannot read '%s'.\n", fn); | |
| 25 exit(EXIT_FAILURE); | |
| 26 } | |
| 27 tv->idx = bam_index_load(fn); | |
| 28 if (tv->idx == 0) | |
| 29 { | |
| 30 fprintf(stderr,"Cannot read index for '%s'.\n", fn); | |
| 31 exit(EXIT_FAILURE); | |
| 32 } | |
| 33 tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv); | |
| 34 if (fn_fa) tv->fai = fai_load(fn_fa); | |
| 35 tv->bca = bcf_call_init(0.83, 13); | |
| 36 tv->ins = 1; | |
| 37 | |
| 38 if ( samples ) | |
| 39 { | |
| 40 if ( !tv->header->dict ) tv->header->dict = sam_header_parse2(tv->header->text); | |
| 41 void *iter = tv->header->dict; | |
| 42 const char *key, *val; | |
| 43 int n = 0; | |
| 44 tv->rg_hash = kh_init(kh_rg); | |
| 45 while ( (iter = sam_header2key_val(iter, "RG","ID","SM", &key, &val)) ) | |
| 46 { | |
| 47 if ( !strcmp(samples,key) || (val && !strcmp(samples,val)) ) | |
| 48 { | |
| 49 khiter_t k = kh_get(kh_rg, tv->rg_hash, key); | |
| 50 if ( k != kh_end(tv->rg_hash) ) continue; | |
| 51 int ret; | |
| 52 k = kh_put(kh_rg, tv->rg_hash, key, &ret); | |
| 53 kh_value(tv->rg_hash, k) = val; | |
| 54 n++; | |
| 55 } | |
| 56 } | |
| 57 if ( !n ) | |
| 58 { | |
| 59 fprintf(stderr,"The sample or read group \"%s\" not present.\n", samples); | |
| 60 exit(EXIT_FAILURE); | |
| 61 } | |
| 62 } | |
| 63 | |
| 64 return 0; | |
| 65 } | |
| 66 | |
| 67 | |
| 68 void base_tv_destroy(tview_t* tv) | |
| 69 { | |
| 70 bam_lplbuf_destroy(tv->lplbuf); | |
| 71 bcf_call_destroy(tv->bca); | |
| 72 bam_index_destroy(tv->idx); | |
| 73 if (tv->fai) fai_destroy(tv->fai); | |
| 74 free(tv->ref); | |
| 75 bam_header_destroy(tv->header); | |
| 76 bam_close(tv->fp); | |
| 77 } | |
| 78 | |
| 79 | |
| 80 int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) | |
| 81 { | |
| 82 extern unsigned char bam_nt16_table[256]; | |
| 83 tview_t *tv = (tview_t*)data; | |
| 84 int i, j, c, rb, attr, max_ins = 0; | |
| 85 uint32_t call = 0; | |
| 86 if (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen | |
| 87 // print referece | |
| 88 rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N'; | |
| 89 for (i = tv->last_pos + 1; i < pos; ++i) { | |
| 90 if (i%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", i+1); | |
| 91 c = tv->ref? tv->ref[i - tv->left_pos] : 'N'; | |
| 92 tv->my_mvaddch(tv,1, tv->ccol++, c); | |
| 93 } | |
| 94 if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1); | |
| 95 { // call consensus | |
| 96 bcf_callret1_t bcr; | |
| 97 int qsum[4], a1, a2, tmp; | |
| 98 double p[3], prior = 30; | |
| 99 bcf_call_glfgen(n, pl, bam_nt16_table[rb], tv->bca, &bcr); | |
| 100 for (i = 0; i < 4; ++i) qsum[i] = bcr.qsum[i]<<2 | i; | |
| 101 for (i = 1; i < 4; ++i) // insertion sort | |
| 102 for (j = i; j > 0 && qsum[j] > qsum[j-1]; --j) | |
| 103 tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp; | |
| 104 a1 = qsum[0]&3; a2 = qsum[1]&3; | |
| 105 p[0] = bcr.p[a1*5+a1]; p[1] = bcr.p[a1*5+a2] + prior; p[2] = bcr.p[a2*5+a2]; | |
| 106 if ("ACGT"[a1] != toupper(rb)) p[0] += prior + 3; | |
| 107 if ("ACGT"[a2] != toupper(rb)) p[2] += prior + 3; | |
| 108 if (p[0] < p[1] && p[0] < p[2]) call = (1<<a1)<<16 | (int)((p[1]<p[2]?p[1]:p[2]) - p[0] + .499); | |
| 109 else if (p[2] < p[1] && p[2] < p[0]) call = (1<<a2)<<16 | (int)((p[0]<p[1]?p[0]:p[1]) - p[2] + .499); | |
| 110 else call = (1<<a1|1<<a2)<<16 | (int)((p[0]<p[2]?p[0]:p[2]) - p[1] + .499); | |
| 111 } | |
| 112 attr = tv->my_underline(tv); | |
| 113 c = ",ACMGRSVTWYHKDBN"[call>>16&0xf]; | |
| 114 i = (call&0xffff)/10+1; | |
| 115 if (i > 4) i = 4; | |
| 116 attr |= tv->my_colorpair(tv,i); | |
| 117 if (c == toupper(rb)) c = '.'; | |
| 118 tv->my_attron(tv,attr); | |
| 119 tv->my_mvaddch(tv,2, tv->ccol, c); | |
| 120 tv->my_attroff(tv,attr); | |
| 121 if(tv->ins) { | |
| 122 // calculate maximum insert | |
| 123 for (i = 0; i < n; ++i) { | |
| 124 const bam_pileup1_t *p = pl + i; | |
| 125 if (p->indel > 0 && max_ins < p->indel) max_ins = p->indel; | |
| 126 } | |
| 127 } | |
| 128 // core loop | |
| 129 for (j = 0; j <= max_ins; ++j) { | |
| 130 for (i = 0; i < n; ++i) { | |
| 131 const bam_pileup1_t *p = pl + i; | |
| 132 int row = TV_MIN_ALNROW + p->level - tv->row_shift; | |
| 133 if (j == 0) { | |
| 134 if (!p->is_del) { | |
| 135 if (tv->base_for == TV_BASE_COLOR_SPACE && | |
| 136 (c = bam_aux_getCSi(p->b, p->qpos))) { | |
| 137 // assume that if we found one color, we will be able to get the color error | |
| 138 if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos)) c = bam1_strand(p->b)? ',' : '.'; | |
| 139 } else { | |
| 140 if (tv->show_name) { | |
| 141 char *name = bam1_qname(p->b); | |
| 142 c = (p->qpos + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos]; | |
| 143 } else { | |
| 144 c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; | |
| 145 if (tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; | |
| 146 } | |
| 147 } | |
| 148 } else c = p->is_refskip? (bam1_strand(p->b)? '<' : '>') : '*'; | |
| 149 } else { // padding | |
| 150 if (j > p->indel) c = '*'; | |
| 151 else { // insertion | |
| 152 if (tv->base_for == TV_BASE_NUCL) { | |
| 153 if (tv->show_name) { | |
| 154 char *name = bam1_qname(p->b); | |
| 155 c = (p->qpos + j + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos + j]; | |
| 156 } else { | |
| 157 c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)]; | |
| 158 if (j == 0 && tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; | |
| 159 } | |
| 160 } else { | |
| 161 c = bam_aux_getCSi(p->b, p->qpos + j); | |
| 162 if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos + j)) c = bam1_strand(p->b)? ',' : '.'; | |
| 163 } | |
| 164 } | |
| 165 } | |
| 166 if (row > TV_MIN_ALNROW && row < tv->mrow) { | |
| 167 int x; | |
| 168 attr = 0; | |
| 169 if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR)) | |
| 170 || (p->b->core.flag & BAM_FSECONDARY)) attr |= tv->my_underline(tv); | |
| 171 if (tv->color_for == TV_COLOR_BASEQ) { | |
| 172 x = bam1_qual(p->b)[p->qpos]/10 + 1; | |
| 173 if (x > 4) x = 4; | |
| 174 attr |= tv->my_colorpair(tv,x); | |
| 175 } else if (tv->color_for == TV_COLOR_MAPQ) { | |
| 176 x = p->b->core.qual/10 + 1; | |
| 177 if (x > 4) x = 4; | |
| 178 attr |= tv->my_colorpair(tv,x); | |
| 179 } else if (tv->color_for == TV_COLOR_NUCL) { | |
| 180 x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)] + 5; | |
| 181 attr |= tv->my_colorpair(tv,x); | |
| 182 } else if(tv->color_for == TV_COLOR_COL) { | |
| 183 x = 0; | |
| 184 switch(bam_aux_getCSi(p->b, p->qpos)) { | |
| 185 case '0': x = 0; break; | |
| 186 case '1': x = 1; break; | |
| 187 case '2': x = 2; break; | |
| 188 case '3': x = 3; break; | |
| 189 case '4': x = 4; break; | |
| 190 default: x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; break; | |
| 191 } | |
| 192 x+=5; | |
| 193 attr |= tv->my_colorpair(tv,x); | |
| 194 } else if(tv->color_for == TV_COLOR_COLQ) { | |
| 195 x = bam_aux_getCQi(p->b, p->qpos); | |
| 196 if(0 == x) x = bam1_qual(p->b)[p->qpos]; | |
| 197 x = x/10 + 1; | |
| 198 if (x > 4) x = 4; | |
| 199 attr |= tv->my_colorpair(tv,x); | |
| 200 } | |
| 201 tv->my_attron(tv,attr); | |
| 202 tv->my_mvaddch(tv,row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c)); | |
| 203 tv->my_attroff(tv,attr); | |
| 204 } | |
| 205 } | |
| 206 c = j? '*' : rb; | |
| 207 if (c == '*') { | |
| 208 attr = tv->my_colorpair(tv,8); | |
| 209 tv->my_attron(tv,attr); | |
| 210 tv->my_mvaddch(tv,1, tv->ccol++, c); | |
| 211 tv->my_attroff(tv,attr); | |
| 212 } else tv->my_mvaddch(tv,1, tv->ccol++, c); | |
| 213 } | |
| 214 tv->last_pos = pos; | |
| 215 return 0; | |
| 216 } | |
| 217 | |
| 218 | |
| 219 | |
| 220 | |
| 221 int tv_fetch_func(const bam1_t *b, void *data) | |
| 222 { | |
| 223 tview_t *tv = (tview_t*)data; | |
| 224 if ( tv->rg_hash ) | |
| 225 { | |
| 226 const uint8_t *rg = bam_aux_get(b, "RG"); | |
| 227 if ( !rg ) return 0; | |
| 228 khiter_t k = kh_get(kh_rg, tv->rg_hash, (const char*)(rg + 1)); | |
| 229 if ( k == kh_end(tv->rg_hash) ) return 0; | |
| 230 } | |
| 231 if (tv->no_skip) { | |
| 232 uint32_t *cigar = bam1_cigar(b); // this is cheating... | |
| 233 int i; | |
| 234 for (i = 0; i <b->core.n_cigar; ++i) { | |
| 235 if ((cigar[i]&0xf) == BAM_CREF_SKIP) | |
| 236 cigar[i] = cigar[i]>>4<<4 | BAM_CDEL; | |
| 237 } | |
| 238 } | |
| 239 bam_lplbuf_push(b, tv->lplbuf); | |
| 240 return 0; | |
| 241 } | |
| 242 | |
| 243 int base_draw_aln(tview_t *tv, int tid, int pos) | |
| 244 { | |
| 245 assert(tv!=NULL); | |
| 246 // reset | |
| 247 tv->my_clear(tv); | |
| 248 tv->curr_tid = tid; tv->left_pos = pos; | |
| 249 tv->last_pos = tv->left_pos - 1; | |
| 250 tv->ccol = 0; | |
| 251 // print ref and consensus | |
| 252 if (tv->fai) { | |
| 253 char *str; | |
| 254 if (tv->ref) free(tv->ref); | |
| 255 assert(tv->curr_tid>=0); | |
| 256 | |
| 257 str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1); | |
| 258 assert(str!=NULL); | |
| 259 sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol); | |
| 260 tv->ref = fai_fetch(tv->fai, str, &tv->l_ref); | |
| 261 free(str); | |
| 262 } | |
| 263 // draw aln | |
| 264 bam_lplbuf_reset(tv->lplbuf); | |
| 265 bam_fetch(tv->fp, tv->idx, tv->curr_tid, tv->left_pos, tv->left_pos + tv->mcol, tv, tv_fetch_func); | |
| 266 bam_lplbuf_push(0, tv->lplbuf); | |
| 267 | |
| 268 while (tv->ccol < tv->mcol) { | |
| 269 int pos = tv->last_pos + 1; | |
| 270 if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1); | |
| 271 tv->my_mvaddch(tv,1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N'); | |
| 272 ++tv->last_pos; | |
| 273 } | |
| 274 return 0; | |
| 275 } | |
| 276 | |
| 277 | |
| 278 | |
| 279 | |
| 280 static void error(const char *format, ...) | |
| 281 { | |
| 282 if ( !format ) | |
| 283 { | |
| 284 fprintf(stderr, "\n"); | |
| 285 fprintf(stderr, "Usage: bamtk tview [options] <aln.bam> [ref.fasta]\n"); | |
| 286 fprintf(stderr, "Options:\n"); | |
| 287 fprintf(stderr, " -d display output as (H)tml or (C)urses or (T)ext \n"); | |
| 288 fprintf(stderr, " -p chr:pos go directly to this position\n"); | |
| 289 fprintf(stderr, " -s STR display only reads from this sample or group\n"); | |
| 290 fprintf(stderr, "\n\n"); | |
| 291 } | |
| 292 else | |
| 293 { | |
| 294 va_list ap; | |
| 295 va_start(ap, format); | |
| 296 vfprintf(stderr, format, ap); | |
| 297 va_end(ap); | |
| 298 } | |
| 299 exit(-1); | |
| 300 } | |
| 301 | |
| 302 enum dipsay_mode {display_ncurses,display_html,display_text}; | |
| 303 extern tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples); | |
| 304 extern tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples); | |
| 305 extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples); | |
| 306 | |
| 307 int bam_tview_main(int argc, char *argv[]) | |
| 308 { | |
| 309 int view_mode=display_ncurses; | |
| 310 tview_t* tv=NULL; | |
| 311 char *samples=NULL, *position=NULL; | |
| 312 int c; | |
| 313 while ((c = getopt(argc, argv, "s:p:d:")) >= 0) { | |
| 314 switch (c) { | |
| 315 case 's': samples=optarg; break; | |
| 316 case 'p': position=optarg; break; | |
| 317 case 'd': | |
| 318 { | |
| 319 switch(optarg[0]) | |
| 320 { | |
| 321 case 'H': case 'h': view_mode=display_html;break; | |
| 322 case 'T': case 't': view_mode=display_text;break; | |
| 323 case 'C': case 'c': view_mode=display_ncurses;break; | |
| 324 default: view_mode=display_ncurses;break; | |
| 325 } | |
| 326 break; | |
| 327 } | |
| 328 default: error(NULL); | |
| 329 } | |
| 330 } | |
| 331 if (argc==optind) error(NULL); | |
| 332 | |
| 333 switch(view_mode) | |
| 334 { | |
| 335 case display_ncurses: | |
| 336 { | |
| 337 tv = curses_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); | |
| 338 break; | |
| 339 } | |
| 340 case display_text: | |
| 341 { | |
| 342 tv = text_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); | |
| 343 break; | |
| 344 } | |
| 345 case display_html: | |
| 346 { | |
| 347 tv = html_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); | |
| 348 break; | |
| 349 } | |
| 350 } | |
| 351 if(tv==NULL) | |
| 352 { | |
| 353 error("cannot create view"); | |
| 354 return EXIT_FAILURE; | |
| 355 } | |
| 356 | |
| 357 if ( position ) | |
| 358 { | |
| 359 int _tid = -1, _beg, _end; | |
| 360 bam_parse_region(tv->header, position, &_tid, &_beg, &_end); | |
| 361 if (_tid >= 0) { tv->curr_tid = _tid; tv->left_pos = _beg; } | |
| 362 } | |
| 363 tv->my_drawaln(tv, tv->curr_tid, tv->left_pos); | |
| 364 tv->my_loop(tv); | |
| 365 tv->my_destroy(tv); | |
| 366 | |
| 367 return EXIT_SUCCESS; | |
| 368 } |
