Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/examples/chk_indel.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 /* To compile, copy this file to the samtools source code directory and compile with: | |
2 gcc -g -O2 -Wall chk_indel_rg.c -o chk_indel_rg -Wall -I. -L. -lbam -lz | |
3 */ | |
4 | |
5 #include <string.h> | |
6 #include "bam.h" | |
7 | |
8 typedef struct { | |
9 long cnt[4]; // short:ins, short:del, long:ins, long:del | |
10 } rgcnt_t; | |
11 | |
12 #include "khash.h" | |
13 KHASH_MAP_INIT_STR(rgcnt, rgcnt_t) | |
14 | |
15 #define MAX_LEN 127 | |
16 #define Q_THRES 10 | |
17 #define L_THRES 6 // short: <=L_THRES; otherwise long | |
18 | |
19 int main(int argc, char *argv[]) | |
20 { | |
21 bamFile fp; | |
22 bam1_t *b; | |
23 int i, x; | |
24 khash_t(rgcnt) *h; | |
25 khint_t k; | |
26 | |
27 if (argc == 1) { | |
28 fprintf(stderr, "Usage: chk_indel_rg <in.bam>\n\n"); | |
29 fprintf(stderr, "Output: filename, RG, #ins-in-short-homopolymer, #del-in-short, #ins-in-long, #del-in-long\n"); | |
30 return 1; | |
31 } | |
32 | |
33 h = kh_init(rgcnt); | |
34 fp = bam_open(argv[1], "r"); | |
35 bam_header_destroy(bam_header_read(fp)); // we do not need the header | |
36 b = bam_init1(); | |
37 | |
38 while (bam_read1(fp, b) >= 0) { | |
39 if (b->core.n_cigar >= 3 && b->core.qual >= Q_THRES) { | |
40 const uint8_t *seq; | |
41 const uint32_t *cigar = bam1_cigar(b); | |
42 char *rg; | |
43 for (i = 0; i < b->core.n_cigar; ++i) // check if there are 1bp indels | |
44 if (bam_cigar_oplen(cigar[i]) == 1 && (bam_cigar_op(cigar[i]) == BAM_CDEL || bam_cigar_op(cigar[i]) == BAM_CINS)) | |
45 break; | |
46 if (i == b->core.n_cigar) continue; // no 1bp ins or del | |
47 if ((rg = (char*)bam_aux_get(b, "RG")) == 0) continue; // no RG tag | |
48 seq = bam1_seq(b); | |
49 for (i = x = 0; i < b->core.n_cigar; ++i) { | |
50 int op = bam_cigar_op(cigar[i]); | |
51 if (bam_cigar_oplen(cigar[i]) == 1 && (op == BAM_CDEL || op == BAM_CINS)) { | |
52 int c, j, hrun, which; | |
53 c = bam1_seqi(seq, x); | |
54 for (j = x + 1, hrun = 0; j < b->core.l_qseq; ++j, ++hrun) // calculate the hompolymer run length | |
55 if (bam1_seqi(seq, j) != c) break; | |
56 k = kh_get(rgcnt, h, rg + 1); | |
57 if (k == kh_end(h)) { // absent | |
58 char *key = strdup(rg + 1); | |
59 k = kh_put(rgcnt, h, key, &c); | |
60 memset(&kh_val(h, k), 0, sizeof(rgcnt_t)); | |
61 } | |
62 which = (hrun <= L_THRES? 0 : 1)<<1 | (op == BAM_CINS? 0 : 1); | |
63 ++kh_val(h, k).cnt[which]; | |
64 } | |
65 if (bam_cigar_type(op)&1) ++x; | |
66 } | |
67 } | |
68 } | |
69 | |
70 for (k = 0; k != kh_end(h); ++k) { | |
71 if (!kh_exist(h, k)) continue; | |
72 printf("%s\t%s", argv[1], kh_key(h, k)); | |
73 for (i = 0; i < 4; ++i) | |
74 printf("\t%ld", kh_val(h, k).cnt[i]); | |
75 putchar('\n'); | |
76 free((char*)kh_key(h, k)); | |
77 } | |
78 | |
79 bam_destroy1(b); | |
80 bam_close(fp); | |
81 kh_destroy(rgcnt, h); | |
82 return 0; | |
83 } |