annotate PsiCLASS-1.0.2/samtools-0.1.19/examples/chk_indel.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 /* To compile, copy this file to the samtools source code directory and compile with:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 gcc -g -O2 -Wall chk_indel_rg.c -o chk_indel_rg -Wall -I. -L. -lbam -lz
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 #include "bam.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 long cnt[4]; // short:ins, short:del, long:ins, long:del
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 } rgcnt_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 #include "khash.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 KHASH_MAP_INIT_STR(rgcnt, rgcnt_t)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 #define MAX_LEN 127
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 #define Q_THRES 10
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 #define L_THRES 6 // short: <=L_THRES; otherwise long
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 int main(int argc, char *argv[])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 bamFile fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 bam1_t *b;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 int i, x;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 khash_t(rgcnt) *h;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 khint_t k;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 if (argc == 1) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 fprintf(stderr, "Usage: chk_indel_rg <in.bam>\n\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 fprintf(stderr, "Output: filename, RG, #ins-in-short-homopolymer, #del-in-short, #ins-in-long, #del-in-long\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 return 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 h = kh_init(rgcnt);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 fp = bam_open(argv[1], "r");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 bam_header_destroy(bam_header_read(fp)); // we do not need the header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 b = bam_init1();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 while (bam_read1(fp, b) >= 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 if (b->core.n_cigar >= 3 && b->core.qual >= Q_THRES) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 const uint8_t *seq;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 const uint32_t *cigar = bam1_cigar(b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 char *rg;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 for (i = 0; i < b->core.n_cigar; ++i) // check if there are 1bp indels
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 if (bam_cigar_oplen(cigar[i]) == 1 && (bam_cigar_op(cigar[i]) == BAM_CDEL || bam_cigar_op(cigar[i]) == BAM_CINS))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 if (i == b->core.n_cigar) continue; // no 1bp ins or del
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 if ((rg = (char*)bam_aux_get(b, "RG")) == 0) continue; // no RG tag
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 seq = bam1_seq(b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 for (i = x = 0; i < b->core.n_cigar; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 int op = bam_cigar_op(cigar[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 if (bam_cigar_oplen(cigar[i]) == 1 && (op == BAM_CDEL || op == BAM_CINS)) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 int c, j, hrun, which;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 c = bam1_seqi(seq, x);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 for (j = x + 1, hrun = 0; j < b->core.l_qseq; ++j, ++hrun) // calculate the hompolymer run length
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 if (bam1_seqi(seq, j) != c) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 k = kh_get(rgcnt, h, rg + 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 if (k == kh_end(h)) { // absent
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 char *key = strdup(rg + 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 k = kh_put(rgcnt, h, key, &c);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 memset(&kh_val(h, k), 0, sizeof(rgcnt_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 which = (hrun <= L_THRES? 0 : 1)<<1 | (op == BAM_CINS? 0 : 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 ++kh_val(h, k).cnt[which];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 if (bam_cigar_type(op)&1) ++x;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 for (k = 0; k != kh_end(h); ++k) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 if (!kh_exist(h, k)) continue;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 printf("%s\t%s", argv[1], kh_key(h, k));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 for (i = 0; i < 4; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 printf("\t%ld", kh_val(h, k).cnt[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 putchar('\n');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 free((char*)kh_key(h, k));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 bam_destroy1(b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 bam_close(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 kh_destroy(rgcnt, h);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 }