0
|
1 /* This program demonstrates how to generate pileup from multiple BAMs
|
|
2 * simutaneously, to achieve random access and to use the BED interface.
|
|
3 * To compile this program separately, you may:
|
|
4 *
|
|
5 * gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -L. -lbam -lz
|
|
6 */
|
|
7 #include <stdlib.h>
|
|
8 #include <string.h>
|
|
9 #include <stdio.h>
|
|
10 #include <unistd.h>
|
|
11 #include "bam.h"
|
|
12
|
|
13 typedef struct { // auxiliary data structure
|
|
14 bamFile fp; // the file handler
|
|
15 bam_iter_t iter; // NULL if a region not specified
|
|
16 int min_mapQ; // mapQ filter
|
|
17 } aux_t;
|
|
18
|
|
19 void *bed_read(const char *fn); // read a BED or position list file
|
|
20 void bed_destroy(void *_h); // destroy the BED data structure
|
|
21 int bed_overlap(const void *_h, const char *chr, int beg, int end); // test if chr:beg-end overlaps
|
|
22
|
|
23 // This function reads a BAM alignment from one BAM file.
|
|
24 static int read_bam(void *data, bam1_t *b) // read level filters better go here to avoid pileup
|
|
25 {
|
|
26 aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure
|
|
27 int ret = aux->iter? bam_iter_read(aux->fp, aux->iter, b) : bam_read1(aux->fp, b);
|
|
28 if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP;
|
|
29 return ret;
|
|
30 }
|
|
31
|
|
32 #ifdef _MAIN_BAM2DEPTH
|
|
33 int main(int argc, char *argv[])
|
|
34 #else
|
|
35 int main_depth(int argc, char *argv[])
|
|
36 #endif
|
|
37 {
|
|
38 int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0;
|
|
39 const bam_pileup1_t **plp;
|
|
40 char *reg = 0; // specified region
|
|
41 void *bed = 0; // BED data structure
|
|
42 bam_header_t *h = 0; // BAM header of the 1st input
|
|
43 aux_t **data;
|
|
44 bam_mplp_t mplp;
|
|
45
|
|
46 // parse the command line
|
|
47 while ((n = getopt(argc, argv, "r:b:q:Q:")) >= 0) {
|
|
48 switch (n) {
|
|
49 case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header
|
|
50 case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now
|
|
51 case 'q': baseQ = atoi(optarg); break; // base quality threshold
|
|
52 case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold
|
|
53 }
|
|
54 }
|
|
55 if (optind == argc) {
|
|
56 fprintf(stderr, "Usage: bam2depth [-r reg] [-q baseQthres] [-Q mapQthres] [-b in.bed] <in1.bam> [...]\n");
|
|
57 return 1;
|
|
58 }
|
|
59
|
|
60 // initialize the auxiliary data structures
|
|
61 n = argc - optind; // the number of BAMs on the command line
|
|
62 data = calloc(n, sizeof(void*)); // data[i] for the i-th input
|
|
63 beg = 0; end = 1<<30; tid = -1; // set the default region
|
|
64 for (i = 0; i < n; ++i) {
|
|
65 bam_header_t *htmp;
|
|
66 data[i] = calloc(1, sizeof(aux_t));
|
|
67 data[i]->fp = bam_open(argv[optind+i], "r"); // open BAM
|
|
68 data[i]->min_mapQ = mapQ; // set the mapQ filter
|
|
69 htmp = bam_header_read(data[i]->fp); // read the BAM header
|
|
70 if (i == 0) {
|
|
71 h = htmp; // keep the header of the 1st BAM
|
|
72 if (reg) bam_parse_region(h, reg, &tid, &beg, &end); // also parse the region
|
|
73 } else bam_header_destroy(htmp); // if not the 1st BAM, trash the header
|
|
74 if (tid >= 0) { // if a region is specified and parsed successfully
|
|
75 bam_index_t *idx = bam_index_load(argv[optind+i]); // load the index
|
|
76 data[i]->iter = bam_iter_query(idx, tid, beg, end); // set the iterator
|
|
77 bam_index_destroy(idx); // the index is not needed any more; phase out of the memory
|
|
78 }
|
|
79 }
|
|
80
|
|
81 // the core multi-pileup loop
|
|
82 mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization
|
|
83 n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM
|
|
84 plp = calloc(n, sizeof(void*)); // plp[i] points to the array of covering reads (internal in mplp)
|
|
85 while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position
|
|
86 if (pos < beg || pos >= end) continue; // out of range; skip
|
|
87 if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip
|
|
88 fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster
|
|
89 for (i = 0; i < n; ++i) { // base level filters have to go here
|
|
90 int j, m = 0;
|
|
91 for (j = 0; j < n_plp[i]; ++j) {
|
|
92 const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know
|
|
93 if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos
|
|
94 else if (bam1_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality
|
|
95 }
|
|
96 printf("\t%d", n_plp[i] - m); // this the depth to output
|
|
97 }
|
|
98 putchar('\n');
|
|
99 }
|
|
100 free(n_plp); free(plp);
|
|
101 bam_mplp_destroy(mplp);
|
|
102
|
|
103 bam_header_destroy(h);
|
|
104 for (i = 0; i < n; ++i) {
|
|
105 bam_close(data[i]->fp);
|
|
106 if (data[i]->iter) bam_iter_destroy(data[i]->iter);
|
|
107 free(data[i]);
|
|
108 }
|
|
109 free(data); free(reg);
|
|
110 if (bed) bed_destroy(bed);
|
|
111 return 0;
|
|
112 }
|