annotate PsiCLASS-1.0.2/samtools-0.1.19/bam2depth.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 /* This program demonstrates how to generate pileup from multiple BAMs
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 * simutaneously, to achieve random access and to use the BED interface.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 * To compile this program separately, you may:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 * gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -L. -lbam -lz
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 #include <unistd.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 #include "bam.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 typedef struct { // auxiliary data structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 bamFile fp; // the file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 bam_iter_t iter; // NULL if a region not specified
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 int min_mapQ, min_len; // mapQ filter; length filter
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 } aux_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 void *bed_read(const char *fn); // read a BED or position list file
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 void bed_destroy(void *_h); // destroy the BED data structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 int bed_overlap(const void *_h, const char *chr, int beg, int end); // test if chr:beg-end overlaps
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 // This function reads a BAM alignment from one BAM file.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 static int read_bam(void *data, bam1_t *b) // read level filters better go here to avoid pileup
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 int ret = aux->iter? bam_iter_read(aux->fp, aux->iter, b) : bam_read1(aux->fp, b);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 if (!(b->core.flag&BAM_FUNMAP)) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 else if (aux->min_len && bam_cigar2qlen(&b->core, bam1_cigar(b)) < aux->min_len) b->core.flag |= BAM_FUNMAP;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 return ret;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 int read_file_list(const char *file_list,int *n,char **argv[]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 #ifdef _MAIN_BAM2DEPTH
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 int main(int argc, char *argv[])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 #else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 int main_depth(int argc, char *argv[])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, nfiles;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 const bam_pileup1_t **plp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 char *reg = 0; // specified region
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 void *bed = 0; // BED data structure
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 char *file_list = NULL, **fn = NULL;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 bam_header_t *h = 0; // BAM header of the 1st input
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 aux_t **data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 bam_mplp_t mplp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 // parse the command line
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 while ((n = getopt(argc, argv, "r:b:q:Q:l:f:")) >= 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 switch (n) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 case 'l': min_len = atoi(optarg); break; // minimum query length
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 case 'q': baseQ = atoi(optarg); break; // base quality threshold
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 case 'f': file_list = optarg; break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 if (optind == argc && !file_list) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 fprintf(stderr, "\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 fprintf(stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 fprintf(stderr, "Options:\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 fprintf(stderr, " -b <bed> list of positions or regions\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 fprintf(stderr, " -f <list> list of input BAM filenames, one per line [null]\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 fprintf(stderr, " -l <int> minQLen\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 fprintf(stderr, " -q <int> base quality threshold\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 fprintf(stderr, " -Q <int> mapping quality threshold\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 fprintf(stderr, " -r <chr:from-to> region\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 fprintf(stderr, "\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 return 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 // initialize the auxiliary data structures
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 if (file_list)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 n = nfiles;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 argv = fn;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 optind = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 n = argc - optind; // the number of BAMs on the command line
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 data = calloc(n, sizeof(void*)); // data[i] for the i-th input
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 beg = 0; end = 1<<30; tid = -1; // set the default region
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 for (i = 0; i < n; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 bam_header_t *htmp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 data[i] = calloc(1, sizeof(aux_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 data[i]->fp = bam_open(argv[optind+i], "r"); // open BAM
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 data[i]->min_mapQ = mapQ; // set the mapQ filter
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 data[i]->min_len = min_len; // set the qlen filter
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 htmp = bam_header_read(data[i]->fp); // read the BAM header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 if (i == 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 h = htmp; // keep the header of the 1st BAM
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 if (reg) bam_parse_region(h, reg, &tid, &beg, &end); // also parse the region
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 } else bam_header_destroy(htmp); // if not the 1st BAM, trash the header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 if (tid >= 0) { // if a region is specified and parsed successfully
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 bam_index_t *idx = bam_index_load(argv[optind+i]); // load the index
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 data[i]->iter = bam_iter_query(idx, tid, beg, end); // set the iterator
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 bam_index_destroy(idx); // the index is not needed any more; phase out of the memory
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 // the core multi-pileup loop
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 plp = calloc(n, sizeof(void*)); // plp[i] points to the array of covering reads (internal in mplp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 if (pos < beg || pos >= end) continue; // out of range; skip
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 for (i = 0; i < n; ++i) { // base level filters have to go here
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 int j, m = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 for (j = 0; j < n_plp[i]; ++j) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 else if (bam1_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 printf("\t%d", n_plp[i] - m); // this the depth to output
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 putchar('\n');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 free(n_plp); free(plp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 bam_mplp_destroy(mplp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 bam_header_destroy(h);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 for (i = 0; i < n; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 bam_close(data[i]->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 if (data[i]->iter) bam_iter_destroy(data[i]->iter);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 free(data[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 free(data); free(reg);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 if (bed) bed_destroy(bed);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 if ( file_list )
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 for (i=0; i<n; i++) free(fn[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 free(fn);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 }