0
|
1 #include <string.h>
|
|
2 #include <unistd.h>
|
|
3 #include "faidx.h"
|
|
4 #include "sam.h"
|
|
5
|
|
6 #define TYPE_BAM 1
|
|
7 #define TYPE_READ 2
|
|
8
|
|
9 bam_header_t *bam_header_dup(const bam_header_t *h0)
|
|
10 {
|
|
11 bam_header_t *h;
|
|
12 int i;
|
|
13 h = bam_header_init();
|
|
14 *h = *h0;
|
|
15 h->hash = h->dict = h->rg2lib = 0;
|
|
16 h->text = (char*)calloc(h->l_text + 1, 1);
|
|
17 memcpy(h->text, h0->text, h->l_text);
|
|
18 h->target_len = (uint32_t*)calloc(h->n_targets, 4);
|
|
19 h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
|
|
20 for (i = 0; i < h->n_targets; ++i) {
|
|
21 h->target_len[i] = h0->target_len[i];
|
|
22 h->target_name[i] = strdup(h0->target_name[i]);
|
|
23 }
|
|
24 return h;
|
|
25 }
|
|
26 static void append_header_text(bam_header_t *header, char* text, int len)
|
|
27 {
|
|
28 int x = header->l_text + 1;
|
|
29 int y = header->l_text + len + 1; // 1 byte null
|
|
30 if (text == 0) return;
|
|
31 kroundup32(x);
|
|
32 kroundup32(y);
|
|
33 if (x < y) header->text = (char*)realloc(header->text, y);
|
|
34 strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here.
|
|
35 header->l_text += len;
|
|
36 header->text[header->l_text] = 0;
|
|
37 }
|
|
38
|
|
39 int samthreads(samfile_t *fp, int n_threads, int n_sub_blks)
|
|
40 {
|
|
41 if (!(fp->type&1) || (fp->type&2)) return -1;
|
|
42 bgzf_mt(fp->x.bam, n_threads, n_sub_blks);
|
|
43 return 0;
|
|
44 }
|
|
45
|
|
46 samfile_t *samopen(const char *fn, const char *mode, const void *aux)
|
|
47 {
|
|
48 samfile_t *fp;
|
|
49 fp = (samfile_t*)calloc(1, sizeof(samfile_t));
|
|
50 if (strchr(mode, 'r')) { // read
|
|
51 fp->type |= TYPE_READ;
|
|
52 if (strchr(mode, 'b')) { // binary
|
|
53 fp->type |= TYPE_BAM;
|
|
54 fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
|
|
55 if (fp->x.bam == 0) goto open_err_ret;
|
|
56 fp->header = bam_header_read(fp->x.bam);
|
|
57 } else { // text
|
|
58 fp->x.tamr = sam_open(fn);
|
|
59 if (fp->x.tamr == 0) goto open_err_ret;
|
|
60 fp->header = sam_header_read(fp->x.tamr);
|
|
61 if (fp->header->n_targets == 0) { // no @SQ fields
|
|
62 if (aux) { // check if aux is present
|
|
63 bam_header_t *textheader = fp->header;
|
|
64 fp->header = sam_header_read2((const char*)aux);
|
|
65 if (fp->header == 0) goto open_err_ret;
|
|
66 append_header_text(fp->header, textheader->text, textheader->l_text);
|
|
67 bam_header_destroy(textheader);
|
|
68 }
|
|
69 if (fp->header->n_targets == 0 && bam_verbose >= 1)
|
|
70 fprintf(stderr, "[samopen] no @SQ lines in the header.\n");
|
|
71 } else if (bam_verbose >= 2) fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
|
|
72 }
|
|
73 } else if (strchr(mode, 'w')) { // write
|
|
74 fp->header = bam_header_dup((const bam_header_t*)aux);
|
|
75 if (strchr(mode, 'b')) { // binary
|
|
76 char bmode[3];
|
|
77 int i, compress_level = -1;
|
|
78 for (i = 0; mode[i]; ++i) if (mode[i] >= '0' && mode[i] <= '9') break;
|
|
79 if (mode[i]) compress_level = mode[i] - '0';
|
|
80 if (strchr(mode, 'u')) compress_level = 0;
|
|
81 bmode[0] = 'w'; bmode[1] = compress_level < 0? 0 : compress_level + '0'; bmode[2] = 0;
|
|
82 fp->type |= TYPE_BAM;
|
|
83 fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode);
|
|
84 if (fp->x.bam == 0) goto open_err_ret;
|
|
85 bam_header_write(fp->x.bam, fp->header);
|
|
86 } else { // text
|
|
87 // open file
|
|
88 fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
|
|
89 if (fp->x.tamw == 0) goto open_err_ret;
|
|
90 if (strchr(mode, 'X')) fp->type |= BAM_OFSTR<<2;
|
|
91 else if (strchr(mode, 'x')) fp->type |= BAM_OFHEX<<2;
|
|
92 else fp->type |= BAM_OFDEC<<2;
|
|
93 // write header
|
|
94 if (strchr(mode, 'h')) {
|
|
95 int i;
|
|
96 bam_header_t *alt;
|
|
97 // parse the header text
|
|
98 alt = bam_header_init();
|
|
99 alt->l_text = fp->header->l_text; alt->text = fp->header->text;
|
|
100 sam_header_parse(alt);
|
|
101 alt->l_text = 0; alt->text = 0;
|
|
102 // check if there are @SQ lines in the header
|
|
103 fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); // FIXME: better to skip the trailing NULL
|
|
104 if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
|
|
105 if (alt->n_targets != fp->header->n_targets && bam_verbose >= 1)
|
|
106 fprintf(stderr, "[samopen] inconsistent number of target sequences. Output the text header.\n");
|
|
107 } else { // then dump ->target_{name,len}
|
|
108 for (i = 0; i < fp->header->n_targets; ++i)
|
|
109 fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
|
|
110 }
|
|
111 bam_header_destroy(alt);
|
|
112 }
|
|
113 }
|
|
114 }
|
|
115 return fp;
|
|
116
|
|
117 open_err_ret:
|
|
118 free(fp);
|
|
119 return 0;
|
|
120 }
|
|
121
|
|
122 void samclose(samfile_t *fp)
|
|
123 {
|
|
124 if (fp == 0) return;
|
|
125 if (fp->header) bam_header_destroy(fp->header);
|
|
126 if (fp->type & TYPE_BAM) bam_close(fp->x.bam);
|
|
127 else if (fp->type & TYPE_READ) sam_close(fp->x.tamr);
|
|
128 else fclose(fp->x.tamw);
|
|
129 free(fp);
|
|
130 }
|
|
131
|
|
132 int samread(samfile_t *fp, bam1_t *b)
|
|
133 {
|
|
134 if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading
|
|
135 if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b);
|
|
136 else return sam_read1(fp->x.tamr, fp->header, b);
|
|
137 }
|
|
138
|
|
139 int samwrite(samfile_t *fp, const bam1_t *b)
|
|
140 {
|
|
141 if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
|
|
142 if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
|
|
143 else {
|
|
144 char *s = bam_format1_core(fp->header, b, fp->type>>2&3);
|
|
145 int l = strlen(s);
|
|
146 fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
|
|
147 free(s);
|
|
148 return l + 1;
|
|
149 }
|
|
150 }
|
|
151
|
|
152 int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
|
|
153 {
|
|
154 bam_plbuf_t *buf;
|
|
155 int ret;
|
|
156 bam1_t *b;
|
|
157 b = bam_init1();
|
|
158 buf = bam_plbuf_init(func, func_data);
|
|
159 bam_plbuf_set_mask(buf, mask);
|
|
160 while ((ret = samread(fp, b)) >= 0)
|
|
161 bam_plbuf_push(b, buf);
|
|
162 bam_plbuf_push(0, buf);
|
|
163 bam_plbuf_destroy(buf);
|
|
164 bam_destroy1(b);
|
|
165 return 0;
|
|
166 }
|
|
167
|
|
168 char *samfaipath(const char *fn_ref)
|
|
169 {
|
|
170 char *fn_list = 0;
|
|
171 if (fn_ref == 0) return 0;
|
|
172 fn_list = calloc(strlen(fn_ref) + 5, 1);
|
|
173 strcat(strcpy(fn_list, fn_ref), ".fai");
|
|
174 if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
|
|
175 if (access(fn_ref, R_OK) == -1) {
|
|
176 fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref);
|
|
177 } else {
|
|
178 if (bam_verbose >= 3) fprintf(stderr, "[samfaipath] build FASTA index...\n");
|
|
179 if (fai_build(fn_ref) == -1) {
|
|
180 fprintf(stderr, "[samfaipath] fail to build FASTA index.\n");
|
|
181 free(fn_list); fn_list = 0;
|
|
182 }
|
|
183 }
|
|
184 }
|
|
185 return fn_list;
|
|
186 }
|