comparison PsiCLASS-1.0.2/samtools-0.1.19/bgzf.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:903fc43d6227
1 /* The MIT License
2
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
4 2011 Attractive Chaos <attractor@live.co.uk>
5
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 THE SOFTWARE.
23 */
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <assert.h>
30 #include <pthread.h>
31 #include <sys/types.h>
32 #include "bgzf.h"
33
34 #ifdef _USE_KNETFILE
35 #include "knetfile.h"
36 typedef knetFile *_bgzf_file_t;
37 #define _bgzf_open(fn, mode) knet_open(fn, mode)
38 #define _bgzf_dopen(fp, mode) knet_dopen(fp, mode)
39 #define _bgzf_close(fp) knet_close(fp)
40 #define _bgzf_fileno(fp) ((fp)->fd)
41 #define _bgzf_tell(fp) knet_tell(fp)
42 #define _bgzf_seek(fp, offset, whence) knet_seek(fp, offset, whence)
43 #define _bgzf_read(fp, buf, len) knet_read(fp, buf, len)
44 #define _bgzf_write(fp, buf, len) knet_write(fp, buf, len)
45 #else // ~defined(_USE_KNETFILE)
46 #if defined(_WIN32) || defined(_MSC_VER)
47 #define ftello(fp) ftell(fp)
48 #define fseeko(fp, offset, whence) fseek(fp, offset, whence)
49 #else // ~defined(_WIN32)
50 extern off_t ftello(FILE *stream);
51 extern int fseeko(FILE *stream, off_t offset, int whence);
52 #endif // ~defined(_WIN32)
53 typedef FILE *_bgzf_file_t;
54 #define _bgzf_open(fn, mode) fopen(fn, mode)
55 #define _bgzf_dopen(fp, mode) fdopen(fp, mode)
56 #define _bgzf_close(fp) fclose(fp)
57 #define _bgzf_fileno(fp) fileno(fp)
58 #define _bgzf_tell(fp) ftello(fp)
59 #define _bgzf_seek(fp, offset, whence) fseeko(fp, offset, whence)
60 #define _bgzf_read(fp, buf, len) fread(buf, 1, len, fp)
61 #define _bgzf_write(fp, buf, len) fwrite(buf, 1, len, fp)
62 #endif // ~define(_USE_KNETFILE)
63
64 #define BLOCK_HEADER_LENGTH 18
65 #define BLOCK_FOOTER_LENGTH 8
66
67
68 /* BGZF/GZIP header (speciallized from RFC 1952; little endian):
69 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
70 | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN|
71 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
72 */
73 static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0";
74
75 #ifdef BGZF_CACHE
76 typedef struct {
77 int size;
78 uint8_t *block;
79 int64_t end_offset;
80 } cache_t;
81 #include "khash.h"
82 KHASH_MAP_INIT_INT64(cache, cache_t)
83 #endif
84
85 static inline void packInt16(uint8_t *buffer, uint16_t value)
86 {
87 buffer[0] = value;
88 buffer[1] = value >> 8;
89 }
90
91 static inline int unpackInt16(const uint8_t *buffer)
92 {
93 return buffer[0] | buffer[1] << 8;
94 }
95
96 static inline void packInt32(uint8_t *buffer, uint32_t value)
97 {
98 buffer[0] = value;
99 buffer[1] = value >> 8;
100 buffer[2] = value >> 16;
101 buffer[3] = value >> 24;
102 }
103
104 static BGZF *bgzf_read_init()
105 {
106 BGZF *fp;
107 fp = calloc(1, sizeof(BGZF));
108 fp->is_write = 0;
109 fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
110 fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
111 #ifdef BGZF_CACHE
112 fp->cache = kh_init(cache);
113 #endif
114 return fp;
115 }
116
117 static BGZF *bgzf_write_init(int compress_level) // compress_level==-1 for the default level
118 {
119 BGZF *fp;
120 fp = calloc(1, sizeof(BGZF));
121 fp->is_write = 1;
122 fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
123 fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
124 fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
125 if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
126 return fp;
127 }
128 // get the compress level from the mode string
129 static int mode2level(const char *__restrict mode)
130 {
131 int i, compress_level = -1;
132 for (i = 0; mode[i]; ++i)
133 if (mode[i] >= '0' && mode[i] <= '9') break;
134 if (mode[i]) compress_level = (int)mode[i] - '0';
135 if (strchr(mode, 'u')) compress_level = 0;
136 return compress_level;
137 }
138
139 BGZF *bgzf_open(const char *path, const char *mode)
140 {
141 BGZF *fp = 0;
142 assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE);
143 if (strchr(mode, 'r') || strchr(mode, 'R')) {
144 _bgzf_file_t fpr;
145 if ((fpr = _bgzf_open(path, "r")) == 0) return 0;
146 fp = bgzf_read_init();
147 fp->fp = fpr;
148 } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
149 FILE *fpw;
150 if ((fpw = fopen(path, "w")) == 0) return 0;
151 fp = bgzf_write_init(mode2level(mode));
152 fp->fp = fpw;
153 }
154 return fp;
155 }
156
157 BGZF *bgzf_dopen(int fd, const char *mode)
158 {
159 BGZF *fp = 0;
160 assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE);
161 if (strchr(mode, 'r') || strchr(mode, 'R')) {
162 _bgzf_file_t fpr;
163 if ((fpr = _bgzf_dopen(fd, "r")) == 0) return 0;
164 fp = bgzf_read_init();
165 fp->fp = fpr;
166 } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
167 FILE *fpw;
168 if ((fpw = fdopen(fd, "w")) == 0) return 0;
169 fp = bgzf_write_init(mode2level(mode));
170 fp->fp = fpw;
171 }
172 return fp;
173 }
174
175 static int bgzf_compress(void *_dst, int *dlen, void *src, int slen, int level)
176 {
177 uint32_t crc;
178 z_stream zs;
179 uint8_t *dst = (uint8_t*)_dst;
180
181 // compress the body
182 zs.zalloc = NULL; zs.zfree = NULL;
183 zs.next_in = src;
184 zs.avail_in = slen;
185 zs.next_out = dst + BLOCK_HEADER_LENGTH;
186 zs.avail_out = *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
187 if (deflateInit2(&zs, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1; // -15 to disable zlib header/footer
188 if (deflate(&zs, Z_FINISH) != Z_STREAM_END) return -1;
189 if (deflateEnd(&zs) != Z_OK) return -1;
190 *dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
191 // write the header
192 memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block
193 packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes
194 // write the footer
195 crc = crc32(crc32(0L, NULL, 0L), src, slen);
196 packInt32((uint8_t*)&dst[*dlen - 8], crc);
197 packInt32((uint8_t*)&dst[*dlen - 4], slen);
198 return 0;
199 }
200
201 // Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length.
202 static int deflate_block(BGZF *fp, int block_length)
203 {
204 int comp_size = BGZF_MAX_BLOCK_SIZE;
205 if (bgzf_compress(fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level) != 0) {
206 fp->errcode |= BGZF_ERR_ZLIB;
207 return -1;
208 }
209 fp->block_offset = 0;
210 return comp_size;
211 }
212
213 // Inflate the block in fp->compressed_block into fp->uncompressed_block
214 static int inflate_block(BGZF* fp, int block_length)
215 {
216 z_stream zs;
217 zs.zalloc = NULL;
218 zs.zfree = NULL;
219 zs.next_in = fp->compressed_block + 18;
220 zs.avail_in = block_length - 16;
221 zs.next_out = fp->uncompressed_block;
222 zs.avail_out = BGZF_MAX_BLOCK_SIZE;
223
224 if (inflateInit2(&zs, -15) != Z_OK) {
225 fp->errcode |= BGZF_ERR_ZLIB;
226 return -1;
227 }
228 if (inflate(&zs, Z_FINISH) != Z_STREAM_END) {
229 inflateEnd(&zs);
230 fp->errcode |= BGZF_ERR_ZLIB;
231 return -1;
232 }
233 if (inflateEnd(&zs) != Z_OK) {
234 fp->errcode |= BGZF_ERR_ZLIB;
235 return -1;
236 }
237 return zs.total_out;
238 }
239
240 static int check_header(const uint8_t *header)
241 {
242 return (header[0] == 31 && header[1] == 139 && header[2] == 8 && (header[3] & 4) != 0
243 && unpackInt16((uint8_t*)&header[10]) == 6
244 && header[12] == 'B' && header[13] == 'C'
245 && unpackInt16((uint8_t*)&header[14]) == 2);
246 }
247
248 #ifdef BGZF_CACHE
249 static void free_cache(BGZF *fp)
250 {
251 khint_t k;
252 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
253 if (fp->is_write) return;
254 for (k = kh_begin(h); k < kh_end(h); ++k)
255 if (kh_exist(h, k)) free(kh_val(h, k).block);
256 kh_destroy(cache, h);
257 }
258
259 static int load_block_from_cache(BGZF *fp, int64_t block_address)
260 {
261 khint_t k;
262 cache_t *p;
263 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
264 k = kh_get(cache, h, block_address);
265 if (k == kh_end(h)) return 0;
266 p = &kh_val(h, k);
267 if (fp->block_length != 0) fp->block_offset = 0;
268 fp->block_address = block_address;
269 fp->block_length = p->size;
270 memcpy(fp->uncompressed_block, p->block, BGZF_MAX_BLOCK_SIZE);
271 _bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET);
272 return p->size;
273 }
274
275 static void cache_block(BGZF *fp, int size)
276 {
277 int ret;
278 khint_t k;
279 cache_t *p;
280 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
281 if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return;
282 if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > fp->cache_size) {
283 /* A better way would be to remove the oldest block in the
284 * cache, but here we remove a random one for simplicity. This
285 * should not have a big impact on performance. */
286 for (k = kh_begin(h); k < kh_end(h); ++k)
287 if (kh_exist(h, k)) break;
288 if (k < kh_end(h)) {
289 free(kh_val(h, k).block);
290 kh_del(cache, h, k);
291 }
292 }
293 k = kh_put(cache, h, fp->block_address, &ret);
294 if (ret == 0) return; // if this happens, a bug!
295 p = &kh_val(h, k);
296 p->size = fp->block_length;
297 p->end_offset = fp->block_address + size;
298 p->block = malloc(BGZF_MAX_BLOCK_SIZE);
299 memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE);
300 }
301 #else
302 static void free_cache(BGZF *fp) {}
303 static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;}
304 static void cache_block(BGZF *fp, int size) {}
305 #endif
306
307 int bgzf_read_block(BGZF *fp)
308 {
309 uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block;
310 int count, size = 0, block_length, remaining;
311 int64_t block_address;
312 block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
313 if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0;
314 count = _bgzf_read(fp->fp, header, sizeof(header));
315 if (count == 0) { // no data read
316 fp->block_length = 0;
317 return 0;
318 }
319 if (count != sizeof(header) || !check_header(header)) {
320 fp->errcode |= BGZF_ERR_HEADER;
321 return -1;
322 }
323 size = count;
324 block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1"
325 compressed_block = (uint8_t*)fp->compressed_block;
326 memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
327 remaining = block_length - BLOCK_HEADER_LENGTH;
328 count = _bgzf_read(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
329 if (count != remaining) {
330 fp->errcode |= BGZF_ERR_IO;
331 return -1;
332 }
333 size += count;
334 if ((count = inflate_block(fp, block_length)) < 0) return -1;
335 if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek.
336 fp->block_address = block_address;
337 fp->block_length = count;
338 cache_block(fp, size);
339 return 0;
340 }
341
342 ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length)
343 {
344 ssize_t bytes_read = 0;
345 uint8_t *output = data;
346 if (length <= 0) return 0;
347 assert(fp->is_write == 0);
348 while (bytes_read < length) {
349 int copy_length, available = fp->block_length - fp->block_offset;
350 uint8_t *buffer;
351 if (available <= 0) {
352 if (bgzf_read_block(fp) != 0) return -1;
353 available = fp->block_length - fp->block_offset;
354 if (available <= 0) break;
355 }
356 copy_length = length - bytes_read < available? length - bytes_read : available;
357 buffer = fp->uncompressed_block;
358 memcpy(output, buffer + fp->block_offset, copy_length);
359 fp->block_offset += copy_length;
360 output += copy_length;
361 bytes_read += copy_length;
362 }
363 if (fp->block_offset == fp->block_length) {
364 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
365 fp->block_offset = fp->block_length = 0;
366 }
367 return bytes_read;
368 }
369
370 /***** BEGIN: multi-threading *****/
371
372 typedef struct {
373 BGZF *fp;
374 struct mtaux_t *mt;
375 void *buf;
376 int i, errcode, toproc;
377 } worker_t;
378
379 typedef struct mtaux_t {
380 int n_threads, n_blks, curr, done;
381 volatile int proc_cnt;
382 void **blk;
383 int *len;
384 worker_t *w;
385 pthread_t *tid;
386 pthread_mutex_t lock;
387 pthread_cond_t cv;
388 } mtaux_t;
389
390 static int worker_aux(worker_t *w)
391 {
392 int i, tmp, stop = 0;
393 // wait for condition: to process or all done
394 pthread_mutex_lock(&w->mt->lock);
395 while (!w->toproc && !w->mt->done)
396 pthread_cond_wait(&w->mt->cv, &w->mt->lock);
397 if (w->mt->done) stop = 1;
398 w->toproc = 0;
399 pthread_mutex_unlock(&w->mt->lock);
400 if (stop) return 1; // to quit the thread
401 w->errcode = 0;
402 for (i = w->i; i < w->mt->curr; i += w->mt->n_threads) {
403 int clen = BGZF_MAX_BLOCK_SIZE;
404 if (bgzf_compress(w->buf, &clen, w->mt->blk[i], w->mt->len[i], w->fp->compress_level) != 0)
405 w->errcode |= BGZF_ERR_ZLIB;
406 memcpy(w->mt->blk[i], w->buf, clen);
407 w->mt->len[i] = clen;
408 }
409 tmp = __sync_fetch_and_add(&w->mt->proc_cnt, 1);
410 return 0;
411 }
412
413 static void *mt_worker(void *data)
414 {
415 while (worker_aux(data) == 0);
416 return 0;
417 }
418
419 int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks)
420 {
421 int i;
422 mtaux_t *mt;
423 pthread_attr_t attr;
424 if (!fp->is_write || fp->mt || n_threads <= 1) return -1;
425 mt = calloc(1, sizeof(mtaux_t));
426 mt->n_threads = n_threads;
427 mt->n_blks = n_threads * n_sub_blks;
428 mt->len = calloc(mt->n_blks, sizeof(int));
429 mt->blk = calloc(mt->n_blks, sizeof(void*));
430 for (i = 0; i < mt->n_blks; ++i)
431 mt->blk[i] = malloc(BGZF_MAX_BLOCK_SIZE);
432 mt->tid = calloc(mt->n_threads, sizeof(pthread_t)); // tid[0] is not used, as the worker 0 is launched by the master
433 mt->w = calloc(mt->n_threads, sizeof(worker_t));
434 for (i = 0; i < mt->n_threads; ++i) {
435 mt->w[i].i = i;
436 mt->w[i].mt = mt;
437 mt->w[i].fp = fp;
438 mt->w[i].buf = malloc(BGZF_MAX_BLOCK_SIZE);
439 }
440 pthread_attr_init(&attr);
441 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
442 pthread_mutex_init(&mt->lock, 0);
443 pthread_cond_init(&mt->cv, 0);
444 for (i = 1; i < mt->n_threads; ++i) // worker 0 is effectively launched by the master thread
445 pthread_create(&mt->tid[i], &attr, mt_worker, &mt->w[i]);
446 fp->mt = mt;
447 return 0;
448 }
449
450 static void mt_destroy(mtaux_t *mt)
451 {
452 int i;
453 // signal all workers to quit
454 pthread_mutex_lock(&mt->lock);
455 mt->done = 1; mt->proc_cnt = 0;
456 pthread_cond_broadcast(&mt->cv);
457 pthread_mutex_unlock(&mt->lock);
458 for (i = 1; i < mt->n_threads; ++i) pthread_join(mt->tid[i], 0); // worker 0 is effectively launched by the master thread
459 // free other data allocated on heap
460 for (i = 0; i < mt->n_blks; ++i) free(mt->blk[i]);
461 for (i = 0; i < mt->n_threads; ++i) free(mt->w[i].buf);
462 free(mt->blk); free(mt->len); free(mt->w); free(mt->tid);
463 pthread_cond_destroy(&mt->cv);
464 pthread_mutex_destroy(&mt->lock);
465 free(mt);
466 }
467
468 static void mt_queue(BGZF *fp)
469 {
470 mtaux_t *mt = (mtaux_t*)fp->mt;
471 assert(mt->curr < mt->n_blks); // guaranteed by the caller
472 memcpy(mt->blk[mt->curr], fp->uncompressed_block, fp->block_offset);
473 mt->len[mt->curr] = fp->block_offset;
474 fp->block_offset = 0;
475 ++mt->curr;
476 }
477
478 static int mt_flush(BGZF *fp)
479 {
480 int i;
481 mtaux_t *mt = (mtaux_t*)fp->mt;
482 if (fp->block_offset) mt_queue(fp); // guaranteed that assertion does not fail
483 // signal all the workers to compress
484 pthread_mutex_lock(&mt->lock);
485 for (i = 0; i < mt->n_threads; ++i) mt->w[i].toproc = 1;
486 mt->proc_cnt = 0;
487 pthread_cond_broadcast(&mt->cv);
488 pthread_mutex_unlock(&mt->lock);
489 // worker 0 is doing things here
490 worker_aux(&mt->w[0]);
491 // wait for all the threads to complete
492 while (mt->proc_cnt < mt->n_threads);
493 // dump data to disk
494 for (i = 0; i < mt->n_threads; ++i) fp->errcode |= mt->w[i].errcode;
495 for (i = 0; i < mt->curr; ++i)
496 if (fwrite(mt->blk[i], 1, mt->len[i], fp->fp) != mt->len[i])
497 fp->errcode |= BGZF_ERR_IO;
498 mt->curr = 0;
499 return 0;
500 }
501
502 static int mt_lazy_flush(BGZF *fp)
503 {
504 mtaux_t *mt = (mtaux_t*)fp->mt;
505 if (fp->block_offset) mt_queue(fp);
506 if (mt->curr == mt->n_blks)
507 return mt_flush(fp);
508 return -1;
509 }
510
511 static ssize_t mt_write(BGZF *fp, const void *data, ssize_t length)
512 {
513 const uint8_t *input = data;
514 ssize_t rest = length;
515 while (rest) {
516 int copy_length = BGZF_BLOCK_SIZE - fp->block_offset < rest? BGZF_BLOCK_SIZE - fp->block_offset : rest;
517 memcpy(fp->uncompressed_block + fp->block_offset, input, copy_length);
518 fp->block_offset += copy_length; input += copy_length; rest -= copy_length;
519 if (fp->block_offset == BGZF_BLOCK_SIZE) mt_lazy_flush(fp);
520 }
521 return length - rest;
522 }
523
524 /***** END: multi-threading *****/
525
526 int bgzf_flush(BGZF *fp)
527 {
528 if (!fp->is_write) return 0;
529 if (fp->mt) return mt_flush(fp);
530 while (fp->block_offset > 0) {
531 int block_length;
532 block_length = deflate_block(fp, fp->block_offset);
533 if (block_length < 0) return -1;
534 if (fwrite(fp->compressed_block, 1, block_length, fp->fp) != block_length) {
535 fp->errcode |= BGZF_ERR_IO; // possibly truncated file
536 return -1;
537 }
538 fp->block_address += block_length;
539 }
540 return 0;
541 }
542
543 int bgzf_flush_try(BGZF *fp, ssize_t size)
544 {
545 if (fp->block_offset + size > BGZF_BLOCK_SIZE) {
546 if (fp->mt) return mt_lazy_flush(fp);
547 else return bgzf_flush(fp);
548 }
549 return -1;
550 }
551
552 ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length)
553 {
554 const uint8_t *input = data;
555 int block_length = BGZF_BLOCK_SIZE, bytes_written = 0;
556 assert(fp->is_write);
557 if (fp->mt) return mt_write(fp, data, length);
558 while (bytes_written < length) {
559 uint8_t* buffer = fp->uncompressed_block;
560 int copy_length = block_length - fp->block_offset < length - bytes_written? block_length - fp->block_offset : length - bytes_written;
561 memcpy(buffer + fp->block_offset, input, copy_length);
562 fp->block_offset += copy_length;
563 input += copy_length;
564 bytes_written += copy_length;
565 if (fp->block_offset == block_length && bgzf_flush(fp)) break;
566 }
567 return bytes_written;
568 }
569
570 int bgzf_close(BGZF* fp)
571 {
572 int ret, count, block_length;
573 if (fp == 0) return -1;
574 if (fp->is_write) {
575 if (bgzf_flush(fp) != 0) return -1;
576 fp->compress_level = -1;
577 block_length = deflate_block(fp, 0); // write an empty block
578 count = fwrite(fp->compressed_block, 1, block_length, fp->fp);
579 if (fflush(fp->fp) != 0) {
580 fp->errcode |= BGZF_ERR_IO;
581 return -1;
582 }
583 if (fp->mt) mt_destroy(fp->mt);
584 }
585 ret = fp->is_write? fclose(fp->fp) : _bgzf_close(fp->fp);
586 if (ret != 0) return -1;
587 free(fp->uncompressed_block);
588 free(fp->compressed_block);
589 free_cache(fp);
590 free(fp);
591 return 0;
592 }
593
594 void bgzf_set_cache_size(BGZF *fp, int cache_size)
595 {
596 if (fp) fp->cache_size = cache_size;
597 }
598
599 int bgzf_check_EOF(BGZF *fp)
600 {
601 static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
602 uint8_t buf[28];
603 off_t offset;
604 offset = _bgzf_tell((_bgzf_file_t)fp->fp);
605 if (_bgzf_seek(fp->fp, -28, SEEK_END) < 0) return 0;
606 _bgzf_read(fp->fp, buf, 28);
607 _bgzf_seek(fp->fp, offset, SEEK_SET);
608 return (memcmp(magic, buf, 28) == 0)? 1 : 0;
609 }
610
611 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
612 {
613 int block_offset;
614 int64_t block_address;
615
616 if (fp->is_write || where != SEEK_SET) {
617 fp->errcode |= BGZF_ERR_MISUSE;
618 return -1;
619 }
620 block_offset = pos & 0xFFFF;
621 block_address = pos >> 16;
622 if (_bgzf_seek(fp->fp, block_address, SEEK_SET) < 0) {
623 fp->errcode |= BGZF_ERR_IO;
624 return -1;
625 }
626 fp->block_length = 0; // indicates current block has not been loaded
627 fp->block_address = block_address;
628 fp->block_offset = block_offset;
629 return 0;
630 }
631
632 int bgzf_is_bgzf(const char *fn)
633 {
634 uint8_t buf[16];
635 int n;
636 _bgzf_file_t fp;
637 if ((fp = _bgzf_open(fn, "r")) == 0) return 0;
638 n = _bgzf_read(fp, buf, 16);
639 _bgzf_close(fp);
640 if (n != 16) return 0;
641 return memcmp(g_magic, buf, 16) == 0? 1 : 0;
642 }
643
644 int bgzf_getc(BGZF *fp)
645 {
646 int c;
647 if (fp->block_offset >= fp->block_length) {
648 if (bgzf_read_block(fp) != 0) return -2; /* error */
649 if (fp->block_length == 0) return -1; /* end-of-file */
650 }
651 c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
652 if (fp->block_offset == fp->block_length) {
653 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
654 fp->block_offset = 0;
655 fp->block_length = 0;
656 }
657 return c;
658 }
659
660 #ifndef kroundup32
661 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
662 #endif
663
664 int bgzf_getline(BGZF *fp, int delim, kstring_t *str)
665 {
666 int l, state = 0;
667 unsigned char *buf = (unsigned char*)fp->uncompressed_block;
668 str->l = 0;
669 do {
670 if (fp->block_offset >= fp->block_length) {
671 if (bgzf_read_block(fp) != 0) { state = -2; break; }
672 if (fp->block_length == 0) { state = -1; break; }
673 }
674 for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l);
675 if (l < fp->block_length) state = 1;
676 l -= fp->block_offset;
677 if (str->l + l + 1 >= str->m) {
678 str->m = str->l + l + 2;
679 kroundup32(str->m);
680 str->s = (char*)realloc(str->s, str->m);
681 }
682 memcpy(str->s + str->l, buf + fp->block_offset, l);
683 str->l += l;
684 fp->block_offset += l + 1;
685 if (fp->block_offset >= fp->block_length) {
686 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
687 fp->block_offset = 0;
688 fp->block_length = 0;
689 }
690 } while (state == 0);
691 if (str->l == 0 && state < 0) return state;
692 str->s[str->l] = 0;
693 return str->l;
694 }