0
|
1 /* The MIT License
|
|
2
|
|
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
|
|
4
|
|
5 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6 of this software and associated documentation files (the "Software"), to deal
|
|
7 in the Software without restriction, including without limitation the rights
|
|
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9 copies of the Software, and to permit persons to whom the Software is
|
|
10 furnished to do so, subject to the following conditions:
|
|
11
|
|
12 The above copyright notice and this permission notice shall be included in
|
|
13 all copies or substantial portions of the Software.
|
|
14
|
|
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21 THE SOFTWARE.
|
|
22 */
|
|
23
|
|
24 /*
|
|
25 2009-06-29 by lh3: cache recent uncompressed blocks.
|
|
26 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.
|
|
27 2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */
|
|
28
|
|
29 #include <stdio.h>
|
|
30 #include <stdlib.h>
|
|
31 #include <string.h>
|
|
32 #include <unistd.h>
|
|
33 #include <fcntl.h>
|
|
34 #include <sys/types.h>
|
|
35 #include <sys/stat.h>
|
|
36 #include "bgzf.h"
|
|
37
|
|
38 #include "khash.h"
|
|
39 typedef struct {
|
|
40 int size;
|
|
41 uint8_t *block;
|
|
42 int64_t end_offset;
|
|
43 } cache_t;
|
|
44 KHASH_MAP_INIT_INT64(cache, cache_t)
|
|
45
|
|
46 #if defined(_WIN32) || defined(_MSC_VER)
|
|
47 #define ftello(fp) ftell(fp)
|
|
48 #define fseeko(fp, offset, whence) fseek(fp, offset, whence)
|
|
49 #else
|
|
50 extern off_t ftello(FILE *stream);
|
|
51 extern int fseeko(FILE *stream, off_t offset, int whence);
|
|
52 #endif
|
|
53
|
|
54 typedef int8_t bgzf_byte_t;
|
|
55
|
|
56 static const int DEFAULT_BLOCK_SIZE = 64 * 1024;
|
|
57 static const int MAX_BLOCK_SIZE = 64 * 1024;
|
|
58
|
|
59 static const int BLOCK_HEADER_LENGTH = 18;
|
|
60 static const int BLOCK_FOOTER_LENGTH = 8;
|
|
61
|
|
62 static const int GZIP_ID1 = 31;
|
|
63 static const int GZIP_ID2 = 139;
|
|
64 static const int CM_DEFLATE = 8;
|
|
65 static const int FLG_FEXTRA = 4;
|
|
66 static const int OS_UNKNOWN = 255;
|
|
67 static const int BGZF_ID1 = 66; // 'B'
|
|
68 static const int BGZF_ID2 = 67; // 'C'
|
|
69 static const int BGZF_LEN = 2;
|
|
70 static const int BGZF_XLEN = 6; // BGZF_LEN+4
|
|
71
|
|
72 static const int GZIP_WINDOW_BITS = -15; // no zlib header
|
|
73 static const int Z_DEFAULT_MEM_LEVEL = 8;
|
|
74
|
|
75
|
|
76 inline
|
|
77 void
|
|
78 packInt16(uint8_t* buffer, uint16_t value)
|
|
79 {
|
|
80 buffer[0] = value;
|
|
81 buffer[1] = value >> 8;
|
|
82 }
|
|
83
|
|
84 inline
|
|
85 int
|
|
86 unpackInt16(const uint8_t* buffer)
|
|
87 {
|
|
88 return (buffer[0] | (buffer[1] << 8));
|
|
89 }
|
|
90
|
|
91 inline
|
|
92 void
|
|
93 packInt32(uint8_t* buffer, uint32_t value)
|
|
94 {
|
|
95 buffer[0] = value;
|
|
96 buffer[1] = value >> 8;
|
|
97 buffer[2] = value >> 16;
|
|
98 buffer[3] = value >> 24;
|
|
99 }
|
|
100
|
|
101 static inline
|
|
102 int
|
|
103 bgzf_min(int x, int y)
|
|
104 {
|
|
105 return (x < y) ? x : y;
|
|
106 }
|
|
107
|
|
108 static
|
|
109 void
|
|
110 report_error(BGZF* fp, const char* message) {
|
|
111 fp->error = message;
|
|
112 }
|
|
113
|
|
114 static BGZF *bgzf_read_init()
|
|
115 {
|
|
116 BGZF *fp;
|
|
117 fp = calloc(1, sizeof(BGZF));
|
|
118 fp->uncompressed_block_size = MAX_BLOCK_SIZE;
|
|
119 fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
|
|
120 fp->compressed_block_size = MAX_BLOCK_SIZE;
|
|
121 fp->compressed_block = malloc(MAX_BLOCK_SIZE);
|
|
122 fp->cache_size = 0;
|
|
123 fp->cache = kh_init(cache);
|
|
124 return fp;
|
|
125 }
|
|
126
|
|
127 static
|
|
128 BGZF*
|
|
129 open_read(int fd)
|
|
130 {
|
|
131 #ifdef _USE_KNETFILE
|
|
132 knetFile *file = knet_dopen(fd, "r");
|
|
133 #else
|
|
134 FILE* file = fdopen(fd, "r");
|
|
135 #endif
|
|
136 BGZF* fp;
|
|
137 if (file == 0) return 0;
|
|
138 fp = bgzf_read_init();
|
|
139 fp->file_descriptor = fd;
|
|
140 fp->open_mode = 'r';
|
|
141 #ifdef _USE_KNETFILE
|
|
142 fp->x.fpr = file;
|
|
143 #else
|
|
144 fp->file = file;
|
|
145 #endif
|
|
146 return fp;
|
|
147 }
|
|
148
|
|
149 static
|
|
150 BGZF*
|
|
151 open_write(int fd, bool is_uncompressed)
|
|
152 {
|
|
153 FILE* file = fdopen(fd, "w");
|
|
154 BGZF* fp;
|
|
155 if (file == 0) return 0;
|
|
156 fp = malloc(sizeof(BGZF));
|
|
157 fp->file_descriptor = fd;
|
|
158 fp->open_mode = 'w';
|
|
159 fp->owned_file = 0; fp->is_uncompressed = is_uncompressed;
|
|
160 #ifdef _USE_KNETFILE
|
|
161 fp->x.fpw = file;
|
|
162 #else
|
|
163 fp->file = file;
|
|
164 #endif
|
|
165 fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
|
|
166 fp->uncompressed_block = NULL;
|
|
167 fp->compressed_block_size = MAX_BLOCK_SIZE;
|
|
168 fp->compressed_block = malloc(MAX_BLOCK_SIZE);
|
|
169 fp->block_address = 0;
|
|
170 fp->block_offset = 0;
|
|
171 fp->block_length = 0;
|
|
172 fp->error = NULL;
|
|
173 return fp;
|
|
174 }
|
|
175
|
|
176 BGZF*
|
|
177 bgzf_open(const char* __restrict path, const char* __restrict mode)
|
|
178 {
|
|
179 BGZF* fp = NULL;
|
|
180 if (mode[0] == 'r' || mode[0] == 'R') { /* The reading mode is preferred. */
|
|
181 #ifdef _USE_KNETFILE
|
|
182 knetFile *file = knet_open(path, mode);
|
|
183 if (file == 0) return 0;
|
|
184 fp = bgzf_read_init();
|
|
185 fp->file_descriptor = -1;
|
|
186 fp->open_mode = 'r';
|
|
187 fp->x.fpr = file;
|
|
188 #else
|
|
189 int fd, oflag = O_RDONLY;
|
|
190 #ifdef _WIN32
|
|
191 oflag |= O_BINARY;
|
|
192 #endif
|
|
193 fd = open(path, oflag);
|
|
194 if (fd == -1) return 0;
|
|
195 fp = open_read(fd);
|
|
196 #endif
|
|
197 } else if (mode[0] == 'w' || mode[0] == 'W') {
|
|
198 int fd, oflag = O_WRONLY | O_CREAT | O_TRUNC;
|
|
199 #ifdef _WIN32
|
|
200 oflag |= O_BINARY;
|
|
201 #endif
|
|
202 fd = open(path, oflag, 0644);
|
|
203 if (fd == -1) return 0;
|
|
204 fp = open_write(fd, strstr(mode, "u")? 1 : 0);
|
|
205 }
|
|
206 if (fp != NULL) {
|
|
207 fp->owned_file = 1;
|
|
208 }
|
|
209 return fp;
|
|
210 }
|
|
211
|
|
212 BGZF*
|
|
213 bgzf_fdopen(int fd, const char * __restrict mode)
|
|
214 {
|
|
215 if (fd == -1) return 0;
|
|
216 if (mode[0] == 'r' || mode[0] == 'R') {
|
|
217 return open_read(fd);
|
|
218 } else if (mode[0] == 'w' || mode[0] == 'W') {
|
|
219 return open_write(fd, strstr(mode, "u")? 1 : 0);
|
|
220 } else {
|
|
221 return NULL;
|
|
222 }
|
|
223 }
|
|
224
|
|
225 static
|
|
226 int
|
|
227 deflate_block(BGZF* fp, int block_length)
|
|
228 {
|
|
229 // Deflate the block in fp->uncompressed_block into fp->compressed_block.
|
|
230 // Also adds an extra field that stores the compressed block length.
|
|
231
|
|
232 bgzf_byte_t* buffer = fp->compressed_block;
|
|
233 int buffer_size = fp->compressed_block_size;
|
|
234
|
|
235 // Init gzip header
|
|
236 buffer[0] = GZIP_ID1;
|
|
237 buffer[1] = GZIP_ID2;
|
|
238 buffer[2] = CM_DEFLATE;
|
|
239 buffer[3] = FLG_FEXTRA;
|
|
240 buffer[4] = 0; // mtime
|
|
241 buffer[5] = 0;
|
|
242 buffer[6] = 0;
|
|
243 buffer[7] = 0;
|
|
244 buffer[8] = 0;
|
|
245 buffer[9] = OS_UNKNOWN;
|
|
246 buffer[10] = BGZF_XLEN;
|
|
247 buffer[11] = 0;
|
|
248 buffer[12] = BGZF_ID1;
|
|
249 buffer[13] = BGZF_ID2;
|
|
250 buffer[14] = BGZF_LEN;
|
|
251 buffer[15] = 0;
|
|
252 buffer[16] = 0; // placeholder for block length
|
|
253 buffer[17] = 0;
|
|
254
|
|
255 // loop to retry for blocks that do not compress enough
|
|
256 int input_length = block_length;
|
|
257 int compressed_length = 0;
|
|
258 while (1) {
|
|
259 int compress_level = fp->is_uncompressed? 0 : Z_DEFAULT_COMPRESSION;
|
|
260 z_stream zs;
|
|
261 zs.zalloc = NULL;
|
|
262 zs.zfree = NULL;
|
|
263 zs.next_in = fp->uncompressed_block;
|
|
264 zs.avail_in = input_length;
|
|
265 zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
|
|
266 zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
|
|
267
|
|
268 int status = deflateInit2(&zs, compress_level, Z_DEFLATED,
|
|
269 GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
|
|
270 if (status != Z_OK) {
|
|
271 report_error(fp, "deflate init failed");
|
|
272 return -1;
|
|
273 }
|
|
274 status = deflate(&zs, Z_FINISH);
|
|
275 if (status != Z_STREAM_END) {
|
|
276 deflateEnd(&zs);
|
|
277 if (status == Z_OK) {
|
|
278 // Not enough space in buffer.
|
|
279 // Can happen in the rare case the input doesn't compress enough.
|
|
280 // Reduce the amount of input until it fits.
|
|
281 input_length -= 1024;
|
|
282 if (input_length <= 0) {
|
|
283 // should never happen
|
|
284 report_error(fp, "input reduction failed");
|
|
285 return -1;
|
|
286 }
|
|
287 continue;
|
|
288 }
|
|
289 report_error(fp, "deflate failed");
|
|
290 return -1;
|
|
291 }
|
|
292 status = deflateEnd(&zs);
|
|
293 if (status != Z_OK) {
|
|
294 report_error(fp, "deflate end failed");
|
|
295 return -1;
|
|
296 }
|
|
297 compressed_length = zs.total_out;
|
|
298 compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
|
|
299 if (compressed_length > MAX_BLOCK_SIZE) {
|
|
300 // should never happen
|
|
301 report_error(fp, "deflate overflow");
|
|
302 return -1;
|
|
303 }
|
|
304 break;
|
|
305 }
|
|
306
|
|
307 packInt16((uint8_t*)&buffer[16], compressed_length-1);
|
|
308 uint32_t crc = crc32(0L, NULL, 0L);
|
|
309 crc = crc32(crc, fp->uncompressed_block, input_length);
|
|
310 packInt32((uint8_t*)&buffer[compressed_length-8], crc);
|
|
311 packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
|
|
312
|
|
313 int remaining = block_length - input_length;
|
|
314 if (remaining > 0) {
|
|
315 if (remaining > input_length) {
|
|
316 // should never happen (check so we can use memcpy)
|
|
317 report_error(fp, "remainder too large");
|
|
318 return -1;
|
|
319 }
|
|
320 memcpy(fp->uncompressed_block,
|
|
321 fp->uncompressed_block + input_length,
|
|
322 remaining);
|
|
323 }
|
|
324 fp->block_offset = remaining;
|
|
325 return compressed_length;
|
|
326 }
|
|
327
|
|
328 static
|
|
329 int
|
|
330 inflate_block(BGZF* fp, int block_length)
|
|
331 {
|
|
332 // Inflate the block in fp->compressed_block into fp->uncompressed_block
|
|
333
|
|
334 z_stream zs;
|
|
335 zs.zalloc = NULL;
|
|
336 zs.zfree = NULL;
|
|
337 zs.next_in = fp->compressed_block + 18;
|
|
338 zs.avail_in = block_length - 16;
|
|
339 zs.next_out = fp->uncompressed_block;
|
|
340 zs.avail_out = fp->uncompressed_block_size;
|
|
341
|
|
342 int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
|
|
343 if (status != Z_OK) {
|
|
344 report_error(fp, "inflate init failed");
|
|
345 return -1;
|
|
346 }
|
|
347 status = inflate(&zs, Z_FINISH);
|
|
348 if (status != Z_STREAM_END) {
|
|
349 inflateEnd(&zs);
|
|
350 report_error(fp, "inflate failed");
|
|
351 return -1;
|
|
352 }
|
|
353 status = inflateEnd(&zs);
|
|
354 if (status != Z_OK) {
|
|
355 report_error(fp, "inflate failed");
|
|
356 return -1;
|
|
357 }
|
|
358 return zs.total_out;
|
|
359 }
|
|
360
|
|
361 static
|
|
362 int
|
|
363 check_header(const bgzf_byte_t* header)
|
|
364 {
|
|
365 return (header[0] == GZIP_ID1 &&
|
|
366 header[1] == (bgzf_byte_t) GZIP_ID2 &&
|
|
367 header[2] == Z_DEFLATED &&
|
|
368 (header[3] & FLG_FEXTRA) != 0 &&
|
|
369 unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN &&
|
|
370 header[12] == BGZF_ID1 &&
|
|
371 header[13] == BGZF_ID2 &&
|
|
372 unpackInt16((uint8_t*)&header[14]) == BGZF_LEN);
|
|
373 }
|
|
374
|
|
375 static void free_cache(BGZF *fp)
|
|
376 {
|
|
377 khint_t k;
|
|
378 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
|
|
379 if (fp->open_mode != 'r') return;
|
|
380 for (k = kh_begin(h); k < kh_end(h); ++k)
|
|
381 if (kh_exist(h, k)) free(kh_val(h, k).block);
|
|
382 kh_destroy(cache, h);
|
|
383 }
|
|
384
|
|
385 static int load_block_from_cache(BGZF *fp, int64_t block_address)
|
|
386 {
|
|
387 khint_t k;
|
|
388 cache_t *p;
|
|
389 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
|
|
390 k = kh_get(cache, h, block_address);
|
|
391 if (k == kh_end(h)) return 0;
|
|
392 p = &kh_val(h, k);
|
|
393 if (fp->block_length != 0) fp->block_offset = 0;
|
|
394 fp->block_address = block_address;
|
|
395 fp->block_length = p->size;
|
|
396 memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE);
|
|
397 #ifdef _USE_KNETFILE
|
|
398 knet_seek(fp->x.fpr, p->end_offset, SEEK_SET);
|
|
399 #else
|
|
400 fseeko(fp->file, p->end_offset, SEEK_SET);
|
|
401 #endif
|
|
402 return p->size;
|
|
403 }
|
|
404
|
|
405 static void cache_block(BGZF *fp, int size)
|
|
406 {
|
|
407 int ret;
|
|
408 khint_t k;
|
|
409 cache_t *p;
|
|
410 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
|
|
411 if (MAX_BLOCK_SIZE >= fp->cache_size) return;
|
|
412 if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) {
|
|
413 /* A better way would be to remove the oldest block in the
|
|
414 * cache, but here we remove a random one for simplicity. This
|
|
415 * should not have a big impact on performance. */
|
|
416 for (k = kh_begin(h); k < kh_end(h); ++k)
|
|
417 if (kh_exist(h, k)) break;
|
|
418 if (k < kh_end(h)) {
|
|
419 free(kh_val(h, k).block);
|
|
420 kh_del(cache, h, k);
|
|
421 }
|
|
422 }
|
|
423 k = kh_put(cache, h, fp->block_address, &ret);
|
|
424 if (ret == 0) return; // if this happens, a bug!
|
|
425 p = &kh_val(h, k);
|
|
426 p->size = fp->block_length;
|
|
427 p->end_offset = fp->block_address + size;
|
|
428 p->block = malloc(MAX_BLOCK_SIZE);
|
|
429 memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE);
|
|
430 }
|
|
431
|
|
432 static
|
|
433 int
|
|
434 read_block(BGZF* fp)
|
|
435 {
|
|
436 bgzf_byte_t header[BLOCK_HEADER_LENGTH];
|
|
437 int size = 0;
|
|
438 #ifdef _USE_KNETFILE
|
|
439 int64_t block_address = knet_tell(fp->x.fpr);
|
|
440 if (load_block_from_cache(fp, block_address)) return 0;
|
|
441 int count = knet_read(fp->x.fpr, header, sizeof(header));
|
|
442 #else
|
|
443 int64_t block_address = ftello(fp->file);
|
|
444 if (load_block_from_cache(fp, block_address)) return 0;
|
|
445 int count = fread(header, 1, sizeof(header), fp->file);
|
|
446 #endif
|
|
447 if (count == 0) {
|
|
448 fp->block_length = 0;
|
|
449 return 0;
|
|
450 }
|
|
451 size = count;
|
|
452 if (count != sizeof(header)) {
|
|
453 report_error(fp, "read failed");
|
|
454 return -1;
|
|
455 }
|
|
456 if (!check_header(header)) {
|
|
457 report_error(fp, "invalid block header");
|
|
458 return -1;
|
|
459 }
|
|
460 int block_length = unpackInt16((uint8_t*)&header[16]) + 1;
|
|
461 bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
|
|
462 memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
|
|
463 int remaining = block_length - BLOCK_HEADER_LENGTH;
|
|
464 #ifdef _USE_KNETFILE
|
|
465 count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
|
|
466 #else
|
|
467 count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
|
|
468 #endif
|
|
469 if (count != remaining) {
|
|
470 report_error(fp, "read failed");
|
|
471 return -1;
|
|
472 }
|
|
473 size += count;
|
|
474 count = inflate_block(fp, block_length);
|
|
475 if (count < 0) {
|
|
476 return -1;
|
|
477 }
|
|
478 if (fp->block_length != 0) {
|
|
479 // Do not reset offset if this read follows a seek.
|
|
480 fp->block_offset = 0;
|
|
481 }
|
|
482 fp->block_address = block_address;
|
|
483 fp->block_length = count;
|
|
484 cache_block(fp, size);
|
|
485 return 0;
|
|
486 }
|
|
487
|
|
488 int
|
|
489 bgzf_read(BGZF* fp, void* data, int length)
|
|
490 {
|
|
491 if (length <= 0) {
|
|
492 return 0;
|
|
493 }
|
|
494 if (fp->open_mode != 'r') {
|
|
495 report_error(fp, "file not open for reading");
|
|
496 return -1;
|
|
497 }
|
|
498
|
|
499 int bytes_read = 0;
|
|
500 bgzf_byte_t* output = data;
|
|
501 while (bytes_read < length) {
|
|
502 int available = fp->block_length - fp->block_offset;
|
|
503 if (available <= 0) {
|
|
504 if (read_block(fp) != 0) {
|
|
505 return -1;
|
|
506 }
|
|
507 available = fp->block_length - fp->block_offset;
|
|
508 if (available <= 0) {
|
|
509 break;
|
|
510 }
|
|
511 }
|
|
512 int copy_length = bgzf_min(length-bytes_read, available);
|
|
513 bgzf_byte_t* buffer = fp->uncompressed_block;
|
|
514 memcpy(output, buffer + fp->block_offset, copy_length);
|
|
515 fp->block_offset += copy_length;
|
|
516 output += copy_length;
|
|
517 bytes_read += copy_length;
|
|
518 }
|
|
519 if (fp->block_offset == fp->block_length) {
|
|
520 #ifdef _USE_KNETFILE
|
|
521 fp->block_address = knet_tell(fp->x.fpr);
|
|
522 #else
|
|
523 fp->block_address = ftello(fp->file);
|
|
524 #endif
|
|
525 fp->block_offset = 0;
|
|
526 fp->block_length = 0;
|
|
527 }
|
|
528 return bytes_read;
|
|
529 }
|
|
530
|
|
531 static
|
|
532 int
|
|
533 flush_block(BGZF* fp)
|
|
534 {
|
|
535 while (fp->block_offset > 0) {
|
|
536 int block_length = deflate_block(fp, fp->block_offset);
|
|
537 if (block_length < 0) {
|
|
538 return -1;
|
|
539 }
|
|
540 #ifdef _USE_KNETFILE
|
|
541 int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
|
|
542 #else
|
|
543 int count = fwrite(fp->compressed_block, 1, block_length, fp->file);
|
|
544 #endif
|
|
545 if (count != block_length) {
|
|
546 report_error(fp, "write failed");
|
|
547 return -1;
|
|
548 }
|
|
549 fp->block_address += block_length;
|
|
550 }
|
|
551 return 0;
|
|
552 }
|
|
553
|
|
554 int
|
|
555 bgzf_write(BGZF* fp, const void* data, int length)
|
|
556 {
|
|
557 if (fp->open_mode != 'w') {
|
|
558 report_error(fp, "file not open for writing");
|
|
559 return -1;
|
|
560 }
|
|
561
|
|
562 if (fp->uncompressed_block == NULL) {
|
|
563 fp->uncompressed_block = malloc(fp->uncompressed_block_size);
|
|
564 }
|
|
565
|
|
566 const bgzf_byte_t* input = data;
|
|
567 int block_length = fp->uncompressed_block_size;
|
|
568 int bytes_written = 0;
|
|
569 while (bytes_written < length) {
|
|
570 int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
|
|
571 bgzf_byte_t* buffer = fp->uncompressed_block;
|
|
572 memcpy(buffer + fp->block_offset, input, copy_length);
|
|
573 fp->block_offset += copy_length;
|
|
574 input += copy_length;
|
|
575 bytes_written += copy_length;
|
|
576 if (fp->block_offset == block_length) {
|
|
577 if (flush_block(fp) != 0) {
|
|
578 break;
|
|
579 }
|
|
580 }
|
|
581 }
|
|
582 return bytes_written;
|
|
583 }
|
|
584
|
|
585 int
|
|
586 bgzf_close(BGZF* fp)
|
|
587 {
|
|
588 if (fp->open_mode == 'w') {
|
|
589 if (flush_block(fp) != 0) {
|
|
590 return -1;
|
|
591 }
|
|
592 { // add an empty block
|
|
593 int count, block_length = deflate_block(fp, 0);
|
|
594 #ifdef _USE_KNETFILE
|
|
595 count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
|
|
596 #else
|
|
597 count = fwrite(fp->compressed_block, 1, block_length, fp->file);
|
|
598 #endif
|
|
599 }
|
|
600 #ifdef _USE_KNETFILE
|
|
601 if (fflush(fp->x.fpw) != 0) {
|
|
602 #else
|
|
603 if (fflush(fp->file) != 0) {
|
|
604 #endif
|
|
605 report_error(fp, "flush failed");
|
|
606 return -1;
|
|
607 }
|
|
608 }
|
|
609 if (fp->owned_file) {
|
|
610 #ifdef _USE_KNETFILE
|
|
611 int ret;
|
|
612 if (fp->open_mode == 'w') ret = fclose(fp->x.fpw);
|
|
613 else ret = knet_close(fp->x.fpr);
|
|
614 if (ret != 0) return -1;
|
|
615 #else
|
|
616 if (fclose(fp->file) != 0) {
|
|
617 return -1;
|
|
618 }
|
|
619 #endif
|
|
620 }
|
|
621 free(fp->uncompressed_block);
|
|
622 free(fp->compressed_block);
|
|
623 free_cache(fp);
|
|
624 free(fp);
|
|
625 return 0;
|
|
626 }
|
|
627
|
|
628 int64_t
|
|
629 bgzf_tell(BGZF* fp)
|
|
630 {
|
|
631 return ((fp->block_address << 16) | (fp->block_offset & 0xFFFF));
|
|
632 }
|
|
633
|
|
634 void bgzf_set_cache_size(BGZF *fp, int cache_size)
|
|
635 {
|
|
636 if (fp) fp->cache_size = cache_size;
|
|
637 }
|
|
638
|
|
639 int bgzf_check_EOF(BGZF *fp)
|
|
640 {
|
|
641 static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
|
|
642 uint8_t buf[28];
|
|
643 off_t offset;
|
|
644 #ifdef _USE_KNETFILE
|
|
645 offset = knet_tell(fp->x.fpr);
|
|
646 if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;
|
|
647 knet_read(fp->x.fpr, buf, 28);
|
|
648 knet_seek(fp->x.fpr, offset, SEEK_SET);
|
|
649 #else
|
|
650 offset = ftello(fp->file);
|
|
651 if (fseeko(fp->file, -28, SEEK_END) != 0) return -1;
|
|
652 fread(buf, 1, 28, fp->file);
|
|
653 fseeko(fp->file, offset, SEEK_SET);
|
|
654 #endif
|
|
655 return (memcmp(magic, buf, 28) == 0)? 1 : 0;
|
|
656 }
|
|
657
|
|
658 int64_t
|
|
659 bgzf_seek(BGZF* fp, int64_t pos, int where)
|
|
660 {
|
|
661 if (fp->open_mode != 'r') {
|
|
662 report_error(fp, "file not open for read");
|
|
663 return -1;
|
|
664 }
|
|
665 if (where != SEEK_SET) {
|
|
666 report_error(fp, "unimplemented seek option");
|
|
667 return -1;
|
|
668 }
|
|
669 int block_offset = pos & 0xFFFF;
|
|
670 int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
|
|
671 #ifdef _USE_KNETFILE
|
|
672 if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
|
|
673 #else
|
|
674 if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
|
|
675 #endif
|
|
676 report_error(fp, "seek failed");
|
|
677 return -1;
|
|
678 }
|
|
679 fp->block_length = 0; // indicates current block is not loaded
|
|
680 fp->block_address = block_address;
|
|
681 fp->block_offset = block_offset;
|
|
682 return 0;
|
|
683 }
|