Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/bgzf.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 /* The MIT License | |
2 | |
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology | |
4 2011 Attractive Chaos <attractor@live.co.uk> | |
5 | |
6 Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 of this software and associated documentation files (the "Software"), to deal | |
8 in the Software without restriction, including without limitation the rights | |
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 copies of the Software, and to permit persons to whom the Software is | |
11 furnished to do so, subject to the following conditions: | |
12 | |
13 The above copyright notice and this permission notice shall be included in | |
14 all copies or substantial portions of the Software. | |
15 | |
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
22 THE SOFTWARE. | |
23 */ | |
24 | |
25 #include <stdio.h> | |
26 #include <stdlib.h> | |
27 #include <string.h> | |
28 #include <unistd.h> | |
29 #include <assert.h> | |
30 #include <pthread.h> | |
31 #include <sys/types.h> | |
32 #include "bgzf.h" | |
33 | |
34 #ifdef _USE_KNETFILE | |
35 #include "knetfile.h" | |
36 typedef knetFile *_bgzf_file_t; | |
37 #define _bgzf_open(fn, mode) knet_open(fn, mode) | |
38 #define _bgzf_dopen(fp, mode) knet_dopen(fp, mode) | |
39 #define _bgzf_close(fp) knet_close(fp) | |
40 #define _bgzf_fileno(fp) ((fp)->fd) | |
41 #define _bgzf_tell(fp) knet_tell(fp) | |
42 #define _bgzf_seek(fp, offset, whence) knet_seek(fp, offset, whence) | |
43 #define _bgzf_read(fp, buf, len) knet_read(fp, buf, len) | |
44 #define _bgzf_write(fp, buf, len) knet_write(fp, buf, len) | |
45 #else // ~defined(_USE_KNETFILE) | |
46 #if defined(_WIN32) || defined(_MSC_VER) | |
47 #define ftello(fp) ftell(fp) | |
48 #define fseeko(fp, offset, whence) fseek(fp, offset, whence) | |
49 #else // ~defined(_WIN32) | |
50 extern off_t ftello(FILE *stream); | |
51 extern int fseeko(FILE *stream, off_t offset, int whence); | |
52 #endif // ~defined(_WIN32) | |
53 typedef FILE *_bgzf_file_t; | |
54 #define _bgzf_open(fn, mode) fopen(fn, mode) | |
55 #define _bgzf_dopen(fp, mode) fdopen(fp, mode) | |
56 #define _bgzf_close(fp) fclose(fp) | |
57 #define _bgzf_fileno(fp) fileno(fp) | |
58 #define _bgzf_tell(fp) ftello(fp) | |
59 #define _bgzf_seek(fp, offset, whence) fseeko(fp, offset, whence) | |
60 #define _bgzf_read(fp, buf, len) fread(buf, 1, len, fp) | |
61 #define _bgzf_write(fp, buf, len) fwrite(buf, 1, len, fp) | |
62 #endif // ~define(_USE_KNETFILE) | |
63 | |
64 #define BLOCK_HEADER_LENGTH 18 | |
65 #define BLOCK_FOOTER_LENGTH 8 | |
66 | |
67 | |
68 /* BGZF/GZIP header (speciallized from RFC 1952; little endian): | |
69 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | |
70 | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| | |
71 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | |
72 */ | |
73 static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0"; | |
74 | |
75 #ifdef BGZF_CACHE | |
76 typedef struct { | |
77 int size; | |
78 uint8_t *block; | |
79 int64_t end_offset; | |
80 } cache_t; | |
81 #include "khash.h" | |
82 KHASH_MAP_INIT_INT64(cache, cache_t) | |
83 #endif | |
84 | |
85 static inline void packInt16(uint8_t *buffer, uint16_t value) | |
86 { | |
87 buffer[0] = value; | |
88 buffer[1] = value >> 8; | |
89 } | |
90 | |
91 static inline int unpackInt16(const uint8_t *buffer) | |
92 { | |
93 return buffer[0] | buffer[1] << 8; | |
94 } | |
95 | |
96 static inline void packInt32(uint8_t *buffer, uint32_t value) | |
97 { | |
98 buffer[0] = value; | |
99 buffer[1] = value >> 8; | |
100 buffer[2] = value >> 16; | |
101 buffer[3] = value >> 24; | |
102 } | |
103 | |
104 static BGZF *bgzf_read_init() | |
105 { | |
106 BGZF *fp; | |
107 fp = calloc(1, sizeof(BGZF)); | |
108 fp->is_write = 0; | |
109 fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE); | |
110 fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE); | |
111 #ifdef BGZF_CACHE | |
112 fp->cache = kh_init(cache); | |
113 #endif | |
114 return fp; | |
115 } | |
116 | |
117 static BGZF *bgzf_write_init(int compress_level) // compress_level==-1 for the default level | |
118 { | |
119 BGZF *fp; | |
120 fp = calloc(1, sizeof(BGZF)); | |
121 fp->is_write = 1; | |
122 fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE); | |
123 fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE); | |
124 fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 | |
125 if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; | |
126 return fp; | |
127 } | |
128 // get the compress level from the mode string | |
129 static int mode2level(const char *__restrict mode) | |
130 { | |
131 int i, compress_level = -1; | |
132 for (i = 0; mode[i]; ++i) | |
133 if (mode[i] >= '0' && mode[i] <= '9') break; | |
134 if (mode[i]) compress_level = (int)mode[i] - '0'; | |
135 if (strchr(mode, 'u')) compress_level = 0; | |
136 return compress_level; | |
137 } | |
138 | |
139 BGZF *bgzf_open(const char *path, const char *mode) | |
140 { | |
141 BGZF *fp = 0; | |
142 assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE); | |
143 if (strchr(mode, 'r') || strchr(mode, 'R')) { | |
144 _bgzf_file_t fpr; | |
145 if ((fpr = _bgzf_open(path, "r")) == 0) return 0; | |
146 fp = bgzf_read_init(); | |
147 fp->fp = fpr; | |
148 } else if (strchr(mode, 'w') || strchr(mode, 'W')) { | |
149 FILE *fpw; | |
150 if ((fpw = fopen(path, "w")) == 0) return 0; | |
151 fp = bgzf_write_init(mode2level(mode)); | |
152 fp->fp = fpw; | |
153 } | |
154 return fp; | |
155 } | |
156 | |
157 BGZF *bgzf_dopen(int fd, const char *mode) | |
158 { | |
159 BGZF *fp = 0; | |
160 assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE); | |
161 if (strchr(mode, 'r') || strchr(mode, 'R')) { | |
162 _bgzf_file_t fpr; | |
163 if ((fpr = _bgzf_dopen(fd, "r")) == 0) return 0; | |
164 fp = bgzf_read_init(); | |
165 fp->fp = fpr; | |
166 } else if (strchr(mode, 'w') || strchr(mode, 'W')) { | |
167 FILE *fpw; | |
168 if ((fpw = fdopen(fd, "w")) == 0) return 0; | |
169 fp = bgzf_write_init(mode2level(mode)); | |
170 fp->fp = fpw; | |
171 } | |
172 return fp; | |
173 } | |
174 | |
175 static int bgzf_compress(void *_dst, int *dlen, void *src, int slen, int level) | |
176 { | |
177 uint32_t crc; | |
178 z_stream zs; | |
179 uint8_t *dst = (uint8_t*)_dst; | |
180 | |
181 // compress the body | |
182 zs.zalloc = NULL; zs.zfree = NULL; | |
183 zs.next_in = src; | |
184 zs.avail_in = slen; | |
185 zs.next_out = dst + BLOCK_HEADER_LENGTH; | |
186 zs.avail_out = *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; | |
187 if (deflateInit2(&zs, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1; // -15 to disable zlib header/footer | |
188 if (deflate(&zs, Z_FINISH) != Z_STREAM_END) return -1; | |
189 if (deflateEnd(&zs) != Z_OK) return -1; | |
190 *dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; | |
191 // write the header | |
192 memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block | |
193 packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes | |
194 // write the footer | |
195 crc = crc32(crc32(0L, NULL, 0L), src, slen); | |
196 packInt32((uint8_t*)&dst[*dlen - 8], crc); | |
197 packInt32((uint8_t*)&dst[*dlen - 4], slen); | |
198 return 0; | |
199 } | |
200 | |
201 // Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length. | |
202 static int deflate_block(BGZF *fp, int block_length) | |
203 { | |
204 int comp_size = BGZF_MAX_BLOCK_SIZE; | |
205 if (bgzf_compress(fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level) != 0) { | |
206 fp->errcode |= BGZF_ERR_ZLIB; | |
207 return -1; | |
208 } | |
209 fp->block_offset = 0; | |
210 return comp_size; | |
211 } | |
212 | |
213 // Inflate the block in fp->compressed_block into fp->uncompressed_block | |
214 static int inflate_block(BGZF* fp, int block_length) | |
215 { | |
216 z_stream zs; | |
217 zs.zalloc = NULL; | |
218 zs.zfree = NULL; | |
219 zs.next_in = fp->compressed_block + 18; | |
220 zs.avail_in = block_length - 16; | |
221 zs.next_out = fp->uncompressed_block; | |
222 zs.avail_out = BGZF_MAX_BLOCK_SIZE; | |
223 | |
224 if (inflateInit2(&zs, -15) != Z_OK) { | |
225 fp->errcode |= BGZF_ERR_ZLIB; | |
226 return -1; | |
227 } | |
228 if (inflate(&zs, Z_FINISH) != Z_STREAM_END) { | |
229 inflateEnd(&zs); | |
230 fp->errcode |= BGZF_ERR_ZLIB; | |
231 return -1; | |
232 } | |
233 if (inflateEnd(&zs) != Z_OK) { | |
234 fp->errcode |= BGZF_ERR_ZLIB; | |
235 return -1; | |
236 } | |
237 return zs.total_out; | |
238 } | |
239 | |
240 static int check_header(const uint8_t *header) | |
241 { | |
242 return (header[0] == 31 && header[1] == 139 && header[2] == 8 && (header[3] & 4) != 0 | |
243 && unpackInt16((uint8_t*)&header[10]) == 6 | |
244 && header[12] == 'B' && header[13] == 'C' | |
245 && unpackInt16((uint8_t*)&header[14]) == 2); | |
246 } | |
247 | |
248 #ifdef BGZF_CACHE | |
249 static void free_cache(BGZF *fp) | |
250 { | |
251 khint_t k; | |
252 khash_t(cache) *h = (khash_t(cache)*)fp->cache; | |
253 if (fp->is_write) return; | |
254 for (k = kh_begin(h); k < kh_end(h); ++k) | |
255 if (kh_exist(h, k)) free(kh_val(h, k).block); | |
256 kh_destroy(cache, h); | |
257 } | |
258 | |
259 static int load_block_from_cache(BGZF *fp, int64_t block_address) | |
260 { | |
261 khint_t k; | |
262 cache_t *p; | |
263 khash_t(cache) *h = (khash_t(cache)*)fp->cache; | |
264 k = kh_get(cache, h, block_address); | |
265 if (k == kh_end(h)) return 0; | |
266 p = &kh_val(h, k); | |
267 if (fp->block_length != 0) fp->block_offset = 0; | |
268 fp->block_address = block_address; | |
269 fp->block_length = p->size; | |
270 memcpy(fp->uncompressed_block, p->block, BGZF_MAX_BLOCK_SIZE); | |
271 _bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET); | |
272 return p->size; | |
273 } | |
274 | |
275 static void cache_block(BGZF *fp, int size) | |
276 { | |
277 int ret; | |
278 khint_t k; | |
279 cache_t *p; | |
280 khash_t(cache) *h = (khash_t(cache)*)fp->cache; | |
281 if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return; | |
282 if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > fp->cache_size) { | |
283 /* A better way would be to remove the oldest block in the | |
284 * cache, but here we remove a random one for simplicity. This | |
285 * should not have a big impact on performance. */ | |
286 for (k = kh_begin(h); k < kh_end(h); ++k) | |
287 if (kh_exist(h, k)) break; | |
288 if (k < kh_end(h)) { | |
289 free(kh_val(h, k).block); | |
290 kh_del(cache, h, k); | |
291 } | |
292 } | |
293 k = kh_put(cache, h, fp->block_address, &ret); | |
294 if (ret == 0) return; // if this happens, a bug! | |
295 p = &kh_val(h, k); | |
296 p->size = fp->block_length; | |
297 p->end_offset = fp->block_address + size; | |
298 p->block = malloc(BGZF_MAX_BLOCK_SIZE); | |
299 memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE); | |
300 } | |
301 #else | |
302 static void free_cache(BGZF *fp) {} | |
303 static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;} | |
304 static void cache_block(BGZF *fp, int size) {} | |
305 #endif | |
306 | |
307 int bgzf_read_block(BGZF *fp) | |
308 { | |
309 uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; | |
310 int count, size = 0, block_length, remaining; | |
311 int64_t block_address; | |
312 block_address = _bgzf_tell((_bgzf_file_t)fp->fp); | |
313 if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0; | |
314 count = _bgzf_read(fp->fp, header, sizeof(header)); | |
315 if (count == 0) { // no data read | |
316 fp->block_length = 0; | |
317 return 0; | |
318 } | |
319 if (count != sizeof(header) || !check_header(header)) { | |
320 fp->errcode |= BGZF_ERR_HEADER; | |
321 return -1; | |
322 } | |
323 size = count; | |
324 block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" | |
325 compressed_block = (uint8_t*)fp->compressed_block; | |
326 memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); | |
327 remaining = block_length - BLOCK_HEADER_LENGTH; | |
328 count = _bgzf_read(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); | |
329 if (count != remaining) { | |
330 fp->errcode |= BGZF_ERR_IO; | |
331 return -1; | |
332 } | |
333 size += count; | |
334 if ((count = inflate_block(fp, block_length)) < 0) return -1; | |
335 if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek. | |
336 fp->block_address = block_address; | |
337 fp->block_length = count; | |
338 cache_block(fp, size); | |
339 return 0; | |
340 } | |
341 | |
342 ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length) | |
343 { | |
344 ssize_t bytes_read = 0; | |
345 uint8_t *output = data; | |
346 if (length <= 0) return 0; | |
347 assert(fp->is_write == 0); | |
348 while (bytes_read < length) { | |
349 int copy_length, available = fp->block_length - fp->block_offset; | |
350 uint8_t *buffer; | |
351 if (available <= 0) { | |
352 if (bgzf_read_block(fp) != 0) return -1; | |
353 available = fp->block_length - fp->block_offset; | |
354 if (available <= 0) break; | |
355 } | |
356 copy_length = length - bytes_read < available? length - bytes_read : available; | |
357 buffer = fp->uncompressed_block; | |
358 memcpy(output, buffer + fp->block_offset, copy_length); | |
359 fp->block_offset += copy_length; | |
360 output += copy_length; | |
361 bytes_read += copy_length; | |
362 } | |
363 if (fp->block_offset == fp->block_length) { | |
364 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); | |
365 fp->block_offset = fp->block_length = 0; | |
366 } | |
367 return bytes_read; | |
368 } | |
369 | |
370 /***** BEGIN: multi-threading *****/ | |
371 | |
372 typedef struct { | |
373 BGZF *fp; | |
374 struct mtaux_t *mt; | |
375 void *buf; | |
376 int i, errcode, toproc; | |
377 } worker_t; | |
378 | |
379 typedef struct mtaux_t { | |
380 int n_threads, n_blks, curr, done; | |
381 volatile int proc_cnt; | |
382 void **blk; | |
383 int *len; | |
384 worker_t *w; | |
385 pthread_t *tid; | |
386 pthread_mutex_t lock; | |
387 pthread_cond_t cv; | |
388 } mtaux_t; | |
389 | |
390 static int worker_aux(worker_t *w) | |
391 { | |
392 int i, tmp, stop = 0; | |
393 // wait for condition: to process or all done | |
394 pthread_mutex_lock(&w->mt->lock); | |
395 while (!w->toproc && !w->mt->done) | |
396 pthread_cond_wait(&w->mt->cv, &w->mt->lock); | |
397 if (w->mt->done) stop = 1; | |
398 w->toproc = 0; | |
399 pthread_mutex_unlock(&w->mt->lock); | |
400 if (stop) return 1; // to quit the thread | |
401 w->errcode = 0; | |
402 for (i = w->i; i < w->mt->curr; i += w->mt->n_threads) { | |
403 int clen = BGZF_MAX_BLOCK_SIZE; | |
404 if (bgzf_compress(w->buf, &clen, w->mt->blk[i], w->mt->len[i], w->fp->compress_level) != 0) | |
405 w->errcode |= BGZF_ERR_ZLIB; | |
406 memcpy(w->mt->blk[i], w->buf, clen); | |
407 w->mt->len[i] = clen; | |
408 } | |
409 tmp = __sync_fetch_and_add(&w->mt->proc_cnt, 1); | |
410 return 0; | |
411 } | |
412 | |
413 static void *mt_worker(void *data) | |
414 { | |
415 while (worker_aux(data) == 0); | |
416 return 0; | |
417 } | |
418 | |
419 int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) | |
420 { | |
421 int i; | |
422 mtaux_t *mt; | |
423 pthread_attr_t attr; | |
424 if (!fp->is_write || fp->mt || n_threads <= 1) return -1; | |
425 mt = calloc(1, sizeof(mtaux_t)); | |
426 mt->n_threads = n_threads; | |
427 mt->n_blks = n_threads * n_sub_blks; | |
428 mt->len = calloc(mt->n_blks, sizeof(int)); | |
429 mt->blk = calloc(mt->n_blks, sizeof(void*)); | |
430 for (i = 0; i < mt->n_blks; ++i) | |
431 mt->blk[i] = malloc(BGZF_MAX_BLOCK_SIZE); | |
432 mt->tid = calloc(mt->n_threads, sizeof(pthread_t)); // tid[0] is not used, as the worker 0 is launched by the master | |
433 mt->w = calloc(mt->n_threads, sizeof(worker_t)); | |
434 for (i = 0; i < mt->n_threads; ++i) { | |
435 mt->w[i].i = i; | |
436 mt->w[i].mt = mt; | |
437 mt->w[i].fp = fp; | |
438 mt->w[i].buf = malloc(BGZF_MAX_BLOCK_SIZE); | |
439 } | |
440 pthread_attr_init(&attr); | |
441 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); | |
442 pthread_mutex_init(&mt->lock, 0); | |
443 pthread_cond_init(&mt->cv, 0); | |
444 for (i = 1; i < mt->n_threads; ++i) // worker 0 is effectively launched by the master thread | |
445 pthread_create(&mt->tid[i], &attr, mt_worker, &mt->w[i]); | |
446 fp->mt = mt; | |
447 return 0; | |
448 } | |
449 | |
450 static void mt_destroy(mtaux_t *mt) | |
451 { | |
452 int i; | |
453 // signal all workers to quit | |
454 pthread_mutex_lock(&mt->lock); | |
455 mt->done = 1; mt->proc_cnt = 0; | |
456 pthread_cond_broadcast(&mt->cv); | |
457 pthread_mutex_unlock(&mt->lock); | |
458 for (i = 1; i < mt->n_threads; ++i) pthread_join(mt->tid[i], 0); // worker 0 is effectively launched by the master thread | |
459 // free other data allocated on heap | |
460 for (i = 0; i < mt->n_blks; ++i) free(mt->blk[i]); | |
461 for (i = 0; i < mt->n_threads; ++i) free(mt->w[i].buf); | |
462 free(mt->blk); free(mt->len); free(mt->w); free(mt->tid); | |
463 pthread_cond_destroy(&mt->cv); | |
464 pthread_mutex_destroy(&mt->lock); | |
465 free(mt); | |
466 } | |
467 | |
468 static void mt_queue(BGZF *fp) | |
469 { | |
470 mtaux_t *mt = (mtaux_t*)fp->mt; | |
471 assert(mt->curr < mt->n_blks); // guaranteed by the caller | |
472 memcpy(mt->blk[mt->curr], fp->uncompressed_block, fp->block_offset); | |
473 mt->len[mt->curr] = fp->block_offset; | |
474 fp->block_offset = 0; | |
475 ++mt->curr; | |
476 } | |
477 | |
478 static int mt_flush(BGZF *fp) | |
479 { | |
480 int i; | |
481 mtaux_t *mt = (mtaux_t*)fp->mt; | |
482 if (fp->block_offset) mt_queue(fp); // guaranteed that assertion does not fail | |
483 // signal all the workers to compress | |
484 pthread_mutex_lock(&mt->lock); | |
485 for (i = 0; i < mt->n_threads; ++i) mt->w[i].toproc = 1; | |
486 mt->proc_cnt = 0; | |
487 pthread_cond_broadcast(&mt->cv); | |
488 pthread_mutex_unlock(&mt->lock); | |
489 // worker 0 is doing things here | |
490 worker_aux(&mt->w[0]); | |
491 // wait for all the threads to complete | |
492 while (mt->proc_cnt < mt->n_threads); | |
493 // dump data to disk | |
494 for (i = 0; i < mt->n_threads; ++i) fp->errcode |= mt->w[i].errcode; | |
495 for (i = 0; i < mt->curr; ++i) | |
496 if (fwrite(mt->blk[i], 1, mt->len[i], fp->fp) != mt->len[i]) | |
497 fp->errcode |= BGZF_ERR_IO; | |
498 mt->curr = 0; | |
499 return 0; | |
500 } | |
501 | |
502 static int mt_lazy_flush(BGZF *fp) | |
503 { | |
504 mtaux_t *mt = (mtaux_t*)fp->mt; | |
505 if (fp->block_offset) mt_queue(fp); | |
506 if (mt->curr == mt->n_blks) | |
507 return mt_flush(fp); | |
508 return -1; | |
509 } | |
510 | |
511 static ssize_t mt_write(BGZF *fp, const void *data, ssize_t length) | |
512 { | |
513 const uint8_t *input = data; | |
514 ssize_t rest = length; | |
515 while (rest) { | |
516 int copy_length = BGZF_BLOCK_SIZE - fp->block_offset < rest? BGZF_BLOCK_SIZE - fp->block_offset : rest; | |
517 memcpy(fp->uncompressed_block + fp->block_offset, input, copy_length); | |
518 fp->block_offset += copy_length; input += copy_length; rest -= copy_length; | |
519 if (fp->block_offset == BGZF_BLOCK_SIZE) mt_lazy_flush(fp); | |
520 } | |
521 return length - rest; | |
522 } | |
523 | |
524 /***** END: multi-threading *****/ | |
525 | |
526 int bgzf_flush(BGZF *fp) | |
527 { | |
528 if (!fp->is_write) return 0; | |
529 if (fp->mt) return mt_flush(fp); | |
530 while (fp->block_offset > 0) { | |
531 int block_length; | |
532 block_length = deflate_block(fp, fp->block_offset); | |
533 if (block_length < 0) return -1; | |
534 if (fwrite(fp->compressed_block, 1, block_length, fp->fp) != block_length) { | |
535 fp->errcode |= BGZF_ERR_IO; // possibly truncated file | |
536 return -1; | |
537 } | |
538 fp->block_address += block_length; | |
539 } | |
540 return 0; | |
541 } | |
542 | |
543 int bgzf_flush_try(BGZF *fp, ssize_t size) | |
544 { | |
545 if (fp->block_offset + size > BGZF_BLOCK_SIZE) { | |
546 if (fp->mt) return mt_lazy_flush(fp); | |
547 else return bgzf_flush(fp); | |
548 } | |
549 return -1; | |
550 } | |
551 | |
552 ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length) | |
553 { | |
554 const uint8_t *input = data; | |
555 int block_length = BGZF_BLOCK_SIZE, bytes_written = 0; | |
556 assert(fp->is_write); | |
557 if (fp->mt) return mt_write(fp, data, length); | |
558 while (bytes_written < length) { | |
559 uint8_t* buffer = fp->uncompressed_block; | |
560 int copy_length = block_length - fp->block_offset < length - bytes_written? block_length - fp->block_offset : length - bytes_written; | |
561 memcpy(buffer + fp->block_offset, input, copy_length); | |
562 fp->block_offset += copy_length; | |
563 input += copy_length; | |
564 bytes_written += copy_length; | |
565 if (fp->block_offset == block_length && bgzf_flush(fp)) break; | |
566 } | |
567 return bytes_written; | |
568 } | |
569 | |
570 int bgzf_close(BGZF* fp) | |
571 { | |
572 int ret, count, block_length; | |
573 if (fp == 0) return -1; | |
574 if (fp->is_write) { | |
575 if (bgzf_flush(fp) != 0) return -1; | |
576 fp->compress_level = -1; | |
577 block_length = deflate_block(fp, 0); // write an empty block | |
578 count = fwrite(fp->compressed_block, 1, block_length, fp->fp); | |
579 if (fflush(fp->fp) != 0) { | |
580 fp->errcode |= BGZF_ERR_IO; | |
581 return -1; | |
582 } | |
583 if (fp->mt) mt_destroy(fp->mt); | |
584 } | |
585 ret = fp->is_write? fclose(fp->fp) : _bgzf_close(fp->fp); | |
586 if (ret != 0) return -1; | |
587 free(fp->uncompressed_block); | |
588 free(fp->compressed_block); | |
589 free_cache(fp); | |
590 free(fp); | |
591 return 0; | |
592 } | |
593 | |
594 void bgzf_set_cache_size(BGZF *fp, int cache_size) | |
595 { | |
596 if (fp) fp->cache_size = cache_size; | |
597 } | |
598 | |
599 int bgzf_check_EOF(BGZF *fp) | |
600 { | |
601 static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0"; | |
602 uint8_t buf[28]; | |
603 off_t offset; | |
604 offset = _bgzf_tell((_bgzf_file_t)fp->fp); | |
605 if (_bgzf_seek(fp->fp, -28, SEEK_END) < 0) return 0; | |
606 _bgzf_read(fp->fp, buf, 28); | |
607 _bgzf_seek(fp->fp, offset, SEEK_SET); | |
608 return (memcmp(magic, buf, 28) == 0)? 1 : 0; | |
609 } | |
610 | |
611 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) | |
612 { | |
613 int block_offset; | |
614 int64_t block_address; | |
615 | |
616 if (fp->is_write || where != SEEK_SET) { | |
617 fp->errcode |= BGZF_ERR_MISUSE; | |
618 return -1; | |
619 } | |
620 block_offset = pos & 0xFFFF; | |
621 block_address = pos >> 16; | |
622 if (_bgzf_seek(fp->fp, block_address, SEEK_SET) < 0) { | |
623 fp->errcode |= BGZF_ERR_IO; | |
624 return -1; | |
625 } | |
626 fp->block_length = 0; // indicates current block has not been loaded | |
627 fp->block_address = block_address; | |
628 fp->block_offset = block_offset; | |
629 return 0; | |
630 } | |
631 | |
632 int bgzf_is_bgzf(const char *fn) | |
633 { | |
634 uint8_t buf[16]; | |
635 int n; | |
636 _bgzf_file_t fp; | |
637 if ((fp = _bgzf_open(fn, "r")) == 0) return 0; | |
638 n = _bgzf_read(fp, buf, 16); | |
639 _bgzf_close(fp); | |
640 if (n != 16) return 0; | |
641 return memcmp(g_magic, buf, 16) == 0? 1 : 0; | |
642 } | |
643 | |
644 int bgzf_getc(BGZF *fp) | |
645 { | |
646 int c; | |
647 if (fp->block_offset >= fp->block_length) { | |
648 if (bgzf_read_block(fp) != 0) return -2; /* error */ | |
649 if (fp->block_length == 0) return -1; /* end-of-file */ | |
650 } | |
651 c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; | |
652 if (fp->block_offset == fp->block_length) { | |
653 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); | |
654 fp->block_offset = 0; | |
655 fp->block_length = 0; | |
656 } | |
657 return c; | |
658 } | |
659 | |
660 #ifndef kroundup32 | |
661 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) | |
662 #endif | |
663 | |
664 int bgzf_getline(BGZF *fp, int delim, kstring_t *str) | |
665 { | |
666 int l, state = 0; | |
667 unsigned char *buf = (unsigned char*)fp->uncompressed_block; | |
668 str->l = 0; | |
669 do { | |
670 if (fp->block_offset >= fp->block_length) { | |
671 if (bgzf_read_block(fp) != 0) { state = -2; break; } | |
672 if (fp->block_length == 0) { state = -1; break; } | |
673 } | |
674 for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l); | |
675 if (l < fp->block_length) state = 1; | |
676 l -= fp->block_offset; | |
677 if (str->l + l + 1 >= str->m) { | |
678 str->m = str->l + l + 2; | |
679 kroundup32(str->m); | |
680 str->s = (char*)realloc(str->s, str->m); | |
681 } | |
682 memcpy(str->s + str->l, buf + fp->block_offset, l); | |
683 str->l += l; | |
684 fp->block_offset += l + 1; | |
685 if (fp->block_offset >= fp->block_length) { | |
686 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); | |
687 fp->block_offset = 0; | |
688 fp->block_length = 0; | |
689 } | |
690 } while (state == 0); | |
691 if (str->l == 0 && state < 0) return state; | |
692 str->s[str->l] = 0; | |
693 return str->l; | |
694 } |