annotate PsiCLASS-1.0.2/samtools-0.1.19/bgzf.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 /* The MIT License
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 2011 Attractive Chaos <attractor@live.co.uk>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 Permission is hereby granted, free of charge, to any person obtaining a copy
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 of this software and associated documentation files (the "Software"), to deal
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 in the Software without restriction, including without limitation the rights
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 copies of the Software, and to permit persons to whom the Software is
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 furnished to do so, subject to the following conditions:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 The above copyright notice and this permission notice shall be included in
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 all copies or substantial portions of the Software.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 THE SOFTWARE.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 #include <unistd.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 #include <assert.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 #include <pthread.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 #include <sys/types.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 #include "bgzf.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 #ifdef _USE_KNETFILE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 #include "knetfile.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 typedef knetFile *_bgzf_file_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 #define _bgzf_open(fn, mode) knet_open(fn, mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 #define _bgzf_dopen(fp, mode) knet_dopen(fp, mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 #define _bgzf_close(fp) knet_close(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 #define _bgzf_fileno(fp) ((fp)->fd)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 #define _bgzf_tell(fp) knet_tell(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 #define _bgzf_seek(fp, offset, whence) knet_seek(fp, offset, whence)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 #define _bgzf_read(fp, buf, len) knet_read(fp, buf, len)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 #define _bgzf_write(fp, buf, len) knet_write(fp, buf, len)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 #else // ~defined(_USE_KNETFILE)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 #if defined(_WIN32) || defined(_MSC_VER)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 #define ftello(fp) ftell(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 #define fseeko(fp, offset, whence) fseek(fp, offset, whence)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 #else // ~defined(_WIN32)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 extern off_t ftello(FILE *stream);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 extern int fseeko(FILE *stream, off_t offset, int whence);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 #endif // ~defined(_WIN32)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 typedef FILE *_bgzf_file_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 #define _bgzf_open(fn, mode) fopen(fn, mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 #define _bgzf_dopen(fp, mode) fdopen(fp, mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 #define _bgzf_close(fp) fclose(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 #define _bgzf_fileno(fp) fileno(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 #define _bgzf_tell(fp) ftello(fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 #define _bgzf_seek(fp, offset, whence) fseeko(fp, offset, whence)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 #define _bgzf_read(fp, buf, len) fread(buf, 1, len, fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 #define _bgzf_write(fp, buf, len) fwrite(buf, 1, len, fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 #endif // ~define(_USE_KNETFILE)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 #define BLOCK_HEADER_LENGTH 18
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 #define BLOCK_FOOTER_LENGTH 8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 /* BGZF/GZIP header (speciallized from RFC 1952; little endian):
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN|
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0";
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 #ifdef BGZF_CACHE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 int size;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 uint8_t *block;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 int64_t end_offset;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 } cache_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 #include "khash.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 KHASH_MAP_INIT_INT64(cache, cache_t)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 static inline void packInt16(uint8_t *buffer, uint16_t value)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 buffer[0] = value;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 buffer[1] = value >> 8;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 static inline int unpackInt16(const uint8_t *buffer)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 return buffer[0] | buffer[1] << 8;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 static inline void packInt32(uint8_t *buffer, uint32_t value)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 buffer[0] = value;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 buffer[1] = value >> 8;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 buffer[2] = value >> 16;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 buffer[3] = value >> 24;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 static BGZF *bgzf_read_init()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 BGZF *fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 fp = calloc(1, sizeof(BGZF));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 fp->is_write = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 #ifdef BGZF_CACHE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 fp->cache = kh_init(cache);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 return fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 static BGZF *bgzf_write_init(int compress_level) // compress_level==-1 for the default level
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 BGZF *fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 fp = calloc(1, sizeof(BGZF));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 fp->is_write = 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 return fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 // get the compress level from the mode string
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 static int mode2level(const char *__restrict mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 int i, compress_level = -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 for (i = 0; mode[i]; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 if (mode[i] >= '0' && mode[i] <= '9') break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 if (mode[i]) compress_level = (int)mode[i] - '0';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 if (strchr(mode, 'u')) compress_level = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 return compress_level;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 BGZF *bgzf_open(const char *path, const char *mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 BGZF *fp = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 if (strchr(mode, 'r') || strchr(mode, 'R')) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 _bgzf_file_t fpr;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 if ((fpr = _bgzf_open(path, "r")) == 0) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 fp = bgzf_read_init();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 fp->fp = fpr;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 FILE *fpw;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 if ((fpw = fopen(path, "w")) == 0) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 fp = bgzf_write_init(mode2level(mode));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 fp->fp = fpw;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 return fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 BGZF *bgzf_dopen(int fd, const char *mode)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 BGZF *fp = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 if (strchr(mode, 'r') || strchr(mode, 'R')) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 _bgzf_file_t fpr;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 if ((fpr = _bgzf_dopen(fd, "r")) == 0) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 fp = bgzf_read_init();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 fp->fp = fpr;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 FILE *fpw;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 if ((fpw = fdopen(fd, "w")) == 0) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 fp = bgzf_write_init(mode2level(mode));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 fp->fp = fpw;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 return fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 static int bgzf_compress(void *_dst, int *dlen, void *src, int slen, int level)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 uint32_t crc;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178 z_stream zs;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 uint8_t *dst = (uint8_t*)_dst;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 // compress the body
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 zs.zalloc = NULL; zs.zfree = NULL;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 zs.next_in = src;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 zs.avail_in = slen;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 zs.next_out = dst + BLOCK_HEADER_LENGTH;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 zs.avail_out = *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 if (deflateInit2(&zs, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1; // -15 to disable zlib header/footer
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188 if (deflate(&zs, Z_FINISH) != Z_STREAM_END) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 if (deflateEnd(&zs) != Z_OK) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 *dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 // write the header
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193 packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 // write the footer
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 crc = crc32(crc32(0L, NULL, 0L), src, slen);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 packInt32((uint8_t*)&dst[*dlen - 8], crc);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 packInt32((uint8_t*)&dst[*dlen - 4], slen);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 // Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202 static int deflate_block(BGZF *fp, int block_length)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 int comp_size = BGZF_MAX_BLOCK_SIZE;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 if (bgzf_compress(fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level) != 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206 fp->errcode |= BGZF_ERR_ZLIB;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
208 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
209 fp->block_offset = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
210 return comp_size;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
211 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
212
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
213 // Inflate the block in fp->compressed_block into fp->uncompressed_block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
214 static int inflate_block(BGZF* fp, int block_length)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
215 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
216 z_stream zs;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
217 zs.zalloc = NULL;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
218 zs.zfree = NULL;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
219 zs.next_in = fp->compressed_block + 18;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
220 zs.avail_in = block_length - 16;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
221 zs.next_out = fp->uncompressed_block;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
222 zs.avail_out = BGZF_MAX_BLOCK_SIZE;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
223
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
224 if (inflateInit2(&zs, -15) != Z_OK) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
225 fp->errcode |= BGZF_ERR_ZLIB;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
226 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
227 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
228 if (inflate(&zs, Z_FINISH) != Z_STREAM_END) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
229 inflateEnd(&zs);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
230 fp->errcode |= BGZF_ERR_ZLIB;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
231 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
232 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
233 if (inflateEnd(&zs) != Z_OK) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
234 fp->errcode |= BGZF_ERR_ZLIB;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
235 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
236 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
237 return zs.total_out;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
238 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
239
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
240 static int check_header(const uint8_t *header)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
241 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
242 return (header[0] == 31 && header[1] == 139 && header[2] == 8 && (header[3] & 4) != 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
243 && unpackInt16((uint8_t*)&header[10]) == 6
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
244 && header[12] == 'B' && header[13] == 'C'
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
245 && unpackInt16((uint8_t*)&header[14]) == 2);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
246 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
247
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
248 #ifdef BGZF_CACHE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
249 static void free_cache(BGZF *fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
250 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
251 khint_t k;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
252 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
253 if (fp->is_write) return;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
254 for (k = kh_begin(h); k < kh_end(h); ++k)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
255 if (kh_exist(h, k)) free(kh_val(h, k).block);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
256 kh_destroy(cache, h);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
257 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
258
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
259 static int load_block_from_cache(BGZF *fp, int64_t block_address)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
260 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
261 khint_t k;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
262 cache_t *p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
263 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
264 k = kh_get(cache, h, block_address);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
265 if (k == kh_end(h)) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
266 p = &kh_val(h, k);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
267 if (fp->block_length != 0) fp->block_offset = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
268 fp->block_address = block_address;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
269 fp->block_length = p->size;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
270 memcpy(fp->uncompressed_block, p->block, BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
271 _bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
272 return p->size;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
273 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
274
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
275 static void cache_block(BGZF *fp, int size)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
276 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
277 int ret;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
278 khint_t k;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
279 cache_t *p;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
280 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
281 if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
282 if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > fp->cache_size) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
283 /* A better way would be to remove the oldest block in the
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
284 * cache, but here we remove a random one for simplicity. This
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
285 * should not have a big impact on performance. */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
286 for (k = kh_begin(h); k < kh_end(h); ++k)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
287 if (kh_exist(h, k)) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
288 if (k < kh_end(h)) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
289 free(kh_val(h, k).block);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
290 kh_del(cache, h, k);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
291 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
292 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
293 k = kh_put(cache, h, fp->block_address, &ret);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
294 if (ret == 0) return; // if this happens, a bug!
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
295 p = &kh_val(h, k);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
296 p->size = fp->block_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
297 p->end_offset = fp->block_address + size;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
298 p->block = malloc(BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
299 memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
300 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
301 #else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
302 static void free_cache(BGZF *fp) {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
303 static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
304 static void cache_block(BGZF *fp, int size) {}
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
305 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
306
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
307 int bgzf_read_block(BGZF *fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
308 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
309 uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
310 int count, size = 0, block_length, remaining;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
311 int64_t block_address;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
312 block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
313 if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
314 count = _bgzf_read(fp->fp, header, sizeof(header));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
315 if (count == 0) { // no data read
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
316 fp->block_length = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
317 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
318 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
319 if (count != sizeof(header) || !check_header(header)) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
320 fp->errcode |= BGZF_ERR_HEADER;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
321 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
322 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
323 size = count;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
324 block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
325 compressed_block = (uint8_t*)fp->compressed_block;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
326 memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
327 remaining = block_length - BLOCK_HEADER_LENGTH;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
328 count = _bgzf_read(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
329 if (count != remaining) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
330 fp->errcode |= BGZF_ERR_IO;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
331 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
332 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
333 size += count;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
334 if ((count = inflate_block(fp, block_length)) < 0) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
335 if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
336 fp->block_address = block_address;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
337 fp->block_length = count;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
338 cache_block(fp, size);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
339 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
340 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
341
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
342 ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
343 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
344 ssize_t bytes_read = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
345 uint8_t *output = data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
346 if (length <= 0) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
347 assert(fp->is_write == 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
348 while (bytes_read < length) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
349 int copy_length, available = fp->block_length - fp->block_offset;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
350 uint8_t *buffer;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
351 if (available <= 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
352 if (bgzf_read_block(fp) != 0) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
353 available = fp->block_length - fp->block_offset;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
354 if (available <= 0) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
355 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
356 copy_length = length - bytes_read < available? length - bytes_read : available;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
357 buffer = fp->uncompressed_block;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
358 memcpy(output, buffer + fp->block_offset, copy_length);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
359 fp->block_offset += copy_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
360 output += copy_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
361 bytes_read += copy_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
362 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
363 if (fp->block_offset == fp->block_length) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
364 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
365 fp->block_offset = fp->block_length = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
366 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
367 return bytes_read;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
368 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
369
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
370 /***** BEGIN: multi-threading *****/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
371
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
372 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
373 BGZF *fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
374 struct mtaux_t *mt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
375 void *buf;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
376 int i, errcode, toproc;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
377 } worker_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
378
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
379 typedef struct mtaux_t {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
380 int n_threads, n_blks, curr, done;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
381 volatile int proc_cnt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
382 void **blk;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
383 int *len;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
384 worker_t *w;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
385 pthread_t *tid;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
386 pthread_mutex_t lock;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
387 pthread_cond_t cv;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
388 } mtaux_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
389
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
390 static int worker_aux(worker_t *w)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
391 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
392 int i, tmp, stop = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
393 // wait for condition: to process or all done
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
394 pthread_mutex_lock(&w->mt->lock);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
395 while (!w->toproc && !w->mt->done)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
396 pthread_cond_wait(&w->mt->cv, &w->mt->lock);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
397 if (w->mt->done) stop = 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
398 w->toproc = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
399 pthread_mutex_unlock(&w->mt->lock);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
400 if (stop) return 1; // to quit the thread
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
401 w->errcode = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
402 for (i = w->i; i < w->mt->curr; i += w->mt->n_threads) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
403 int clen = BGZF_MAX_BLOCK_SIZE;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
404 if (bgzf_compress(w->buf, &clen, w->mt->blk[i], w->mt->len[i], w->fp->compress_level) != 0)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
405 w->errcode |= BGZF_ERR_ZLIB;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
406 memcpy(w->mt->blk[i], w->buf, clen);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
407 w->mt->len[i] = clen;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
408 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
409 tmp = __sync_fetch_and_add(&w->mt->proc_cnt, 1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
410 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
411 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
412
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
413 static void *mt_worker(void *data)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
414 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
415 while (worker_aux(data) == 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
416 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
417 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
418
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
419 int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
420 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
421 int i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
422 mtaux_t *mt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
423 pthread_attr_t attr;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
424 if (!fp->is_write || fp->mt || n_threads <= 1) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
425 mt = calloc(1, sizeof(mtaux_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
426 mt->n_threads = n_threads;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
427 mt->n_blks = n_threads * n_sub_blks;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
428 mt->len = calloc(mt->n_blks, sizeof(int));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
429 mt->blk = calloc(mt->n_blks, sizeof(void*));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
430 for (i = 0; i < mt->n_blks; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
431 mt->blk[i] = malloc(BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
432 mt->tid = calloc(mt->n_threads, sizeof(pthread_t)); // tid[0] is not used, as the worker 0 is launched by the master
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
433 mt->w = calloc(mt->n_threads, sizeof(worker_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
434 for (i = 0; i < mt->n_threads; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
435 mt->w[i].i = i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
436 mt->w[i].mt = mt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
437 mt->w[i].fp = fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
438 mt->w[i].buf = malloc(BGZF_MAX_BLOCK_SIZE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
439 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
440 pthread_attr_init(&attr);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
441 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
442 pthread_mutex_init(&mt->lock, 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
443 pthread_cond_init(&mt->cv, 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
444 for (i = 1; i < mt->n_threads; ++i) // worker 0 is effectively launched by the master thread
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
445 pthread_create(&mt->tid[i], &attr, mt_worker, &mt->w[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
446 fp->mt = mt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
447 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
448 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
449
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
450 static void mt_destroy(mtaux_t *mt)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
451 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
452 int i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
453 // signal all workers to quit
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
454 pthread_mutex_lock(&mt->lock);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
455 mt->done = 1; mt->proc_cnt = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
456 pthread_cond_broadcast(&mt->cv);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
457 pthread_mutex_unlock(&mt->lock);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
458 for (i = 1; i < mt->n_threads; ++i) pthread_join(mt->tid[i], 0); // worker 0 is effectively launched by the master thread
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
459 // free other data allocated on heap
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
460 for (i = 0; i < mt->n_blks; ++i) free(mt->blk[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
461 for (i = 0; i < mt->n_threads; ++i) free(mt->w[i].buf);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
462 free(mt->blk); free(mt->len); free(mt->w); free(mt->tid);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
463 pthread_cond_destroy(&mt->cv);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
464 pthread_mutex_destroy(&mt->lock);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
465 free(mt);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
466 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
467
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
468 static void mt_queue(BGZF *fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
469 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
470 mtaux_t *mt = (mtaux_t*)fp->mt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
471 assert(mt->curr < mt->n_blks); // guaranteed by the caller
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
472 memcpy(mt->blk[mt->curr], fp->uncompressed_block, fp->block_offset);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
473 mt->len[mt->curr] = fp->block_offset;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
474 fp->block_offset = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
475 ++mt->curr;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
476 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
477
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
478 static int mt_flush(BGZF *fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
479 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
480 int i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
481 mtaux_t *mt = (mtaux_t*)fp->mt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
482 if (fp->block_offset) mt_queue(fp); // guaranteed that assertion does not fail
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
483 // signal all the workers to compress
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
484 pthread_mutex_lock(&mt->lock);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
485 for (i = 0; i < mt->n_threads; ++i) mt->w[i].toproc = 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
486 mt->proc_cnt = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
487 pthread_cond_broadcast(&mt->cv);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
488 pthread_mutex_unlock(&mt->lock);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
489 // worker 0 is doing things here
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
490 worker_aux(&mt->w[0]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
491 // wait for all the threads to complete
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
492 while (mt->proc_cnt < mt->n_threads);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
493 // dump data to disk
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
494 for (i = 0; i < mt->n_threads; ++i) fp->errcode |= mt->w[i].errcode;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
495 for (i = 0; i < mt->curr; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
496 if (fwrite(mt->blk[i], 1, mt->len[i], fp->fp) != mt->len[i])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
497 fp->errcode |= BGZF_ERR_IO;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
498 mt->curr = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
499 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
500 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
501
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
502 static int mt_lazy_flush(BGZF *fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
503 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
504 mtaux_t *mt = (mtaux_t*)fp->mt;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
505 if (fp->block_offset) mt_queue(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
506 if (mt->curr == mt->n_blks)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
507 return mt_flush(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
508 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
509 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
510
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
511 static ssize_t mt_write(BGZF *fp, const void *data, ssize_t length)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
512 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
513 const uint8_t *input = data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
514 ssize_t rest = length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
515 while (rest) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
516 int copy_length = BGZF_BLOCK_SIZE - fp->block_offset < rest? BGZF_BLOCK_SIZE - fp->block_offset : rest;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
517 memcpy(fp->uncompressed_block + fp->block_offset, input, copy_length);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
518 fp->block_offset += copy_length; input += copy_length; rest -= copy_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
519 if (fp->block_offset == BGZF_BLOCK_SIZE) mt_lazy_flush(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
520 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
521 return length - rest;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
522 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
523
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
524 /***** END: multi-threading *****/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
525
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
526 int bgzf_flush(BGZF *fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
527 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
528 if (!fp->is_write) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
529 if (fp->mt) return mt_flush(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
530 while (fp->block_offset > 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
531 int block_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
532 block_length = deflate_block(fp, fp->block_offset);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
533 if (block_length < 0) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
534 if (fwrite(fp->compressed_block, 1, block_length, fp->fp) != block_length) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
535 fp->errcode |= BGZF_ERR_IO; // possibly truncated file
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
536 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
537 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
538 fp->block_address += block_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
539 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
540 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
541 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
542
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
543 int bgzf_flush_try(BGZF *fp, ssize_t size)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
544 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
545 if (fp->block_offset + size > BGZF_BLOCK_SIZE) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
546 if (fp->mt) return mt_lazy_flush(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
547 else return bgzf_flush(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
548 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
549 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
550 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
551
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
552 ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
553 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
554 const uint8_t *input = data;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
555 int block_length = BGZF_BLOCK_SIZE, bytes_written = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
556 assert(fp->is_write);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
557 if (fp->mt) return mt_write(fp, data, length);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
558 while (bytes_written < length) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
559 uint8_t* buffer = fp->uncompressed_block;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
560 int copy_length = block_length - fp->block_offset < length - bytes_written? block_length - fp->block_offset : length - bytes_written;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
561 memcpy(buffer + fp->block_offset, input, copy_length);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
562 fp->block_offset += copy_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
563 input += copy_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
564 bytes_written += copy_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
565 if (fp->block_offset == block_length && bgzf_flush(fp)) break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
566 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
567 return bytes_written;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
568 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
569
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
570 int bgzf_close(BGZF* fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
571 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
572 int ret, count, block_length;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
573 if (fp == 0) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
574 if (fp->is_write) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
575 if (bgzf_flush(fp) != 0) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
576 fp->compress_level = -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
577 block_length = deflate_block(fp, 0); // write an empty block
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
578 count = fwrite(fp->compressed_block, 1, block_length, fp->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
579 if (fflush(fp->fp) != 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
580 fp->errcode |= BGZF_ERR_IO;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
581 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
582 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
583 if (fp->mt) mt_destroy(fp->mt);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
584 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
585 ret = fp->is_write? fclose(fp->fp) : _bgzf_close(fp->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
586 if (ret != 0) return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
587 free(fp->uncompressed_block);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
588 free(fp->compressed_block);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
589 free_cache(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
590 free(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
591 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
592 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
593
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
594 void bgzf_set_cache_size(BGZF *fp, int cache_size)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
595 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
596 if (fp) fp->cache_size = cache_size;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
597 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
598
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
599 int bgzf_check_EOF(BGZF *fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
600 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
601 static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
602 uint8_t buf[28];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
603 off_t offset;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
604 offset = _bgzf_tell((_bgzf_file_t)fp->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
605 if (_bgzf_seek(fp->fp, -28, SEEK_END) < 0) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
606 _bgzf_read(fp->fp, buf, 28);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
607 _bgzf_seek(fp->fp, offset, SEEK_SET);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
608 return (memcmp(magic, buf, 28) == 0)? 1 : 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
609 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
610
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
611 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
612 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
613 int block_offset;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
614 int64_t block_address;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
615
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
616 if (fp->is_write || where != SEEK_SET) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
617 fp->errcode |= BGZF_ERR_MISUSE;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
618 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
619 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
620 block_offset = pos & 0xFFFF;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
621 block_address = pos >> 16;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
622 if (_bgzf_seek(fp->fp, block_address, SEEK_SET) < 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
623 fp->errcode |= BGZF_ERR_IO;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
624 return -1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
625 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
626 fp->block_length = 0; // indicates current block has not been loaded
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
627 fp->block_address = block_address;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
628 fp->block_offset = block_offset;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
629 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
630 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
631
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
632 int bgzf_is_bgzf(const char *fn)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
633 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
634 uint8_t buf[16];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
635 int n;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
636 _bgzf_file_t fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
637 if ((fp = _bgzf_open(fn, "r")) == 0) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
638 n = _bgzf_read(fp, buf, 16);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
639 _bgzf_close(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
640 if (n != 16) return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
641 return memcmp(g_magic, buf, 16) == 0? 1 : 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
642 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
643
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
644 int bgzf_getc(BGZF *fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
645 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
646 int c;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
647 if (fp->block_offset >= fp->block_length) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
648 if (bgzf_read_block(fp) != 0) return -2; /* error */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
649 if (fp->block_length == 0) return -1; /* end-of-file */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
650 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
651 c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
652 if (fp->block_offset == fp->block_length) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
653 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
654 fp->block_offset = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
655 fp->block_length = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
656 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
657 return c;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
658 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
659
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
660 #ifndef kroundup32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
661 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
662 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
663
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
664 int bgzf_getline(BGZF *fp, int delim, kstring_t *str)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
665 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
666 int l, state = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
667 unsigned char *buf = (unsigned char*)fp->uncompressed_block;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
668 str->l = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
669 do {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
670 if (fp->block_offset >= fp->block_length) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
671 if (bgzf_read_block(fp) != 0) { state = -2; break; }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
672 if (fp->block_length == 0) { state = -1; break; }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
673 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
674 for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
675 if (l < fp->block_length) state = 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
676 l -= fp->block_offset;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
677 if (str->l + l + 1 >= str->m) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
678 str->m = str->l + l + 2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
679 kroundup32(str->m);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
680 str->s = (char*)realloc(str->s, str->m);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
681 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
682 memcpy(str->s + str->l, buf + fp->block_offset, l);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
683 str->l += l;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
684 fp->block_offset += l + 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
685 if (fp->block_offset >= fp->block_length) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
686 fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
687 fp->block_offset = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
688 fp->block_length = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
689 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
690 } while (state == 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
691 if (str->l == 0 && state < 0) return state;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
692 str->s[str->l] = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
693 return str->l;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
694 }