Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/razf.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 /* | |
2 * RAZF : Random Access compressed(Z) File | |
3 * Version: 1.0 | |
4 * Release Date: 2008-10-27 | |
5 * | |
6 * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk> | |
7 * | |
8 * All rights reserved. | |
9 * | |
10 * Redistribution and use in source and binary forms, with or without | |
11 * modification, are permitted provided that the following conditions | |
12 * are met: | |
13 * 1. Redistributions of source code must retain the above copyright | |
14 * notice, this list of conditions and the following disclaimer. | |
15 * 2. Redistributions in binary form must reproduce the above copyright | |
16 * notice, this list of conditions and the following disclaimer in the | |
17 * documentation and/or other materials provided with the distribution. | |
18 * | |
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
29 * SUCH DAMAGE. | |
30 */ | |
31 | |
32 #ifndef _NO_RAZF | |
33 | |
34 #include <fcntl.h> | |
35 #include <stdio.h> | |
36 #include <stdlib.h> | |
37 #include <string.h> | |
38 #include <unistd.h> | |
39 #include "razf.h" | |
40 | |
41 | |
42 #if ZLIB_VERNUM < 0x1221 | |
43 struct _gz_header_s { | |
44 int text; | |
45 uLong time; | |
46 int xflags; | |
47 int os; | |
48 Bytef *extra; | |
49 uInt extra_len; | |
50 uInt extra_max; | |
51 Bytef *name; | |
52 uInt name_max; | |
53 Bytef *comment; | |
54 uInt comm_max; | |
55 int hcrc; | |
56 int done; | |
57 }; | |
58 #warning "zlib < 1.2.2.1; RAZF writing is disabled." | |
59 #endif | |
60 | |
61 #define DEF_MEM_LEVEL 8 | |
62 | |
63 static inline uint32_t byte_swap_4(uint32_t v){ | |
64 v = ((v & 0x0000FFFFU) << 16) | (v >> 16); | |
65 return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); | |
66 } | |
67 | |
68 static inline uint64_t byte_swap_8(uint64_t v){ | |
69 v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); | |
70 v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); | |
71 return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); | |
72 } | |
73 | |
74 static inline int is_big_endian(){ | |
75 int x = 0x01; | |
76 char *c = (char*)&x; | |
77 return (c[0] != 0x01); | |
78 } | |
79 | |
80 #ifndef _RZ_READONLY | |
81 static void add_zindex(RAZF *rz, int64_t in, int64_t out){ | |
82 if(rz->index->size == rz->index->cap){ | |
83 rz->index->cap = rz->index->cap * 1.5 + 2; | |
84 rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap); | |
85 rz->index->bin_offsets = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1)); | |
86 } | |
87 if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out; | |
88 rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE]; | |
89 rz->index->size ++; | |
90 } | |
91 | |
92 static void save_zindex(RAZF *rz, int fd){ | |
93 int32_t i, v32; | |
94 int is_be; | |
95 is_be = is_big_endian(); | |
96 if(is_be) write(fd, &rz->index->size, sizeof(int)); | |
97 else { | |
98 v32 = byte_swap_4((uint32_t)rz->index->size); | |
99 write(fd, &v32, sizeof(uint32_t)); | |
100 } | |
101 v32 = rz->index->size / RZ_BIN_SIZE + 1; | |
102 if(!is_be){ | |
103 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]); | |
104 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]); | |
105 } | |
106 write(fd, rz->index->bin_offsets, sizeof(int64_t) * v32); | |
107 write(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size); | |
108 } | |
109 #endif | |
110 | |
111 #ifdef _USE_KNETFILE | |
112 static void load_zindex(RAZF *rz, knetFile *fp){ | |
113 #else | |
114 static void load_zindex(RAZF *rz, int fd){ | |
115 #endif | |
116 int32_t i, v32; | |
117 int is_be; | |
118 if(!rz->load_index) return; | |
119 if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex)); | |
120 is_be = is_big_endian(); | |
121 #ifdef _USE_KNETFILE | |
122 knet_read(fp, &rz->index->size, sizeof(int)); | |
123 #else | |
124 read(fd, &rz->index->size, sizeof(int)); | |
125 #endif | |
126 if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size); | |
127 rz->index->cap = rz->index->size; | |
128 v32 = rz->index->size / RZ_BIN_SIZE + 1; | |
129 rz->index->bin_offsets = malloc(sizeof(int64_t) * v32); | |
130 #ifdef _USE_KNETFILE | |
131 knet_read(fp, rz->index->bin_offsets, sizeof(int64_t) * v32); | |
132 #else | |
133 read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32); | |
134 #endif | |
135 rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size); | |
136 #ifdef _USE_KNETFILE | |
137 knet_read(fp, rz->index->cell_offsets, sizeof(int) * rz->index->size); | |
138 #else | |
139 read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size); | |
140 #endif | |
141 if(!is_be){ | |
142 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]); | |
143 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]); | |
144 } | |
145 } | |
146 | |
147 #ifdef _RZ_READONLY | |
148 static RAZF* razf_open_w(int fd) | |
149 { | |
150 fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n"); | |
151 return 0; | |
152 } | |
153 #else | |
154 static RAZF* razf_open_w(int fd){ | |
155 RAZF *rz; | |
156 #ifdef _WIN32 | |
157 setmode(fd, O_BINARY); | |
158 #endif | |
159 rz = calloc(1, sizeof(RAZF)); | |
160 rz->mode = 'w'; | |
161 #ifdef _USE_KNETFILE | |
162 rz->x.fpw = fd; | |
163 #else | |
164 rz->filedes = fd; | |
165 #endif | |
166 rz->stream = calloc(sizeof(z_stream), 1); | |
167 rz->inbuf = malloc(RZ_BUFFER_SIZE); | |
168 rz->outbuf = malloc(RZ_BUFFER_SIZE); | |
169 rz->index = calloc(sizeof(ZBlockIndex), 1); | |
170 deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); | |
171 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
172 rz->stream->next_out = rz->outbuf; | |
173 rz->header = calloc(sizeof(gz_header), 1); | |
174 rz->header->os = 0x03; //Unix | |
175 rz->header->text = 0; | |
176 rz->header->time = 0; | |
177 rz->header->extra = malloc(7); | |
178 strncpy((char*)rz->header->extra, "RAZF", 4); | |
179 rz->header->extra[4] = 1; // obsolete field | |
180 // block size = RZ_BLOCK_SIZE, Big-Endian | |
181 rz->header->extra[5] = RZ_BLOCK_SIZE >> 8; | |
182 rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF; | |
183 rz->header->extra_len = 7; | |
184 rz->header->name = rz->header->comment = 0; | |
185 rz->header->hcrc = 0; | |
186 deflateSetHeader(rz->stream, rz->header); | |
187 rz->block_pos = rz->block_off = 0; | |
188 return rz; | |
189 } | |
190 | |
191 static void _razf_write(RAZF* rz, const void *data, int size){ | |
192 int tout; | |
193 rz->stream->avail_in = size; | |
194 rz->stream->next_in = (void*)data; | |
195 while(1){ | |
196 tout = rz->stream->avail_out; | |
197 deflate(rz->stream, Z_NO_FLUSH); | |
198 rz->out += tout - rz->stream->avail_out; | |
199 if(rz->stream->avail_out) break; | |
200 #ifdef _USE_KNETFILE | |
201 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
202 #else | |
203 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
204 #endif | |
205 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
206 rz->stream->next_out = rz->outbuf; | |
207 if(rz->stream->avail_in == 0) break; | |
208 }; | |
209 rz->in += size - rz->stream->avail_in; | |
210 rz->block_off += size - rz->stream->avail_in; | |
211 } | |
212 | |
213 static void razf_flush(RAZF *rz){ | |
214 uint32_t tout; | |
215 if(rz->buf_len){ | |
216 _razf_write(rz, rz->inbuf, rz->buf_len); | |
217 rz->buf_off = rz->buf_len = 0; | |
218 } | |
219 if(rz->stream->avail_out){ | |
220 #ifdef _USE_KNETFILE | |
221 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
222 #else | |
223 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
224 #endif | |
225 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
226 rz->stream->next_out = rz->outbuf; | |
227 } | |
228 while(1){ | |
229 tout = rz->stream->avail_out; | |
230 deflate(rz->stream, Z_FULL_FLUSH); | |
231 rz->out += tout - rz->stream->avail_out; | |
232 if(rz->stream->avail_out == 0){ | |
233 #ifdef _USE_KNETFILE | |
234 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
235 #else | |
236 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
237 #endif | |
238 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
239 rz->stream->next_out = rz->outbuf; | |
240 } else break; | |
241 } | |
242 rz->block_pos = rz->out; | |
243 rz->block_off = 0; | |
244 } | |
245 | |
246 static void razf_end_flush(RAZF *rz){ | |
247 uint32_t tout; | |
248 if(rz->buf_len){ | |
249 _razf_write(rz, rz->inbuf, rz->buf_len); | |
250 rz->buf_off = rz->buf_len = 0; | |
251 } | |
252 while(1){ | |
253 tout = rz->stream->avail_out; | |
254 deflate(rz->stream, Z_FINISH); | |
255 rz->out += tout - rz->stream->avail_out; | |
256 if(rz->stream->avail_out < RZ_BUFFER_SIZE){ | |
257 #ifdef _USE_KNETFILE | |
258 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
259 #else | |
260 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
261 #endif | |
262 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
263 rz->stream->next_out = rz->outbuf; | |
264 } else break; | |
265 } | |
266 } | |
267 | |
268 static void _razf_buffered_write(RAZF *rz, const void *data, int size){ | |
269 int i, n; | |
270 while(1){ | |
271 if(rz->buf_len == RZ_BUFFER_SIZE){ | |
272 _razf_write(rz, rz->inbuf, rz->buf_len); | |
273 rz->buf_len = 0; | |
274 } | |
275 if(size + rz->buf_len < RZ_BUFFER_SIZE){ | |
276 for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i]; | |
277 rz->buf_len += size; | |
278 return; | |
279 } else { | |
280 n = RZ_BUFFER_SIZE - rz->buf_len; | |
281 for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i]; | |
282 size -= n; | |
283 data += n; | |
284 rz->buf_len += n; | |
285 } | |
286 } | |
287 } | |
288 | |
289 int razf_write(RAZF* rz, const void *data, int size){ | |
290 int ori_size, n; | |
291 int64_t next_block; | |
292 ori_size = size; | |
293 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE; | |
294 while(rz->in + rz->buf_len + size >= next_block){ | |
295 n = next_block - rz->in - rz->buf_len; | |
296 _razf_buffered_write(rz, data, n); | |
297 data += n; | |
298 size -= n; | |
299 razf_flush(rz); | |
300 add_zindex(rz, rz->in, rz->out); | |
301 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE; | |
302 } | |
303 _razf_buffered_write(rz, data, size); | |
304 return ori_size; | |
305 } | |
306 #endif | |
307 | |
308 /* gzip flag byte */ | |
309 #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ | |
310 #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ | |
311 #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ | |
312 #define ORIG_NAME 0x08 /* bit 3 set: original file name present */ | |
313 #define COMMENT 0x10 /* bit 4 set: file comment present */ | |
314 #define RESERVED 0xE0 /* bits 5..7: reserved */ | |
315 | |
316 static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){ | |
317 int method, flags, n, len; | |
318 if(size < 2) return 0; | |
319 if(data[0] != 0x1f || data[1] != 0x8b) return 0; | |
320 if(size < 4) return 0; | |
321 method = data[2]; | |
322 flags = data[3]; | |
323 if(method != Z_DEFLATED || (flags & RESERVED)) return 0; | |
324 n = 4 + 6; // Skip 6 bytes | |
325 *extra_off = n + 2; | |
326 *extra_len = 0; | |
327 if(flags & EXTRA_FIELD){ | |
328 if(size < n + 2) return 0; | |
329 len = ((int)data[n + 1] << 8) | data[n]; | |
330 n += 2; | |
331 *extra_off = n; | |
332 while(len){ | |
333 if(n >= size) return 0; | |
334 n ++; | |
335 len --; | |
336 } | |
337 *extra_len = n - (*extra_off); | |
338 } | |
339 if(flags & ORIG_NAME) while(n < size && data[n++]); | |
340 if(flags & COMMENT) while(n < size && data[n++]); | |
341 if(flags & HEAD_CRC){ | |
342 if(n + 2 > size) return 0; | |
343 n += 2; | |
344 } | |
345 return n; | |
346 } | |
347 | |
348 #ifdef _USE_KNETFILE | |
349 static RAZF* razf_open_r(knetFile *fp, int _load_index){ | |
350 #else | |
351 static RAZF* razf_open_r(int fd, int _load_index){ | |
352 #endif | |
353 RAZF *rz; | |
354 int ext_off, ext_len; | |
355 int n, is_be, ret; | |
356 int64_t end; | |
357 unsigned char c[] = "RAZF"; | |
358 rz = calloc(1, sizeof(RAZF)); | |
359 rz->mode = 'r'; | |
360 #ifdef _USE_KNETFILE | |
361 rz->x.fpr = fp; | |
362 #else | |
363 #ifdef _WIN32 | |
364 setmode(fd, O_BINARY); | |
365 #endif | |
366 rz->filedes = fd; | |
367 #endif | |
368 rz->stream = calloc(sizeof(z_stream), 1); | |
369 rz->inbuf = malloc(RZ_BUFFER_SIZE); | |
370 rz->outbuf = malloc(RZ_BUFFER_SIZE); | |
371 rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL; | |
372 #ifdef _USE_KNETFILE | |
373 n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE); | |
374 #else | |
375 n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); | |
376 #endif | |
377 ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len); | |
378 if(ret == 0){ | |
379 PLAIN_FILE: | |
380 rz->in = n; | |
381 rz->file_type = FILE_TYPE_PLAIN; | |
382 memcpy(rz->outbuf, rz->inbuf, n); | |
383 rz->buf_len = n; | |
384 free(rz->stream); | |
385 rz->stream = NULL; | |
386 return rz; | |
387 } | |
388 rz->header_size = ret; | |
389 ret = inflateInit2(rz->stream, -WINDOW_BITS); | |
390 if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;} | |
391 rz->stream->avail_in = n - rz->header_size; | |
392 rz->stream->next_in = rz->inbuf + rz->header_size; | |
393 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
394 rz->stream->next_out = rz->outbuf; | |
395 rz->file_type = FILE_TYPE_GZ; | |
396 rz->in = rz->header_size; | |
397 rz->block_pos = rz->header_size; | |
398 rz->next_block_pos = rz->header_size; | |
399 rz->block_off = 0; | |
400 if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz; | |
401 if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){ | |
402 fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file. in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__); | |
403 return rz; | |
404 } | |
405 rz->load_index = _load_index; | |
406 rz->file_type = FILE_TYPE_RZ; | |
407 #ifdef _USE_KNETFILE | |
408 if(knet_seek(fp, -16, SEEK_END) == -1){ | |
409 #else | |
410 if(lseek(fd, -16, SEEK_END) == -1){ | |
411 #endif | |
412 UNSEEKABLE: | |
413 rz->seekable = 0; | |
414 rz->index = NULL; | |
415 rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL; | |
416 } else { | |
417 is_be = is_big_endian(); | |
418 rz->seekable = 1; | |
419 #ifdef _USE_KNETFILE | |
420 knet_read(fp, &end, sizeof(int64_t)); | |
421 #else | |
422 read(fd, &end, sizeof(int64_t)); | |
423 #endif | |
424 if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end); | |
425 else rz->src_end = end; | |
426 | |
427 #ifdef _USE_KNETFILE | |
428 knet_read(fp, &end, sizeof(int64_t)); | |
429 #else | |
430 read(fd, &end, sizeof(int64_t)); | |
431 #endif | |
432 if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end); | |
433 else rz->end = end; | |
434 if(n > rz->end){ | |
435 rz->stream->avail_in -= n - rz->end; | |
436 n = rz->end; | |
437 } | |
438 if(rz->end > rz->src_end){ | |
439 #ifdef _USE_KNETFILE | |
440 knet_seek(fp, rz->in, SEEK_SET); | |
441 #else | |
442 lseek(fd, rz->in, SEEK_SET); | |
443 #endif | |
444 goto UNSEEKABLE; | |
445 } | |
446 #ifdef _USE_KNETFILE | |
447 knet_seek(fp, rz->end, SEEK_SET); | |
448 if(knet_tell(fp) != rz->end){ | |
449 knet_seek(fp, rz->in, SEEK_SET); | |
450 #else | |
451 if(lseek(fd, rz->end, SEEK_SET) != rz->end){ | |
452 lseek(fd, rz->in, SEEK_SET); | |
453 #endif | |
454 goto UNSEEKABLE; | |
455 } | |
456 #ifdef _USE_KNETFILE | |
457 load_zindex(rz, fp); | |
458 knet_seek(fp, n, SEEK_SET); | |
459 #else | |
460 load_zindex(rz, fd); | |
461 lseek(fd, n, SEEK_SET); | |
462 #endif | |
463 } | |
464 return rz; | |
465 } | |
466 | |
467 #ifdef _USE_KNETFILE | |
468 RAZF* razf_dopen(int fd, const char *mode){ | |
469 if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n"); | |
470 else if(strstr(mode, "w")) return razf_open_w(fd); | |
471 return NULL; | |
472 } | |
473 | |
474 RAZF* razf_dopen2(int fd, const char *mode) | |
475 { | |
476 fprintf(stderr,"[razf_dopen2] implement me\n"); | |
477 return NULL; | |
478 } | |
479 #else | |
480 RAZF* razf_dopen(int fd, const char *mode){ | |
481 if(strstr(mode, "r")) return razf_open_r(fd, 1); | |
482 else if(strstr(mode, "w")) return razf_open_w(fd); | |
483 else return NULL; | |
484 } | |
485 | |
486 RAZF* razf_dopen2(int fd, const char *mode) | |
487 { | |
488 if(strstr(mode, "r")) return razf_open_r(fd, 0); | |
489 else if(strstr(mode, "w")) return razf_open_w(fd); | |
490 else return NULL; | |
491 } | |
492 #endif | |
493 | |
494 static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){ | |
495 int fd; | |
496 RAZF *rz; | |
497 if(strstr(mode, "r")){ | |
498 #ifdef _USE_KNETFILE | |
499 knetFile *fd = knet_open(filename, "r"); | |
500 if (fd == 0) { | |
501 fprintf(stderr, "[_razf_open] fail to open %s\n", filename); | |
502 return NULL; | |
503 } | |
504 #else | |
505 #ifdef _WIN32 | |
506 fd = open(filename, O_RDONLY | O_BINARY); | |
507 #else | |
508 fd = open(filename, O_RDONLY); | |
509 #endif | |
510 #endif | |
511 if(fd < 0) return NULL; | |
512 rz = razf_open_r(fd, _load_index); | |
513 } else if(strstr(mode, "w")){ | |
514 #ifdef _WIN32 | |
515 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666); | |
516 #else | |
517 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666); | |
518 #endif | |
519 if(fd < 0) return NULL; | |
520 rz = razf_open_w(fd); | |
521 } else return NULL; | |
522 return rz; | |
523 } | |
524 | |
525 RAZF* razf_open(const char *filename, const char *mode){ | |
526 return _razf_open(filename, mode, 1); | |
527 } | |
528 | |
529 RAZF* razf_open2(const char *filename, const char *mode){ | |
530 return _razf_open(filename, mode, 0); | |
531 } | |
532 | |
533 int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){ | |
534 int64_t n; | |
535 if(rz->mode != 'r' && rz->mode != 'R') return 0; | |
536 switch(rz->file_type){ | |
537 case FILE_TYPE_PLAIN: | |
538 if(rz->end == 0x7fffffffffffffffLL){ | |
539 #ifdef _USE_KNETFILE | |
540 if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0; | |
541 n = knet_tell(rz->x.fpr); | |
542 knet_seek(rz->x.fpr, 0, SEEK_END); | |
543 rz->end = knet_tell(rz->x.fpr); | |
544 knet_seek(rz->x.fpr, n, SEEK_SET); | |
545 #else | |
546 if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0; | |
547 rz->end = lseek(rz->filedes, 0, SEEK_END); | |
548 lseek(rz->filedes, n, SEEK_SET); | |
549 #endif | |
550 } | |
551 *u_size = *c_size = rz->end; | |
552 return 1; | |
553 case FILE_TYPE_GZ: | |
554 return 0; | |
555 case FILE_TYPE_RZ: | |
556 if(rz->src_end == rz->end) return 0; | |
557 *u_size = rz->src_end; | |
558 *c_size = rz->end; | |
559 return 1; | |
560 default: | |
561 return 0; | |
562 } | |
563 } | |
564 | |
565 static int _razf_read(RAZF* rz, void *data, int size){ | |
566 int ret, tin; | |
567 if(rz->z_eof || rz->z_err) return 0; | |
568 if (rz->file_type == FILE_TYPE_PLAIN) { | |
569 #ifdef _USE_KNETFILE | |
570 ret = knet_read(rz->x.fpr, data, size); | |
571 #else | |
572 ret = read(rz->filedes, data, size); | |
573 #endif | |
574 if (ret == 0) rz->z_eof = 1; | |
575 return ret; | |
576 } | |
577 rz->stream->avail_out = size; | |
578 rz->stream->next_out = data; | |
579 while(rz->stream->avail_out){ | |
580 if(rz->stream->avail_in == 0){ | |
581 if(rz->in >= rz->end){ rz->z_eof = 1; break; } | |
582 if(rz->end - rz->in < RZ_BUFFER_SIZE){ | |
583 #ifdef _USE_KNETFILE | |
584 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in); | |
585 #else | |
586 rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in); | |
587 #endif | |
588 } else { | |
589 #ifdef _USE_KNETFILE | |
590 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE); | |
591 #else | |
592 rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); | |
593 #endif | |
594 } | |
595 if(rz->stream->avail_in == 0){ | |
596 rz->z_eof = 1; | |
597 break; | |
598 } | |
599 rz->stream->next_in = rz->inbuf; | |
600 } | |
601 tin = rz->stream->avail_in; | |
602 ret = inflate(rz->stream, Z_BLOCK); | |
603 rz->in += tin - rz->stream->avail_in; | |
604 if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){ | |
605 fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__); | |
606 rz->z_err = 1; | |
607 break; | |
608 } | |
609 if(ret == Z_STREAM_END){ | |
610 rz->z_eof = 1; | |
611 break; | |
612 } | |
613 if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){ | |
614 rz->buf_flush = 1; | |
615 rz->next_block_pos = rz->in; | |
616 break; | |
617 } | |
618 } | |
619 return size - rz->stream->avail_out; | |
620 } | |
621 | |
622 int razf_read(RAZF *rz, void *data, int size){ | |
623 int ori_size, i; | |
624 ori_size = size; | |
625 while(size > 0){ | |
626 if(rz->buf_len){ | |
627 if(size < rz->buf_len){ | |
628 for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i]; | |
629 rz->buf_off += size; | |
630 rz->buf_len -= size; | |
631 data += size; | |
632 rz->block_off += size; | |
633 size = 0; | |
634 break; | |
635 } else { | |
636 for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i]; | |
637 data += rz->buf_len; | |
638 size -= rz->buf_len; | |
639 rz->block_off += rz->buf_len; | |
640 rz->buf_off = 0; | |
641 rz->buf_len = 0; | |
642 if(rz->buf_flush){ | |
643 rz->block_pos = rz->next_block_pos; | |
644 rz->block_off = 0; | |
645 rz->buf_flush = 0; | |
646 } | |
647 } | |
648 } else if(rz->buf_flush){ | |
649 rz->block_pos = rz->next_block_pos; | |
650 rz->block_off = 0; | |
651 rz->buf_flush = 0; | |
652 } | |
653 if(rz->buf_flush) continue; | |
654 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE); | |
655 if(rz->z_eof && rz->buf_len == 0) break; | |
656 } | |
657 rz->out += ori_size - size; | |
658 return ori_size - size; | |
659 } | |
660 | |
661 int razf_skip(RAZF* rz, int size){ | |
662 int ori_size; | |
663 ori_size = size; | |
664 while(size > 0){ | |
665 if(rz->buf_len){ | |
666 if(size < rz->buf_len){ | |
667 rz->buf_off += size; | |
668 rz->buf_len -= size; | |
669 rz->block_off += size; | |
670 size = 0; | |
671 break; | |
672 } else { | |
673 size -= rz->buf_len; | |
674 rz->buf_off = 0; | |
675 rz->buf_len = 0; | |
676 rz->block_off += rz->buf_len; | |
677 if(rz->buf_flush){ | |
678 rz->block_pos = rz->next_block_pos; | |
679 rz->block_off = 0; | |
680 rz->buf_flush = 0; | |
681 } | |
682 } | |
683 } else if(rz->buf_flush){ | |
684 rz->block_pos = rz->next_block_pos; | |
685 rz->block_off = 0; | |
686 rz->buf_flush = 0; | |
687 } | |
688 if(rz->buf_flush) continue; | |
689 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE); | |
690 if(rz->z_eof || rz->z_err) break; | |
691 } | |
692 rz->out += ori_size - size; | |
693 return ori_size - size; | |
694 } | |
695 | |
696 static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){ | |
697 #ifdef _USE_KNETFILE | |
698 knet_seek(rz->x.fpr, in, SEEK_SET); | |
699 #else | |
700 lseek(rz->filedes, in, SEEK_SET); | |
701 #endif | |
702 rz->in = in; | |
703 rz->out = out; | |
704 rz->block_pos = in; | |
705 rz->next_block_pos = in; | |
706 rz->block_off = 0; | |
707 rz->buf_flush = 0; | |
708 rz->z_eof = rz->z_err = 0; | |
709 inflateReset(rz->stream); | |
710 rz->stream->avail_in = 0; | |
711 rz->buf_off = rz->buf_len = 0; | |
712 } | |
713 | |
714 int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){ | |
715 int64_t pos; | |
716 rz->z_eof = 0; | |
717 if(rz->file_type == FILE_TYPE_PLAIN){ | |
718 rz->buf_off = rz->buf_len = 0; | |
719 pos = block_start + block_offset; | |
720 #ifdef _USE_KNETFILE | |
721 knet_seek(rz->x.fpr, pos, SEEK_SET); | |
722 pos = knet_tell(rz->x.fpr); | |
723 #else | |
724 pos = lseek(rz->filedes, pos, SEEK_SET); | |
725 #endif | |
726 rz->out = rz->in = pos; | |
727 return pos; | |
728 } | |
729 if(block_start == rz->block_pos && block_offset >= rz->block_off) { | |
730 block_offset -= rz->block_off; | |
731 goto SKIP; // Needn't reset inflate | |
732 } | |
733 if(block_start == 0) block_start = rz->header_size; // Automaticly revist wrong block_start | |
734 _razf_reset_read(rz, block_start, 0); | |
735 SKIP: | |
736 if(block_offset) razf_skip(rz, block_offset); | |
737 return rz->block_off; | |
738 } | |
739 | |
740 int64_t razf_seek(RAZF* rz, int64_t pos, int where){ | |
741 int64_t idx; | |
742 int64_t seek_pos, new_out; | |
743 rz->z_eof = 0; | |
744 if (where == SEEK_CUR) pos += rz->out; | |
745 else if (where == SEEK_END) pos += rz->src_end; | |
746 if(rz->file_type == FILE_TYPE_PLAIN){ | |
747 #ifdef _USE_KNETFILE | |
748 knet_seek(rz->x.fpr, pos, SEEK_SET); | |
749 seek_pos = knet_tell(rz->x.fpr); | |
750 #else | |
751 seek_pos = lseek(rz->filedes, pos, SEEK_SET); | |
752 #endif | |
753 rz->buf_off = rz->buf_len = 0; | |
754 rz->out = rz->in = seek_pos; | |
755 return seek_pos; | |
756 } else if(rz->file_type == FILE_TYPE_GZ){ | |
757 if(pos >= rz->out) goto SKIP; | |
758 return rz->out; | |
759 } | |
760 if(pos == rz->out) return pos; | |
761 if(pos > rz->src_end) return rz->out; | |
762 if(!rz->seekable || !rz->load_index){ | |
763 if(pos >= rz->out) goto SKIP; | |
764 } | |
765 idx = pos / RZ_BLOCK_SIZE - 1; | |
766 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]); | |
767 new_out = (idx + 1) * RZ_BLOCK_SIZE; | |
768 if(pos > rz->out && new_out <= rz->out) goto SKIP; | |
769 _razf_reset_read(rz, seek_pos, new_out); | |
770 SKIP: | |
771 razf_skip(rz, (int)(pos - rz->out)); | |
772 return rz->out; | |
773 } | |
774 | |
775 uint64_t razf_tell2(RAZF *rz) | |
776 { | |
777 /* | |
778 if (rz->load_index) { | |
779 int64_t idx, seek_pos; | |
780 idx = rz->out / RZ_BLOCK_SIZE - 1; | |
781 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]); | |
782 if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off) | |
783 fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n", | |
784 (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off); | |
785 } | |
786 */ | |
787 return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff); | |
788 } | |
789 | |
790 int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where) | |
791 { | |
792 if (where != SEEK_SET) return -1; | |
793 return razf_jump(rz, voffset>>16, voffset&0xffff); | |
794 } | |
795 | |
796 void razf_close(RAZF *rz){ | |
797 if(rz->mode == 'w'){ | |
798 #ifndef _RZ_READONLY | |
799 razf_end_flush(rz); | |
800 deflateEnd(rz->stream); | |
801 #ifdef _USE_KNETFILE | |
802 save_zindex(rz, rz->x.fpw); | |
803 if(is_big_endian()){ | |
804 write(rz->x.fpw, &rz->in, sizeof(int64_t)); | |
805 write(rz->x.fpw, &rz->out, sizeof(int64_t)); | |
806 } else { | |
807 uint64_t v64 = byte_swap_8((uint64_t)rz->in); | |
808 write(rz->x.fpw, &v64, sizeof(int64_t)); | |
809 v64 = byte_swap_8((uint64_t)rz->out); | |
810 write(rz->x.fpw, &v64, sizeof(int64_t)); | |
811 } | |
812 #else | |
813 save_zindex(rz, rz->filedes); | |
814 if(is_big_endian()){ | |
815 write(rz->filedes, &rz->in, sizeof(int64_t)); | |
816 write(rz->filedes, &rz->out, sizeof(int64_t)); | |
817 } else { | |
818 uint64_t v64 = byte_swap_8((uint64_t)rz->in); | |
819 write(rz->filedes, &v64, sizeof(int64_t)); | |
820 v64 = byte_swap_8((uint64_t)rz->out); | |
821 write(rz->filedes, &v64, sizeof(int64_t)); | |
822 } | |
823 #endif | |
824 #endif | |
825 } else if(rz->mode == 'r'){ | |
826 if(rz->stream) inflateEnd(rz->stream); | |
827 } | |
828 if(rz->inbuf) free(rz->inbuf); | |
829 if(rz->outbuf) free(rz->outbuf); | |
830 if(rz->header){ | |
831 free(rz->header->extra); | |
832 free(rz->header->name); | |
833 free(rz->header->comment); | |
834 free(rz->header); | |
835 } | |
836 if(rz->index){ | |
837 free(rz->index->bin_offsets); | |
838 free(rz->index->cell_offsets); | |
839 free(rz->index); | |
840 } | |
841 free(rz->stream); | |
842 #ifdef _USE_KNETFILE | |
843 if (rz->mode == 'r') | |
844 knet_close(rz->x.fpr); | |
845 if (rz->mode == 'w') | |
846 close(rz->x.fpw); | |
847 #else | |
848 close(rz->filedes); | |
849 #endif | |
850 free(rz); | |
851 } | |
852 | |
853 #endif |