0
|
1 /*
|
|
2 Copyright (c) 2012-2013 Genome Research Ltd.
|
|
3 Author: James Bonfield <jkb@sanger.ac.uk>
|
|
4
|
|
5 Redistribution and use in source and binary forms, with or without
|
|
6 modification, are permitted provided that the following conditions are met:
|
|
7
|
|
8 1. Redistributions of source code must retain the above copyright notice,
|
|
9 this list of conditions and the following disclaimer.
|
|
10
|
|
11 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12 this list of conditions and the following disclaimer in the documentation
|
|
13 and/or other materials provided with the distribution.
|
|
14
|
|
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
|
|
16 Institute nor the names of its contributors may be used to endorse or promote
|
|
17 products derived from this software without specific prior written permission.
|
|
18
|
|
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
|
|
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
|
|
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29 */
|
|
30
|
|
31 /*
|
|
32 * FIXME: add checking of cram_external_type to return NULL on unsupported
|
|
33 * {codec,type} tuples.
|
|
34 */
|
|
35
|
|
36 #ifdef HAVE_CONFIG_H
|
|
37 #include "io_lib_config.h"
|
|
38 #endif
|
|
39
|
|
40 #include <stdlib.h>
|
|
41 #include <string.h>
|
|
42 #include <assert.h>
|
|
43 #include <limits.h>
|
|
44
|
|
45 #include "cram/cram.h"
|
|
46
|
|
47 static char *codec2str(enum cram_encoding codec) {
|
|
48 switch (codec) {
|
|
49 case E_NULL: return "NULL";
|
|
50 case E_EXTERNAL: return "EXTERNAL";
|
|
51 case E_GOLOMB: return "GOLOMB";
|
|
52 case E_HUFFMAN: return "HUFFMAN";
|
|
53 case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN";
|
|
54 case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP";
|
|
55 case E_BETA: return "BETA";
|
|
56 case E_SUBEXP: return "SUBEXP";
|
|
57 case E_GOLOMB_RICE: return "GOLOMB_RICE";
|
|
58 case E_GAMMA: return "GAMMA";
|
|
59 }
|
|
60
|
|
61 return "(unknown)";
|
|
62 }
|
|
63
|
|
64 /*
|
|
65 * ---------------------------------------------------------------------------
|
|
66 * Block bit-level I/O functions.
|
|
67 * All defined static here to promote easy inlining by the compiler.
|
|
68 */
|
|
69
|
|
70 #if 0
|
|
71 /* Get a single bit, MSB first */
|
|
72 static signed int get_bit_MSB(cram_block *block) {
|
|
73 unsigned int val;
|
|
74
|
|
75 if (block->byte > block->alloc)
|
|
76 return -1;
|
|
77
|
|
78 val = block->data[block->byte] >> block->bit;
|
|
79 if (--block->bit == -1) {
|
|
80 block->bit = 7;
|
|
81 block->byte++;
|
|
82 //printf("(%02X)", block->data[block->byte]);
|
|
83 }
|
|
84
|
|
85 //printf("-B%d-", val&1);
|
|
86
|
|
87 return val & 1;
|
|
88 }
|
|
89 #endif
|
|
90
|
|
91 /*
|
|
92 * Count number of successive 0 and 1 bits
|
|
93 */
|
|
94 static int get_one_bits_MSB(cram_block *block) {
|
|
95 int n = 0, b;
|
|
96 do {
|
|
97 b = block->data[block->byte] >> block->bit;
|
|
98 if (--block->bit == -1) {
|
|
99 block->bit = 7;
|
|
100 block->byte++;
|
|
101 }
|
|
102 n++;
|
|
103 } while (b&1);
|
|
104
|
|
105 return n-1;
|
|
106 }
|
|
107
|
|
108 static int get_zero_bits_MSB(cram_block *block) {
|
|
109 int n = 0, b;
|
|
110 do {
|
|
111 b = block->data[block->byte] >> block->bit;
|
|
112 if (--block->bit == -1) {
|
|
113 block->bit = 7;
|
|
114 block->byte++;
|
|
115 }
|
|
116 n++;
|
|
117 } while (!(b&1));
|
|
118
|
|
119 return n-1;
|
|
120 }
|
|
121
|
|
122 #if 0
|
|
123 /* Stores a single bit */
|
|
124 static void store_bit_MSB(cram_block *block, unsigned int bit) {
|
|
125 if (block->byte >= block->alloc) {
|
|
126 block->alloc = block->alloc ? block->alloc*2 : 1024;
|
|
127 block->data = realloc(block->data, block->alloc);
|
|
128 }
|
|
129
|
|
130 if (bit)
|
|
131 block->data[block->byte] |= (1 << block->bit);
|
|
132
|
|
133 if (--block->bit == -1) {
|
|
134 block->bit = 7;
|
|
135 block->byte++;
|
|
136 block->data[block->byte] = 0;
|
|
137 }
|
|
138 }
|
|
139 #endif
|
|
140
|
|
141 #if 0
|
|
142 /* Rounds to the next whole byte boundary first */
|
|
143 static void store_bytes_MSB(cram_block *block, char *bytes, int len) {
|
|
144 if (block->bit != 7) {
|
|
145 block->bit = 7;
|
|
146 block->byte++;
|
|
147 }
|
|
148
|
|
149 while (block->byte + len >= block->alloc) {
|
|
150 block->alloc = block->alloc ? block->alloc*2 : 1024;
|
|
151 block->data = realloc(block->data, block->alloc);
|
|
152 }
|
|
153
|
|
154 memcpy(&block->data[block->byte], bytes, len);
|
|
155 block->byte += len;
|
|
156 }
|
|
157 #endif
|
|
158
|
|
159 /* Local optimised copy for inlining */
|
|
160 static inline unsigned int get_bits_MSB(cram_block *block, int nbits) {
|
|
161 unsigned int val = 0;
|
|
162 int i;
|
|
163
|
|
164 #if 0
|
|
165 // Fits within the current byte */
|
|
166 if (nbits <= block->bit+1) {
|
|
167 val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
|
|
168 if ((block->bit -= nbits) == -1) {
|
|
169 block->bit = 7;
|
|
170 block->byte++;
|
|
171 }
|
|
172 return val;
|
|
173 }
|
|
174
|
|
175 // partial first byte
|
|
176 val = block->data[block->byte] & ((1<<(block->bit+1))-1);
|
|
177 nbits -= block->bit+1;
|
|
178 block->bit = 7;
|
|
179 block->byte++;
|
|
180
|
|
181 // whole middle bytes
|
|
182 while (nbits >= 8) {
|
|
183 val = (val << 8) | block->data[block->byte++];
|
|
184 nbits -= 8;
|
|
185 }
|
|
186
|
|
187 val <<= nbits;
|
|
188 val |= (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
|
|
189 block->bit -= nbits;
|
|
190 return val;
|
|
191 #endif
|
|
192
|
|
193 #if 0
|
|
194 /* Inefficient implementation! */
|
|
195 //printf("{");
|
|
196 for (i = 0; i < nbits; i++)
|
|
197 //val = (val << 1) | get_bit_MSB(block);
|
|
198 GET_BIT_MSB(block, val);
|
|
199 #endif
|
|
200
|
|
201 #if 1
|
|
202 /* Combination of 1st two methods */
|
|
203 if (nbits <= block->bit+1) {
|
|
204 val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
|
|
205 if ((block->bit -= nbits) == -1) {
|
|
206 block->bit = 7;
|
|
207 block->byte++;
|
|
208 }
|
|
209 return val;
|
|
210 }
|
|
211
|
|
212 switch(nbits) {
|
|
213 // case 15: GET_BIT_MSB(block, val);
|
|
214 // case 14: GET_BIT_MSB(block, val);
|
|
215 // case 13: GET_BIT_MSB(block, val);
|
|
216 // case 12: GET_BIT_MSB(block, val);
|
|
217 // case 11: GET_BIT_MSB(block, val);
|
|
218 // case 10: GET_BIT_MSB(block, val);
|
|
219 // case 9: GET_BIT_MSB(block, val);
|
|
220 case 8: GET_BIT_MSB(block, val);
|
|
221 case 7: GET_BIT_MSB(block, val);
|
|
222 case 6: GET_BIT_MSB(block, val);
|
|
223 case 5: GET_BIT_MSB(block, val);
|
|
224 case 4: GET_BIT_MSB(block, val);
|
|
225 case 3: GET_BIT_MSB(block, val);
|
|
226 case 2: GET_BIT_MSB(block, val);
|
|
227 case 1: GET_BIT_MSB(block, val);
|
|
228 break;
|
|
229
|
|
230 default:
|
|
231 for (i = 0; i < nbits; i++)
|
|
232 //val = (val << 1) | get_bit_MSB(block);
|
|
233 GET_BIT_MSB(block, val);
|
|
234 }
|
|
235 #endif
|
|
236
|
|
237 //printf("=0x%x}", val);
|
|
238
|
|
239 return val;
|
|
240 }
|
|
241
|
|
242 /*
|
|
243 * Can store up to 24-bits worth of data encoded in an integer value
|
|
244 * Possibly we'd want to have a less optimal store_bits function when dealing
|
|
245 * with nbits > 24, but for now we assume the codes generated are never
|
|
246 * that big. (Given this is only possible with 121392 or more
|
|
247 * characters with exactly the correct frequency distribution we check
|
|
248 * for it elsewhere.)
|
|
249 */
|
|
250 static int store_bits_MSB(cram_block *block, unsigned int val, int nbits) {
|
|
251 /* fprintf(stderr, " store_bits: %02x %d\n", val, nbits); */
|
|
252
|
|
253 /*
|
|
254 * Use slow mode until we tweak the huffman generator to never generate
|
|
255 * codes longer than 24-bits.
|
|
256 */
|
|
257 unsigned int mask;
|
|
258
|
|
259 if (block->byte+4 >= block->alloc) {
|
|
260 if (block->byte) {
|
|
261 block->alloc *= 2;
|
|
262 block->data = realloc(block->data, block->alloc + 4);
|
|
263 if (!block->data)
|
|
264 return -1;
|
|
265 } else {
|
|
266 block->alloc = 1024;
|
|
267 block->data = realloc(block->data, block->alloc + 4);
|
|
268 if (!block->data)
|
|
269 return -1;
|
|
270 block->data[0] = 0; // initialise first byte of buffer
|
|
271 }
|
|
272 }
|
|
273
|
|
274 /* fits in current bit-field */
|
|
275 if (nbits <= block->bit+1) {
|
|
276 block->data[block->byte] |= (val << (block->bit+1-nbits));
|
|
277 if ((block->bit-=nbits) == -1) {
|
|
278 block->bit = 7;
|
|
279 block->byte++;
|
|
280 block->data[block->byte] = 0;
|
|
281 }
|
|
282 return 0;
|
|
283 }
|
|
284
|
|
285 block->data[block->byte] |= (val >> (nbits -= block->bit+1));
|
|
286 block->bit = 7;
|
|
287 block->byte++;
|
|
288 block->data[block->byte] = 0;
|
|
289
|
|
290 mask = 1<<(nbits-1);
|
|
291 do {
|
|
292 if (val & mask)
|
|
293 block->data[block->byte] |= (1 << block->bit);
|
|
294 if (--block->bit == -1) {
|
|
295 block->bit = 7;
|
|
296 block->byte++;
|
|
297 block->data[block->byte] = 0;
|
|
298 }
|
|
299 mask >>= 1;
|
|
300 } while(--nbits);
|
|
301
|
|
302 return 0;
|
|
303 }
|
|
304
|
|
305 /*
|
|
306 * Returns the next 'size' bytes from a block, or NULL if insufficient
|
|
307 * data left.This is just a pointer into the block data and not an
|
|
308 * allocated object, so do not free the result.
|
|
309 */
|
|
310 static char *cram_extract_block(cram_block *b, int size) {
|
|
311 char *cp = (char *)b->data + b->idx;
|
|
312 b->idx += size;
|
|
313 if (b->idx > b->uncomp_size)
|
|
314 return NULL;
|
|
315
|
|
316 return cp;
|
|
317 }
|
|
318
|
|
319 /*
|
|
320 * ---------------------------------------------------------------------------
|
|
321 * EXTERNAL
|
|
322 */
|
|
323 int cram_external_decode_int(cram_slice *slice, cram_codec *c,
|
|
324 cram_block *in, char *out, int *out_size) {
|
|
325 int i;
|
|
326 char *cp;
|
|
327 cram_block *b = NULL;
|
|
328
|
|
329 /* Find the external block */
|
|
330 if (slice->block_by_id) {
|
|
331 if (!(b = slice->block_by_id[c->external.content_id]))
|
|
332 return *out_size?-1:0;
|
|
333 } else {
|
|
334 for (i = 0; i < slice->hdr->num_blocks; i++) {
|
|
335 b = slice->block[i];
|
|
336 if (b && b->content_type == EXTERNAL &&
|
|
337 b->content_id == c->external.content_id) {
|
|
338 break;
|
|
339 }
|
|
340 }
|
|
341 if (i == slice->hdr->num_blocks || !b)
|
|
342 return -1;
|
|
343 }
|
|
344
|
|
345 cp = (char *)b->data + b->idx;
|
|
346 // E_INT and E_LONG are guaranteed single item queries
|
|
347 b->idx += itf8_get(cp, (int32_t *)out);
|
|
348 *out_size = 1;
|
|
349
|
|
350 return 0;
|
|
351 }
|
|
352
|
|
353 int cram_external_decode_char(cram_slice *slice, cram_codec *c,
|
|
354 cram_block *in, char *out,
|
|
355 int *out_size) {
|
|
356 int i;
|
|
357 char *cp;
|
|
358 cram_block *b = NULL;
|
|
359
|
|
360 /* Find the external block */
|
|
361 if (slice->block_by_id) {
|
|
362 if (!(b = slice->block_by_id[c->external.content_id]))
|
|
363 return *out_size?-1:0;
|
|
364 } else {
|
|
365 for (i = 0; i < slice->hdr->num_blocks; i++) {
|
|
366 b = slice->block[i];
|
|
367 if (b && b->content_type == EXTERNAL &&
|
|
368 b->content_id == c->external.content_id) {
|
|
369 break;
|
|
370 }
|
|
371 }
|
|
372 if (i == slice->hdr->num_blocks || !b)
|
|
373 return -1;
|
|
374 }
|
|
375
|
|
376 cp = cram_extract_block(b, *out_size);
|
|
377 if (!cp)
|
|
378 return -1;
|
|
379
|
|
380 memcpy(out, cp, *out_size);
|
|
381 return 0;
|
|
382 }
|
|
383
|
|
384 static int cram_external_decode_block(cram_slice *slice, cram_codec *c,
|
|
385 cram_block *in, char *out_,
|
|
386 int *out_size) {
|
|
387 int i;
|
|
388 char *cp;
|
|
389 cram_block *b = NULL;
|
|
390 cram_block *out = (cram_block *)out_;
|
|
391
|
|
392 /* Find the external block */
|
|
393 if (slice->block_by_id) {
|
|
394 if (!(b = slice->block_by_id[c->external.content_id]))
|
|
395 return *out_size?-1:0;
|
|
396 } else {
|
|
397 for (i = 0; i < slice->hdr->num_blocks; i++) {
|
|
398 b = slice->block[i];
|
|
399 if (b && b->content_type == EXTERNAL &&
|
|
400 b->content_id == c->external.content_id) {
|
|
401 break;
|
|
402 }
|
|
403 }
|
|
404 if (i == slice->hdr->num_blocks || !b)
|
|
405 return -1;
|
|
406 }
|
|
407
|
|
408 cp = cram_extract_block(b, *out_size);
|
|
409 if (!cp)
|
|
410 return -1;
|
|
411
|
|
412 BLOCK_APPEND(out, cp, *out_size);
|
|
413 return 0;
|
|
414 }
|
|
415
|
|
416 void cram_external_decode_free(cram_codec *c) {
|
|
417 if (c)
|
|
418 free(c);
|
|
419 }
|
|
420
|
|
421 cram_codec *cram_external_decode_init(char *data, int size,
|
|
422 enum cram_external_type option,
|
|
423 int version) {
|
|
424 cram_codec *c;
|
|
425 char *cp = data;
|
|
426
|
|
427 if (!(c = malloc(sizeof(*c))))
|
|
428 return NULL;
|
|
429
|
|
430 c->codec = E_EXTERNAL;
|
|
431 if (option == E_INT || option == E_LONG)
|
|
432 c->decode = cram_external_decode_int;
|
|
433 else if (option == E_BYTE_ARRAY || option == E_BYTE)
|
|
434 c->decode = cram_external_decode_char;
|
|
435 else
|
|
436 c->decode = cram_external_decode_block;
|
|
437 c->free = cram_external_decode_free;
|
|
438
|
|
439 cp += itf8_get(cp, &c->external.content_id);
|
|
440
|
|
441 if (cp - data != size) {
|
|
442 fprintf(stderr, "Malformed external header stream\n");
|
|
443 free(c);
|
|
444 return NULL;
|
|
445 }
|
|
446
|
|
447 c->external.type = option;
|
|
448
|
|
449 return c;
|
|
450 }
|
|
451
|
|
452 int cram_external_encode_int(cram_slice *slice, cram_codec *c,
|
|
453 char *in, int in_size) {
|
|
454 uint32_t *i32 = (uint32_t *)in;
|
|
455
|
|
456 itf8_put_blk(c->out, *i32);
|
|
457 return 0;
|
|
458 }
|
|
459
|
|
460 int cram_external_encode_char(cram_slice *slice, cram_codec *c,
|
|
461 char *in, int in_size) {
|
|
462 BLOCK_APPEND(c->out, in, in_size);
|
|
463 return 0;
|
|
464 }
|
|
465
|
|
466 void cram_external_encode_free(cram_codec *c) {
|
|
467 if (!c)
|
|
468 return;
|
|
469 free(c);
|
|
470 }
|
|
471
|
|
472 int cram_external_encode_store(cram_codec *c, cram_block *b, char *prefix,
|
|
473 int version) {
|
|
474 char tmp[99], *tp = tmp;
|
|
475 int len = 0;
|
|
476
|
|
477 if (prefix) {
|
|
478 size_t l = strlen(prefix);
|
|
479 BLOCK_APPEND(b, prefix, l);
|
|
480 len += l;
|
|
481 }
|
|
482
|
|
483 tp += itf8_put(tp, c->e_external.content_id);
|
|
484 len += itf8_put_blk(b, c->codec);
|
|
485 len += itf8_put_blk(b, tp-tmp);
|
|
486 BLOCK_APPEND(b, tmp, tp-tmp);
|
|
487 len += tp-tmp;
|
|
488
|
|
489 return len;
|
|
490 }
|
|
491
|
|
492 cram_codec *cram_external_encode_init(cram_stats *st,
|
|
493 enum cram_external_type option,
|
|
494 void *dat,
|
|
495 int version) {
|
|
496 cram_codec *c;
|
|
497
|
|
498 c = malloc(sizeof(*c));
|
|
499 if (!c)
|
|
500 return NULL;
|
|
501 c->codec = E_EXTERNAL;
|
|
502 c->free = cram_external_encode_free;
|
|
503 if (option == E_INT || option == E_LONG)
|
|
504 c->encode = cram_external_encode_int;
|
|
505 else if (option == E_BYTE_ARRAY || option == E_BYTE)
|
|
506 c->encode = cram_external_encode_char;
|
|
507 else
|
|
508 abort();
|
|
509 c->store = cram_external_encode_store;
|
|
510
|
|
511 c->e_external.content_id = (size_t)dat;
|
|
512
|
|
513 return c;
|
|
514 }
|
|
515
|
|
516 /*
|
|
517 * ---------------------------------------------------------------------------
|
|
518 * BETA
|
|
519 */
|
|
520 int cram_beta_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
|
|
521 int32_t *out_i = (int32_t *)out;
|
|
522 int i, n;
|
|
523
|
|
524 if (c->beta.nbits) {
|
|
525 for (i = 0, n = *out_size; i < n; i++)
|
|
526 out_i[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset;
|
|
527 } else {
|
|
528 for (i = 0, n = *out_size; i < n; i++)
|
|
529 out_i[i] = -c->beta.offset;
|
|
530 }
|
|
531
|
|
532 return 0;
|
|
533 }
|
|
534
|
|
535 int cram_beta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
|
|
536 int i, n;
|
|
537
|
|
538 if (c->beta.nbits) {
|
|
539 for (i = 0, n = *out_size; i < n; i++)
|
|
540 out[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset;
|
|
541 } else {
|
|
542 for (i = 0, n = *out_size; i < n; i++)
|
|
543 out[i] = -c->beta.offset;
|
|
544 }
|
|
545
|
|
546 return 0;
|
|
547 }
|
|
548
|
|
549 void cram_beta_decode_free(cram_codec *c) {
|
|
550 if (c)
|
|
551 free(c);
|
|
552 }
|
|
553
|
|
554 cram_codec *cram_beta_decode_init(char *data, int size,
|
|
555 enum cram_external_type option,
|
|
556 int version) {
|
|
557 cram_codec *c;
|
|
558 char *cp = data;
|
|
559
|
|
560 if (!(c = malloc(sizeof(*c))))
|
|
561 return NULL;
|
|
562
|
|
563 c->codec = E_BETA;
|
|
564 if (option == E_INT || option == E_LONG)
|
|
565 c->decode = cram_beta_decode_int;
|
|
566 else if (option == E_BYTE_ARRAY || option == E_BYTE)
|
|
567 c->decode = cram_beta_decode_char;
|
|
568 else
|
|
569 abort();
|
|
570 c->free = cram_beta_decode_free;
|
|
571
|
|
572 cp += itf8_get(cp, &c->beta.offset);
|
|
573 cp += itf8_get(cp, &c->beta.nbits);
|
|
574
|
|
575 if (cp - data != size) {
|
|
576 fprintf(stderr, "Malformed beta header stream\n");
|
|
577 free(c);
|
|
578 return NULL;
|
|
579 }
|
|
580
|
|
581 return c;
|
|
582 }
|
|
583
|
|
584 int cram_beta_encode_store(cram_codec *c, cram_block *b,
|
|
585 char *prefix, int version) {
|
|
586 int len = 0;
|
|
587
|
|
588 if (prefix) {
|
|
589 size_t l = strlen(prefix);
|
|
590 BLOCK_APPEND(b, prefix, l);
|
|
591 len += l;
|
|
592 }
|
|
593
|
|
594 len += itf8_put_blk(b, c->codec);
|
|
595 len += itf8_put_blk(b, itf8_size(c->e_beta.offset)
|
|
596 + itf8_size(c->e_beta.nbits)); // codec length
|
|
597 len += itf8_put_blk(b, c->e_beta.offset);
|
|
598 len += itf8_put_blk(b, c->e_beta.nbits);
|
|
599
|
|
600 return len;
|
|
601 }
|
|
602
|
|
603 int cram_beta_encode_int(cram_slice *slice, cram_codec *c,
|
|
604 char *in, int in_size) {
|
|
605 int *syms = (int *)in;
|
|
606 int i, r = 0;
|
|
607
|
|
608 for (i = 0; i < in_size; i++)
|
|
609 r |= store_bits_MSB(c->out, syms[i] + c->e_beta.offset,
|
|
610 c->e_beta.nbits);
|
|
611
|
|
612 return r;
|
|
613 }
|
|
614
|
|
615 int cram_beta_encode_char(cram_slice *slice, cram_codec *c,
|
|
616 char *in, int in_size) {
|
|
617 unsigned char *syms = (unsigned char *)in;
|
|
618 int i, r = 0;
|
|
619
|
|
620 for (i = 0; i < in_size; i++)
|
|
621 r |= store_bits_MSB(c->out, syms[i] + c->e_beta.offset,
|
|
622 c->e_beta.nbits);
|
|
623
|
|
624 return r;
|
|
625 }
|
|
626
|
|
627 void cram_beta_encode_free(cram_codec *c) {
|
|
628 if (c) free(c);
|
|
629 }
|
|
630
|
|
631 cram_codec *cram_beta_encode_init(cram_stats *st,
|
|
632 enum cram_external_type option,
|
|
633 void *dat,
|
|
634 int version) {
|
|
635 cram_codec *c;
|
|
636 int min_val, max_val, len = 0;
|
|
637
|
|
638 c = malloc(sizeof(*c));
|
|
639 if (!c)
|
|
640 return NULL;
|
|
641 c->codec = E_BETA;
|
|
642 c->free = cram_beta_encode_free;
|
|
643 if (option == E_INT)
|
|
644 c->encode = cram_beta_encode_int;
|
|
645 else
|
|
646 c->encode = cram_beta_encode_char;
|
|
647 c->store = cram_beta_encode_store;
|
|
648
|
|
649 if (dat) {
|
|
650 min_val = ((int *)dat)[0];
|
|
651 max_val = ((int *)dat)[1];
|
|
652 } else {
|
|
653 min_val = INT_MAX;
|
|
654 max_val = INT_MIN;
|
|
655 int i;
|
|
656 for (i = 0; i < MAX_STAT_VAL; i++) {
|
|
657 if (!st->freqs[i])
|
|
658 continue;
|
|
659 if (min_val > i)
|
|
660 min_val = i;
|
|
661 max_val = i;
|
|
662 }
|
|
663 if (st->h) {
|
|
664 khint_t k;
|
|
665
|
|
666 for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
|
|
667 if (!kh_exist(st->h, k))
|
|
668 continue;
|
|
669
|
|
670 i = kh_key(st->h, k);
|
|
671 if (min_val > i)
|
|
672 min_val = i;
|
|
673 if (max_val < i)
|
|
674 max_val = i;
|
|
675 }
|
|
676 }
|
|
677 }
|
|
678
|
|
679 assert(max_val >= min_val);
|
|
680 c->e_beta.offset = -min_val;
|
|
681 max_val -= min_val;
|
|
682 while (max_val) {
|
|
683 len++;
|
|
684 max_val >>= 1;
|
|
685 }
|
|
686 c->e_beta.nbits = len;
|
|
687
|
|
688 return c;
|
|
689 }
|
|
690
|
|
691 /*
|
|
692 * ---------------------------------------------------------------------------
|
|
693 * SUBEXP
|
|
694 */
|
|
695 int cram_subexp_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
|
|
696 int32_t *out_i = (int32_t *)out;
|
|
697 int n, count;
|
|
698 int k = c->subexp.k;
|
|
699
|
|
700 for (count = 0, n = *out_size; count < n; count++) {
|
|
701 int i = 0, tail;
|
|
702 int val;
|
|
703
|
|
704 /* Get number of 1s */
|
|
705 //while (get_bit_MSB(in) == 1) i++;
|
|
706 i = get_one_bits_MSB(in);
|
|
707
|
|
708 /*
|
|
709 * Val is
|
|
710 * i > 0: 2^(k+i-1) + k+i-1 bits
|
|
711 * i = 0: k bits
|
|
712 */
|
|
713 if (i) {
|
|
714 tail = i + k-1;
|
|
715 val = 0;
|
|
716 while (tail) {
|
|
717 //val = val<<1; val |= get_bit_MSB(in);
|
|
718 GET_BIT_MSB(in, val);
|
|
719 tail--;
|
|
720 }
|
|
721 val += 1 << (i + k-1);
|
|
722 } else {
|
|
723 tail = k;
|
|
724 val = 0;
|
|
725 while (tail) {
|
|
726 //val = val<<1; val |= get_bit_MSB(in);
|
|
727 GET_BIT_MSB(in, val);
|
|
728 tail--;
|
|
729 }
|
|
730 }
|
|
731
|
|
732 out_i[count] = val - c->subexp.offset;
|
|
733 }
|
|
734
|
|
735 return 0;
|
|
736 }
|
|
737
|
|
738 void cram_subexp_decode_free(cram_codec *c) {
|
|
739 if (c)
|
|
740 free(c);
|
|
741 }
|
|
742
|
|
743 cram_codec *cram_subexp_decode_init(char *data, int size,
|
|
744 enum cram_external_type option,
|
|
745 int version) {
|
|
746 cram_codec *c;
|
|
747 char *cp = data;
|
|
748
|
|
749 if (!(c = malloc(sizeof(*c))))
|
|
750 return NULL;
|
|
751
|
|
752 c->codec = E_SUBEXP;
|
|
753 c->decode = cram_subexp_decode;
|
|
754 c->free = cram_subexp_decode_free;
|
|
755
|
|
756 cp += itf8_get(cp, &c->subexp.offset);
|
|
757 cp += itf8_get(cp, &c->subexp.k);
|
|
758
|
|
759 if (cp - data != size) {
|
|
760 fprintf(stderr, "Malformed subexp header stream\n");
|
|
761 free(c);
|
|
762 return NULL;
|
|
763 }
|
|
764
|
|
765 return c;
|
|
766 }
|
|
767
|
|
768 /*
|
|
769 * ---------------------------------------------------------------------------
|
|
770 * GAMMA
|
|
771 */
|
|
772 int cram_gamma_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
|
|
773 int32_t *out_i = (int32_t *)out;
|
|
774 int i, n;
|
|
775
|
|
776 for (i = 0, n = *out_size; i < n; i++) {
|
|
777 int nz = 0;
|
|
778 int val;
|
|
779 //while (get_bit_MSB(in) == 0) nz++;
|
|
780 nz = get_zero_bits_MSB(in);
|
|
781 val = 1;
|
|
782 while (nz > 0) {
|
|
783 //val <<= 1; val |= get_bit_MSB(in);
|
|
784 GET_BIT_MSB(in, val);
|
|
785 nz--;
|
|
786 }
|
|
787
|
|
788 out_i[i] = val - c->gamma.offset;
|
|
789 }
|
|
790
|
|
791 return 0;
|
|
792 }
|
|
793
|
|
794 void cram_gamma_decode_free(cram_codec *c) {
|
|
795 if (c)
|
|
796 free(c);
|
|
797 }
|
|
798
|
|
799 cram_codec *cram_gamma_decode_init(char *data, int size,
|
|
800 enum cram_external_type option,
|
|
801 int version) {
|
|
802 cram_codec *c;
|
|
803 char *cp = data;
|
|
804
|
|
805 if (!(c = malloc(sizeof(*c))))
|
|
806 return NULL;
|
|
807
|
|
808 c->codec = E_GAMMA;
|
|
809 c->decode = cram_gamma_decode;
|
|
810 c->free = cram_gamma_decode_free;
|
|
811
|
|
812 cp += itf8_get(cp, &c->gamma.offset);
|
|
813
|
|
814 if (cp - data != size) {
|
|
815 fprintf(stderr, "Malformed gamma header stream\n");
|
|
816 free(c);
|
|
817 return NULL;
|
|
818 }
|
|
819
|
|
820 return c;
|
|
821 }
|
|
822
|
|
823 /*
|
|
824 * ---------------------------------------------------------------------------
|
|
825 * HUFFMAN
|
|
826 */
|
|
827
|
|
828 static int code_sort(const void *vp1, const void *vp2) {
|
|
829 const cram_huffman_code *c1 = (const cram_huffman_code *)vp1;
|
|
830 const cram_huffman_code *c2 = (const cram_huffman_code *)vp2;
|
|
831
|
|
832 if (c1->len != c2->len)
|
|
833 return c1->len - c2->len;
|
|
834 else
|
|
835 return c1->symbol - c2->symbol;
|
|
836 }
|
|
837
|
|
838 void cram_huffman_decode_free(cram_codec *c) {
|
|
839 if (!c)
|
|
840 return;
|
|
841
|
|
842 if (c->huffman.codes)
|
|
843 free(c->huffman.codes);
|
|
844 free(c);
|
|
845 }
|
|
846
|
|
847 int cram_huffman_decode_char0(cram_slice *slice, cram_codec *c,
|
|
848 cram_block *in, char *out, int *out_size) {
|
|
849 int i, n;
|
|
850
|
|
851 /* Special case of 0 length codes */
|
|
852 for (i = 0, n = *out_size; i < n; i++) {
|
|
853 out[i] = c->huffman.codes[0].symbol;
|
|
854 }
|
|
855 return 0;
|
|
856 }
|
|
857
|
|
858 int cram_huffman_decode_char(cram_slice *slice, cram_codec *c,
|
|
859 cram_block *in, char *out, int *out_size) {
|
|
860 int i, n, ncodes = c->huffman.ncodes;
|
|
861 const cram_huffman_code * const codes = c->huffman.codes;
|
|
862
|
|
863 for (i = 0, n = *out_size; i < n; i++) {
|
|
864 int idx = 0;
|
|
865 int val = 0, len = 0, last_len = 0;
|
|
866
|
|
867 for (;;) {
|
|
868 int dlen = codes[idx].len - last_len;
|
|
869 if (dlen <= 0 || (in->alloc - in->byte)*8 + in->bit + 7 < dlen)
|
|
870 return -1;
|
|
871
|
|
872 //val <<= dlen;
|
|
873 //val |= get_bits_MSB(in, dlen);
|
|
874 //last_len = (len += dlen);
|
|
875
|
|
876 last_len = (len += dlen);
|
|
877 for (; dlen; dlen--) GET_BIT_MSB(in, val);
|
|
878
|
|
879 idx = val - codes[idx].p;
|
|
880 if (idx >= ncodes || idx < 0)
|
|
881 return -1;
|
|
882
|
|
883 if (codes[idx].code == val && codes[idx].len == len) {
|
|
884 out[i] = codes[idx].symbol;
|
|
885 break;
|
|
886 }
|
|
887 }
|
|
888 }
|
|
889
|
|
890 return 0;
|
|
891 }
|
|
892
|
|
893 int cram_huffman_decode_int0(cram_slice *slice, cram_codec *c,
|
|
894 cram_block *in, char *out, int *out_size) {
|
|
895 int32_t *out_i = (int32_t *)out;
|
|
896 int i, n;
|
|
897 const cram_huffman_code * const codes = c->huffman.codes;
|
|
898
|
|
899 /* Special case of 0 length codes */
|
|
900 for (i = 0, n = *out_size; i < n; i++) {
|
|
901 out_i[i] = codes[0].symbol;
|
|
902 }
|
|
903 return 0;
|
|
904 }
|
|
905
|
|
906 int cram_huffman_decode_int(cram_slice *slice, cram_codec *c,
|
|
907 cram_block *in, char *out, int *out_size) {
|
|
908 int32_t *out_i = (int32_t *)out;
|
|
909 int i, n, ncodes = c->huffman.ncodes;
|
|
910 const cram_huffman_code * const codes = c->huffman.codes;
|
|
911
|
|
912 for (i = 0, n = *out_size; i < n; i++) {
|
|
913 int idx = 0;
|
|
914 int val = 0, len = 0, last_len = 0;
|
|
915
|
|
916 // Now one bit at a time for remaining checks
|
|
917 for (;;) {
|
|
918 int dlen = codes[idx].len - last_len;
|
|
919 if (dlen <= 0 || (in->alloc - in->byte)*8 + in->bit + 7 < dlen)
|
|
920 return -1;
|
|
921
|
|
922 //val <<= dlen;
|
|
923 //val |= get_bits_MSB(in, dlen);
|
|
924 //last_len = (len += dlen);
|
|
925
|
|
926 last_len = (len += dlen);
|
|
927 for (; dlen; dlen--) GET_BIT_MSB(in, val);
|
|
928
|
|
929 idx = val - codes[idx].p;
|
|
930 if (idx >= ncodes || idx < 0)
|
|
931 return -1;
|
|
932
|
|
933 if (codes[idx].code == val && codes[idx].len == len) {
|
|
934 out_i[i] = codes[idx].symbol;
|
|
935 break;
|
|
936 }
|
|
937 }
|
|
938 }
|
|
939
|
|
940 return 0;
|
|
941 }
|
|
942
|
|
943 /*
|
|
944 * Initialises a huffman decoder from an encoding data stream.
|
|
945 */
|
|
946 cram_codec *cram_huffman_decode_init(char *data, int size,
|
|
947 enum cram_external_type option,
|
|
948 int version) {
|
|
949 int32_t ncodes, i, j;
|
|
950 char *cp = data, *data_end = &data[size];
|
|
951 cram_codec *h;
|
|
952 cram_huffman_code *codes;
|
|
953 int32_t val, last_len, max_len = 0;
|
|
954
|
|
955 cp += itf8_get(cp, &ncodes);
|
|
956 h = calloc(1, sizeof(*h));
|
|
957 if (!h)
|
|
958 return NULL;
|
|
959
|
|
960 h->free = cram_huffman_decode_free;
|
|
961
|
|
962 h->huffman.ncodes = ncodes;
|
|
963 codes = h->huffman.codes = malloc(ncodes * sizeof(*codes));
|
|
964 if (!codes) {
|
|
965 free(h);
|
|
966 return NULL;
|
|
967 }
|
|
968
|
|
969 /* Read symbols and bit-lengths */
|
|
970 for (i = 0; i < ncodes && cp < data_end; i++) {
|
|
971 cp += itf8_get(cp, &codes[i].symbol);
|
|
972 }
|
|
973
|
|
974 if (cp >= data_end) {
|
|
975 fprintf(stderr, "Malformed huffman header stream\n");
|
|
976 free(h);
|
|
977 return NULL;
|
|
978 }
|
|
979 cp += itf8_get(cp, &i);
|
|
980 if (i != ncodes) {
|
|
981 fprintf(stderr, "Malformed huffman header stream\n");
|
|
982 free(h);
|
|
983 return NULL;
|
|
984 }
|
|
985
|
|
986 if (ncodes == 0) {
|
|
987 /* NULL huffman stream */
|
|
988 return h;
|
|
989 }
|
|
990
|
|
991 for (i = 0; i < ncodes && cp < data_end; i++) {
|
|
992 cp += itf8_get(cp, &codes[i].len);
|
|
993 if (max_len < codes[i].len)
|
|
994 max_len = codes[i].len;
|
|
995 }
|
|
996 if (cp - data != size || max_len >= ncodes) {
|
|
997 fprintf(stderr, "Malformed huffman header stream\n");
|
|
998 free(h);
|
|
999 return NULL;
|
|
1000 }
|
|
1001
|
|
1002 /* Sort by bit length and then by symbol value */
|
|
1003 qsort(codes, ncodes, sizeof(*codes), code_sort);
|
|
1004
|
|
1005 /* Assign canonical codes */
|
|
1006 val = -1, last_len = 0;
|
|
1007 for (i = 0; i < ncodes; i++) {
|
|
1008 val++;
|
|
1009 if (codes[i].len > last_len) {
|
|
1010 while (codes[i].len > last_len) {
|
|
1011 val <<= 1;
|
|
1012 last_len++;
|
|
1013 }
|
|
1014 }
|
|
1015 codes[i].code = val;
|
|
1016 }
|
|
1017
|
|
1018 /*
|
|
1019 * Compute the next starting point, offset by the i'th value.
|
|
1020 * For example if codes 10, 11, 12, 13 are 30, 31, 32, 33 then
|
|
1021 * codes[10..13].p = 30 - 10.
|
|
1022 */
|
|
1023 last_len = 0;
|
|
1024 for (i = j = 0; i < ncodes; i++) {
|
|
1025 if (codes[i].len > last_len) {
|
|
1026 j = codes[i].code - i;
|
|
1027 last_len = codes[i].len;
|
|
1028 }
|
|
1029 codes[i].p = j;
|
|
1030 }
|
|
1031
|
|
1032 // puts("==HUFF LEN==");
|
|
1033 // for (i = 0; i <= last_len+1; i++) {
|
|
1034 // printf("len %d=%d prefix %d\n", i, h->huffman.lengths[i], h->huffman.prefix[i]);
|
|
1035 // }
|
|
1036 // puts("===HUFFMAN CODES===");
|
|
1037 // for (i = 0; i < ncodes; i++) {
|
|
1038 // int j;
|
|
1039 // printf("%d: %d %d %d ", i, codes[i].symbol, codes[i].len, codes[i].code);
|
|
1040 // j = codes[i].len;
|
|
1041 // while (j) {
|
|
1042 // putchar(codes[i].code & (1 << --j) ? '1' : '0');
|
|
1043 // }
|
|
1044 // printf(" %d\n", codes[i].code);
|
|
1045 // }
|
|
1046
|
|
1047 h->codec = E_HUFFMAN;
|
|
1048 if (option == E_BYTE || option == E_BYTE_ARRAY) {
|
|
1049 if (h->huffman.codes[0].len == 0)
|
|
1050 h->decode = cram_huffman_decode_char0;
|
|
1051 else
|
|
1052 h->decode = cram_huffman_decode_char;
|
|
1053 } else if (option == E_BYTE_ARRAY_BLOCK) {
|
|
1054 abort();
|
|
1055 } else {
|
|
1056 if (h->huffman.codes[0].len == 0)
|
|
1057 h->decode = cram_huffman_decode_int0;
|
|
1058 else
|
|
1059 h->decode = cram_huffman_decode_int;
|
|
1060 }
|
|
1061
|
|
1062 return (cram_codec *)h;
|
|
1063 }
|
|
1064
|
|
1065 int cram_huffman_encode_char0(cram_slice *slice, cram_codec *c,
|
|
1066 char *in, int in_size) {
|
|
1067 return 0;
|
|
1068 }
|
|
1069
|
|
1070 int cram_huffman_encode_char(cram_slice *slice, cram_codec *c,
|
|
1071 char *in, int in_size) {
|
|
1072 int i, code, len, r = 0;
|
|
1073 unsigned char *syms = (unsigned char *)in;
|
|
1074
|
|
1075 do {
|
|
1076 int sym = *syms++;
|
|
1077 if (sym >= -1 && sym < MAX_HUFF) {
|
|
1078 i = c->e_huffman.val2code[sym+1];
|
|
1079 assert(c->e_huffman.codes[i].symbol == sym);
|
|
1080 code = c->e_huffman.codes[i].code;
|
|
1081 len = c->e_huffman.codes[i].len;
|
|
1082 } else {
|
|
1083 /* Slow - use a lookup table for when sym < MAX_HUFF? */
|
|
1084 for (i = 0; i < c->e_huffman.nvals; i++) {
|
|
1085 if (c->e_huffman.codes[i].symbol == sym)
|
|
1086 break;
|
|
1087 }
|
|
1088 if (i == c->e_huffman.nvals)
|
|
1089 return -1;
|
|
1090
|
|
1091 code = c->e_huffman.codes[i].code;
|
|
1092 len = c->e_huffman.codes[i].len;
|
|
1093 }
|
|
1094
|
|
1095 r |= store_bits_MSB(c->out, code, len);
|
|
1096 } while (--in_size);
|
|
1097
|
|
1098 return r;
|
|
1099 }
|
|
1100
|
|
1101 int cram_huffman_encode_int0(cram_slice *slice, cram_codec *c,
|
|
1102 char *in, int in_size) {
|
|
1103 return 0;
|
|
1104 }
|
|
1105
|
|
1106 int cram_huffman_encode_int(cram_slice *slice, cram_codec *c,
|
|
1107 char *in, int in_size) {
|
|
1108 int i, code, len, r = 0;
|
|
1109 int *syms = (int *)in;
|
|
1110
|
|
1111 do {
|
|
1112 int sym = *syms++;
|
|
1113
|
|
1114 if (sym >= -1 && sym < MAX_HUFF) {
|
|
1115 i = c->e_huffman.val2code[sym+1];
|
|
1116 assert(c->e_huffman.codes[i].symbol == sym);
|
|
1117 code = c->e_huffman.codes[i].code;
|
|
1118 len = c->e_huffman.codes[i].len;
|
|
1119 } else {
|
|
1120 /* Slow - use a lookup table for when sym < MAX_HUFFMAN_SYM? */
|
|
1121 for (i = 0; i < c->e_huffman.nvals; i++) {
|
|
1122 if (c->e_huffman.codes[i].symbol == sym)
|
|
1123 break;
|
|
1124 }
|
|
1125 if (i == c->e_huffman.nvals)
|
|
1126 return -1;
|
|
1127
|
|
1128 code = c->e_huffman.codes[i].code;
|
|
1129 len = c->e_huffman.codes[i].len;
|
|
1130 }
|
|
1131
|
|
1132 r |= store_bits_MSB(c->out, code, len);
|
|
1133 } while (--in_size);
|
|
1134
|
|
1135 return r;
|
|
1136 }
|
|
1137
|
|
1138 void cram_huffman_encode_free(cram_codec *c) {
|
|
1139 if (!c)
|
|
1140 return;
|
|
1141
|
|
1142 if (c->e_huffman.codes)
|
|
1143 free(c->e_huffman.codes);
|
|
1144 free(c);
|
|
1145 }
|
|
1146
|
|
1147 /*
|
|
1148 * Encodes a huffman tree.
|
|
1149 * Returns number of bytes written.
|
|
1150 */
|
|
1151 int cram_huffman_encode_store(cram_codec *c, cram_block *b, char *prefix,
|
|
1152 int version) {
|
|
1153 int i, len = 0;
|
|
1154 cram_huffman_code *codes = c->e_huffman.codes;
|
|
1155 /*
|
|
1156 * Up to code length 127 means 2.5e+26 bytes of data required (worst
|
|
1157 * case huffman tree needs symbols with freqs matching the Fibonacci
|
|
1158 * series). So guaranteed 1 byte per code.
|
|
1159 *
|
|
1160 * Symbols themselves could be 5 bytes (eg -1 is 5 bytes in itf8).
|
|
1161 *
|
|
1162 * Therefore 6*ncodes + 5 + 5 + 1 + 5 is max memory
|
|
1163 */
|
|
1164 char *tmp = malloc(6*c->e_huffman.nvals+16);
|
|
1165 char *tp = tmp;
|
|
1166
|
|
1167 if (!tmp)
|
|
1168 return -1;
|
|
1169
|
|
1170 if (prefix) {
|
|
1171 size_t l = strlen(prefix);
|
|
1172 BLOCK_APPEND(b, prefix, l);
|
|
1173 len += l;
|
|
1174 }
|
|
1175
|
|
1176 tp += itf8_put(tp, c->e_huffman.nvals);
|
|
1177 for (i = 0; i < c->e_huffman.nvals; i++) {
|
|
1178 tp += itf8_put(tp, codes[i].symbol);
|
|
1179 }
|
|
1180
|
|
1181 tp += itf8_put(tp, c->e_huffman.nvals);
|
|
1182 for (i = 0; i < c->e_huffman.nvals; i++) {
|
|
1183 tp += itf8_put(tp, codes[i].len);
|
|
1184 }
|
|
1185
|
|
1186 len += itf8_put_blk(b, c->codec);
|
|
1187 len += itf8_put_blk(b, tp-tmp);
|
|
1188 BLOCK_APPEND(b, tmp, tp-tmp);
|
|
1189 len += tp-tmp;
|
|
1190
|
|
1191 free(tmp);
|
|
1192
|
|
1193 return len;
|
|
1194 }
|
|
1195
|
|
1196 cram_codec *cram_huffman_encode_init(cram_stats *st,
|
|
1197 enum cram_external_type option,
|
|
1198 void *dat,
|
|
1199 int version) {
|
|
1200 int *vals = NULL, *freqs = NULL, vals_alloc = 0, *lens, code, len;
|
|
1201 int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k;
|
|
1202 cram_codec *c;
|
|
1203 cram_huffman_code *codes;
|
|
1204
|
|
1205 c = malloc(sizeof(*c));
|
|
1206 if (!c)
|
|
1207 return NULL;
|
|
1208 c->codec = E_HUFFMAN;
|
|
1209
|
|
1210 /* Count number of unique symbols */
|
|
1211 for (nvals = i = 0; i < MAX_STAT_VAL; i++) {
|
|
1212 if (!st->freqs[i])
|
|
1213 continue;
|
|
1214 if (nvals >= vals_alloc) {
|
|
1215 vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
|
|
1216 vals = realloc(vals, vals_alloc * sizeof(int));
|
|
1217 freqs = realloc(freqs, vals_alloc * sizeof(int));
|
|
1218 if (!vals || !freqs) {
|
|
1219 if (vals) free(vals);
|
|
1220 if (freqs) free(freqs);
|
|
1221 free(c);
|
|
1222 return NULL;
|
|
1223 }
|
|
1224 }
|
|
1225 vals[nvals] = i;
|
|
1226 freqs[nvals] = st->freqs[i];
|
|
1227 assert(st->freqs[i] > 0);
|
|
1228 ntot += freqs[nvals];
|
|
1229 if (max_val < i) max_val = i;
|
|
1230 if (min_val > i) min_val = i;
|
|
1231 nvals++;
|
|
1232 }
|
|
1233 if (st->h) {
|
|
1234 khint_t k;
|
|
1235
|
|
1236 for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
|
|
1237 if (!kh_exist(st->h, k))
|
|
1238 continue;
|
|
1239 if (nvals >= vals_alloc) {
|
|
1240 vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
|
|
1241 vals = realloc(vals, vals_alloc * sizeof(int));
|
|
1242 freqs = realloc(freqs, vals_alloc * sizeof(int));
|
|
1243 if (!vals || !freqs)
|
|
1244 return NULL;
|
|
1245 }
|
|
1246 vals[nvals]= kh_key(st->h, k);
|
|
1247 freqs[nvals] = kh_val(st->h, k);
|
|
1248 assert(freqs[nvals] > 0);
|
|
1249 ntot += freqs[nvals];
|
|
1250 if (max_val < i) max_val = i;
|
|
1251 if (min_val > i) min_val = i;
|
|
1252 nvals++;
|
|
1253 }
|
|
1254 }
|
|
1255
|
|
1256 assert(nvals > 0);
|
|
1257
|
|
1258 freqs = realloc(freqs, 2*nvals*sizeof(*freqs));
|
|
1259 lens = calloc(2*nvals, sizeof(*lens));
|
|
1260 if (!lens || !freqs)
|
|
1261 return NULL;
|
|
1262
|
|
1263 /* Inefficient, use pointers to form chain so we can insert and maintain
|
|
1264 * a sorted list? This is currently O(nvals^2) complexity.
|
|
1265 */
|
|
1266 for (;;) {
|
|
1267 int low1 = INT_MAX, low2 = INT_MAX;
|
|
1268 int ind1 = 0, ind2 = 0;
|
|
1269 for (i = 0; i < nvals; i++) {
|
|
1270 if (freqs[i] < 0)
|
|
1271 continue;
|
|
1272 if (low1 > freqs[i])
|
|
1273 low2 = low1, ind2 = ind1, low1 = freqs[i], ind1 = i;
|
|
1274 else if (low2 > freqs[i])
|
|
1275 low2 = freqs[i], ind2 = i;
|
|
1276 }
|
|
1277 if (low2 == INT_MAX)
|
|
1278 break;
|
|
1279
|
|
1280 freqs[nvals] = low1 + low2;
|
|
1281 lens[ind1] = nvals;
|
|
1282 lens[ind2] = nvals;
|
|
1283 freqs[ind1] *= -1;
|
|
1284 freqs[ind2] *= -1;
|
|
1285 nvals++;
|
|
1286 }
|
|
1287 nvals = nvals/2+1;
|
|
1288
|
|
1289 /* Assign lengths */
|
|
1290 for (i = 0; i < nvals; i++) {
|
|
1291 int code_len = 0;
|
|
1292 for (k = lens[i]; k; k = lens[k])
|
|
1293 code_len++;
|
|
1294 lens[i] = code_len;
|
|
1295 freqs[i] *= -1;
|
|
1296 //fprintf(stderr, "%d / %d => %d\n", vals[i], freqs[i], lens[i]);
|
|
1297 }
|
|
1298
|
|
1299
|
|
1300 /* Sort, need in a struct */
|
|
1301 if (!(codes = malloc(nvals * sizeof(*codes))))
|
|
1302 return NULL;
|
|
1303 for (i = 0; i < nvals; i++) {
|
|
1304 codes[i].symbol = vals[i];
|
|
1305 codes[i].len = lens[i];
|
|
1306 }
|
|
1307 qsort(codes, nvals, sizeof(*codes), code_sort);
|
|
1308
|
|
1309 /*
|
|
1310 * Generate canonical codes from lengths.
|
|
1311 * Sort by length.
|
|
1312 * Start with 0.
|
|
1313 * Every new code of same length is +1.
|
|
1314 * Every new code of new length is +1 then <<1 per extra length.
|
|
1315 *
|
|
1316 * /\
|
|
1317 * a/\
|
|
1318 * /\/\
|
|
1319 * bcd/\
|
|
1320 * ef
|
|
1321 *
|
|
1322 * a 1 0
|
|
1323 * b 3 4 (0+1)<<2
|
|
1324 * c 3 5
|
|
1325 * d 3 6
|
|
1326 * e 4 14 (6+1)<<1
|
|
1327 * f 5 15
|
|
1328 */
|
|
1329 code = 0; len = codes[0].len;
|
|
1330 for (i = 0; i < nvals; i++) {
|
|
1331 while (len != codes[i].len) {
|
|
1332 code<<=1;
|
|
1333 len++;
|
|
1334 }
|
|
1335 codes[i].code = code++;
|
|
1336
|
|
1337 if (codes[i].symbol >= -1 && codes[i].symbol < MAX_HUFF)
|
|
1338 c->e_huffman.val2code[codes[i].symbol+1] = i;
|
|
1339
|
|
1340 //fprintf(stderr, "sym %d, code %d, len %d\n",
|
|
1341 // codes[i].symbol, codes[i].code, codes[i].len);
|
|
1342 }
|
|
1343
|
|
1344 free(lens);
|
|
1345 free(vals);
|
|
1346 free(freqs);
|
|
1347
|
|
1348 c->e_huffman.codes = codes;
|
|
1349 c->e_huffman.nvals = nvals;
|
|
1350
|
|
1351 c->free = cram_huffman_encode_free;
|
|
1352 if (option == E_BYTE || option == E_BYTE_ARRAY) {
|
|
1353 if (c->e_huffman.codes[0].len == 0)
|
|
1354 c->encode = cram_huffman_encode_char0;
|
|
1355 else
|
|
1356 c->encode = cram_huffman_encode_char;
|
|
1357 } else {
|
|
1358 if (c->e_huffman.codes[0].len == 0)
|
|
1359 c->encode = cram_huffman_encode_int0;
|
|
1360 else
|
|
1361 c->encode = cram_huffman_encode_int;
|
|
1362 }
|
|
1363 c->store = cram_huffman_encode_store;
|
|
1364
|
|
1365 return c;
|
|
1366 }
|
|
1367
|
|
1368 /*
|
|
1369 * ---------------------------------------------------------------------------
|
|
1370 * BYTE_ARRAY_LEN
|
|
1371 */
|
|
1372 int cram_byte_array_len_decode(cram_slice *slice, cram_codec *c,
|
|
1373 cram_block *in, char *out,
|
|
1374 int *out_size) {
|
|
1375 /* Fetch length */
|
|
1376 int32_t len, one = 1;
|
|
1377
|
|
1378 c->byte_array_len.len_codec->decode(slice, c->byte_array_len.len_codec, in, (char *)&len, &one);
|
|
1379 //printf("ByteArray Len=%d\n", len);
|
|
1380
|
|
1381 if (c->byte_array_len.value_codec) {
|
|
1382 c->byte_array_len.value_codec->decode(slice,
|
|
1383 c->byte_array_len.value_codec,
|
|
1384 in, out, &len);
|
|
1385 } else {
|
|
1386 return -1;
|
|
1387 }
|
|
1388
|
|
1389 *out_size = len;
|
|
1390
|
|
1391 return 0;
|
|
1392 }
|
|
1393
|
|
1394 void cram_byte_array_len_decode_free(cram_codec *c) {
|
|
1395 if (!c) return;
|
|
1396
|
|
1397 if (c->byte_array_len.len_codec)
|
|
1398 c->byte_array_len.len_codec->free(c->byte_array_len.len_codec);
|
|
1399
|
|
1400 if (c->byte_array_len.value_codec)
|
|
1401 c->byte_array_len.value_codec->free(c->byte_array_len.value_codec);
|
|
1402
|
|
1403 free(c);
|
|
1404 }
|
|
1405
|
|
1406 cram_codec *cram_byte_array_len_decode_init(char *data, int size,
|
|
1407 enum cram_external_type option,
|
|
1408 int version) {
|
|
1409 cram_codec *c;
|
|
1410 char *cp = data;
|
|
1411 int32_t encoding;
|
|
1412 int32_t sub_size;
|
|
1413
|
|
1414 if (!(c = malloc(sizeof(*c))))
|
|
1415 return NULL;
|
|
1416
|
|
1417 c->codec = E_BYTE_ARRAY_LEN;
|
|
1418 c->decode = cram_byte_array_len_decode;
|
|
1419 c->free = cram_byte_array_len_decode_free;
|
|
1420
|
|
1421 cp += itf8_get(cp, &encoding);
|
|
1422 cp += itf8_get(cp, &sub_size);
|
|
1423 c->byte_array_len.len_codec = cram_decoder_init(encoding, cp, sub_size,
|
|
1424 E_INT, version);
|
|
1425 cp += sub_size;
|
|
1426
|
|
1427 cp += itf8_get(cp, &encoding);
|
|
1428 cp += itf8_get(cp, &sub_size);
|
|
1429 c->byte_array_len.value_codec = cram_decoder_init(encoding, cp, sub_size,
|
|
1430 option, version);
|
|
1431 cp += sub_size;
|
|
1432
|
|
1433 if (cp - data != size) {
|
|
1434 fprintf(stderr, "Malformed byte_array_len header stream\n");
|
|
1435 free(c);
|
|
1436 return NULL;
|
|
1437 }
|
|
1438
|
|
1439 return c;
|
|
1440 }
|
|
1441
|
|
1442 int cram_byte_array_len_encode(cram_slice *slice, cram_codec *c,
|
|
1443 char *in, int in_size) {
|
|
1444 int32_t i32 = in_size;
|
|
1445 int r = 0;
|
|
1446
|
|
1447 r |= c->e_byte_array_len.len_codec->encode(slice,
|
|
1448 c->e_byte_array_len.len_codec,
|
|
1449 (char *)&i32, 1);
|
|
1450 r |= c->e_byte_array_len.val_codec->encode(slice,
|
|
1451 c->e_byte_array_len.val_codec,
|
|
1452 in, in_size);
|
|
1453 return r;
|
|
1454 }
|
|
1455
|
|
1456 void cram_byte_array_len_encode_free(cram_codec *c) {
|
|
1457 if (!c)
|
|
1458 return;
|
|
1459
|
|
1460 if (c->e_byte_array_len.len_codec)
|
|
1461 c->e_byte_array_len.len_codec->free(c->e_byte_array_len.len_codec);
|
|
1462
|
|
1463 if (c->e_byte_array_len.val_codec)
|
|
1464 c->e_byte_array_len.val_codec->free(c->e_byte_array_len.val_codec);
|
|
1465
|
|
1466 free(c);
|
|
1467 }
|
|
1468
|
|
1469 int cram_byte_array_len_encode_store(cram_codec *c, cram_block *b,
|
|
1470 char *prefix, int version) {
|
|
1471 int len = 0, len2, len3;
|
|
1472 cram_codec *tc;
|
|
1473 cram_block *b_len, *b_val;
|
|
1474
|
|
1475 if (prefix) {
|
|
1476 size_t l = strlen(prefix);
|
|
1477 BLOCK_APPEND(b, prefix, l);
|
|
1478 len += l;
|
|
1479 }
|
|
1480
|
|
1481 tc = c->e_byte_array_len.len_codec;
|
|
1482 b_len = cram_new_block(0, 0);
|
|
1483 len2 = tc->store(tc, b_len, NULL, version);
|
|
1484
|
|
1485 tc = c->e_byte_array_len.val_codec;
|
|
1486 b_val = cram_new_block(0, 0);
|
|
1487 len3 = tc->store(tc, b_val, NULL, version);
|
|
1488
|
|
1489 len += itf8_put_blk(b, c->codec);
|
|
1490 len += itf8_put_blk(b, len2+len3);
|
|
1491 BLOCK_APPEND(b, BLOCK_DATA(b_len), BLOCK_SIZE(b_len));
|
|
1492 BLOCK_APPEND(b, BLOCK_DATA(b_val), BLOCK_SIZE(b_val));
|
|
1493
|
|
1494 cram_free_block(b_len);
|
|
1495 cram_free_block(b_val);
|
|
1496
|
|
1497 return len + len2 + len3;
|
|
1498 }
|
|
1499
|
|
1500 cram_codec *cram_byte_array_len_encode_init(cram_stats *st,
|
|
1501 enum cram_external_type option,
|
|
1502 void *dat,
|
|
1503 int version) {
|
|
1504 cram_codec *c;
|
|
1505 cram_byte_array_len_encoder *e = (cram_byte_array_len_encoder *)dat;
|
|
1506
|
|
1507 c = malloc(sizeof(*c));
|
|
1508 if (!c)
|
|
1509 return NULL;
|
|
1510 c->codec = E_BYTE_ARRAY_LEN;
|
|
1511 c->free = cram_byte_array_len_encode_free;
|
|
1512 c->encode = cram_byte_array_len_encode;
|
|
1513 c->store = cram_byte_array_len_encode_store;
|
|
1514
|
|
1515 c->e_byte_array_len.len_codec = cram_encoder_init(e->len_encoding,
|
|
1516 NULL, E_INT,
|
|
1517 e->len_dat,
|
|
1518 version);
|
|
1519 c->e_byte_array_len.val_codec = cram_encoder_init(e->val_encoding,
|
|
1520 NULL, E_BYTE_ARRAY,
|
|
1521 e->val_dat,
|
|
1522 version);
|
|
1523
|
|
1524 return c;
|
|
1525 }
|
|
1526
|
|
1527 /*
|
|
1528 * ---------------------------------------------------------------------------
|
|
1529 * BYTE_ARRAY_STOP
|
|
1530 */
|
|
1531 static int cram_byte_array_stop_decode_char(cram_slice *slice, cram_codec *c,
|
|
1532 cram_block *in, char *out,
|
|
1533 int *out_size) {
|
|
1534 int i;
|
|
1535 cram_block *b = NULL;
|
|
1536 char *cp, ch;
|
|
1537
|
|
1538 if (slice->block_by_id) {
|
|
1539 if (!(b = slice->block_by_id[c->byte_array_stop.content_id]))
|
|
1540 return *out_size?-1:0;
|
|
1541 } else {
|
|
1542 for (i = 0; i < slice->hdr->num_blocks; i++) {
|
|
1543 b = slice->block[i];
|
|
1544 if (b && b->content_type == EXTERNAL &&
|
|
1545 b->content_id == c->byte_array_stop.content_id) {
|
|
1546 break;
|
|
1547 }
|
|
1548 }
|
|
1549 if (i == slice->hdr->num_blocks || !b)
|
|
1550 return -1;
|
|
1551 }
|
|
1552
|
|
1553 if (b->idx >= b->uncomp_size)
|
|
1554 return -1;
|
|
1555
|
|
1556 cp = (char *)b->data + b->idx;
|
|
1557 while ((ch = *cp) != (char)c->byte_array_stop.stop) {
|
|
1558 if (cp - (char *)b->data >= b->uncomp_size)
|
|
1559 return -1;
|
|
1560 *out++ = ch;
|
|
1561 cp++;
|
|
1562 }
|
|
1563
|
|
1564 *out_size = cp - (char *)(b->data + b->idx);
|
|
1565 b->idx = cp - (char *)b->data + 1;
|
|
1566
|
|
1567 return 0;
|
|
1568 }
|
|
1569
|
|
1570 int cram_byte_array_stop_decode_block(cram_slice *slice, cram_codec *c,
|
|
1571 cram_block *in, char *out_,
|
|
1572 int *out_size) {
|
|
1573 cram_block *b = NULL;
|
|
1574 cram_block *out = (cram_block *)out_;
|
|
1575 char *cp, *out_cp, *cp_end;
|
|
1576 char stop;
|
|
1577
|
|
1578 if (slice->block_by_id) {
|
|
1579 if (!(b = slice->block_by_id[c->byte_array_stop.content_id]))
|
|
1580 return *out_size?-1:0;
|
|
1581 } else {
|
|
1582 int i;
|
|
1583 for (i = 0; i < slice->hdr->num_blocks; i++) {
|
|
1584 b = slice->block[i];
|
|
1585 if (b && b->content_type == EXTERNAL &&
|
|
1586 b->content_id == c->byte_array_stop.content_id) {
|
|
1587 break;
|
|
1588 }
|
|
1589 }
|
|
1590 if (i == slice->hdr->num_blocks || !b)
|
|
1591 return -1;
|
|
1592 }
|
|
1593
|
|
1594 if (b->idx >= b->uncomp_size)
|
|
1595 return -1;
|
|
1596 cp = (char *)b->data + b->idx;
|
|
1597 cp_end = (char *)b->data + b->uncomp_size;
|
|
1598 out_cp = (char *)BLOCK_END(out);
|
|
1599
|
|
1600 stop = c->byte_array_stop.stop;
|
|
1601 if (cp_end - cp < out->alloc - out->byte) {
|
|
1602 while (*cp != stop && cp != cp_end)
|
|
1603 *out_cp++ = *cp++;
|
|
1604 BLOCK_SIZE(out) = out_cp - (char *)BLOCK_DATA(out);
|
|
1605 } else {
|
|
1606 char *cp_start;
|
|
1607 for (cp_start = cp; *cp != stop && cp != cp_end; cp++)
|
|
1608 ;
|
|
1609 BLOCK_APPEND(out, cp_start, cp - cp_start);
|
|
1610 BLOCK_GROW(out, cp - cp_start);
|
|
1611 }
|
|
1612
|
|
1613 *out_size = cp - (char *)(b->data + b->idx);
|
|
1614 b->idx = cp - (char *)b->data + 1;
|
|
1615
|
|
1616 return 0;
|
|
1617 }
|
|
1618
|
|
1619 void cram_byte_array_stop_decode_free(cram_codec *c) {
|
|
1620 if (!c) return;
|
|
1621
|
|
1622 free(c);
|
|
1623 }
|
|
1624
|
|
1625 cram_codec *cram_byte_array_stop_decode_init(char *data, int size,
|
|
1626 enum cram_external_type option,
|
|
1627 int version) {
|
|
1628 cram_codec *c;
|
|
1629 unsigned char *cp = (unsigned char *)data;
|
|
1630
|
|
1631 if (!(c = malloc(sizeof(*c))))
|
|
1632 return NULL;
|
|
1633
|
|
1634 c->codec = E_BYTE_ARRAY_STOP;
|
|
1635 c->decode = (option == E_BYTE_ARRAY_BLOCK)
|
|
1636 ? cram_byte_array_stop_decode_block
|
|
1637 : cram_byte_array_stop_decode_char;
|
|
1638 c->free = cram_byte_array_stop_decode_free;
|
|
1639
|
|
1640 c->byte_array_stop.stop = *cp++;
|
|
1641 if (CRAM_MAJOR_VERS(version) == 1) {
|
|
1642 c->byte_array_stop.content_id = cp[0] + (cp[1]<<8) + (cp[2]<<16)
|
|
1643 + (cp[3]<<24);
|
|
1644 cp += 4;
|
|
1645 } else {
|
|
1646 cp += itf8_get(cp, &c->byte_array_stop.content_id);
|
|
1647 }
|
|
1648
|
|
1649 if ((char *)cp - data != size) {
|
|
1650 fprintf(stderr, "Malformed byte_array_stop header stream\n");
|
|
1651 free(c);
|
|
1652 return NULL;
|
|
1653 }
|
|
1654
|
|
1655 return c;
|
|
1656 }
|
|
1657
|
|
1658 int cram_byte_array_stop_encode(cram_slice *slice, cram_codec *c,
|
|
1659 char *in, int in_size) {
|
|
1660 BLOCK_APPEND(c->out, in, in_size);
|
|
1661 BLOCK_APPEND_CHAR(c->out, c->e_byte_array_stop.stop);
|
|
1662 return 0;
|
|
1663 }
|
|
1664
|
|
1665 void cram_byte_array_stop_encode_free(cram_codec *c) {
|
|
1666 if (!c)
|
|
1667 return;
|
|
1668 free(c);
|
|
1669 }
|
|
1670
|
|
1671 int cram_byte_array_stop_encode_store(cram_codec *c, cram_block *b,
|
|
1672 char *prefix, int version) {
|
|
1673 int len = 0;
|
|
1674 char buf[20], *cp = buf;
|
|
1675
|
|
1676 if (prefix) {
|
|
1677 size_t l = strlen(prefix);
|
|
1678 BLOCK_APPEND(b, prefix, l);
|
|
1679 len += l;
|
|
1680 }
|
|
1681
|
|
1682 cp += itf8_put(cp, c->codec);
|
|
1683
|
|
1684 if (CRAM_MAJOR_VERS(version) == 1) {
|
|
1685 cp += itf8_put(cp, 5);
|
|
1686 *cp++ = c->e_byte_array_stop.stop;
|
|
1687 *cp++ = (c->e_byte_array_stop.content_id >> 0) & 0xff;
|
|
1688 *cp++ = (c->e_byte_array_stop.content_id >> 8) & 0xff;
|
|
1689 *cp++ = (c->e_byte_array_stop.content_id >> 16) & 0xff;
|
|
1690 *cp++ = (c->e_byte_array_stop.content_id >> 24) & 0xff;
|
|
1691 } else {
|
|
1692 cp += itf8_put(cp, 1 + itf8_size(c->e_byte_array_stop.content_id));
|
|
1693 *cp++ = c->e_byte_array_stop.stop;
|
|
1694 cp += itf8_put(cp, c->e_byte_array_stop.content_id);
|
|
1695 }
|
|
1696
|
|
1697 BLOCK_APPEND(b, buf, cp-buf);
|
|
1698 len += cp-buf;
|
|
1699
|
|
1700 return len;
|
|
1701 }
|
|
1702
|
|
1703 cram_codec *cram_byte_array_stop_encode_init(cram_stats *st,
|
|
1704 enum cram_external_type option,
|
|
1705 void *dat,
|
|
1706 int version) {
|
|
1707 cram_codec *c;
|
|
1708
|
|
1709 c = malloc(sizeof(*c));
|
|
1710 if (!c)
|
|
1711 return NULL;
|
|
1712 c->codec = E_BYTE_ARRAY_STOP;
|
|
1713 c->free = cram_byte_array_stop_encode_free;
|
|
1714 c->encode = cram_byte_array_stop_encode;
|
|
1715 c->store = cram_byte_array_stop_encode_store;
|
|
1716
|
|
1717 c->e_byte_array_stop.stop = ((int *)dat)[0];
|
|
1718 c->e_byte_array_stop.content_id = ((int *)dat)[1];
|
|
1719
|
|
1720 return c;
|
|
1721 }
|
|
1722
|
|
1723 /*
|
|
1724 * ---------------------------------------------------------------------------
|
|
1725 */
|
|
1726
|
|
1727 char *cram_encoding2str(enum cram_encoding t) {
|
|
1728 switch (t) {
|
|
1729 case E_NULL: return "NULL";
|
|
1730 case E_EXTERNAL: return "EXTERNAL";
|
|
1731 case E_GOLOMB: return "GOLOMB";
|
|
1732 case E_HUFFMAN: return "HUFFMAN";
|
|
1733 case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN";
|
|
1734 case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP";
|
|
1735 case E_BETA: return "BETA";
|
|
1736 case E_SUBEXP: return "SUBEXP";
|
|
1737 case E_GOLOMB_RICE: return "GOLOMB_RICE";
|
|
1738 case E_GAMMA: return "GAMMA";
|
|
1739 }
|
|
1740 return "?";
|
|
1741 }
|
|
1742
|
|
1743 static cram_codec *(*decode_init[])(char *data,
|
|
1744 int size,
|
|
1745 enum cram_external_type option,
|
|
1746 int version) = {
|
|
1747 NULL,
|
|
1748 cram_external_decode_init,
|
|
1749 NULL,
|
|
1750 cram_huffman_decode_init,
|
|
1751 cram_byte_array_len_decode_init,
|
|
1752 cram_byte_array_stop_decode_init,
|
|
1753 cram_beta_decode_init,
|
|
1754 cram_subexp_decode_init,
|
|
1755 NULL,
|
|
1756 cram_gamma_decode_init,
|
|
1757 };
|
|
1758
|
|
1759 cram_codec *cram_decoder_init(enum cram_encoding codec,
|
|
1760 char *data, int size,
|
|
1761 enum cram_external_type option,
|
|
1762 int version) {
|
|
1763 if (decode_init[codec]) {
|
|
1764 return decode_init[codec](data, size, option, version);
|
|
1765 } else {
|
|
1766 fprintf(stderr, "Unimplemented codec of type %s\n", codec2str(codec));
|
|
1767 return NULL;
|
|
1768 }
|
|
1769 }
|
|
1770
|
|
1771 static cram_codec *(*encode_init[])(cram_stats *stx,
|
|
1772 enum cram_external_type option,
|
|
1773 void *opt,
|
|
1774 int version) = {
|
|
1775 NULL,
|
|
1776 cram_external_encode_init,
|
|
1777 NULL,
|
|
1778 cram_huffman_encode_init,
|
|
1779 cram_byte_array_len_encode_init,
|
|
1780 cram_byte_array_stop_encode_init,
|
|
1781 cram_beta_encode_init,
|
|
1782 NULL, //cram_subexp_encode_init,
|
|
1783 NULL,
|
|
1784 NULL, //cram_gamma_encode_init,
|
|
1785 };
|
|
1786
|
|
1787 cram_codec *cram_encoder_init(enum cram_encoding codec,
|
|
1788 cram_stats *st,
|
|
1789 enum cram_external_type option,
|
|
1790 void *dat,
|
|
1791 int version) {
|
|
1792 if (st && !st->nvals)
|
|
1793 return NULL;
|
|
1794
|
|
1795 if (encode_init[codec]) {
|
|
1796 cram_codec *r;
|
|
1797 if ((r = encode_init[codec](st, option, dat, version)))
|
|
1798 r->out = NULL;
|
|
1799 return r;
|
|
1800 } else {
|
|
1801 fprintf(stderr, "Unimplemented codec of type %s\n", codec2str(codec));
|
|
1802 abort();
|
|
1803 }
|
|
1804 }
|
|
1805
|
|
1806 /*
|
|
1807 * Returns the content_id used by this codec, also in id2 if byte_array_len.
|
|
1808 * Returns -1 for the CORE block and -2 for unneeded.
|
|
1809 * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.
|
|
1810 */
|
|
1811 int cram_codec_to_id(cram_codec *c, int *id2) {
|
|
1812 int bnum1, bnum2 = -2;
|
|
1813
|
|
1814 switch (c->codec) {
|
|
1815 case E_HUFFMAN:
|
|
1816 bnum1 = c->huffman.ncodes == 1 ? -2 : -1;
|
|
1817 break;
|
|
1818 case E_GOLOMB:
|
|
1819 case E_BETA:
|
|
1820 case E_SUBEXP:
|
|
1821 case E_GOLOMB_RICE:
|
|
1822 case E_GAMMA:
|
|
1823 bnum1 = -1;
|
|
1824 break;
|
|
1825 case E_EXTERNAL:
|
|
1826 bnum1 = c->external.content_id;
|
|
1827 break;
|
|
1828 case E_BYTE_ARRAY_LEN:
|
|
1829 bnum1 = cram_codec_to_id(c->byte_array_len.len_codec, NULL);
|
|
1830 bnum2 = cram_codec_to_id(c->byte_array_len.value_codec, NULL);
|
|
1831 break;
|
|
1832 case E_BYTE_ARRAY_STOP:
|
|
1833 bnum1 = c->byte_array_stop.content_id;
|
|
1834 break;
|
|
1835 case E_NULL:
|
|
1836 bnum1 = -2;
|
|
1837 break;
|
|
1838 default:
|
|
1839 fprintf(stderr, "Unknown codec type %d\n", c->codec);
|
|
1840 bnum1 = -1;
|
|
1841 }
|
|
1842
|
|
1843 if (id2)
|
|
1844 *id2 = bnum2;
|
|
1845 return bnum1;
|
|
1846 }
|