Mercurial > repos > youngkim > ezbamqc
comparison ezBAMQC/src/htslib/cram/cram_codecs.c @ 0:dfa3745e5fd8
Uploaded
| author | youngkim |
|---|---|
| date | Thu, 24 Mar 2016 17:12:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfa3745e5fd8 |
|---|---|
| 1 /* | |
| 2 Copyright (c) 2012-2013 Genome Research Ltd. | |
| 3 Author: James Bonfield <jkb@sanger.ac.uk> | |
| 4 | |
| 5 Redistribution and use in source and binary forms, with or without | |
| 6 modification, are permitted provided that the following conditions are met: | |
| 7 | |
| 8 1. Redistributions of source code must retain the above copyright notice, | |
| 9 this list of conditions and the following disclaimer. | |
| 10 | |
| 11 2. Redistributions in binary form must reproduce the above copyright notice, | |
| 12 this list of conditions and the following disclaimer in the documentation | |
| 13 and/or other materials provided with the distribution. | |
| 14 | |
| 15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger | |
| 16 Institute nor the names of its contributors may be used to endorse or promote | |
| 17 products derived from this software without specific prior written permission. | |
| 18 | |
| 19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND | |
| 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE | |
| 23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 */ | |
| 30 | |
| 31 /* | |
| 32 * FIXME: add checking of cram_external_type to return NULL on unsupported | |
| 33 * {codec,type} tuples. | |
| 34 */ | |
| 35 | |
| 36 #ifdef HAVE_CONFIG_H | |
| 37 #include "io_lib_config.h" | |
| 38 #endif | |
| 39 | |
| 40 #include <stdlib.h> | |
| 41 #include <string.h> | |
| 42 #include <assert.h> | |
| 43 #include <limits.h> | |
| 44 | |
| 45 #include "cram/cram.h" | |
| 46 | |
| 47 static char *codec2str(enum cram_encoding codec) { | |
| 48 switch (codec) { | |
| 49 case E_NULL: return "NULL"; | |
| 50 case E_EXTERNAL: return "EXTERNAL"; | |
| 51 case E_GOLOMB: return "GOLOMB"; | |
| 52 case E_HUFFMAN: return "HUFFMAN"; | |
| 53 case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN"; | |
| 54 case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP"; | |
| 55 case E_BETA: return "BETA"; | |
| 56 case E_SUBEXP: return "SUBEXP"; | |
| 57 case E_GOLOMB_RICE: return "GOLOMB_RICE"; | |
| 58 case E_GAMMA: return "GAMMA"; | |
| 59 } | |
| 60 | |
| 61 return "(unknown)"; | |
| 62 } | |
| 63 | |
| 64 /* | |
| 65 * --------------------------------------------------------------------------- | |
| 66 * Block bit-level I/O functions. | |
| 67 * All defined static here to promote easy inlining by the compiler. | |
| 68 */ | |
| 69 | |
| 70 #if 0 | |
| 71 /* Get a single bit, MSB first */ | |
| 72 static signed int get_bit_MSB(cram_block *block) { | |
| 73 unsigned int val; | |
| 74 | |
| 75 if (block->byte > block->alloc) | |
| 76 return -1; | |
| 77 | |
| 78 val = block->data[block->byte] >> block->bit; | |
| 79 if (--block->bit == -1) { | |
| 80 block->bit = 7; | |
| 81 block->byte++; | |
| 82 //printf("(%02X)", block->data[block->byte]); | |
| 83 } | |
| 84 | |
| 85 //printf("-B%d-", val&1); | |
| 86 | |
| 87 return val & 1; | |
| 88 } | |
| 89 #endif | |
| 90 | |
| 91 /* | |
| 92 * Count number of successive 0 and 1 bits | |
| 93 */ | |
| 94 static int get_one_bits_MSB(cram_block *block) { | |
| 95 int n = 0, b; | |
| 96 do { | |
| 97 b = block->data[block->byte] >> block->bit; | |
| 98 if (--block->bit == -1) { | |
| 99 block->bit = 7; | |
| 100 block->byte++; | |
| 101 } | |
| 102 n++; | |
| 103 } while (b&1); | |
| 104 | |
| 105 return n-1; | |
| 106 } | |
| 107 | |
| 108 static int get_zero_bits_MSB(cram_block *block) { | |
| 109 int n = 0, b; | |
| 110 do { | |
| 111 b = block->data[block->byte] >> block->bit; | |
| 112 if (--block->bit == -1) { | |
| 113 block->bit = 7; | |
| 114 block->byte++; | |
| 115 } | |
| 116 n++; | |
| 117 } while (!(b&1)); | |
| 118 | |
| 119 return n-1; | |
| 120 } | |
| 121 | |
| 122 #if 0 | |
| 123 /* Stores a single bit */ | |
| 124 static void store_bit_MSB(cram_block *block, unsigned int bit) { | |
| 125 if (block->byte >= block->alloc) { | |
| 126 block->alloc = block->alloc ? block->alloc*2 : 1024; | |
| 127 block->data = realloc(block->data, block->alloc); | |
| 128 } | |
| 129 | |
| 130 if (bit) | |
| 131 block->data[block->byte] |= (1 << block->bit); | |
| 132 | |
| 133 if (--block->bit == -1) { | |
| 134 block->bit = 7; | |
| 135 block->byte++; | |
| 136 block->data[block->byte] = 0; | |
| 137 } | |
| 138 } | |
| 139 #endif | |
| 140 | |
| 141 #if 0 | |
| 142 /* Rounds to the next whole byte boundary first */ | |
| 143 static void store_bytes_MSB(cram_block *block, char *bytes, int len) { | |
| 144 if (block->bit != 7) { | |
| 145 block->bit = 7; | |
| 146 block->byte++; | |
| 147 } | |
| 148 | |
| 149 while (block->byte + len >= block->alloc) { | |
| 150 block->alloc = block->alloc ? block->alloc*2 : 1024; | |
| 151 block->data = realloc(block->data, block->alloc); | |
| 152 } | |
| 153 | |
| 154 memcpy(&block->data[block->byte], bytes, len); | |
| 155 block->byte += len; | |
| 156 } | |
| 157 #endif | |
| 158 | |
| 159 /* Local optimised copy for inlining */ | |
| 160 static inline unsigned int get_bits_MSB(cram_block *block, int nbits) { | |
| 161 unsigned int val = 0; | |
| 162 int i; | |
| 163 | |
| 164 #if 0 | |
| 165 // Fits within the current byte */ | |
| 166 if (nbits <= block->bit+1) { | |
| 167 val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1); | |
| 168 if ((block->bit -= nbits) == -1) { | |
| 169 block->bit = 7; | |
| 170 block->byte++; | |
| 171 } | |
| 172 return val; | |
| 173 } | |
| 174 | |
| 175 // partial first byte | |
| 176 val = block->data[block->byte] & ((1<<(block->bit+1))-1); | |
| 177 nbits -= block->bit+1; | |
| 178 block->bit = 7; | |
| 179 block->byte++; | |
| 180 | |
| 181 // whole middle bytes | |
| 182 while (nbits >= 8) { | |
| 183 val = (val << 8) | block->data[block->byte++]; | |
| 184 nbits -= 8; | |
| 185 } | |
| 186 | |
| 187 val <<= nbits; | |
| 188 val |= (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1); | |
| 189 block->bit -= nbits; | |
| 190 return val; | |
| 191 #endif | |
| 192 | |
| 193 #if 0 | |
| 194 /* Inefficient implementation! */ | |
| 195 //printf("{"); | |
| 196 for (i = 0; i < nbits; i++) | |
| 197 //val = (val << 1) | get_bit_MSB(block); | |
| 198 GET_BIT_MSB(block, val); | |
| 199 #endif | |
| 200 | |
| 201 #if 1 | |
| 202 /* Combination of 1st two methods */ | |
| 203 if (nbits <= block->bit+1) { | |
| 204 val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1); | |
| 205 if ((block->bit -= nbits) == -1) { | |
| 206 block->bit = 7; | |
| 207 block->byte++; | |
| 208 } | |
| 209 return val; | |
| 210 } | |
| 211 | |
| 212 switch(nbits) { | |
| 213 // case 15: GET_BIT_MSB(block, val); | |
| 214 // case 14: GET_BIT_MSB(block, val); | |
| 215 // case 13: GET_BIT_MSB(block, val); | |
| 216 // case 12: GET_BIT_MSB(block, val); | |
| 217 // case 11: GET_BIT_MSB(block, val); | |
| 218 // case 10: GET_BIT_MSB(block, val); | |
| 219 // case 9: GET_BIT_MSB(block, val); | |
| 220 case 8: GET_BIT_MSB(block, val); | |
| 221 case 7: GET_BIT_MSB(block, val); | |
| 222 case 6: GET_BIT_MSB(block, val); | |
| 223 case 5: GET_BIT_MSB(block, val); | |
| 224 case 4: GET_BIT_MSB(block, val); | |
| 225 case 3: GET_BIT_MSB(block, val); | |
| 226 case 2: GET_BIT_MSB(block, val); | |
| 227 case 1: GET_BIT_MSB(block, val); | |
| 228 break; | |
| 229 | |
| 230 default: | |
| 231 for (i = 0; i < nbits; i++) | |
| 232 //val = (val << 1) | get_bit_MSB(block); | |
| 233 GET_BIT_MSB(block, val); | |
| 234 } | |
| 235 #endif | |
| 236 | |
| 237 //printf("=0x%x}", val); | |
| 238 | |
| 239 return val; | |
| 240 } | |
| 241 | |
| 242 /* | |
| 243 * Can store up to 24-bits worth of data encoded in an integer value | |
| 244 * Possibly we'd want to have a less optimal store_bits function when dealing | |
| 245 * with nbits > 24, but for now we assume the codes generated are never | |
| 246 * that big. (Given this is only possible with 121392 or more | |
| 247 * characters with exactly the correct frequency distribution we check | |
| 248 * for it elsewhere.) | |
| 249 */ | |
| 250 static int store_bits_MSB(cram_block *block, unsigned int val, int nbits) { | |
| 251 /* fprintf(stderr, " store_bits: %02x %d\n", val, nbits); */ | |
| 252 | |
| 253 /* | |
| 254 * Use slow mode until we tweak the huffman generator to never generate | |
| 255 * codes longer than 24-bits. | |
| 256 */ | |
| 257 unsigned int mask; | |
| 258 | |
| 259 if (block->byte+4 >= block->alloc) { | |
| 260 if (block->byte) { | |
| 261 block->alloc *= 2; | |
| 262 block->data = realloc(block->data, block->alloc + 4); | |
| 263 if (!block->data) | |
| 264 return -1; | |
| 265 } else { | |
| 266 block->alloc = 1024; | |
| 267 block->data = realloc(block->data, block->alloc + 4); | |
| 268 if (!block->data) | |
| 269 return -1; | |
| 270 block->data[0] = 0; // initialise first byte of buffer | |
| 271 } | |
| 272 } | |
| 273 | |
| 274 /* fits in current bit-field */ | |
| 275 if (nbits <= block->bit+1) { | |
| 276 block->data[block->byte] |= (val << (block->bit+1-nbits)); | |
| 277 if ((block->bit-=nbits) == -1) { | |
| 278 block->bit = 7; | |
| 279 block->byte++; | |
| 280 block->data[block->byte] = 0; | |
| 281 } | |
| 282 return 0; | |
| 283 } | |
| 284 | |
| 285 block->data[block->byte] |= (val >> (nbits -= block->bit+1)); | |
| 286 block->bit = 7; | |
| 287 block->byte++; | |
| 288 block->data[block->byte] = 0; | |
| 289 | |
| 290 mask = 1<<(nbits-1); | |
| 291 do { | |
| 292 if (val & mask) | |
| 293 block->data[block->byte] |= (1 << block->bit); | |
| 294 if (--block->bit == -1) { | |
| 295 block->bit = 7; | |
| 296 block->byte++; | |
| 297 block->data[block->byte] = 0; | |
| 298 } | |
| 299 mask >>= 1; | |
| 300 } while(--nbits); | |
| 301 | |
| 302 return 0; | |
| 303 } | |
| 304 | |
| 305 /* | |
| 306 * Returns the next 'size' bytes from a block, or NULL if insufficient | |
| 307 * data left.This is just a pointer into the block data and not an | |
| 308 * allocated object, so do not free the result. | |
| 309 */ | |
| 310 static char *cram_extract_block(cram_block *b, int size) { | |
| 311 char *cp = (char *)b->data + b->idx; | |
| 312 b->idx += size; | |
| 313 if (b->idx > b->uncomp_size) | |
| 314 return NULL; | |
| 315 | |
| 316 return cp; | |
| 317 } | |
| 318 | |
| 319 /* | |
| 320 * --------------------------------------------------------------------------- | |
| 321 * EXTERNAL | |
| 322 */ | |
| 323 int cram_external_decode_int(cram_slice *slice, cram_codec *c, | |
| 324 cram_block *in, char *out, int *out_size) { | |
| 325 int i; | |
| 326 char *cp; | |
| 327 cram_block *b = NULL; | |
| 328 | |
| 329 /* Find the external block */ | |
| 330 if (slice->block_by_id) { | |
| 331 if (!(b = slice->block_by_id[c->external.content_id])) | |
| 332 return *out_size?-1:0; | |
| 333 } else { | |
| 334 for (i = 0; i < slice->hdr->num_blocks; i++) { | |
| 335 b = slice->block[i]; | |
| 336 if (b && b->content_type == EXTERNAL && | |
| 337 b->content_id == c->external.content_id) { | |
| 338 break; | |
| 339 } | |
| 340 } | |
| 341 if (i == slice->hdr->num_blocks || !b) | |
| 342 return -1; | |
| 343 } | |
| 344 | |
| 345 cp = (char *)b->data + b->idx; | |
| 346 // E_INT and E_LONG are guaranteed single item queries | |
| 347 b->idx += itf8_get(cp, (int32_t *)out); | |
| 348 *out_size = 1; | |
| 349 | |
| 350 return 0; | |
| 351 } | |
| 352 | |
| 353 int cram_external_decode_char(cram_slice *slice, cram_codec *c, | |
| 354 cram_block *in, char *out, | |
| 355 int *out_size) { | |
| 356 int i; | |
| 357 char *cp; | |
| 358 cram_block *b = NULL; | |
| 359 | |
| 360 /* Find the external block */ | |
| 361 if (slice->block_by_id) { | |
| 362 if (!(b = slice->block_by_id[c->external.content_id])) | |
| 363 return *out_size?-1:0; | |
| 364 } else { | |
| 365 for (i = 0; i < slice->hdr->num_blocks; i++) { | |
| 366 b = slice->block[i]; | |
| 367 if (b && b->content_type == EXTERNAL && | |
| 368 b->content_id == c->external.content_id) { | |
| 369 break; | |
| 370 } | |
| 371 } | |
| 372 if (i == slice->hdr->num_blocks || !b) | |
| 373 return -1; | |
| 374 } | |
| 375 | |
| 376 cp = cram_extract_block(b, *out_size); | |
| 377 if (!cp) | |
| 378 return -1; | |
| 379 | |
| 380 memcpy(out, cp, *out_size); | |
| 381 return 0; | |
| 382 } | |
| 383 | |
| 384 static int cram_external_decode_block(cram_slice *slice, cram_codec *c, | |
| 385 cram_block *in, char *out_, | |
| 386 int *out_size) { | |
| 387 int i; | |
| 388 char *cp; | |
| 389 cram_block *b = NULL; | |
| 390 cram_block *out = (cram_block *)out_; | |
| 391 | |
| 392 /* Find the external block */ | |
| 393 if (slice->block_by_id) { | |
| 394 if (!(b = slice->block_by_id[c->external.content_id])) | |
| 395 return *out_size?-1:0; | |
| 396 } else { | |
| 397 for (i = 0; i < slice->hdr->num_blocks; i++) { | |
| 398 b = slice->block[i]; | |
| 399 if (b && b->content_type == EXTERNAL && | |
| 400 b->content_id == c->external.content_id) { | |
| 401 break; | |
| 402 } | |
| 403 } | |
| 404 if (i == slice->hdr->num_blocks || !b) | |
| 405 return -1; | |
| 406 } | |
| 407 | |
| 408 cp = cram_extract_block(b, *out_size); | |
| 409 if (!cp) | |
| 410 return -1; | |
| 411 | |
| 412 BLOCK_APPEND(out, cp, *out_size); | |
| 413 return 0; | |
| 414 } | |
| 415 | |
| 416 void cram_external_decode_free(cram_codec *c) { | |
| 417 if (c) | |
| 418 free(c); | |
| 419 } | |
| 420 | |
| 421 cram_codec *cram_external_decode_init(char *data, int size, | |
| 422 enum cram_external_type option, | |
| 423 int version) { | |
| 424 cram_codec *c; | |
| 425 char *cp = data; | |
| 426 | |
| 427 if (!(c = malloc(sizeof(*c)))) | |
| 428 return NULL; | |
| 429 | |
| 430 c->codec = E_EXTERNAL; | |
| 431 if (option == E_INT || option == E_LONG) | |
| 432 c->decode = cram_external_decode_int; | |
| 433 else if (option == E_BYTE_ARRAY || option == E_BYTE) | |
| 434 c->decode = cram_external_decode_char; | |
| 435 else | |
| 436 c->decode = cram_external_decode_block; | |
| 437 c->free = cram_external_decode_free; | |
| 438 | |
| 439 cp += itf8_get(cp, &c->external.content_id); | |
| 440 | |
| 441 if (cp - data != size) { | |
| 442 fprintf(stderr, "Malformed external header stream\n"); | |
| 443 free(c); | |
| 444 return NULL; | |
| 445 } | |
| 446 | |
| 447 c->external.type = option; | |
| 448 | |
| 449 return c; | |
| 450 } | |
| 451 | |
| 452 int cram_external_encode_int(cram_slice *slice, cram_codec *c, | |
| 453 char *in, int in_size) { | |
| 454 uint32_t *i32 = (uint32_t *)in; | |
| 455 | |
| 456 itf8_put_blk(c->out, *i32); | |
| 457 return 0; | |
| 458 } | |
| 459 | |
| 460 int cram_external_encode_char(cram_slice *slice, cram_codec *c, | |
| 461 char *in, int in_size) { | |
| 462 BLOCK_APPEND(c->out, in, in_size); | |
| 463 return 0; | |
| 464 } | |
| 465 | |
| 466 void cram_external_encode_free(cram_codec *c) { | |
| 467 if (!c) | |
| 468 return; | |
| 469 free(c); | |
| 470 } | |
| 471 | |
| 472 int cram_external_encode_store(cram_codec *c, cram_block *b, char *prefix, | |
| 473 int version) { | |
| 474 char tmp[99], *tp = tmp; | |
| 475 int len = 0; | |
| 476 | |
| 477 if (prefix) { | |
| 478 size_t l = strlen(prefix); | |
| 479 BLOCK_APPEND(b, prefix, l); | |
| 480 len += l; | |
| 481 } | |
| 482 | |
| 483 tp += itf8_put(tp, c->e_external.content_id); | |
| 484 len += itf8_put_blk(b, c->codec); | |
| 485 len += itf8_put_blk(b, tp-tmp); | |
| 486 BLOCK_APPEND(b, tmp, tp-tmp); | |
| 487 len += tp-tmp; | |
| 488 | |
| 489 return len; | |
| 490 } | |
| 491 | |
| 492 cram_codec *cram_external_encode_init(cram_stats *st, | |
| 493 enum cram_external_type option, | |
| 494 void *dat, | |
| 495 int version) { | |
| 496 cram_codec *c; | |
| 497 | |
| 498 c = malloc(sizeof(*c)); | |
| 499 if (!c) | |
| 500 return NULL; | |
| 501 c->codec = E_EXTERNAL; | |
| 502 c->free = cram_external_encode_free; | |
| 503 if (option == E_INT || option == E_LONG) | |
| 504 c->encode = cram_external_encode_int; | |
| 505 else if (option == E_BYTE_ARRAY || option == E_BYTE) | |
| 506 c->encode = cram_external_encode_char; | |
| 507 else | |
| 508 abort(); | |
| 509 c->store = cram_external_encode_store; | |
| 510 | |
| 511 c->e_external.content_id = (size_t)dat; | |
| 512 | |
| 513 return c; | |
| 514 } | |
| 515 | |
| 516 /* | |
| 517 * --------------------------------------------------------------------------- | |
| 518 * BETA | |
| 519 */ | |
| 520 int cram_beta_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { | |
| 521 int32_t *out_i = (int32_t *)out; | |
| 522 int i, n; | |
| 523 | |
| 524 if (c->beta.nbits) { | |
| 525 for (i = 0, n = *out_size; i < n; i++) | |
| 526 out_i[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset; | |
| 527 } else { | |
| 528 for (i = 0, n = *out_size; i < n; i++) | |
| 529 out_i[i] = -c->beta.offset; | |
| 530 } | |
| 531 | |
| 532 return 0; | |
| 533 } | |
| 534 | |
| 535 int cram_beta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { | |
| 536 int i, n; | |
| 537 | |
| 538 if (c->beta.nbits) { | |
| 539 for (i = 0, n = *out_size; i < n; i++) | |
| 540 out[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset; | |
| 541 } else { | |
| 542 for (i = 0, n = *out_size; i < n; i++) | |
| 543 out[i] = -c->beta.offset; | |
| 544 } | |
| 545 | |
| 546 return 0; | |
| 547 } | |
| 548 | |
| 549 void cram_beta_decode_free(cram_codec *c) { | |
| 550 if (c) | |
| 551 free(c); | |
| 552 } | |
| 553 | |
| 554 cram_codec *cram_beta_decode_init(char *data, int size, | |
| 555 enum cram_external_type option, | |
| 556 int version) { | |
| 557 cram_codec *c; | |
| 558 char *cp = data; | |
| 559 | |
| 560 if (!(c = malloc(sizeof(*c)))) | |
| 561 return NULL; | |
| 562 | |
| 563 c->codec = E_BETA; | |
| 564 if (option == E_INT || option == E_LONG) | |
| 565 c->decode = cram_beta_decode_int; | |
| 566 else if (option == E_BYTE_ARRAY || option == E_BYTE) | |
| 567 c->decode = cram_beta_decode_char; | |
| 568 else | |
| 569 abort(); | |
| 570 c->free = cram_beta_decode_free; | |
| 571 | |
| 572 cp += itf8_get(cp, &c->beta.offset); | |
| 573 cp += itf8_get(cp, &c->beta.nbits); | |
| 574 | |
| 575 if (cp - data != size) { | |
| 576 fprintf(stderr, "Malformed beta header stream\n"); | |
| 577 free(c); | |
| 578 return NULL; | |
| 579 } | |
| 580 | |
| 581 return c; | |
| 582 } | |
| 583 | |
| 584 int cram_beta_encode_store(cram_codec *c, cram_block *b, | |
| 585 char *prefix, int version) { | |
| 586 int len = 0; | |
| 587 | |
| 588 if (prefix) { | |
| 589 size_t l = strlen(prefix); | |
| 590 BLOCK_APPEND(b, prefix, l); | |
| 591 len += l; | |
| 592 } | |
| 593 | |
| 594 len += itf8_put_blk(b, c->codec); | |
| 595 len += itf8_put_blk(b, itf8_size(c->e_beta.offset) | |
| 596 + itf8_size(c->e_beta.nbits)); // codec length | |
| 597 len += itf8_put_blk(b, c->e_beta.offset); | |
| 598 len += itf8_put_blk(b, c->e_beta.nbits); | |
| 599 | |
| 600 return len; | |
| 601 } | |
| 602 | |
| 603 int cram_beta_encode_int(cram_slice *slice, cram_codec *c, | |
| 604 char *in, int in_size) { | |
| 605 int *syms = (int *)in; | |
| 606 int i, r = 0; | |
| 607 | |
| 608 for (i = 0; i < in_size; i++) | |
| 609 r |= store_bits_MSB(c->out, syms[i] + c->e_beta.offset, | |
| 610 c->e_beta.nbits); | |
| 611 | |
| 612 return r; | |
| 613 } | |
| 614 | |
| 615 int cram_beta_encode_char(cram_slice *slice, cram_codec *c, | |
| 616 char *in, int in_size) { | |
| 617 unsigned char *syms = (unsigned char *)in; | |
| 618 int i, r = 0; | |
| 619 | |
| 620 for (i = 0; i < in_size; i++) | |
| 621 r |= store_bits_MSB(c->out, syms[i] + c->e_beta.offset, | |
| 622 c->e_beta.nbits); | |
| 623 | |
| 624 return r; | |
| 625 } | |
| 626 | |
| 627 void cram_beta_encode_free(cram_codec *c) { | |
| 628 if (c) free(c); | |
| 629 } | |
| 630 | |
| 631 cram_codec *cram_beta_encode_init(cram_stats *st, | |
| 632 enum cram_external_type option, | |
| 633 void *dat, | |
| 634 int version) { | |
| 635 cram_codec *c; | |
| 636 int min_val, max_val, len = 0; | |
| 637 | |
| 638 c = malloc(sizeof(*c)); | |
| 639 if (!c) | |
| 640 return NULL; | |
| 641 c->codec = E_BETA; | |
| 642 c->free = cram_beta_encode_free; | |
| 643 if (option == E_INT) | |
| 644 c->encode = cram_beta_encode_int; | |
| 645 else | |
| 646 c->encode = cram_beta_encode_char; | |
| 647 c->store = cram_beta_encode_store; | |
| 648 | |
| 649 if (dat) { | |
| 650 min_val = ((int *)dat)[0]; | |
| 651 max_val = ((int *)dat)[1]; | |
| 652 } else { | |
| 653 min_val = INT_MAX; | |
| 654 max_val = INT_MIN; | |
| 655 int i; | |
| 656 for (i = 0; i < MAX_STAT_VAL; i++) { | |
| 657 if (!st->freqs[i]) | |
| 658 continue; | |
| 659 if (min_val > i) | |
| 660 min_val = i; | |
| 661 max_val = i; | |
| 662 } | |
| 663 if (st->h) { | |
| 664 khint_t k; | |
| 665 | |
| 666 for (k = kh_begin(st->h); k != kh_end(st->h); k++) { | |
| 667 if (!kh_exist(st->h, k)) | |
| 668 continue; | |
| 669 | |
| 670 i = kh_key(st->h, k); | |
| 671 if (min_val > i) | |
| 672 min_val = i; | |
| 673 if (max_val < i) | |
| 674 max_val = i; | |
| 675 } | |
| 676 } | |
| 677 } | |
| 678 | |
| 679 assert(max_val >= min_val); | |
| 680 c->e_beta.offset = -min_val; | |
| 681 max_val -= min_val; | |
| 682 while (max_val) { | |
| 683 len++; | |
| 684 max_val >>= 1; | |
| 685 } | |
| 686 c->e_beta.nbits = len; | |
| 687 | |
| 688 return c; | |
| 689 } | |
| 690 | |
| 691 /* | |
| 692 * --------------------------------------------------------------------------- | |
| 693 * SUBEXP | |
| 694 */ | |
| 695 int cram_subexp_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { | |
| 696 int32_t *out_i = (int32_t *)out; | |
| 697 int n, count; | |
| 698 int k = c->subexp.k; | |
| 699 | |
| 700 for (count = 0, n = *out_size; count < n; count++) { | |
| 701 int i = 0, tail; | |
| 702 int val; | |
| 703 | |
| 704 /* Get number of 1s */ | |
| 705 //while (get_bit_MSB(in) == 1) i++; | |
| 706 i = get_one_bits_MSB(in); | |
| 707 | |
| 708 /* | |
| 709 * Val is | |
| 710 * i > 0: 2^(k+i-1) + k+i-1 bits | |
| 711 * i = 0: k bits | |
| 712 */ | |
| 713 if (i) { | |
| 714 tail = i + k-1; | |
| 715 val = 0; | |
| 716 while (tail) { | |
| 717 //val = val<<1; val |= get_bit_MSB(in); | |
| 718 GET_BIT_MSB(in, val); | |
| 719 tail--; | |
| 720 } | |
| 721 val += 1 << (i + k-1); | |
| 722 } else { | |
| 723 tail = k; | |
| 724 val = 0; | |
| 725 while (tail) { | |
| 726 //val = val<<1; val |= get_bit_MSB(in); | |
| 727 GET_BIT_MSB(in, val); | |
| 728 tail--; | |
| 729 } | |
| 730 } | |
| 731 | |
| 732 out_i[count] = val - c->subexp.offset; | |
| 733 } | |
| 734 | |
| 735 return 0; | |
| 736 } | |
| 737 | |
| 738 void cram_subexp_decode_free(cram_codec *c) { | |
| 739 if (c) | |
| 740 free(c); | |
| 741 } | |
| 742 | |
| 743 cram_codec *cram_subexp_decode_init(char *data, int size, | |
| 744 enum cram_external_type option, | |
| 745 int version) { | |
| 746 cram_codec *c; | |
| 747 char *cp = data; | |
| 748 | |
| 749 if (!(c = malloc(sizeof(*c)))) | |
| 750 return NULL; | |
| 751 | |
| 752 c->codec = E_SUBEXP; | |
| 753 c->decode = cram_subexp_decode; | |
| 754 c->free = cram_subexp_decode_free; | |
| 755 | |
| 756 cp += itf8_get(cp, &c->subexp.offset); | |
| 757 cp += itf8_get(cp, &c->subexp.k); | |
| 758 | |
| 759 if (cp - data != size) { | |
| 760 fprintf(stderr, "Malformed subexp header stream\n"); | |
| 761 free(c); | |
| 762 return NULL; | |
| 763 } | |
| 764 | |
| 765 return c; | |
| 766 } | |
| 767 | |
| 768 /* | |
| 769 * --------------------------------------------------------------------------- | |
| 770 * GAMMA | |
| 771 */ | |
| 772 int cram_gamma_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { | |
| 773 int32_t *out_i = (int32_t *)out; | |
| 774 int i, n; | |
| 775 | |
| 776 for (i = 0, n = *out_size; i < n; i++) { | |
| 777 int nz = 0; | |
| 778 int val; | |
| 779 //while (get_bit_MSB(in) == 0) nz++; | |
| 780 nz = get_zero_bits_MSB(in); | |
| 781 val = 1; | |
| 782 while (nz > 0) { | |
| 783 //val <<= 1; val |= get_bit_MSB(in); | |
| 784 GET_BIT_MSB(in, val); | |
| 785 nz--; | |
| 786 } | |
| 787 | |
| 788 out_i[i] = val - c->gamma.offset; | |
| 789 } | |
| 790 | |
| 791 return 0; | |
| 792 } | |
| 793 | |
| 794 void cram_gamma_decode_free(cram_codec *c) { | |
| 795 if (c) | |
| 796 free(c); | |
| 797 } | |
| 798 | |
| 799 cram_codec *cram_gamma_decode_init(char *data, int size, | |
| 800 enum cram_external_type option, | |
| 801 int version) { | |
| 802 cram_codec *c; | |
| 803 char *cp = data; | |
| 804 | |
| 805 if (!(c = malloc(sizeof(*c)))) | |
| 806 return NULL; | |
| 807 | |
| 808 c->codec = E_GAMMA; | |
| 809 c->decode = cram_gamma_decode; | |
| 810 c->free = cram_gamma_decode_free; | |
| 811 | |
| 812 cp += itf8_get(cp, &c->gamma.offset); | |
| 813 | |
| 814 if (cp - data != size) { | |
| 815 fprintf(stderr, "Malformed gamma header stream\n"); | |
| 816 free(c); | |
| 817 return NULL; | |
| 818 } | |
| 819 | |
| 820 return c; | |
| 821 } | |
| 822 | |
| 823 /* | |
| 824 * --------------------------------------------------------------------------- | |
| 825 * HUFFMAN | |
| 826 */ | |
| 827 | |
| 828 static int code_sort(const void *vp1, const void *vp2) { | |
| 829 const cram_huffman_code *c1 = (const cram_huffman_code *)vp1; | |
| 830 const cram_huffman_code *c2 = (const cram_huffman_code *)vp2; | |
| 831 | |
| 832 if (c1->len != c2->len) | |
| 833 return c1->len - c2->len; | |
| 834 else | |
| 835 return c1->symbol - c2->symbol; | |
| 836 } | |
| 837 | |
| 838 void cram_huffman_decode_free(cram_codec *c) { | |
| 839 if (!c) | |
| 840 return; | |
| 841 | |
| 842 if (c->huffman.codes) | |
| 843 free(c->huffman.codes); | |
| 844 free(c); | |
| 845 } | |
| 846 | |
| 847 int cram_huffman_decode_char0(cram_slice *slice, cram_codec *c, | |
| 848 cram_block *in, char *out, int *out_size) { | |
| 849 int i, n; | |
| 850 | |
| 851 /* Special case of 0 length codes */ | |
| 852 for (i = 0, n = *out_size; i < n; i++) { | |
| 853 out[i] = c->huffman.codes[0].symbol; | |
| 854 } | |
| 855 return 0; | |
| 856 } | |
| 857 | |
| 858 int cram_huffman_decode_char(cram_slice *slice, cram_codec *c, | |
| 859 cram_block *in, char *out, int *out_size) { | |
| 860 int i, n, ncodes = c->huffman.ncodes; | |
| 861 const cram_huffman_code * const codes = c->huffman.codes; | |
| 862 | |
| 863 for (i = 0, n = *out_size; i < n; i++) { | |
| 864 int idx = 0; | |
| 865 int val = 0, len = 0, last_len = 0; | |
| 866 | |
| 867 for (;;) { | |
| 868 int dlen = codes[idx].len - last_len; | |
| 869 if (dlen <= 0 || (in->alloc - in->byte)*8 + in->bit + 7 < dlen) | |
| 870 return -1; | |
| 871 | |
| 872 //val <<= dlen; | |
| 873 //val |= get_bits_MSB(in, dlen); | |
| 874 //last_len = (len += dlen); | |
| 875 | |
| 876 last_len = (len += dlen); | |
| 877 for (; dlen; dlen--) GET_BIT_MSB(in, val); | |
| 878 | |
| 879 idx = val - codes[idx].p; | |
| 880 if (idx >= ncodes || idx < 0) | |
| 881 return -1; | |
| 882 | |
| 883 if (codes[idx].code == val && codes[idx].len == len) { | |
| 884 out[i] = codes[idx].symbol; | |
| 885 break; | |
| 886 } | |
| 887 } | |
| 888 } | |
| 889 | |
| 890 return 0; | |
| 891 } | |
| 892 | |
| 893 int cram_huffman_decode_int0(cram_slice *slice, cram_codec *c, | |
| 894 cram_block *in, char *out, int *out_size) { | |
| 895 int32_t *out_i = (int32_t *)out; | |
| 896 int i, n; | |
| 897 const cram_huffman_code * const codes = c->huffman.codes; | |
| 898 | |
| 899 /* Special case of 0 length codes */ | |
| 900 for (i = 0, n = *out_size; i < n; i++) { | |
| 901 out_i[i] = codes[0].symbol; | |
| 902 } | |
| 903 return 0; | |
| 904 } | |
| 905 | |
| 906 int cram_huffman_decode_int(cram_slice *slice, cram_codec *c, | |
| 907 cram_block *in, char *out, int *out_size) { | |
| 908 int32_t *out_i = (int32_t *)out; | |
| 909 int i, n, ncodes = c->huffman.ncodes; | |
| 910 const cram_huffman_code * const codes = c->huffman.codes; | |
| 911 | |
| 912 for (i = 0, n = *out_size; i < n; i++) { | |
| 913 int idx = 0; | |
| 914 int val = 0, len = 0, last_len = 0; | |
| 915 | |
| 916 // Now one bit at a time for remaining checks | |
| 917 for (;;) { | |
| 918 int dlen = codes[idx].len - last_len; | |
| 919 if (dlen <= 0 || (in->alloc - in->byte)*8 + in->bit + 7 < dlen) | |
| 920 return -1; | |
| 921 | |
| 922 //val <<= dlen; | |
| 923 //val |= get_bits_MSB(in, dlen); | |
| 924 //last_len = (len += dlen); | |
| 925 | |
| 926 last_len = (len += dlen); | |
| 927 for (; dlen; dlen--) GET_BIT_MSB(in, val); | |
| 928 | |
| 929 idx = val - codes[idx].p; | |
| 930 if (idx >= ncodes || idx < 0) | |
| 931 return -1; | |
| 932 | |
| 933 if (codes[idx].code == val && codes[idx].len == len) { | |
| 934 out_i[i] = codes[idx].symbol; | |
| 935 break; | |
| 936 } | |
| 937 } | |
| 938 } | |
| 939 | |
| 940 return 0; | |
| 941 } | |
| 942 | |
| 943 /* | |
| 944 * Initialises a huffman decoder from an encoding data stream. | |
| 945 */ | |
| 946 cram_codec *cram_huffman_decode_init(char *data, int size, | |
| 947 enum cram_external_type option, | |
| 948 int version) { | |
| 949 int32_t ncodes, i, j; | |
| 950 char *cp = data, *data_end = &data[size]; | |
| 951 cram_codec *h; | |
| 952 cram_huffman_code *codes; | |
| 953 int32_t val, last_len, max_len = 0; | |
| 954 | |
| 955 cp += itf8_get(cp, &ncodes); | |
| 956 h = calloc(1, sizeof(*h)); | |
| 957 if (!h) | |
| 958 return NULL; | |
| 959 | |
| 960 h->free = cram_huffman_decode_free; | |
| 961 | |
| 962 h->huffman.ncodes = ncodes; | |
| 963 codes = h->huffman.codes = malloc(ncodes * sizeof(*codes)); | |
| 964 if (!codes) { | |
| 965 free(h); | |
| 966 return NULL; | |
| 967 } | |
| 968 | |
| 969 /* Read symbols and bit-lengths */ | |
| 970 for (i = 0; i < ncodes && cp < data_end; i++) { | |
| 971 cp += itf8_get(cp, &codes[i].symbol); | |
| 972 } | |
| 973 | |
| 974 if (cp >= data_end) { | |
| 975 fprintf(stderr, "Malformed huffman header stream\n"); | |
| 976 free(h); | |
| 977 return NULL; | |
| 978 } | |
| 979 cp += itf8_get(cp, &i); | |
| 980 if (i != ncodes) { | |
| 981 fprintf(stderr, "Malformed huffman header stream\n"); | |
| 982 free(h); | |
| 983 return NULL; | |
| 984 } | |
| 985 | |
| 986 if (ncodes == 0) { | |
| 987 /* NULL huffman stream */ | |
| 988 return h; | |
| 989 } | |
| 990 | |
| 991 for (i = 0; i < ncodes && cp < data_end; i++) { | |
| 992 cp += itf8_get(cp, &codes[i].len); | |
| 993 if (max_len < codes[i].len) | |
| 994 max_len = codes[i].len; | |
| 995 } | |
| 996 if (cp - data != size || max_len >= ncodes) { | |
| 997 fprintf(stderr, "Malformed huffman header stream\n"); | |
| 998 free(h); | |
| 999 return NULL; | |
| 1000 } | |
| 1001 | |
| 1002 /* Sort by bit length and then by symbol value */ | |
| 1003 qsort(codes, ncodes, sizeof(*codes), code_sort); | |
| 1004 | |
| 1005 /* Assign canonical codes */ | |
| 1006 val = -1, last_len = 0; | |
| 1007 for (i = 0; i < ncodes; i++) { | |
| 1008 val++; | |
| 1009 if (codes[i].len > last_len) { | |
| 1010 while (codes[i].len > last_len) { | |
| 1011 val <<= 1; | |
| 1012 last_len++; | |
| 1013 } | |
| 1014 } | |
| 1015 codes[i].code = val; | |
| 1016 } | |
| 1017 | |
| 1018 /* | |
| 1019 * Compute the next starting point, offset by the i'th value. | |
| 1020 * For example if codes 10, 11, 12, 13 are 30, 31, 32, 33 then | |
| 1021 * codes[10..13].p = 30 - 10. | |
| 1022 */ | |
| 1023 last_len = 0; | |
| 1024 for (i = j = 0; i < ncodes; i++) { | |
| 1025 if (codes[i].len > last_len) { | |
| 1026 j = codes[i].code - i; | |
| 1027 last_len = codes[i].len; | |
| 1028 } | |
| 1029 codes[i].p = j; | |
| 1030 } | |
| 1031 | |
| 1032 // puts("==HUFF LEN=="); | |
| 1033 // for (i = 0; i <= last_len+1; i++) { | |
| 1034 // printf("len %d=%d prefix %d\n", i, h->huffman.lengths[i], h->huffman.prefix[i]); | |
| 1035 // } | |
| 1036 // puts("===HUFFMAN CODES==="); | |
| 1037 // for (i = 0; i < ncodes; i++) { | |
| 1038 // int j; | |
| 1039 // printf("%d: %d %d %d ", i, codes[i].symbol, codes[i].len, codes[i].code); | |
| 1040 // j = codes[i].len; | |
| 1041 // while (j) { | |
| 1042 // putchar(codes[i].code & (1 << --j) ? '1' : '0'); | |
| 1043 // } | |
| 1044 // printf(" %d\n", codes[i].code); | |
| 1045 // } | |
| 1046 | |
| 1047 h->codec = E_HUFFMAN; | |
| 1048 if (option == E_BYTE || option == E_BYTE_ARRAY) { | |
| 1049 if (h->huffman.codes[0].len == 0) | |
| 1050 h->decode = cram_huffman_decode_char0; | |
| 1051 else | |
| 1052 h->decode = cram_huffman_decode_char; | |
| 1053 } else if (option == E_BYTE_ARRAY_BLOCK) { | |
| 1054 abort(); | |
| 1055 } else { | |
| 1056 if (h->huffman.codes[0].len == 0) | |
| 1057 h->decode = cram_huffman_decode_int0; | |
| 1058 else | |
| 1059 h->decode = cram_huffman_decode_int; | |
| 1060 } | |
| 1061 | |
| 1062 return (cram_codec *)h; | |
| 1063 } | |
| 1064 | |
| 1065 int cram_huffman_encode_char0(cram_slice *slice, cram_codec *c, | |
| 1066 char *in, int in_size) { | |
| 1067 return 0; | |
| 1068 } | |
| 1069 | |
| 1070 int cram_huffman_encode_char(cram_slice *slice, cram_codec *c, | |
| 1071 char *in, int in_size) { | |
| 1072 int i, code, len, r = 0; | |
| 1073 unsigned char *syms = (unsigned char *)in; | |
| 1074 | |
| 1075 do { | |
| 1076 int sym = *syms++; | |
| 1077 if (sym >= -1 && sym < MAX_HUFF) { | |
| 1078 i = c->e_huffman.val2code[sym+1]; | |
| 1079 assert(c->e_huffman.codes[i].symbol == sym); | |
| 1080 code = c->e_huffman.codes[i].code; | |
| 1081 len = c->e_huffman.codes[i].len; | |
| 1082 } else { | |
| 1083 /* Slow - use a lookup table for when sym < MAX_HUFF? */ | |
| 1084 for (i = 0; i < c->e_huffman.nvals; i++) { | |
| 1085 if (c->e_huffman.codes[i].symbol == sym) | |
| 1086 break; | |
| 1087 } | |
| 1088 if (i == c->e_huffman.nvals) | |
| 1089 return -1; | |
| 1090 | |
| 1091 code = c->e_huffman.codes[i].code; | |
| 1092 len = c->e_huffman.codes[i].len; | |
| 1093 } | |
| 1094 | |
| 1095 r |= store_bits_MSB(c->out, code, len); | |
| 1096 } while (--in_size); | |
| 1097 | |
| 1098 return r; | |
| 1099 } | |
| 1100 | |
| 1101 int cram_huffman_encode_int0(cram_slice *slice, cram_codec *c, | |
| 1102 char *in, int in_size) { | |
| 1103 return 0; | |
| 1104 } | |
| 1105 | |
| 1106 int cram_huffman_encode_int(cram_slice *slice, cram_codec *c, | |
| 1107 char *in, int in_size) { | |
| 1108 int i, code, len, r = 0; | |
| 1109 int *syms = (int *)in; | |
| 1110 | |
| 1111 do { | |
| 1112 int sym = *syms++; | |
| 1113 | |
| 1114 if (sym >= -1 && sym < MAX_HUFF) { | |
| 1115 i = c->e_huffman.val2code[sym+1]; | |
| 1116 assert(c->e_huffman.codes[i].symbol == sym); | |
| 1117 code = c->e_huffman.codes[i].code; | |
| 1118 len = c->e_huffman.codes[i].len; | |
| 1119 } else { | |
| 1120 /* Slow - use a lookup table for when sym < MAX_HUFFMAN_SYM? */ | |
| 1121 for (i = 0; i < c->e_huffman.nvals; i++) { | |
| 1122 if (c->e_huffman.codes[i].symbol == sym) | |
| 1123 break; | |
| 1124 } | |
| 1125 if (i == c->e_huffman.nvals) | |
| 1126 return -1; | |
| 1127 | |
| 1128 code = c->e_huffman.codes[i].code; | |
| 1129 len = c->e_huffman.codes[i].len; | |
| 1130 } | |
| 1131 | |
| 1132 r |= store_bits_MSB(c->out, code, len); | |
| 1133 } while (--in_size); | |
| 1134 | |
| 1135 return r; | |
| 1136 } | |
| 1137 | |
| 1138 void cram_huffman_encode_free(cram_codec *c) { | |
| 1139 if (!c) | |
| 1140 return; | |
| 1141 | |
| 1142 if (c->e_huffman.codes) | |
| 1143 free(c->e_huffman.codes); | |
| 1144 free(c); | |
| 1145 } | |
| 1146 | |
| 1147 /* | |
| 1148 * Encodes a huffman tree. | |
| 1149 * Returns number of bytes written. | |
| 1150 */ | |
| 1151 int cram_huffman_encode_store(cram_codec *c, cram_block *b, char *prefix, | |
| 1152 int version) { | |
| 1153 int i, len = 0; | |
| 1154 cram_huffman_code *codes = c->e_huffman.codes; | |
| 1155 /* | |
| 1156 * Up to code length 127 means 2.5e+26 bytes of data required (worst | |
| 1157 * case huffman tree needs symbols with freqs matching the Fibonacci | |
| 1158 * series). So guaranteed 1 byte per code. | |
| 1159 * | |
| 1160 * Symbols themselves could be 5 bytes (eg -1 is 5 bytes in itf8). | |
| 1161 * | |
| 1162 * Therefore 6*ncodes + 5 + 5 + 1 + 5 is max memory | |
| 1163 */ | |
| 1164 char *tmp = malloc(6*c->e_huffman.nvals+16); | |
| 1165 char *tp = tmp; | |
| 1166 | |
| 1167 if (!tmp) | |
| 1168 return -1; | |
| 1169 | |
| 1170 if (prefix) { | |
| 1171 size_t l = strlen(prefix); | |
| 1172 BLOCK_APPEND(b, prefix, l); | |
| 1173 len += l; | |
| 1174 } | |
| 1175 | |
| 1176 tp += itf8_put(tp, c->e_huffman.nvals); | |
| 1177 for (i = 0; i < c->e_huffman.nvals; i++) { | |
| 1178 tp += itf8_put(tp, codes[i].symbol); | |
| 1179 } | |
| 1180 | |
| 1181 tp += itf8_put(tp, c->e_huffman.nvals); | |
| 1182 for (i = 0; i < c->e_huffman.nvals; i++) { | |
| 1183 tp += itf8_put(tp, codes[i].len); | |
| 1184 } | |
| 1185 | |
| 1186 len += itf8_put_blk(b, c->codec); | |
| 1187 len += itf8_put_blk(b, tp-tmp); | |
| 1188 BLOCK_APPEND(b, tmp, tp-tmp); | |
| 1189 len += tp-tmp; | |
| 1190 | |
| 1191 free(tmp); | |
| 1192 | |
| 1193 return len; | |
| 1194 } | |
| 1195 | |
| 1196 cram_codec *cram_huffman_encode_init(cram_stats *st, | |
| 1197 enum cram_external_type option, | |
| 1198 void *dat, | |
| 1199 int version) { | |
| 1200 int *vals = NULL, *freqs = NULL, vals_alloc = 0, *lens, code, len; | |
| 1201 int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k; | |
| 1202 cram_codec *c; | |
| 1203 cram_huffman_code *codes; | |
| 1204 | |
| 1205 c = malloc(sizeof(*c)); | |
| 1206 if (!c) | |
| 1207 return NULL; | |
| 1208 c->codec = E_HUFFMAN; | |
| 1209 | |
| 1210 /* Count number of unique symbols */ | |
| 1211 for (nvals = i = 0; i < MAX_STAT_VAL; i++) { | |
| 1212 if (!st->freqs[i]) | |
| 1213 continue; | |
| 1214 if (nvals >= vals_alloc) { | |
| 1215 vals_alloc = vals_alloc ? vals_alloc*2 : 1024; | |
| 1216 vals = realloc(vals, vals_alloc * sizeof(int)); | |
| 1217 freqs = realloc(freqs, vals_alloc * sizeof(int)); | |
| 1218 if (!vals || !freqs) { | |
| 1219 if (vals) free(vals); | |
| 1220 if (freqs) free(freqs); | |
| 1221 free(c); | |
| 1222 return NULL; | |
| 1223 } | |
| 1224 } | |
| 1225 vals[nvals] = i; | |
| 1226 freqs[nvals] = st->freqs[i]; | |
| 1227 assert(st->freqs[i] > 0); | |
| 1228 ntot += freqs[nvals]; | |
| 1229 if (max_val < i) max_val = i; | |
| 1230 if (min_val > i) min_val = i; | |
| 1231 nvals++; | |
| 1232 } | |
| 1233 if (st->h) { | |
| 1234 khint_t k; | |
| 1235 | |
| 1236 for (k = kh_begin(st->h); k != kh_end(st->h); k++) { | |
| 1237 if (!kh_exist(st->h, k)) | |
| 1238 continue; | |
| 1239 if (nvals >= vals_alloc) { | |
| 1240 vals_alloc = vals_alloc ? vals_alloc*2 : 1024; | |
| 1241 vals = realloc(vals, vals_alloc * sizeof(int)); | |
| 1242 freqs = realloc(freqs, vals_alloc * sizeof(int)); | |
| 1243 if (!vals || !freqs) | |
| 1244 return NULL; | |
| 1245 } | |
| 1246 vals[nvals]= kh_key(st->h, k); | |
| 1247 freqs[nvals] = kh_val(st->h, k); | |
| 1248 assert(freqs[nvals] > 0); | |
| 1249 ntot += freqs[nvals]; | |
| 1250 if (max_val < i) max_val = i; | |
| 1251 if (min_val > i) min_val = i; | |
| 1252 nvals++; | |
| 1253 } | |
| 1254 } | |
| 1255 | |
| 1256 assert(nvals > 0); | |
| 1257 | |
| 1258 freqs = realloc(freqs, 2*nvals*sizeof(*freqs)); | |
| 1259 lens = calloc(2*nvals, sizeof(*lens)); | |
| 1260 if (!lens || !freqs) | |
| 1261 return NULL; | |
| 1262 | |
| 1263 /* Inefficient, use pointers to form chain so we can insert and maintain | |
| 1264 * a sorted list? This is currently O(nvals^2) complexity. | |
| 1265 */ | |
| 1266 for (;;) { | |
| 1267 int low1 = INT_MAX, low2 = INT_MAX; | |
| 1268 int ind1 = 0, ind2 = 0; | |
| 1269 for (i = 0; i < nvals; i++) { | |
| 1270 if (freqs[i] < 0) | |
| 1271 continue; | |
| 1272 if (low1 > freqs[i]) | |
| 1273 low2 = low1, ind2 = ind1, low1 = freqs[i], ind1 = i; | |
| 1274 else if (low2 > freqs[i]) | |
| 1275 low2 = freqs[i], ind2 = i; | |
| 1276 } | |
| 1277 if (low2 == INT_MAX) | |
| 1278 break; | |
| 1279 | |
| 1280 freqs[nvals] = low1 + low2; | |
| 1281 lens[ind1] = nvals; | |
| 1282 lens[ind2] = nvals; | |
| 1283 freqs[ind1] *= -1; | |
| 1284 freqs[ind2] *= -1; | |
| 1285 nvals++; | |
| 1286 } | |
| 1287 nvals = nvals/2+1; | |
| 1288 | |
| 1289 /* Assign lengths */ | |
| 1290 for (i = 0; i < nvals; i++) { | |
| 1291 int code_len = 0; | |
| 1292 for (k = lens[i]; k; k = lens[k]) | |
| 1293 code_len++; | |
| 1294 lens[i] = code_len; | |
| 1295 freqs[i] *= -1; | |
| 1296 //fprintf(stderr, "%d / %d => %d\n", vals[i], freqs[i], lens[i]); | |
| 1297 } | |
| 1298 | |
| 1299 | |
| 1300 /* Sort, need in a struct */ | |
| 1301 if (!(codes = malloc(nvals * sizeof(*codes)))) | |
| 1302 return NULL; | |
| 1303 for (i = 0; i < nvals; i++) { | |
| 1304 codes[i].symbol = vals[i]; | |
| 1305 codes[i].len = lens[i]; | |
| 1306 } | |
| 1307 qsort(codes, nvals, sizeof(*codes), code_sort); | |
| 1308 | |
| 1309 /* | |
| 1310 * Generate canonical codes from lengths. | |
| 1311 * Sort by length. | |
| 1312 * Start with 0. | |
| 1313 * Every new code of same length is +1. | |
| 1314 * Every new code of new length is +1 then <<1 per extra length. | |
| 1315 * | |
| 1316 * /\ | |
| 1317 * a/\ | |
| 1318 * /\/\ | |
| 1319 * bcd/\ | |
| 1320 * ef | |
| 1321 * | |
| 1322 * a 1 0 | |
| 1323 * b 3 4 (0+1)<<2 | |
| 1324 * c 3 5 | |
| 1325 * d 3 6 | |
| 1326 * e 4 14 (6+1)<<1 | |
| 1327 * f 5 15 | |
| 1328 */ | |
| 1329 code = 0; len = codes[0].len; | |
| 1330 for (i = 0; i < nvals; i++) { | |
| 1331 while (len != codes[i].len) { | |
| 1332 code<<=1; | |
| 1333 len++; | |
| 1334 } | |
| 1335 codes[i].code = code++; | |
| 1336 | |
| 1337 if (codes[i].symbol >= -1 && codes[i].symbol < MAX_HUFF) | |
| 1338 c->e_huffman.val2code[codes[i].symbol+1] = i; | |
| 1339 | |
| 1340 //fprintf(stderr, "sym %d, code %d, len %d\n", | |
| 1341 // codes[i].symbol, codes[i].code, codes[i].len); | |
| 1342 } | |
| 1343 | |
| 1344 free(lens); | |
| 1345 free(vals); | |
| 1346 free(freqs); | |
| 1347 | |
| 1348 c->e_huffman.codes = codes; | |
| 1349 c->e_huffman.nvals = nvals; | |
| 1350 | |
| 1351 c->free = cram_huffman_encode_free; | |
| 1352 if (option == E_BYTE || option == E_BYTE_ARRAY) { | |
| 1353 if (c->e_huffman.codes[0].len == 0) | |
| 1354 c->encode = cram_huffman_encode_char0; | |
| 1355 else | |
| 1356 c->encode = cram_huffman_encode_char; | |
| 1357 } else { | |
| 1358 if (c->e_huffman.codes[0].len == 0) | |
| 1359 c->encode = cram_huffman_encode_int0; | |
| 1360 else | |
| 1361 c->encode = cram_huffman_encode_int; | |
| 1362 } | |
| 1363 c->store = cram_huffman_encode_store; | |
| 1364 | |
| 1365 return c; | |
| 1366 } | |
| 1367 | |
| 1368 /* | |
| 1369 * --------------------------------------------------------------------------- | |
| 1370 * BYTE_ARRAY_LEN | |
| 1371 */ | |
| 1372 int cram_byte_array_len_decode(cram_slice *slice, cram_codec *c, | |
| 1373 cram_block *in, char *out, | |
| 1374 int *out_size) { | |
| 1375 /* Fetch length */ | |
| 1376 int32_t len, one = 1; | |
| 1377 | |
| 1378 c->byte_array_len.len_codec->decode(slice, c->byte_array_len.len_codec, in, (char *)&len, &one); | |
| 1379 //printf("ByteArray Len=%d\n", len); | |
| 1380 | |
| 1381 if (c->byte_array_len.value_codec) { | |
| 1382 c->byte_array_len.value_codec->decode(slice, | |
| 1383 c->byte_array_len.value_codec, | |
| 1384 in, out, &len); | |
| 1385 } else { | |
| 1386 return -1; | |
| 1387 } | |
| 1388 | |
| 1389 *out_size = len; | |
| 1390 | |
| 1391 return 0; | |
| 1392 } | |
| 1393 | |
| 1394 void cram_byte_array_len_decode_free(cram_codec *c) { | |
| 1395 if (!c) return; | |
| 1396 | |
| 1397 if (c->byte_array_len.len_codec) | |
| 1398 c->byte_array_len.len_codec->free(c->byte_array_len.len_codec); | |
| 1399 | |
| 1400 if (c->byte_array_len.value_codec) | |
| 1401 c->byte_array_len.value_codec->free(c->byte_array_len.value_codec); | |
| 1402 | |
| 1403 free(c); | |
| 1404 } | |
| 1405 | |
| 1406 cram_codec *cram_byte_array_len_decode_init(char *data, int size, | |
| 1407 enum cram_external_type option, | |
| 1408 int version) { | |
| 1409 cram_codec *c; | |
| 1410 char *cp = data; | |
| 1411 int32_t encoding; | |
| 1412 int32_t sub_size; | |
| 1413 | |
| 1414 if (!(c = malloc(sizeof(*c)))) | |
| 1415 return NULL; | |
| 1416 | |
| 1417 c->codec = E_BYTE_ARRAY_LEN; | |
| 1418 c->decode = cram_byte_array_len_decode; | |
| 1419 c->free = cram_byte_array_len_decode_free; | |
| 1420 | |
| 1421 cp += itf8_get(cp, &encoding); | |
| 1422 cp += itf8_get(cp, &sub_size); | |
| 1423 c->byte_array_len.len_codec = cram_decoder_init(encoding, cp, sub_size, | |
| 1424 E_INT, version); | |
| 1425 cp += sub_size; | |
| 1426 | |
| 1427 cp += itf8_get(cp, &encoding); | |
| 1428 cp += itf8_get(cp, &sub_size); | |
| 1429 c->byte_array_len.value_codec = cram_decoder_init(encoding, cp, sub_size, | |
| 1430 option, version); | |
| 1431 cp += sub_size; | |
| 1432 | |
| 1433 if (cp - data != size) { | |
| 1434 fprintf(stderr, "Malformed byte_array_len header stream\n"); | |
| 1435 free(c); | |
| 1436 return NULL; | |
| 1437 } | |
| 1438 | |
| 1439 return c; | |
| 1440 } | |
| 1441 | |
| 1442 int cram_byte_array_len_encode(cram_slice *slice, cram_codec *c, | |
| 1443 char *in, int in_size) { | |
| 1444 int32_t i32 = in_size; | |
| 1445 int r = 0; | |
| 1446 | |
| 1447 r |= c->e_byte_array_len.len_codec->encode(slice, | |
| 1448 c->e_byte_array_len.len_codec, | |
| 1449 (char *)&i32, 1); | |
| 1450 r |= c->e_byte_array_len.val_codec->encode(slice, | |
| 1451 c->e_byte_array_len.val_codec, | |
| 1452 in, in_size); | |
| 1453 return r; | |
| 1454 } | |
| 1455 | |
| 1456 void cram_byte_array_len_encode_free(cram_codec *c) { | |
| 1457 if (!c) | |
| 1458 return; | |
| 1459 | |
| 1460 if (c->e_byte_array_len.len_codec) | |
| 1461 c->e_byte_array_len.len_codec->free(c->e_byte_array_len.len_codec); | |
| 1462 | |
| 1463 if (c->e_byte_array_len.val_codec) | |
| 1464 c->e_byte_array_len.val_codec->free(c->e_byte_array_len.val_codec); | |
| 1465 | |
| 1466 free(c); | |
| 1467 } | |
| 1468 | |
| 1469 int cram_byte_array_len_encode_store(cram_codec *c, cram_block *b, | |
| 1470 char *prefix, int version) { | |
| 1471 int len = 0, len2, len3; | |
| 1472 cram_codec *tc; | |
| 1473 cram_block *b_len, *b_val; | |
| 1474 | |
| 1475 if (prefix) { | |
| 1476 size_t l = strlen(prefix); | |
| 1477 BLOCK_APPEND(b, prefix, l); | |
| 1478 len += l; | |
| 1479 } | |
| 1480 | |
| 1481 tc = c->e_byte_array_len.len_codec; | |
| 1482 b_len = cram_new_block(0, 0); | |
| 1483 len2 = tc->store(tc, b_len, NULL, version); | |
| 1484 | |
| 1485 tc = c->e_byte_array_len.val_codec; | |
| 1486 b_val = cram_new_block(0, 0); | |
| 1487 len3 = tc->store(tc, b_val, NULL, version); | |
| 1488 | |
| 1489 len += itf8_put_blk(b, c->codec); | |
| 1490 len += itf8_put_blk(b, len2+len3); | |
| 1491 BLOCK_APPEND(b, BLOCK_DATA(b_len), BLOCK_SIZE(b_len)); | |
| 1492 BLOCK_APPEND(b, BLOCK_DATA(b_val), BLOCK_SIZE(b_val)); | |
| 1493 | |
| 1494 cram_free_block(b_len); | |
| 1495 cram_free_block(b_val); | |
| 1496 | |
| 1497 return len + len2 + len3; | |
| 1498 } | |
| 1499 | |
| 1500 cram_codec *cram_byte_array_len_encode_init(cram_stats *st, | |
| 1501 enum cram_external_type option, | |
| 1502 void *dat, | |
| 1503 int version) { | |
| 1504 cram_codec *c; | |
| 1505 cram_byte_array_len_encoder *e = (cram_byte_array_len_encoder *)dat; | |
| 1506 | |
| 1507 c = malloc(sizeof(*c)); | |
| 1508 if (!c) | |
| 1509 return NULL; | |
| 1510 c->codec = E_BYTE_ARRAY_LEN; | |
| 1511 c->free = cram_byte_array_len_encode_free; | |
| 1512 c->encode = cram_byte_array_len_encode; | |
| 1513 c->store = cram_byte_array_len_encode_store; | |
| 1514 | |
| 1515 c->e_byte_array_len.len_codec = cram_encoder_init(e->len_encoding, | |
| 1516 NULL, E_INT, | |
| 1517 e->len_dat, | |
| 1518 version); | |
| 1519 c->e_byte_array_len.val_codec = cram_encoder_init(e->val_encoding, | |
| 1520 NULL, E_BYTE_ARRAY, | |
| 1521 e->val_dat, | |
| 1522 version); | |
| 1523 | |
| 1524 return c; | |
| 1525 } | |
| 1526 | |
| 1527 /* | |
| 1528 * --------------------------------------------------------------------------- | |
| 1529 * BYTE_ARRAY_STOP | |
| 1530 */ | |
| 1531 static int cram_byte_array_stop_decode_char(cram_slice *slice, cram_codec *c, | |
| 1532 cram_block *in, char *out, | |
| 1533 int *out_size) { | |
| 1534 int i; | |
| 1535 cram_block *b = NULL; | |
| 1536 char *cp, ch; | |
| 1537 | |
| 1538 if (slice->block_by_id) { | |
| 1539 if (!(b = slice->block_by_id[c->byte_array_stop.content_id])) | |
| 1540 return *out_size?-1:0; | |
| 1541 } else { | |
| 1542 for (i = 0; i < slice->hdr->num_blocks; i++) { | |
| 1543 b = slice->block[i]; | |
| 1544 if (b && b->content_type == EXTERNAL && | |
| 1545 b->content_id == c->byte_array_stop.content_id) { | |
| 1546 break; | |
| 1547 } | |
| 1548 } | |
| 1549 if (i == slice->hdr->num_blocks || !b) | |
| 1550 return -1; | |
| 1551 } | |
| 1552 | |
| 1553 if (b->idx >= b->uncomp_size) | |
| 1554 return -1; | |
| 1555 | |
| 1556 cp = (char *)b->data + b->idx; | |
| 1557 while ((ch = *cp) != (char)c->byte_array_stop.stop) { | |
| 1558 if (cp - (char *)b->data >= b->uncomp_size) | |
| 1559 return -1; | |
| 1560 *out++ = ch; | |
| 1561 cp++; | |
| 1562 } | |
| 1563 | |
| 1564 *out_size = cp - (char *)(b->data + b->idx); | |
| 1565 b->idx = cp - (char *)b->data + 1; | |
| 1566 | |
| 1567 return 0; | |
| 1568 } | |
| 1569 | |
| 1570 int cram_byte_array_stop_decode_block(cram_slice *slice, cram_codec *c, | |
| 1571 cram_block *in, char *out_, | |
| 1572 int *out_size) { | |
| 1573 cram_block *b = NULL; | |
| 1574 cram_block *out = (cram_block *)out_; | |
| 1575 char *cp, *out_cp, *cp_end; | |
| 1576 char stop; | |
| 1577 | |
| 1578 if (slice->block_by_id) { | |
| 1579 if (!(b = slice->block_by_id[c->byte_array_stop.content_id])) | |
| 1580 return *out_size?-1:0; | |
| 1581 } else { | |
| 1582 int i; | |
| 1583 for (i = 0; i < slice->hdr->num_blocks; i++) { | |
| 1584 b = slice->block[i]; | |
| 1585 if (b && b->content_type == EXTERNAL && | |
| 1586 b->content_id == c->byte_array_stop.content_id) { | |
| 1587 break; | |
| 1588 } | |
| 1589 } | |
| 1590 if (i == slice->hdr->num_blocks || !b) | |
| 1591 return -1; | |
| 1592 } | |
| 1593 | |
| 1594 if (b->idx >= b->uncomp_size) | |
| 1595 return -1; | |
| 1596 cp = (char *)b->data + b->idx; | |
| 1597 cp_end = (char *)b->data + b->uncomp_size; | |
| 1598 out_cp = (char *)BLOCK_END(out); | |
| 1599 | |
| 1600 stop = c->byte_array_stop.stop; | |
| 1601 if (cp_end - cp < out->alloc - out->byte) { | |
| 1602 while (*cp != stop && cp != cp_end) | |
| 1603 *out_cp++ = *cp++; | |
| 1604 BLOCK_SIZE(out) = out_cp - (char *)BLOCK_DATA(out); | |
| 1605 } else { | |
| 1606 char *cp_start; | |
| 1607 for (cp_start = cp; *cp != stop && cp != cp_end; cp++) | |
| 1608 ; | |
| 1609 BLOCK_APPEND(out, cp_start, cp - cp_start); | |
| 1610 BLOCK_GROW(out, cp - cp_start); | |
| 1611 } | |
| 1612 | |
| 1613 *out_size = cp - (char *)(b->data + b->idx); | |
| 1614 b->idx = cp - (char *)b->data + 1; | |
| 1615 | |
| 1616 return 0; | |
| 1617 } | |
| 1618 | |
| 1619 void cram_byte_array_stop_decode_free(cram_codec *c) { | |
| 1620 if (!c) return; | |
| 1621 | |
| 1622 free(c); | |
| 1623 } | |
| 1624 | |
| 1625 cram_codec *cram_byte_array_stop_decode_init(char *data, int size, | |
| 1626 enum cram_external_type option, | |
| 1627 int version) { | |
| 1628 cram_codec *c; | |
| 1629 unsigned char *cp = (unsigned char *)data; | |
| 1630 | |
| 1631 if (!(c = malloc(sizeof(*c)))) | |
| 1632 return NULL; | |
| 1633 | |
| 1634 c->codec = E_BYTE_ARRAY_STOP; | |
| 1635 c->decode = (option == E_BYTE_ARRAY_BLOCK) | |
| 1636 ? cram_byte_array_stop_decode_block | |
| 1637 : cram_byte_array_stop_decode_char; | |
| 1638 c->free = cram_byte_array_stop_decode_free; | |
| 1639 | |
| 1640 c->byte_array_stop.stop = *cp++; | |
| 1641 if (CRAM_MAJOR_VERS(version) == 1) { | |
| 1642 c->byte_array_stop.content_id = cp[0] + (cp[1]<<8) + (cp[2]<<16) | |
| 1643 + (cp[3]<<24); | |
| 1644 cp += 4; | |
| 1645 } else { | |
| 1646 cp += itf8_get(cp, &c->byte_array_stop.content_id); | |
| 1647 } | |
| 1648 | |
| 1649 if ((char *)cp - data != size) { | |
| 1650 fprintf(stderr, "Malformed byte_array_stop header stream\n"); | |
| 1651 free(c); | |
| 1652 return NULL; | |
| 1653 } | |
| 1654 | |
| 1655 return c; | |
| 1656 } | |
| 1657 | |
| 1658 int cram_byte_array_stop_encode(cram_slice *slice, cram_codec *c, | |
| 1659 char *in, int in_size) { | |
| 1660 BLOCK_APPEND(c->out, in, in_size); | |
| 1661 BLOCK_APPEND_CHAR(c->out, c->e_byte_array_stop.stop); | |
| 1662 return 0; | |
| 1663 } | |
| 1664 | |
| 1665 void cram_byte_array_stop_encode_free(cram_codec *c) { | |
| 1666 if (!c) | |
| 1667 return; | |
| 1668 free(c); | |
| 1669 } | |
| 1670 | |
| 1671 int cram_byte_array_stop_encode_store(cram_codec *c, cram_block *b, | |
| 1672 char *prefix, int version) { | |
| 1673 int len = 0; | |
| 1674 char buf[20], *cp = buf; | |
| 1675 | |
| 1676 if (prefix) { | |
| 1677 size_t l = strlen(prefix); | |
| 1678 BLOCK_APPEND(b, prefix, l); | |
| 1679 len += l; | |
| 1680 } | |
| 1681 | |
| 1682 cp += itf8_put(cp, c->codec); | |
| 1683 | |
| 1684 if (CRAM_MAJOR_VERS(version) == 1) { | |
| 1685 cp += itf8_put(cp, 5); | |
| 1686 *cp++ = c->e_byte_array_stop.stop; | |
| 1687 *cp++ = (c->e_byte_array_stop.content_id >> 0) & 0xff; | |
| 1688 *cp++ = (c->e_byte_array_stop.content_id >> 8) & 0xff; | |
| 1689 *cp++ = (c->e_byte_array_stop.content_id >> 16) & 0xff; | |
| 1690 *cp++ = (c->e_byte_array_stop.content_id >> 24) & 0xff; | |
| 1691 } else { | |
| 1692 cp += itf8_put(cp, 1 + itf8_size(c->e_byte_array_stop.content_id)); | |
| 1693 *cp++ = c->e_byte_array_stop.stop; | |
| 1694 cp += itf8_put(cp, c->e_byte_array_stop.content_id); | |
| 1695 } | |
| 1696 | |
| 1697 BLOCK_APPEND(b, buf, cp-buf); | |
| 1698 len += cp-buf; | |
| 1699 | |
| 1700 return len; | |
| 1701 } | |
| 1702 | |
| 1703 cram_codec *cram_byte_array_stop_encode_init(cram_stats *st, | |
| 1704 enum cram_external_type option, | |
| 1705 void *dat, | |
| 1706 int version) { | |
| 1707 cram_codec *c; | |
| 1708 | |
| 1709 c = malloc(sizeof(*c)); | |
| 1710 if (!c) | |
| 1711 return NULL; | |
| 1712 c->codec = E_BYTE_ARRAY_STOP; | |
| 1713 c->free = cram_byte_array_stop_encode_free; | |
| 1714 c->encode = cram_byte_array_stop_encode; | |
| 1715 c->store = cram_byte_array_stop_encode_store; | |
| 1716 | |
| 1717 c->e_byte_array_stop.stop = ((int *)dat)[0]; | |
| 1718 c->e_byte_array_stop.content_id = ((int *)dat)[1]; | |
| 1719 | |
| 1720 return c; | |
| 1721 } | |
| 1722 | |
| 1723 /* | |
| 1724 * --------------------------------------------------------------------------- | |
| 1725 */ | |
| 1726 | |
| 1727 char *cram_encoding2str(enum cram_encoding t) { | |
| 1728 switch (t) { | |
| 1729 case E_NULL: return "NULL"; | |
| 1730 case E_EXTERNAL: return "EXTERNAL"; | |
| 1731 case E_GOLOMB: return "GOLOMB"; | |
| 1732 case E_HUFFMAN: return "HUFFMAN"; | |
| 1733 case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN"; | |
| 1734 case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP"; | |
| 1735 case E_BETA: return "BETA"; | |
| 1736 case E_SUBEXP: return "SUBEXP"; | |
| 1737 case E_GOLOMB_RICE: return "GOLOMB_RICE"; | |
| 1738 case E_GAMMA: return "GAMMA"; | |
| 1739 } | |
| 1740 return "?"; | |
| 1741 } | |
| 1742 | |
| 1743 static cram_codec *(*decode_init[])(char *data, | |
| 1744 int size, | |
| 1745 enum cram_external_type option, | |
| 1746 int version) = { | |
| 1747 NULL, | |
| 1748 cram_external_decode_init, | |
| 1749 NULL, | |
| 1750 cram_huffman_decode_init, | |
| 1751 cram_byte_array_len_decode_init, | |
| 1752 cram_byte_array_stop_decode_init, | |
| 1753 cram_beta_decode_init, | |
| 1754 cram_subexp_decode_init, | |
| 1755 NULL, | |
| 1756 cram_gamma_decode_init, | |
| 1757 }; | |
| 1758 | |
| 1759 cram_codec *cram_decoder_init(enum cram_encoding codec, | |
| 1760 char *data, int size, | |
| 1761 enum cram_external_type option, | |
| 1762 int version) { | |
| 1763 if (decode_init[codec]) { | |
| 1764 return decode_init[codec](data, size, option, version); | |
| 1765 } else { | |
| 1766 fprintf(stderr, "Unimplemented codec of type %s\n", codec2str(codec)); | |
| 1767 return NULL; | |
| 1768 } | |
| 1769 } | |
| 1770 | |
| 1771 static cram_codec *(*encode_init[])(cram_stats *stx, | |
| 1772 enum cram_external_type option, | |
| 1773 void *opt, | |
| 1774 int version) = { | |
| 1775 NULL, | |
| 1776 cram_external_encode_init, | |
| 1777 NULL, | |
| 1778 cram_huffman_encode_init, | |
| 1779 cram_byte_array_len_encode_init, | |
| 1780 cram_byte_array_stop_encode_init, | |
| 1781 cram_beta_encode_init, | |
| 1782 NULL, //cram_subexp_encode_init, | |
| 1783 NULL, | |
| 1784 NULL, //cram_gamma_encode_init, | |
| 1785 }; | |
| 1786 | |
| 1787 cram_codec *cram_encoder_init(enum cram_encoding codec, | |
| 1788 cram_stats *st, | |
| 1789 enum cram_external_type option, | |
| 1790 void *dat, | |
| 1791 int version) { | |
| 1792 if (st && !st->nvals) | |
| 1793 return NULL; | |
| 1794 | |
| 1795 if (encode_init[codec]) { | |
| 1796 cram_codec *r; | |
| 1797 if ((r = encode_init[codec](st, option, dat, version))) | |
| 1798 r->out = NULL; | |
| 1799 return r; | |
| 1800 } else { | |
| 1801 fprintf(stderr, "Unimplemented codec of type %s\n", codec2str(codec)); | |
| 1802 abort(); | |
| 1803 } | |
| 1804 } | |
| 1805 | |
| 1806 /* | |
| 1807 * Returns the content_id used by this codec, also in id2 if byte_array_len. | |
| 1808 * Returns -1 for the CORE block and -2 for unneeded. | |
| 1809 * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs. | |
| 1810 */ | |
| 1811 int cram_codec_to_id(cram_codec *c, int *id2) { | |
| 1812 int bnum1, bnum2 = -2; | |
| 1813 | |
| 1814 switch (c->codec) { | |
| 1815 case E_HUFFMAN: | |
| 1816 bnum1 = c->huffman.ncodes == 1 ? -2 : -1; | |
| 1817 break; | |
| 1818 case E_GOLOMB: | |
| 1819 case E_BETA: | |
| 1820 case E_SUBEXP: | |
| 1821 case E_GOLOMB_RICE: | |
| 1822 case E_GAMMA: | |
| 1823 bnum1 = -1; | |
| 1824 break; | |
| 1825 case E_EXTERNAL: | |
| 1826 bnum1 = c->external.content_id; | |
| 1827 break; | |
| 1828 case E_BYTE_ARRAY_LEN: | |
| 1829 bnum1 = cram_codec_to_id(c->byte_array_len.len_codec, NULL); | |
| 1830 bnum2 = cram_codec_to_id(c->byte_array_len.value_codec, NULL); | |
| 1831 break; | |
| 1832 case E_BYTE_ARRAY_STOP: | |
| 1833 bnum1 = c->byte_array_stop.content_id; | |
| 1834 break; | |
| 1835 case E_NULL: | |
| 1836 bnum1 = -2; | |
| 1837 break; | |
| 1838 default: | |
| 1839 fprintf(stderr, "Unknown codec type %d\n", c->codec); | |
| 1840 bnum1 = -1; | |
| 1841 } | |
| 1842 | |
| 1843 if (id2) | |
| 1844 *id2 = bnum2; | |
| 1845 return bnum1; | |
| 1846 } |
