| 
0
 | 
     1 /*
 | 
| 
 | 
     2 Copyright (c) 2012-2013 Genome Research Ltd.
 | 
| 
 | 
     3 Author: James Bonfield <jkb@sanger.ac.uk>
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 Redistribution and use in source and binary forms, with or without 
 | 
| 
 | 
     6 modification, are permitted provided that the following conditions are met:
 | 
| 
 | 
     7 
 | 
| 
 | 
     8    1. Redistributions of source code must retain the above copyright notice, 
 | 
| 
 | 
     9 this list of conditions and the following disclaimer.
 | 
| 
 | 
    10 
 | 
| 
 | 
    11    2. Redistributions in binary form must reproduce the above copyright notice, 
 | 
| 
 | 
    12 this list of conditions and the following disclaimer in the documentation 
 | 
| 
 | 
    13 and/or other materials provided with the distribution.
 | 
| 
 | 
    14 
 | 
| 
 | 
    15    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
 | 
| 
 | 
    16 Institute nor the names of its contributors may be used to endorse or promote
 | 
| 
 | 
    17 products derived from this software without specific prior written permission.
 | 
| 
 | 
    18 
 | 
| 
 | 
    19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 
 | 
| 
 | 
    20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
 | 
| 
 | 
    21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
 | 
| 
 | 
    22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
 | 
| 
 | 
    23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
| 
 | 
    24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
| 
 | 
    25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | 
| 
 | 
    26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | 
| 
 | 
    27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
| 
 | 
    28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
| 
 | 
    29 */
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 #ifndef _CRAM_ENCODINGS_H_
 | 
| 
 | 
    32 #define _CRAM_ENCODINGS_H_
 | 
| 
 | 
    33 
 | 
| 
 | 
    34 #ifdef __cplusplus
 | 
| 
 | 
    35 extern "C" {
 | 
| 
 | 
    36 #endif
 | 
| 
 | 
    37 
 | 
| 
 | 
    38 #include <inttypes.h>
 | 
| 
 | 
    39 
 | 
| 
 | 
    40 struct cram_codec;
 | 
| 
 | 
    41 
 | 
| 
 | 
    42 /*
 | 
| 
 | 
    43  * Slow but simple huffman decoder to start with.
 | 
| 
 | 
    44  * Read a bit at a time, keeping track of {length, value}
 | 
| 
 | 
    45  * eg. 1 1 0 1 => {1,1},  {2,3}, {3,6}, {4,13}
 | 
| 
 | 
    46  *
 | 
| 
 | 
    47  * Keep track of this through the huffman code table.
 | 
| 
 | 
    48  * For fast scanning we have an index of where the first code of length X
 | 
| 
 | 
    49  * appears.
 | 
| 
 | 
    50  */
 | 
| 
 | 
    51 typedef struct {
 | 
| 
 | 
    52     int32_t symbol;
 | 
| 
 | 
    53     int32_t p; // next code start value, minus index to codes[]
 | 
| 
 | 
    54     int32_t code;
 | 
| 
 | 
    55     int32_t len;
 | 
| 
 | 
    56 } cram_huffman_code;
 | 
| 
 | 
    57 
 | 
| 
 | 
    58 typedef struct {
 | 
| 
 | 
    59     int ncodes;
 | 
| 
 | 
    60     cram_huffman_code *codes;
 | 
| 
 | 
    61 } cram_huffman_decoder;
 | 
| 
 | 
    62 
 | 
| 
 | 
    63 #define MAX_HUFF 128
 | 
| 
 | 
    64 typedef struct {
 | 
| 
 | 
    65     cram_huffman_code *codes;
 | 
| 
 | 
    66     int nvals;
 | 
| 
 | 
    67     int val2code[MAX_HUFF+1]; // value to code lookup for small values
 | 
| 
 | 
    68 } cram_huffman_encoder;
 | 
| 
 | 
    69 
 | 
| 
 | 
    70 typedef struct {
 | 
| 
 | 
    71     int32_t offset;
 | 
| 
 | 
    72     int32_t nbits;
 | 
| 
 | 
    73 } cram_beta_decoder;
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 typedef struct {
 | 
| 
 | 
    76     int32_t offset;
 | 
| 
 | 
    77 } cram_gamma_decoder;
 | 
| 
 | 
    78 
 | 
| 
 | 
    79 typedef struct {
 | 
| 
 | 
    80     int32_t offset;
 | 
| 
 | 
    81     int32_t k;
 | 
| 
 | 
    82 } cram_subexp_decoder;
 | 
| 
 | 
    83 
 | 
| 
 | 
    84 typedef struct {
 | 
| 
 | 
    85     int32_t content_id;
 | 
| 
 | 
    86     enum cram_external_type type;
 | 
| 
 | 
    87 } cram_external_decoder;
 | 
| 
 | 
    88 
 | 
| 
 | 
    89 typedef struct {
 | 
| 
 | 
    90     struct cram_codec *len_codec;
 | 
| 
 | 
    91     struct cram_codec *value_codec;
 | 
| 
 | 
    92 } cram_byte_array_len_decoder;
 | 
| 
 | 
    93 
 | 
| 
 | 
    94 typedef struct {
 | 
| 
 | 
    95     unsigned char stop;
 | 
| 
 | 
    96     int32_t content_id;
 | 
| 
 | 
    97 } cram_byte_array_stop_decoder;
 | 
| 
 | 
    98 
 | 
| 
 | 
    99 typedef struct {
 | 
| 
 | 
   100     enum cram_encoding len_encoding;
 | 
| 
 | 
   101     enum cram_encoding val_encoding;
 | 
| 
 | 
   102     void *len_dat;
 | 
| 
 | 
   103     void *val_dat;
 | 
| 
 | 
   104     struct cram_codec *len_codec;
 | 
| 
 | 
   105     struct cram_codec *val_codec;
 | 
| 
 | 
   106 } cram_byte_array_len_encoder;
 | 
| 
 | 
   107 
 | 
| 
 | 
   108 /*
 | 
| 
 | 
   109  * A generic codec structure.
 | 
| 
 | 
   110  */
 | 
| 
 | 
   111 typedef struct cram_codec {
 | 
| 
 | 
   112     enum cram_encoding codec;
 | 
| 
 | 
   113     cram_block *out;
 | 
| 
 | 
   114     void (*free)(struct cram_codec *codec);
 | 
| 
 | 
   115     int (*decode)(cram_slice *slice, struct cram_codec *codec,
 | 
| 
 | 
   116 		  cram_block *in, char *out, int *out_size);
 | 
| 
 | 
   117     int (*encode)(cram_slice *slice, struct cram_codec *codec,
 | 
| 
 | 
   118 		  char *in, int in_size);
 | 
| 
 | 
   119     int (*store)(struct cram_codec *codec, cram_block *b, char *prefix,
 | 
| 
 | 
   120 		 int version);
 | 
| 
 | 
   121     union {
 | 
| 
 | 
   122 	cram_huffman_decoder         huffman;
 | 
| 
 | 
   123 	cram_external_decoder        external;
 | 
| 
 | 
   124 	cram_beta_decoder            beta;
 | 
| 
 | 
   125 	cram_gamma_decoder           gamma;
 | 
| 
 | 
   126 	cram_subexp_decoder          subexp;
 | 
| 
 | 
   127 	cram_byte_array_len_decoder  byte_array_len;
 | 
| 
 | 
   128 	cram_byte_array_stop_decoder byte_array_stop;
 | 
| 
 | 
   129 
 | 
| 
 | 
   130 	cram_huffman_encoder         e_huffman;
 | 
| 
 | 
   131 	cram_external_decoder        e_external;
 | 
| 
 | 
   132 	cram_byte_array_stop_decoder e_byte_array_stop;
 | 
| 
 | 
   133 	cram_byte_array_len_encoder  e_byte_array_len;
 | 
| 
 | 
   134 	cram_beta_decoder            e_beta;
 | 
| 
 | 
   135     };
 | 
| 
 | 
   136 } cram_codec;
 | 
| 
 | 
   137 
 | 
| 
 | 
   138 char *cram_encoding2str(enum cram_encoding t);
 | 
| 
 | 
   139 
 | 
| 
 | 
   140 cram_codec *cram_decoder_init(enum cram_encoding codec, char *data, int size,
 | 
| 
 | 
   141 			      enum cram_external_type option,
 | 
| 
 | 
   142 			      int version);
 | 
| 
 | 
   143 cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st,
 | 
| 
 | 
   144 			      enum cram_external_type option, void *dat,
 | 
| 
 | 
   145 			      int version);
 | 
| 
 | 
   146 
 | 
| 
 | 
   147 //int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size);
 | 
| 
 | 
   148 //void cram_decoder_free(void *codes);
 | 
| 
 | 
   149 
 | 
| 
 | 
   150 //#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++))
 | 
| 
 | 
   151 
 | 
| 
 | 
   152 #define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7)
 | 
| 
 | 
   153 
 | 
| 
 | 
   154 /*
 | 
| 
 | 
   155  * Returns the content_id used by this codec, also in id2 if byte_array_len.
 | 
| 
 | 
   156  * Returns -1 for the CORE block and -2 for unneeded.
 | 
| 
 | 
   157  * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.
 | 
| 
 | 
   158  */
 | 
| 
 | 
   159 int cram_codec_to_id(cram_codec *c, int *id2);
 | 
| 
 | 
   160 
 | 
| 
 | 
   161 #ifdef __cplusplus
 | 
| 
 | 
   162 }
 | 
| 
 | 
   163 #endif
 | 
| 
 | 
   164 
 | 
| 
 | 
   165 #endif /* _CRAM_ENCODINGS_H_ */
 |