0
|
1 /*
|
|
2 Copyright (c) 2012-2013 Genome Research Ltd.
|
|
3 Author: James Bonfield <jkb@sanger.ac.uk>
|
|
4
|
|
5 Redistribution and use in source and binary forms, with or without
|
|
6 modification, are permitted provided that the following conditions are met:
|
|
7
|
|
8 1. Redistributions of source code must retain the above copyright notice,
|
|
9 this list of conditions and the following disclaimer.
|
|
10
|
|
11 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12 this list of conditions and the following disclaimer in the documentation
|
|
13 and/or other materials provided with the distribution.
|
|
14
|
|
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
|
|
16 Institute nor the names of its contributors may be used to endorse or promote
|
|
17 products derived from this software without specific prior written permission.
|
|
18
|
|
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
|
|
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
|
|
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29 */
|
|
30
|
|
31 #ifndef _CRAM_ENCODINGS_H_
|
|
32 #define _CRAM_ENCODINGS_H_
|
|
33
|
|
34 #ifdef __cplusplus
|
|
35 extern "C" {
|
|
36 #endif
|
|
37
|
|
38 #include <inttypes.h>
|
|
39
|
|
40 struct cram_codec;
|
|
41
|
|
42 /*
|
|
43 * Slow but simple huffman decoder to start with.
|
|
44 * Read a bit at a time, keeping track of {length, value}
|
|
45 * eg. 1 1 0 1 => {1,1}, {2,3}, {3,6}, {4,13}
|
|
46 *
|
|
47 * Keep track of this through the huffman code table.
|
|
48 * For fast scanning we have an index of where the first code of length X
|
|
49 * appears.
|
|
50 */
|
|
51 typedef struct {
|
|
52 int32_t symbol;
|
|
53 int32_t p; // next code start value, minus index to codes[]
|
|
54 int32_t code;
|
|
55 int32_t len;
|
|
56 } cram_huffman_code;
|
|
57
|
|
58 typedef struct {
|
|
59 int ncodes;
|
|
60 cram_huffman_code *codes;
|
|
61 } cram_huffman_decoder;
|
|
62
|
|
63 #define MAX_HUFF 128
|
|
64 typedef struct {
|
|
65 cram_huffman_code *codes;
|
|
66 int nvals;
|
|
67 int val2code[MAX_HUFF+1]; // value to code lookup for small values
|
|
68 } cram_huffman_encoder;
|
|
69
|
|
70 typedef struct {
|
|
71 int32_t offset;
|
|
72 int32_t nbits;
|
|
73 } cram_beta_decoder;
|
|
74
|
|
75 typedef struct {
|
|
76 int32_t offset;
|
|
77 } cram_gamma_decoder;
|
|
78
|
|
79 typedef struct {
|
|
80 int32_t offset;
|
|
81 int32_t k;
|
|
82 } cram_subexp_decoder;
|
|
83
|
|
84 typedef struct {
|
|
85 int32_t content_id;
|
|
86 enum cram_external_type type;
|
|
87 } cram_external_decoder;
|
|
88
|
|
89 typedef struct {
|
|
90 struct cram_codec *len_codec;
|
|
91 struct cram_codec *value_codec;
|
|
92 } cram_byte_array_len_decoder;
|
|
93
|
|
94 typedef struct {
|
|
95 unsigned char stop;
|
|
96 int32_t content_id;
|
|
97 } cram_byte_array_stop_decoder;
|
|
98
|
|
99 typedef struct {
|
|
100 enum cram_encoding len_encoding;
|
|
101 enum cram_encoding val_encoding;
|
|
102 void *len_dat;
|
|
103 void *val_dat;
|
|
104 struct cram_codec *len_codec;
|
|
105 struct cram_codec *val_codec;
|
|
106 } cram_byte_array_len_encoder;
|
|
107
|
|
108 /*
|
|
109 * A generic codec structure.
|
|
110 */
|
|
111 typedef struct cram_codec {
|
|
112 enum cram_encoding codec;
|
|
113 cram_block *out;
|
|
114 void (*free)(struct cram_codec *codec);
|
|
115 int (*decode)(cram_slice *slice, struct cram_codec *codec,
|
|
116 cram_block *in, char *out, int *out_size);
|
|
117 int (*encode)(cram_slice *slice, struct cram_codec *codec,
|
|
118 char *in, int in_size);
|
|
119 int (*store)(struct cram_codec *codec, cram_block *b, char *prefix,
|
|
120 int version);
|
|
121 union {
|
|
122 cram_huffman_decoder huffman;
|
|
123 cram_external_decoder external;
|
|
124 cram_beta_decoder beta;
|
|
125 cram_gamma_decoder gamma;
|
|
126 cram_subexp_decoder subexp;
|
|
127 cram_byte_array_len_decoder byte_array_len;
|
|
128 cram_byte_array_stop_decoder byte_array_stop;
|
|
129
|
|
130 cram_huffman_encoder e_huffman;
|
|
131 cram_external_decoder e_external;
|
|
132 cram_byte_array_stop_decoder e_byte_array_stop;
|
|
133 cram_byte_array_len_encoder e_byte_array_len;
|
|
134 cram_beta_decoder e_beta;
|
|
135 };
|
|
136 } cram_codec;
|
|
137
|
|
138 char *cram_encoding2str(enum cram_encoding t);
|
|
139
|
|
140 cram_codec *cram_decoder_init(enum cram_encoding codec, char *data, int size,
|
|
141 enum cram_external_type option,
|
|
142 int version);
|
|
143 cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st,
|
|
144 enum cram_external_type option, void *dat,
|
|
145 int version);
|
|
146
|
|
147 //int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size);
|
|
148 //void cram_decoder_free(void *codes);
|
|
149
|
|
150 //#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++))
|
|
151
|
|
152 #define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7)
|
|
153
|
|
154 /*
|
|
155 * Returns the content_id used by this codec, also in id2 if byte_array_len.
|
|
156 * Returns -1 for the CORE block and -2 for unneeded.
|
|
157 * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.
|
|
158 */
|
|
159 int cram_codec_to_id(cram_codec *c, int *id2);
|
|
160
|
|
161 #ifdef __cplusplus
|
|
162 }
|
|
163 #endif
|
|
164
|
|
165 #endif /* _CRAM_ENCODINGS_H_ */
|