annotate ezBAMQC/src/htslib/cram/cram_codecs.h @ 20:9de3bbec2479 draft default tip

Uploaded
author youngkim
date Thu, 31 Mar 2016 10:10:37 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2 Copyright (c) 2012-2013 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Author: James Bonfield <jkb@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Redistribution and use in source and binary forms, with or without
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6 modification, are permitted provided that the following conditions are met:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 1. Redistributions of source code must retain the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 this list of conditions and the following disclaimer.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 2. Redistributions in binary form must reproduce the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 this list of conditions and the following disclaimer in the documentation
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13 and/or other materials provided with the distribution.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16 Institute nor the names of its contributors may be used to endorse or promote
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 products derived from this software without specific prior written permission.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 #ifndef _CRAM_ENCODINGS_H_
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 #define _CRAM_ENCODINGS_H_
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34 #ifdef __cplusplus
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 extern "C" {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 #include <inttypes.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 struct cram_codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 * Slow but simple huffman decoder to start with.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 * Read a bit at a time, keeping track of {length, value}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 * eg. 1 1 0 1 => {1,1}, {2,3}, {3,6}, {4,13}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 * Keep track of this through the huffman code table.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 * For fast scanning we have an index of where the first code of length X
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 * appears.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 int32_t symbol;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 int32_t p; // next code start value, minus index to codes[]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 int32_t code;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 int32_t len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 } cram_huffman_code;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 int ncodes;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 cram_huffman_code *codes;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 } cram_huffman_decoder;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 #define MAX_HUFF 128
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 cram_huffman_code *codes;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 int nvals;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 int val2code[MAX_HUFF+1]; // value to code lookup for small values
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 } cram_huffman_encoder;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 int32_t offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 int32_t nbits;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 } cram_beta_decoder;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 int32_t offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 } cram_gamma_decoder;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 int32_t offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 int32_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 } cram_subexp_decoder;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 int32_t content_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 enum cram_external_type type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 } cram_external_decoder;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 struct cram_codec *len_codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 struct cram_codec *value_codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 } cram_byte_array_len_decoder;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 unsigned char stop;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 int32_t content_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 } cram_byte_array_stop_decoder;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100 enum cram_encoding len_encoding;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 enum cram_encoding val_encoding;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 void *len_dat;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 void *val_dat;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 struct cram_codec *len_codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 struct cram_codec *val_codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106 } cram_byte_array_len_encoder;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 * A generic codec structure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 typedef struct cram_codec {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 enum cram_encoding codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 cram_block *out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114 void (*free)(struct cram_codec *codec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 int (*decode)(cram_slice *slice, struct cram_codec *codec,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116 cram_block *in, char *out, int *out_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 int (*encode)(cram_slice *slice, struct cram_codec *codec,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118 char *in, int in_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119 int (*store)(struct cram_codec *codec, cram_block *b, char *prefix,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 int version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 union {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122 cram_huffman_decoder huffman;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 cram_external_decoder external;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124 cram_beta_decoder beta;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 cram_gamma_decoder gamma;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126 cram_subexp_decoder subexp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 cram_byte_array_len_decoder byte_array_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 cram_byte_array_stop_decoder byte_array_stop;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 cram_huffman_encoder e_huffman;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131 cram_external_decoder e_external;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 cram_byte_array_stop_decoder e_byte_array_stop;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 cram_byte_array_len_encoder e_byte_array_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134 cram_beta_decoder e_beta;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136 } cram_codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 char *cram_encoding2str(enum cram_encoding t);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 cram_codec *cram_decoder_init(enum cram_encoding codec, char *data, int size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141 enum cram_external_type option,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 int version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144 enum cram_external_type option, void *dat,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145 int version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 //int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148 //void cram_decoder_free(void *codes);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 //#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152 #define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155 * Returns the content_id used by this codec, also in id2 if byte_array_len.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 * Returns -1 for the CORE block and -2 for unneeded.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 int cram_codec_to_id(cram_codec *c, int *id2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 #ifdef __cplusplus
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165 #endif /* _CRAM_ENCODINGS_H_ */