diff ezBAMQC/src/htslib/cram/cram_codecs.h @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ezBAMQC/src/htslib/cram/cram_codecs.h	Thu Mar 24 17:12:52 2016 -0400
@@ -0,0 +1,165 @@
+/*
+Copyright (c) 2012-2013 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice, 
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice, 
+this list of conditions and the following disclaimer in the documentation 
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _CRAM_ENCODINGS_H_
+#define _CRAM_ENCODINGS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <inttypes.h>
+
+struct cram_codec;
+
+/*
+ * Slow but simple huffman decoder to start with.
+ * Read a bit at a time, keeping track of {length, value}
+ * eg. 1 1 0 1 => {1,1},  {2,3}, {3,6}, {4,13}
+ *
+ * Keep track of this through the huffman code table.
+ * For fast scanning we have an index of where the first code of length X
+ * appears.
+ */
+typedef struct {
+    int32_t symbol;
+    int32_t p; // next code start value, minus index to codes[]
+    int32_t code;
+    int32_t len;
+} cram_huffman_code;
+
+typedef struct {
+    int ncodes;
+    cram_huffman_code *codes;
+} cram_huffman_decoder;
+
+#define MAX_HUFF 128
+typedef struct {
+    cram_huffman_code *codes;
+    int nvals;
+    int val2code[MAX_HUFF+1]; // value to code lookup for small values
+} cram_huffman_encoder;
+
+typedef struct {
+    int32_t offset;
+    int32_t nbits;
+} cram_beta_decoder;
+
+typedef struct {
+    int32_t offset;
+} cram_gamma_decoder;
+
+typedef struct {
+    int32_t offset;
+    int32_t k;
+} cram_subexp_decoder;
+
+typedef struct {
+    int32_t content_id;
+    enum cram_external_type type;
+} cram_external_decoder;
+
+typedef struct {
+    struct cram_codec *len_codec;
+    struct cram_codec *value_codec;
+} cram_byte_array_len_decoder;
+
+typedef struct {
+    unsigned char stop;
+    int32_t content_id;
+} cram_byte_array_stop_decoder;
+
+typedef struct {
+    enum cram_encoding len_encoding;
+    enum cram_encoding val_encoding;
+    void *len_dat;
+    void *val_dat;
+    struct cram_codec *len_codec;
+    struct cram_codec *val_codec;
+} cram_byte_array_len_encoder;
+
+/*
+ * A generic codec structure.
+ */
+typedef struct cram_codec {
+    enum cram_encoding codec;
+    cram_block *out;
+    void (*free)(struct cram_codec *codec);
+    int (*decode)(cram_slice *slice, struct cram_codec *codec,
+		  cram_block *in, char *out, int *out_size);
+    int (*encode)(cram_slice *slice, struct cram_codec *codec,
+		  char *in, int in_size);
+    int (*store)(struct cram_codec *codec, cram_block *b, char *prefix,
+		 int version);
+    union {
+	cram_huffman_decoder         huffman;
+	cram_external_decoder        external;
+	cram_beta_decoder            beta;
+	cram_gamma_decoder           gamma;
+	cram_subexp_decoder          subexp;
+	cram_byte_array_len_decoder  byte_array_len;
+	cram_byte_array_stop_decoder byte_array_stop;
+
+	cram_huffman_encoder         e_huffman;
+	cram_external_decoder        e_external;
+	cram_byte_array_stop_decoder e_byte_array_stop;
+	cram_byte_array_len_encoder  e_byte_array_len;
+	cram_beta_decoder            e_beta;
+    };
+} cram_codec;
+
+char *cram_encoding2str(enum cram_encoding t);
+
+cram_codec *cram_decoder_init(enum cram_encoding codec, char *data, int size,
+			      enum cram_external_type option,
+			      int version);
+cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st,
+			      enum cram_external_type option, void *dat,
+			      int version);
+
+//int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size);
+//void cram_decoder_free(void *codes);
+
+//#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++))
+
+#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7)
+
+/*
+ * Returns the content_id used by this codec, also in id2 if byte_array_len.
+ * Returns -1 for the CORE block and -2 for unneeded.
+ * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.
+ */
+int cram_codec_to_id(cram_codec *c, int *id2);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CRAM_ENCODINGS_H_ */