diff ezBAMQC/src/htslib/cram/cram_encode.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ezBAMQC/src/htslib/cram/cram_encode.c	Thu Mar 24 17:12:52 2016 -0400
@@ -0,0 +1,3068 @@
+/*
+Copyright (c) 2012-2013 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice, 
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice, 
+this list of conditions and the following disclaimer in the documentation 
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "io_lib_config.h"
+#endif
+
+#include <stdio.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <math.h>
+#include <ctype.h>
+
+#include "cram/cram.h"
+#include "cram/os.h"
+#include "cram/md5.h"
+
+#define Z_CRAM_STRAT Z_FILTERED
+//#define Z_CRAM_STRAT Z_RLE
+//#define Z_CRAM_STRAT Z_HUFFMAN_ONLY
+//#define Z_CRAM_STRAT Z_DEFAULT_STRATEGY
+
+static int process_one_read(cram_fd *fd, cram_container *c,
+			    cram_slice *s, cram_record *cr,
+			    bam_seq_t *b, int rnum);
+
+/*
+ * Returns index of val into key.
+ * Basically strchr(key, val)-key;
+ */
+static int sub_idx(char *key, char val) {
+    int i;
+
+    for (i = 0; *key && *key++ != val; i++);
+    return i;
+}
+
+/*
+ * Encodes a compression header block into a generic cram_block structure.
+ *
+ * Returns cram_block ptr on success
+ *         NULL on failure
+ */
+cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c,
+					   cram_block_compression_hdr *h) {
+    cram_block *cb  = cram_new_block(COMPRESSION_HEADER, 0);
+    cram_block *map = cram_new_block(COMPRESSION_HEADER, 0);
+    int i, mc;
+
+    if (!cb || !map)
+	return NULL;
+
+    /*
+     * This is a concatenation of several blocks of data:
+     * header + landmarks, preservation map, read encoding map, and the tag
+     * encoding map.
+     * All 4 are variable sized and we need to know how large these are
+     * before creating the compression header itself as this starts with
+     * the total size (stored as a variable length string).
+     */
+
+    // Duplicated from container itself, and removed in 1.1
+    if (CRAM_MAJOR_VERS(fd->version) == 1) {
+	itf8_put_blk(cb, h->ref_seq_id);
+	itf8_put_blk(cb, h->ref_seq_start);
+	itf8_put_blk(cb, h->ref_seq_span);
+	itf8_put_blk(cb, h->num_records);
+	itf8_put_blk(cb, h->num_landmarks);
+	for (i = 0; i < h->num_landmarks; i++) {
+	    itf8_put_blk(cb, h->landmark[i]);
+	}
+    }
+
+    /* Create in-memory preservation map */
+    /* FIXME: should create this when we create the container */
+    {
+	khint_t k;
+	int r;
+
+	if (!(h->preservation_map = kh_init(map)))
+	    return NULL;
+
+	k = kh_put(map, h->preservation_map, "RN", &r);
+	if (-1 == r) return NULL;
+	kh_val(h->preservation_map, k).i = 1;
+
+	if (CRAM_MAJOR_VERS(fd->version) == 1) {
+	    k = kh_put(map, h->preservation_map, "PI", &r);
+	    if (-1 == r) return NULL;
+	    kh_val(h->preservation_map, k).i = 0;
+
+	    k = kh_put(map, h->preservation_map, "UI", &r);
+	    if (-1 == r) return NULL;
+	    kh_val(h->preservation_map, k).i = 1;
+
+	    k = kh_put(map, h->preservation_map, "MI", &r);
+	    if (-1 == r) return NULL;
+	    kh_val(h->preservation_map, k).i = 1;
+
+	} else {
+	    // Technically SM was in 1.0, but wasn't in Java impl.
+	    k = kh_put(map, h->preservation_map, "SM", &r);
+	    if (-1 == r) return NULL;
+	    kh_val(h->preservation_map, k).i = 0;
+
+	    k = kh_put(map, h->preservation_map, "TD", &r);
+	    if (-1 == r) return NULL;
+	    kh_val(h->preservation_map, k).i = 0;
+
+	    k = kh_put(map, h->preservation_map, "AP", &r);
+	    if (-1 == r) return NULL;
+	    kh_val(h->preservation_map, k).i = c->pos_sorted;
+
+	    if (fd->no_ref || fd->embed_ref) {
+		// Reference Required == No
+		k = kh_put(map, h->preservation_map, "RR", &r);
+		if (-1 == r) return NULL;
+		kh_val(h->preservation_map, k).i = 0;
+	    }
+	}
+    }
+
+    /* Encode preservation map; could collapse this and above into one */
+    mc = 0;
+    BLOCK_SIZE(map) = 0;
+    if (h->preservation_map) {
+	khint_t k;
+
+	for (k = kh_begin(h->preservation_map);
+	     k != kh_end(h->preservation_map);
+	     k++) {
+	    const char *key;
+	    khash_t(map) *pmap = h->preservation_map;
+
+
+	    if (!kh_exist(pmap, k))
+		continue;
+
+	    key = kh_key(pmap, k);
+	    BLOCK_APPEND(map, key, 2);
+
+	    switch(CRAM_KEY(key[0], key[1])) {
+	    case CRAM_KEY('M','I'):
+		BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
+		break;
+
+	    case CRAM_KEY('U','I'):
+		BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
+		break;
+
+	    case CRAM_KEY('P','I'):
+		BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
+		break;
+
+	    case CRAM_KEY('A','P'):
+		BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
+		break;
+
+	    case CRAM_KEY('R','N'):
+		BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
+		break;
+
+	    case CRAM_KEY('R','R'):
+		BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
+		break;
+
+	    case CRAM_KEY('S','M'): {
+		char smat[5], *mp = smat;
+		*mp++ =
+		    (sub_idx("CGTN", h->substitution_matrix[0][0]) << 6) |
+		    (sub_idx("CGTN", h->substitution_matrix[0][1]) << 4) |
+		    (sub_idx("CGTN", h->substitution_matrix[0][2]) << 2) |
+		    (sub_idx("CGTN", h->substitution_matrix[0][3]) << 0);
+		*mp++ =
+		    (sub_idx("AGTN", h->substitution_matrix[1][0]) << 6) |
+		    (sub_idx("AGTN", h->substitution_matrix[1][1]) << 4) |
+		    (sub_idx("AGTN", h->substitution_matrix[1][2]) << 2) |
+		    (sub_idx("AGTN", h->substitution_matrix[1][3]) << 0);
+		*mp++ =
+		    (sub_idx("ACTN", h->substitution_matrix[2][0]) << 6) |
+		    (sub_idx("ACTN", h->substitution_matrix[2][1]) << 4) |
+		    (sub_idx("ACTN", h->substitution_matrix[2][2]) << 2) |
+		    (sub_idx("ACTN", h->substitution_matrix[2][3]) << 0);
+		*mp++ =
+		    (sub_idx("ACGN", h->substitution_matrix[3][0]) << 6) |
+		    (sub_idx("ACGN", h->substitution_matrix[3][1]) << 4) |
+		    (sub_idx("ACGN", h->substitution_matrix[3][2]) << 2) |
+		    (sub_idx("ACGN", h->substitution_matrix[3][3]) << 0);
+		*mp++ =
+		    (sub_idx("ACGT", h->substitution_matrix[4][0]) << 6) |
+		    (sub_idx("ACGT", h->substitution_matrix[4][1]) << 4) |
+		    (sub_idx("ACGT", h->substitution_matrix[4][2]) << 2) |
+		    (sub_idx("ACGT", h->substitution_matrix[4][3]) << 0);
+		BLOCK_APPEND(map, smat, 5);
+		break;
+	    }
+
+	    case CRAM_KEY('T','D'): {
+		itf8_put_blk(map, BLOCK_SIZE(h->TD_blk));
+		BLOCK_APPEND(map,
+			     BLOCK_DATA(h->TD_blk),
+			     BLOCK_SIZE(h->TD_blk));
+		break;
+	    }
+
+	    default:
+		fprintf(stderr, "Unknown preservation key '%.2s'\n", key);
+		break;
+	    }
+
+	    mc++;
+        }
+    }
+    itf8_put_blk(cb, BLOCK_SIZE(map) + itf8_size(mc));
+    itf8_put_blk(cb, mc);    
+    BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map));
+    
+    /* rec encoding map */
+    mc = 0;
+    BLOCK_SIZE(map) = 0;
+    if (h->codecs[DS_BF]) {
+	if (-1 == h->codecs[DS_BF]->store(h->codecs[DS_BF], map, "BF",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_CF]) {
+	if (-1 == h->codecs[DS_CF]->store(h->codecs[DS_CF], map, "CF",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_RL]) {
+	if (-1 == h->codecs[DS_RL]->store(h->codecs[DS_RL], map, "RL",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_AP]) {
+	if (-1 == h->codecs[DS_AP]->store(h->codecs[DS_AP], map, "AP",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_RG]) {
+	if (-1 == h->codecs[DS_RG]->store(h->codecs[DS_RG], map, "RG",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_MF]) {
+	if (-1 == h->codecs[DS_MF]->store(h->codecs[DS_MF], map, "MF",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_NS]) {
+	if (-1 == h->codecs[DS_NS]->store(h->codecs[DS_NS], map, "NS",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_NP]) {
+	if (-1 == h->codecs[DS_NP]->store(h->codecs[DS_NP], map, "NP",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_TS]) {
+	if (-1 == h->codecs[DS_TS]->store(h->codecs[DS_TS], map, "TS",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_NF]) {
+	if (-1 == h->codecs[DS_NF]->store(h->codecs[DS_NF], map, "NF",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_TC]) {
+	if (-1 == h->codecs[DS_TC]->store(h->codecs[DS_TC], map, "TC",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_TN]) {
+	if (-1 == h->codecs[DS_TN]->store(h->codecs[DS_TN], map, "TN",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_TL]) {
+	if (-1 == h->codecs[DS_TL]->store(h->codecs[DS_TL], map, "TL",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_FN]) {
+	if (-1 == h->codecs[DS_FN]->store(h->codecs[DS_FN], map, "FN",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_FC]) {
+	if (-1 == h->codecs[DS_FC]->store(h->codecs[DS_FC], map, "FC",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_FP]) {
+	if (-1 == h->codecs[DS_FP]->store(h->codecs[DS_FP], map, "FP",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_BS]) {
+	if (-1 == h->codecs[DS_BS]->store(h->codecs[DS_BS], map, "BS",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_IN]) {
+	if (-1 == h->codecs[DS_IN]->store(h->codecs[DS_IN], map, "IN",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_DL]) {
+	if (-1 == h->codecs[DS_DL]->store(h->codecs[DS_DL], map, "DL",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_BA]) {
+	if (-1 == h->codecs[DS_BA]->store(h->codecs[DS_BA], map, "BA",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_BB]) {
+	if (-1 == h->codecs[DS_BB]->store(h->codecs[DS_BB], map, "BB",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_MQ]) {
+	if (-1 == h->codecs[DS_MQ]->store(h->codecs[DS_MQ], map, "MQ",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_RN]) {
+	if (-1 == h->codecs[DS_RN]->store(h->codecs[DS_RN], map, "RN",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_QS]) {
+	if (-1 == h->codecs[DS_QS]->store(h->codecs[DS_QS], map, "QS",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_QQ]) {
+	if (-1 == h->codecs[DS_QQ]->store(h->codecs[DS_QQ], map, "QQ",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_RI]) {
+	if (-1 == h->codecs[DS_RI]->store(h->codecs[DS_RI], map, "RI",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (CRAM_MAJOR_VERS(fd->version) != 1) {
+	if (h->codecs[DS_SC]) {
+	    if (-1 == h->codecs[DS_SC]->store(h->codecs[DS_SC], map, "SC",
+					      fd->version))
+		return NULL;
+	    mc++;
+	}
+	if (h->codecs[DS_RS]) {
+	    if (-1 == h->codecs[DS_RS]->store(h->codecs[DS_RS], map, "RS",
+					      fd->version))
+		return NULL;
+	    mc++;
+	}
+	if (h->codecs[DS_PD]) {
+	    if (-1 == h->codecs[DS_PD]->store(h->codecs[DS_PD], map, "PD",
+					      fd->version))
+		return NULL;
+	    mc++;
+	}
+	if (h->codecs[DS_HC]) {
+	    if (-1 == h->codecs[DS_HC]->store(h->codecs[DS_HC], map, "HC",
+					      fd->version))
+		return NULL;
+	    mc++;
+	}
+    }
+    if (h->codecs[DS_TM]) {
+	if (-1 == h->codecs[DS_TM]->store(h->codecs[DS_TM], map, "TM",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    if (h->codecs[DS_TV]) {
+	if (-1 == h->codecs[DS_TV]->store(h->codecs[DS_TV], map, "TV",
+					  fd->version))
+	    return NULL;
+	mc++;
+    }
+    itf8_put_blk(cb, BLOCK_SIZE(map) + itf8_size(mc));
+    itf8_put_blk(cb, mc);    
+    BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map));
+
+    /* tag encoding map */
+#if 0
+    mp = map; mc = 0;
+    if (h->tag_encoding_map) {
+        HashItem *hi;
+        HashIter *iter = HashTableIterCreate();
+	if (!iter)
+	    return NULL;
+
+        while ((hi = HashTableIterNext(h->tag_encoding_map, iter))) {
+            cram_map *m = hi->data.p;
+	    int sz;
+
+	    mp += itf8_put(mp, (hi->key[0]<<16)|(hi->key[1]<<8)|hi->key[2]);
+	    if (-1 == (sz = m->codec->store(m->codec, mp, NULL, fd->version)))
+		return NULL;
+	    mp += sz;
+	    mc++;
+        }
+
+        HashTableIterDestroy(iter);
+    }
+#else
+    mc = 0;
+    BLOCK_SIZE(map) = 0;
+    if (c->tags_used) {
+	khint_t k;
+
+#define TAG_ID(a) ((#a[0]<<8)+#a[1])
+
+	for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) {
+	    int key;
+	    if (!kh_exist(c->tags_used, k))
+		continue;
+
+	    mc++;
+	    itf8_put_blk(map, kh_key(c->tags_used, k));
+
+	    // use block content id 4
+	    switch((key = kh_key(c->tags_used, k)) & 0xff) {
+	    case 'Z': case 'H':
+		// string as byte_array_stop
+		if (CRAM_MAJOR_VERS(fd->version) == 1) {
+		    BLOCK_APPEND(map,
+				 "\005" // BYTE_ARRAY_STOP
+				 "\005" // len
+				 "\t"   // stop-byte is also SAM separator
+				 DS_aux_S "\000\000\000",
+				 7);
+		} else {
+		    if (key>>8 == TAG_ID(OQ))
+			BLOCK_APPEND(map,
+				     "\005" // BYTE_ARRAY_STOP
+				     "\002" // len
+				     "\t"   // stop-byte is also SAM separator
+				     DS_aux_OQ_S,
+				     4);
+		    else if (key>>8 == TAG_ID(BQ))
+			BLOCK_APPEND(map,
+				     "\005" // BYTE_ARRAY_STOP
+				     "\002" // len
+				     "\t"   // stop-byte is also SAM separator
+				     DS_aux_BQ_S,
+				     4);
+		    else if (key>>8 == TAG_ID(BD))
+			BLOCK_APPEND(map,
+				     "\005" // BYTE_ARRAY_STOP
+				     "\002" // len
+				     "\t"   // stop-byte is also SAM separator
+				     DS_aux_BD_S,
+				     4);
+		    else if (key>>8 == TAG_ID(BI))
+			BLOCK_APPEND(map,
+				     "\005" // BYTE_ARRAY_STOP
+				     "\002" // len
+				     "\t"   // stop-byte is also SAM separator
+				     DS_aux_BI_S,
+				     4);
+		    else if ((key>>8 == TAG_ID(Q2)) ||
+			     (key>>8 == TAG_ID(U2)) ||
+			     (key>>8 == TAG_ID(QT)) ||
+			     (key>>8 == TAG_ID(CQ)))
+			BLOCK_APPEND(map,
+				     "\005" // BYTE_ARRAY_STOP
+				     "\002" // len
+				     "\t"   // stop-byte is also SAM separator
+				     DS_aux_oq_S,
+				     4);
+		    else if ((key>>8 == TAG_ID(R2)) ||
+			     (key>>8 == TAG_ID(E2)) ||
+			     (key>>8 == TAG_ID(CS)) ||
+			     (key>>8 == TAG_ID(BC)) ||
+			     (key>>8 == TAG_ID(RT)))
+			BLOCK_APPEND(map,
+				     "\005" // BYTE_ARRAY_STOP
+				     "\002" // len
+				     "\t"   // stop-byte is also SAM separator
+				     DS_aux_os_S,
+				     4);
+		    else
+			BLOCK_APPEND(map,
+				     "\005" // BYTE_ARRAY_STOP
+				     "\002" // len
+				     "\t"   // stop-byte is also SAM separator
+				     DS_aux_oz_S,
+				     4);
+		}
+		break;
+
+	    case 'A': case 'c': case 'C':
+		// byte array len, 1 byte
+		BLOCK_APPEND(map,
+			     "\004" // BYTE_ARRAY_LEN
+			     "\011" // length
+			     "\003" // HUFFMAN (len)
+			     "\004" // huffman-len
+			     "\001" // 1 symbol
+			     "\001" // symbol=1 byte value
+			     "\001" // 1 length
+			     "\000" // length=0
+			     "\001" // EXTERNAL (val)
+			     "\001" // external-len
+			     DS_aux_S,// content-id
+			     11);
+		break;
+
+	    case 's': case 'S':
+		// byte array len, 2 byte
+		BLOCK_APPEND(map,
+			     "\004" // BYTE_ARRAY_LEN
+			     "\011" // length
+			     "\003" // HUFFMAN (len)
+			     "\004" // huffman-len
+			     "\001" // 1 symbol
+			     "\002" // symbol=2 byte value
+			     "\001" // 1 length
+			     "\000" // length=0
+			     "\001" // EXTERNAL (val)
+			     "\001" // external-len
+			     DS_aux_S,// content-id
+			     11);
+		break;
+
+	    case 'i': case 'I': case 'f':
+		// byte array len, 4 byte
+		BLOCK_APPEND(map,
+			     "\004" // BYTE_ARRAY_LEN
+			     "\011" // length
+			     "\003" // HUFFMAN (len)
+			     "\004" // huffman-len
+			     "\001" // 1 symbol
+			     "\004" // symbol=4 byte value
+			     "\001" // 1 length
+			     "\000" // length=0
+			     "\001" // EXTERNAL (val)
+			     "\001" // external-len
+			     DS_aux_S,// content-id
+			     11);
+		break;
+
+	    case 'B':
+		// Byte array of variable size, but we generate our tag
+		// byte stream at the wrong stage (during reading and not
+		// after slice header construction). So we use
+		// BYTE_ARRAY_LEN with the length codec being external
+		// too.
+		if ((key>>8 == TAG_ID(FZ)) || (key>>8 == TAG_ID(ZM)))
+		    BLOCK_APPEND(map,
+				 "\004" // BYTE_ARRAY_LEN
+				 "\006" // length
+				 "\001" // EXTERNAL (len)
+				 "\001" // external-len
+				 DS_aux_FZ_S // content-id
+				 "\001" // EXTERNAL (val)
+				 "\001" // external-len
+				 DS_aux_FZ_S,// content-id
+				 8);
+		else
+		    BLOCK_APPEND(map,
+				 "\004" // BYTE_ARRAY_LEN
+				 "\006" // length
+				 "\001" // EXTERNAL (len)
+				 "\001" // external-len
+				 DS_aux_S // content-id
+				 "\001" // EXTERNAL (val)
+				 "\001" // external-len
+				 DS_aux_S,// content-id
+				 8);
+		break;
+
+	    default:
+		fprintf(stderr, "Unsupported SAM aux type '%c'\n",
+			kh_key(c->tags_used, k) & 0xff);
+	    }
+	    //mp += m->codec->store(m->codec, mp, NULL, fd->version);
+	}
+    }
+#endif
+    itf8_put_blk(cb, BLOCK_SIZE(map) + itf8_size(mc));
+    itf8_put_blk(cb, mc);    
+    BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map));
+
+    if (fd->verbose)
+	fprintf(stderr, "Wrote compression block header in %d bytes\n",
+		(int)BLOCK_SIZE(cb));
+
+    BLOCK_UPLEN(cb);
+
+    cram_free_block(map);
+
+    return cb;
+}
+
+
+/*
+ * Encodes a slice compression header. 
+ *
+ * Returns cram_block on success
+ *         NULL on failure
+ */
+cram_block *cram_encode_slice_header(cram_fd *fd, cram_slice *s) {
+    char *buf;
+    char *cp;
+    cram_block *b = cram_new_block(MAPPED_SLICE, 0);
+    int j;
+
+    if (!b)
+	return NULL;
+
+    if (NULL == (cp = buf = malloc(16+5*(8+s->hdr->num_blocks)))) {
+	cram_free_block(b);
+	return NULL;
+    }
+
+    cp += itf8_put(cp, s->hdr->ref_seq_id);
+    cp += itf8_put(cp, s->hdr->ref_seq_start);
+    cp += itf8_put(cp, s->hdr->ref_seq_span);
+    cp += itf8_put(cp, s->hdr->num_records);
+    if (CRAM_MAJOR_VERS(fd->version) == 2)
+	cp += itf8_put(cp, s->hdr->record_counter);
+    else if (CRAM_MAJOR_VERS(fd->version) >= 3)
+	cp += ltf8_put(cp, s->hdr->record_counter);
+    cp += itf8_put(cp, s->hdr->num_blocks);
+    cp += itf8_put(cp, s->hdr->num_content_ids);
+    for (j = 0; j < s->hdr->num_content_ids; j++) {
+	cp += itf8_put(cp, s->hdr->block_content_ids[j]);
+    }
+    if (s->hdr->content_type == MAPPED_SLICE)
+	cp += itf8_put(cp, s->hdr->ref_base_id);
+
+    if (CRAM_MAJOR_VERS(fd->version) != 1) {
+	memcpy(cp, s->hdr->md5, 16); cp += 16;
+    }
+    
+    assert(cp-buf <= 16+5*(8+s->hdr->num_blocks));
+
+    b->data = (unsigned char *)buf;
+    b->comp_size = b->uncomp_size = cp-buf;
+
+    return b;
+}
+
+
+/*
+ * Encodes a single read.
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+static int cram_encode_slice_read(cram_fd *fd,
+				  cram_container *c,
+				  cram_block_compression_hdr *h,
+				  cram_slice *s,
+				  cram_record *cr,
+				  int *last_pos) {
+    int r = 0;
+    int32_t i32;
+    unsigned char uc;
+
+    //fprintf(stderr, "Encode seq %d, %d/%d FN=%d, %s\n", rec, core->byte, core->bit, cr->nfeature, s->name_ds->str + cr->name);
+
+    //printf("BF=0x%x\n", cr->flags);
+    //	    bf = cram_flag_swap[cr->flags];
+    i32 = fd->cram_flag_swap[cr->flags & 0xfff];
+    r |= h->codecs[DS_BF]->encode(s, h->codecs[DS_BF], (char *)&i32, 1);
+
+    i32 = cr->cram_flags;
+    r |= h->codecs[DS_CF]->encode(s, h->codecs[DS_CF], (char *)&i32, 1);
+
+    if (CRAM_MAJOR_VERS(fd->version) != 1 && s->hdr->ref_seq_id == -2)
+	r |= h->codecs[DS_RI]->encode(s, h->codecs[DS_RI], (char *)&cr->ref_id, 1);
+
+    r |= h->codecs[DS_RL]->encode(s, h->codecs[DS_RL], (char *)&cr->len, 1);
+
+    if (c->pos_sorted) {
+	i32 = cr->apos - *last_pos;
+	r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1);
+	*last_pos = cr->apos;
+    } else {
+	i32 = cr->apos;
+	r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1);
+    }
+
+    r |= h->codecs[DS_RG]->encode(s, h->codecs[DS_RG], (char *)&cr->rg, 1);
+
+    if (c->comp_hdr->read_names_included) {
+	// RN codec: Already stored in block[3].
+    }
+
+    if (cr->cram_flags & CRAM_FLAG_DETACHED) {
+	i32 = cr->mate_flags;
+	r |= h->codecs[DS_MF]->encode(s, h->codecs[DS_MF], (char *)&i32, 1);
+
+	if (!c->comp_hdr->read_names_included) {
+	    // RN codec: Already stored in block[3].
+	}
+
+	r |= h->codecs[DS_NS]->encode(s, h->codecs[DS_NS],
+				      (char *)&cr->mate_ref_id, 1);
+
+	r |= h->codecs[DS_NP]->encode(s, h->codecs[DS_NP],
+				      (char *)&cr->mate_pos, 1);
+
+	r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS],
+				      (char *)&cr->tlen, 1);
+    } else if (cr->cram_flags & CRAM_FLAG_MATE_DOWNSTREAM) {
+	r |= h->codecs[DS_NF]->encode(s, h->codecs[DS_NF],
+				      (char *)&cr->mate_line, 1);
+    }
+
+    /* Aux tags */
+    if (CRAM_MAJOR_VERS(fd->version) == 1) {
+	int j;
+	uc = cr->ntags;
+	r |= h->codecs[DS_TC]->encode(s, h->codecs[DS_TC], (char *)&uc, 1);
+
+	for (j = 0; j < cr->ntags; j++) {
+	    uint32_t i32 = s->TN[cr->TN_idx + j]; // id
+	    r |= h->codecs[DS_TN]->encode(s, h->codecs[DS_TN], (char *)&i32, 1);
+	}
+    } else {
+	r |= h->codecs[DS_TL]->encode(s, h->codecs[DS_TL], (char *)&cr->TL, 1);
+    }
+
+    // qual
+    // QS codec : Already stored in block[2].
+
+    // features (diffs)
+    if (!(cr->flags & BAM_FUNMAP)) {
+	int prev_pos = 0, j;
+
+	r |= h->codecs[DS_FN]->encode(s, h->codecs[DS_FN],
+				      (char *)&cr->nfeature, 1);
+	for (j = 0; j < cr->nfeature; j++) {
+	    cram_feature *f = &s->features[cr->feature + j];
+
+	    uc = f->X.code;
+	    r |= h->codecs[DS_FC]->encode(s, h->codecs[DS_FC], (char *)&uc, 1);
+	    i32 = f->X.pos - prev_pos;
+	    r |= h->codecs[DS_FP]->encode(s, h->codecs[DS_FP], (char *)&i32, 1);
+	    prev_pos = f->X.pos;
+
+	    switch(f->X.code) {
+		//char *seq;
+
+	    case 'X':
+		//fprintf(stderr, "    FC=%c FP=%d base=%d\n", f->X.code, i32, f->X.base);
+		
+		uc = f->X.base;
+		r |= h->codecs[DS_BS]->encode(s, h->codecs[DS_BS],
+					      (char *)&uc, 1);
+		break;
+	    case 'S':
+		// Already done
+//		r |= h->codecs[DS_SC]->encode(s, h->codecs[DS_SC],
+//					      BLOCK_DATA(s->soft_blk) + f->S.seq_idx,
+//					      f->S.len);
+
+//		if (IS_CRAM_3_VERS(fd)) {
+//		    r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], 
+//						  BLOCK_DATA(s->seqs_blk) + f->S.seq_idx,
+//						  f->S.len);
+//		}
+		break;
+	    case 'I':
+		//seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx;
+		//r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN],
+		//			     seq, f->S.len);
+//		if (IS_CRAM_3_VERS(fd)) {
+//		    r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], 
+//						  BLOCK_DATA(s->seqs_blk) + f->I.seq_idx,
+//						  f->I.len);
+//		}
+		break;
+	    case 'i':
+		uc = f->i.base;
+		r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA],
+					      (char *)&uc, 1);
+		//seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx;
+		//r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN], 
+		//			     seq, 1);
+		break;
+	    case 'D':
+		i32 = f->D.len;
+		r |= h->codecs[DS_DL]->encode(s, h->codecs[DS_DL],
+					      (char *)&i32, 1);
+		break;
+
+	    case 'B':
+		//		    // Used when we try to store a non ACGTN base or an N
+		//		    // that aligns against a non ACGTN reference
+
+		uc  = f->B.base;
+		r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA],
+					      (char *)&uc, 1);
+
+		//                  Already added
+		//		    uc  = f->B.qual;
+		//		    r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS], 
+		//					     (char *)&uc, 1);
+		break;
+
+	    case 'b':
+		// string of bases
+		r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB], 
+					      (char *)BLOCK_DATA(s->seqs_blk)
+					              + f->b.seq_idx,
+					      f->b.len);
+		break;
+
+	    case 'Q':
+		//                  Already added
+		//		    uc  = f->B.qual;
+		//		    r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS], 
+		//					     (char *)&uc, 1);
+		break;
+
+	    case 'N':
+		i32 = f->N.len;
+		r |= h->codecs[DS_RS]->encode(s, h->codecs[DS_RS],
+					      (char *)&i32, 1);
+		break;
+		    
+	    case 'P':
+		i32 = f->P.len;
+		r |= h->codecs[DS_PD]->encode(s, h->codecs[DS_PD],
+					      (char *)&i32, 1);
+		break;
+		    
+	    case 'H':
+		i32 = f->H.len;
+		r |= h->codecs[DS_HC]->encode(s, h->codecs[DS_HC],
+					      (char *)&i32, 1);
+		break;
+		    
+
+	    default:
+		fprintf(stderr, "unhandled feature code %c\n",
+			f->X.code);
+		return -1;
+	    }
+	}
+
+	r |= h->codecs[DS_MQ]->encode(s, h->codecs[DS_MQ],
+				      (char *)&cr->mqual, 1);
+    } else {
+	char *seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq;
+	r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], seq, cr->len);
+    }
+
+    return r ? -1 : 0;
+}
+
+
+/*
+ * Applies various compression methods to specific blocks, depending on
+ * known observations of how data series compress.
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+static int cram_compress_slice(cram_fd *fd, cram_slice *s) {
+    int level = fd->level, i;
+    int method = 1<<GZIP | 1<<GZIP_RLE, methodF = method;
+
+    /* Compress the CORE Block too, with minimal zlib level */
+    if (level > 5 && s->block[0]->uncomp_size > 500)
+	cram_compress_block(fd, s->block[0], NULL, GZIP, 1);
+ 
+    if (fd->use_bz2)
+	method |= 1<<BZIP2;
+
+    if (fd->use_rans)
+	method |= (1<<RANS0) | (1<<RANS1);
+
+    if (fd->use_lzma)
+	method |= (1<<LZMA);
+
+    /* Faster method for data series we only need entropy encoding on */
+    methodF = method & ~(1<<GZIP | 1<<BZIP2 | 1<<LZMA);
+    if (level >= 6)
+	methodF = method;
+    
+
+    /* Specific compression methods for certain block types */
+    if (cram_compress_block(fd, s->block[DS_IN], fd->m[DS_IN], //IN (seq)
+			    method, level))
+	return -1;
+
+    if (fd->level == 0) {
+	/* Do nothing */
+    } else if (fd->level == 1) {
+	if (cram_compress_block(fd, s->block[DS_QS], fd->m[DS_QS],
+				methodF, 1))
+	    return -1;
+	for (i = DS_aux; i <= DS_aux_oz; i++) {
+	    if (s->block[i])
+		if (cram_compress_block(fd, s->block[i], fd->m[i],
+					method, 1))
+		    return -1;
+	}
+    } else if (fd->level < 3) {
+	if (cram_compress_block(fd, s->block[DS_QS], fd->m[DS_QS],
+				method, 1))
+	    return -1;
+	if (cram_compress_block(fd, s->block[DS_BA], fd->m[DS_BA],
+				method, 1))
+	    return -1;
+	if (s->block[DS_BB])
+	    if (cram_compress_block(fd, s->block[DS_BB], fd->m[DS_BB],
+				    method, 1))
+	    return -1;
+	for (i = DS_aux; i <= DS_aux_oz; i++) {
+	    if (s->block[i])
+		if (cram_compress_block(fd, s->block[i], fd->m[i],
+					method, level))
+		    return -1;
+	}
+    } else {
+	if (cram_compress_block(fd, s->block[DS_QS], fd->m[DS_QS],
+				method, level))
+	    return -1;
+	if (cram_compress_block(fd, s->block[DS_BA], fd->m[DS_BA],
+				method, level))
+	    return -1;
+	if (s->block[DS_BB])
+	    if (cram_compress_block(fd, s->block[DS_BB], fd->m[DS_BB],
+				    method, level))
+	    return -1;
+	for (i = DS_aux; i <= DS_aux_oz; i++) {
+	    if (s->block[i])
+		if (cram_compress_block(fd, s->block[i], fd->m[i],
+					method, level))
+		    return -1;
+	}
+    }
+
+    // NAME: best is generally xz, bzip2, zlib then rans1
+    // It benefits well from a little bit extra compression level.
+    if (cram_compress_block(fd, s->block[DS_RN], fd->m[DS_RN],
+			    method & ~(1<<RANS0 | 1<<GZIP_RLE),
+			    MIN(9,level)))
+	return -1;
+
+    // NS shows strong local correlation as rearrangements are localised
+    if (s->block[DS_NS] != s->block[0])
+	if (cram_compress_block(fd, s->block[DS_NS], fd->m[DS_NS],
+				method, level))
+	    return -1;
+
+
+    /*
+     * Minimal compression of any block still uncompressed, bar CORE
+     */
+    {
+	int i;
+	for (i = 1; i < DS_END; i++) {
+	    if (!s->block[i] || s->block[i] == s->block[0])
+		continue;
+
+	    // fast methods only
+	    if (s->block[i]->method == RAW) {
+		cram_compress_block(fd, s->block[i], fd->m[i],
+				    methodF, level);
+	    }
+	}
+    }
+
+    return 0;
+}
+
+/*
+ * Encodes a single slice from a container
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+static int cram_encode_slice(cram_fd *fd, cram_container *c,
+			     cram_block_compression_hdr *h, cram_slice *s) {
+    int rec, r = 0, last_pos;
+    int embed_ref;
+    enum cram_DS_ID id;
+
+    embed_ref = fd->embed_ref && s->hdr->ref_seq_id != -1 ? 1 : 0;
+
+    /*
+     * Slice external blocks:
+     * ID 0 => base calls (insertions, soft-clip)
+     * ID 1 => qualities
+     * ID 2 => names
+     * ID 3 => TS (insert size), NP (next frag)
+     * ID 4 => tag values
+     * ID 6 => tag IDs (TN), if CRAM_V1.0
+     * ID 7 => TD tag dictionary, if !CRAM_V1.0
+     */
+
+    /* Create cram slice header */
+    s->hdr->ref_base_id = embed_ref ? DS_ref : -1;
+    s->hdr->record_counter = c->num_records + c->record_counter;
+    c->num_records += s->hdr->num_records;
+
+    s->block = calloc(DS_END, sizeof(s->block[0]));
+    s->hdr->block_content_ids = malloc(DS_END * sizeof(int32_t));
+    if (!s->block || !s->hdr->block_content_ids)
+	return -1;
+
+    // Create first fixed blocks, always external.
+    // CORE
+    if (!(s->block[0] = cram_new_block(CORE, 0)))
+	return -1;
+
+    // TN block for CRAM v1
+    if (CRAM_MAJOR_VERS(fd->version) == 1) {
+	if (h->codecs[DS_TN]->codec == E_EXTERNAL) {
+	    if (!(s->block[DS_TN] = cram_new_block(EXTERNAL,DS_TN))) return -1;
+	    h->codecs[DS_TN]->external.content_id = DS_TN;
+	} else {
+	    s->block[DS_TN] = s->block[0];
+	}
+	s->block[DS_TN] = s->block[DS_TN];
+    }
+
+    // Embedded reference
+    if (embed_ref) {
+	if (!(s->block[DS_ref] = cram_new_block(EXTERNAL, DS_ref)))
+	    return -1;
+	s->ref_id = DS_ref; // needed?
+	BLOCK_APPEND(s->block[DS_ref],
+		     c->ref + c->first_base - c->ref_start,
+		     c->last_base - c->first_base + 1);
+    }
+
+    /*
+     * All the data-series blocks if appropriate. 
+     */
+    for (id = DS_BF; id < DS_TN; id++) {
+	if (h->codecs[id] && (h->codecs[id]->codec == E_EXTERNAL ||
+			      h->codecs[id]->codec == E_BYTE_ARRAY_STOP ||
+			      h->codecs[id]->codec == E_BYTE_ARRAY_LEN)) {
+	    switch (h->codecs[id]->codec) {
+	    case E_EXTERNAL:
+		if (!(s->block[id] = cram_new_block(EXTERNAL, id)))
+		    return -1;
+		h->codecs[id]->external.content_id = id;
+		break;
+
+	    case E_BYTE_ARRAY_STOP:
+		if (!(s->block[id] = cram_new_block(EXTERNAL, id)))
+		    return -1;
+		h->codecs[id]->byte_array_stop.content_id = id;
+		break;
+
+	    case E_BYTE_ARRAY_LEN: {
+		cram_codec *cc;
+
+		cc = h->codecs[id]->e_byte_array_len.len_codec;
+		if (cc->codec == E_EXTERNAL) {
+		    int eid = cc->external.content_id;
+		    if (!(s->block[eid] = cram_new_block(EXTERNAL, eid)))
+			return -1;
+		    cc->external.content_id = eid;
+		    cc->out = s->block[eid];
+		}
+
+		cc = h->codecs[id]->e_byte_array_len.val_codec;
+		if (cc->codec == E_EXTERNAL) {
+		    int eid = cc->external.content_id;
+		    if (!s->block[eid])
+			if (!(s->block[eid] = cram_new_block(EXTERNAL, eid)))
+			    return -1;
+		    cc->external.content_id = eid;
+		    cc->out = s->block[eid];
+		}
+		break;
+	    }
+	    default:
+		break;
+	    }
+	} else {
+	    if (!(id == DS_BB && !h->codecs[DS_BB]))
+		s->block[id] = s->block[0];
+	}
+	if (h->codecs[id])
+	    h->codecs[id]->out = s->block[id];
+    }
+
+    /* Encode reads */
+    last_pos = s->hdr->ref_seq_start;
+    for (rec = 0; rec < s->hdr->num_records; rec++) {
+	cram_record *cr = &s->crecs[rec];
+	if (cram_encode_slice_read(fd, c, h, s, cr, &last_pos) == -1)
+	    return -1;
+    }
+
+    s->block[0]->uncomp_size = s->block[0]->byte + (s->block[0]->bit < 7);
+    s->block[0]->comp_size = s->block[0]->uncomp_size;
+
+    // Make sure the fixed blocks point to the correct sources
+    s->block[DS_IN] = s->base_blk; s->base_blk = NULL;
+    s->block[DS_QS] = s->qual_blk; s->qual_blk = NULL;
+    s->block[DS_RN] = s->name_blk; s->name_blk = NULL;
+    s->block[DS_SC] = s->soft_blk; s->soft_blk = NULL;
+    s->block[DS_aux]= s->aux_blk;  s->aux_blk  = NULL;
+    s->block[DS_aux_OQ]= s->aux_OQ_blk;  s->aux_OQ_blk  = NULL;
+    s->block[DS_aux_BQ]= s->aux_BQ_blk;  s->aux_BQ_blk  = NULL;
+    s->block[DS_aux_BD]= s->aux_BD_blk;  s->aux_BD_blk  = NULL;
+    s->block[DS_aux_BI]= s->aux_BI_blk;  s->aux_BI_blk  = NULL;
+    s->block[DS_aux_FZ]= s->aux_FZ_blk;  s->aux_FZ_blk  = NULL;
+    s->block[DS_aux_oq]= s->aux_oq_blk;  s->aux_oq_blk  = NULL;
+    s->block[DS_aux_os]= s->aux_os_blk;  s->aux_os_blk  = NULL;
+    s->block[DS_aux_oz]= s->aux_oz_blk;  s->aux_oz_blk  = NULL;
+
+    // Ensure block sizes are up to date.
+    for (id = 1; id < DS_END; id++) {
+	if (!s->block[id] || s->block[id] == s->block[0])
+	    continue;
+
+	if (s->block[id]->uncomp_size == 0)
+	    BLOCK_UPLEN(s->block[id]);
+    }
+
+    // Compress it all
+    if (cram_compress_slice(fd, s) == -1)
+	return -1;
+
+    // Collapse empty blocks and create hdr_block
+    {
+	int i, j;
+	for (i = j = 1; i < DS_END; i++) {
+	    if (!s->block[i] || s->block[i] == s->block[0])
+		continue;
+	    if (s->block[i]->uncomp_size == 0) {
+		cram_free_block(s->block[i]);
+		s->block[i] = NULL;
+		continue;
+	    }
+	    s->block[j] = s->block[i];
+	    s->hdr->block_content_ids[j-1] = s->block[i]->content_id;
+	    j++;
+	}
+	s->hdr->num_content_ids = j-1;
+	s->hdr->num_blocks = j;
+
+	if (!(s->hdr_block = cram_encode_slice_header(fd, s)))
+	    return -1;
+    }
+
+    return r ? -1 : 0;
+}
+
+/*
+ * Encodes all slices in a container into blocks.
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int cram_encode_container(cram_fd *fd, cram_container *c) {
+    int i, j, slice_offset;
+    cram_block_compression_hdr *h = c->comp_hdr;
+    cram_block *c_hdr;
+    int multi_ref = 0;
+    int r1, r2, sn, nref;
+    spare_bams *spares;
+
+    /* Cache references up-front if we have unsorted access patterns */
+    pthread_mutex_lock(&fd->ref_lock);
+    nref = fd->refs->nref;
+    pthread_mutex_unlock(&fd->ref_lock);
+
+    if (!fd->no_ref && c->refs_used) {
+	for (i = 0; i < nref; i++) {
+	    if (c->refs_used[i])
+		cram_get_ref(fd, i, 1, 0);
+	}
+    }
+
+    /* To create M5 strings */
+    /* Fetch reference sequence */
+    if (!fd->no_ref) {
+	bam_seq_t *b = c->bams[0];
+	char *ref;
+
+	ref = cram_get_ref(fd, bam_ref(b), 1, 0);
+	if (!ref && bam_ref(b) >= 0) {
+	    fprintf(stderr, "Failed to load reference #%d\n", bam_ref(b));
+	    return -1;
+	}
+	if ((c->ref_id = bam_ref(b)) >= 0) {
+	    c->ref_seq_id = c->ref_id;
+	    c->ref       = fd->refs->ref_id[c->ref_seq_id]->seq;
+	    c->ref_start = 1;
+	    c->ref_end   = fd->refs->ref_id[c->ref_seq_id]->length;
+	} else {
+	    c->ref_seq_id = c->ref_id; // FIXME remove one var!
+	}
+    } else {
+	c->ref_id = bam_ref(c->bams[0]);
+	cram_ref_incr(fd->refs, c->ref_id);
+	c->ref_seq_id = c->ref_id;
+    }
+
+    /* Turn bams into cram_records and gather basic stats */
+    for (r1 = sn = 0; r1 < c->curr_c_rec; sn++) {
+	cram_slice *s = c->slices[sn];
+	int first_base = INT_MAX, last_base = INT_MIN;
+
+	assert(sn < c->curr_slice);
+
+	/* FIXME: we could create our slice objects here too instead of
+	 * in cram_put_bam_seq. It's more natural here and also this is
+	 * bit is threaded so it's less work in the main thread.
+	 */
+
+	for (r2 = 0; r1 < c->curr_c_rec && r2 < c->max_rec; r1++, r2++) {
+	    cram_record *cr = &s->crecs[r2];
+	    bam_seq_t *b = c->bams[r1];
+
+	    /* If multi-ref we need to cope with changing reference per seq */
+	    if (c->multi_seq && !fd->no_ref) {
+		if (bam_ref(b) != c->ref_seq_id && bam_ref(b) >= 0) {
+		    if (c->ref_seq_id >= 0)
+			cram_ref_decr(fd->refs, c->ref_seq_id);
+
+		    if (!cram_get_ref(fd, bam_ref(b), 1, 0)) {
+			fprintf(stderr, "Failed to load reference #%d\n",
+				bam_ref(b));
+			return -1;
+		    }
+
+		    c->ref_seq_id = bam_ref(b); // overwritten later by -2
+		    assert(fd->refs->ref_id[c->ref_seq_id]->seq);
+		    c->ref       = fd->refs->ref_id[c->ref_seq_id]->seq;
+		    c->ref_start = 1;
+		    c->ref_end   = fd->refs->ref_id[c->ref_seq_id]->length;
+		}
+	    }
+
+	    process_one_read(fd, c, s, cr, b, r2);
+
+	    if (first_base > cr->apos)
+		first_base = cr->apos;
+
+	    if (last_base < cr->aend)
+		last_base = cr->aend;
+	}
+
+	if (c->multi_seq) {
+	    s->hdr->ref_seq_id    = -2;
+	    s->hdr->ref_seq_start = 0;
+	    s->hdr->ref_seq_span  = 0;
+	} else {
+	    s->hdr->ref_seq_id    = c->ref_id;
+	    s->hdr->ref_seq_start = first_base;
+	    s->hdr->ref_seq_span  = last_base - first_base + 1;
+	}
+	s->hdr->num_records = r2;
+    }
+
+    if (c->multi_seq && !fd->no_ref) {
+	if (c->ref_seq_id >= 0)
+	    cram_ref_decr(fd->refs, c->ref_seq_id);
+    }
+
+    /* Link our bams[] array onto the spare bam list for reuse */
+    spares = malloc(sizeof(*spares));
+    pthread_mutex_lock(&fd->bam_list_lock);
+    spares->bams = c->bams;
+    spares->next = fd->bl;
+    fd->bl = spares;
+    pthread_mutex_unlock(&fd->bam_list_lock);
+    c->bams = NULL;
+
+    /* Detect if a multi-seq container */
+    cram_stats_encoding(fd, c->stats[DS_RI]);
+    multi_ref = c->stats[DS_RI]->nvals > 1;
+
+    if (multi_ref) {
+	if (fd->verbose)
+	    fprintf(stderr, "Multi-ref container\n");
+	c->ref_seq_id = -2;
+	c->ref_seq_start = 0;
+	c->ref_seq_span = 0;
+    }
+
+
+    /* Compute MD5s */
+    for (i = 0; i < c->curr_slice; i++) {
+	cram_slice *s = c->slices[i];
+	
+	if (CRAM_MAJOR_VERS(fd->version) != 1) {
+	    if (s->hdr->ref_seq_id >= 0 && c->multi_seq == 0 && !fd->no_ref) {
+		MD5_CTX md5;
+		MD5_Init(&md5);
+		MD5_Update(&md5,
+			   c->ref + s->hdr->ref_seq_start - c->ref_start,
+			   s->hdr->ref_seq_span);
+		MD5_Final(s->hdr->md5, &md5);
+	    } else {
+		memset(s->hdr->md5, 0, 16);
+	    }
+	}
+    }
+
+    c->num_records = 0;
+    c->num_blocks = 0;
+    c->length = 0;
+
+    //fprintf(stderr, "=== BF ===\n");
+    h->codecs[DS_BF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BF]),
+					 c->stats[DS_BF], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== CF ===\n");
+    h->codecs[DS_CF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_CF]),
+					 c->stats[DS_CF], E_INT, NULL,
+					 fd->version);
+//    fprintf(stderr, "=== RN ===\n");
+//    h->codecs[DS_RN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RN]),
+//				    c->stats[DS_RN], E_BYTE_ARRAY, NULL,
+//				    fd->version);
+
+    //fprintf(stderr, "=== AP ===\n");
+    if (c->pos_sorted) {
+	h->codecs[DS_AP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_AP]),
+					     c->stats[DS_AP], E_INT, NULL,
+					     fd->version);
+    } else {
+	int p[2] = {0, c->max_apos};
+	h->codecs[DS_AP] = cram_encoder_init(E_BETA, NULL, E_INT, p,
+					     fd->version);
+    }
+
+    //fprintf(stderr, "=== RG ===\n");
+    h->codecs[DS_RG] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RG]),
+					 c->stats[DS_RG], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== MQ ===\n");
+    h->codecs[DS_MQ] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MQ]),
+					 c->stats[DS_MQ], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== NS ===\n");
+    h->codecs[DS_NS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NS]),
+					 c->stats[DS_NS], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== MF ===\n");
+    h->codecs[DS_MF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MF]),
+					 c->stats[DS_MF], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== TS ===\n");
+    h->codecs[DS_TS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TS]),
+					 c->stats[DS_TS], E_INT, NULL,
+					 fd->version);
+    //fprintf(stderr, "=== NP ===\n");
+    h->codecs[DS_NP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NP]),
+					 c->stats[DS_NP], E_INT, NULL,
+					 fd->version);
+    //fprintf(stderr, "=== NF ===\n");
+    h->codecs[DS_NF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NF]),
+					 c->stats[DS_NF], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== RL ===\n");
+    h->codecs[DS_RL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RL]),
+					 c->stats[DS_RL], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== FN ===\n");
+    h->codecs[DS_FN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FN]),
+					 c->stats[DS_FN], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== FC ===\n");
+    h->codecs[DS_FC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FC]),
+					 c->stats[DS_FC], E_BYTE, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== FP ===\n");
+    h->codecs[DS_FP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FP]),
+					 c->stats[DS_FP], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== DL ===\n");
+    h->codecs[DS_DL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_DL]),
+					 c->stats[DS_DL], E_INT, NULL,
+					 fd->version);
+
+    //fprintf(stderr, "=== BA ===\n");
+    h->codecs[DS_BA] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BA]),
+					 c->stats[DS_BA], E_BYTE, NULL,
+					 fd->version);
+
+    if (CRAM_MAJOR_VERS(fd->version) >= 3) {
+	cram_byte_array_len_encoder e;
+
+	e.len_encoding = E_EXTERNAL;
+	e.len_dat = (void *)DS_BB_len;
+	//e.len_dat = (void *)DS_BB;
+
+	e.val_encoding = E_EXTERNAL;
+	e.val_dat = (void *)DS_BB;
+
+	h->codecs[DS_BB] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL,
+					     E_BYTE_ARRAY, (void *)&e,
+					     fd->version);
+    } else {
+	h->codecs[DS_BB] = NULL;
+    }
+
+    //fprintf(stderr, "=== BS ===\n");
+    h->codecs[DS_BS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BS]),
+					 c->stats[DS_BS], E_BYTE, NULL,
+					 fd->version);
+
+    if (CRAM_MAJOR_VERS(fd->version) == 1) {
+	h->codecs[DS_TL] = NULL;
+	h->codecs[DS_RI] = NULL;
+	h->codecs[DS_RS] = NULL;
+	h->codecs[DS_PD] = NULL;
+	h->codecs[DS_HC] = NULL;
+	h->codecs[DS_SC] = NULL;
+
+	//fprintf(stderr, "=== TC ===\n");
+	h->codecs[DS_TC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TC]),
+					     c->stats[DS_TC], E_BYTE, NULL,
+					     fd->version);
+
+    //fprintf(stderr, "=== TN ===\n");
+	h->codecs[DS_TN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TN]),
+					     c->stats[DS_TN], E_INT, NULL,
+					     fd->version);
+    } else {
+	h->codecs[DS_TC] = NULL;
+	h->codecs[DS_TN] = NULL;
+
+	//fprintf(stderr, "=== TL ===\n");
+	h->codecs[DS_TL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TL]),
+					     c->stats[DS_TL], E_INT, NULL,
+					     fd->version);
+
+
+	//fprintf(stderr, "=== RI ===\n");
+	h->codecs[DS_RI] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RI]),
+					     c->stats[DS_RI], E_INT, NULL,
+					     fd->version);
+
+	//fprintf(stderr, "=== RS ===\n");
+	h->codecs[DS_RS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RS]),
+					     c->stats[DS_RS], E_INT, NULL,
+					     fd->version);
+
+	//fprintf(stderr, "=== PD ===\n");
+	h->codecs[DS_PD] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_PD]),
+					     c->stats[DS_PD], E_INT, NULL,
+					     fd->version);
+
+	//fprintf(stderr, "=== HC ===\n");
+	h->codecs[DS_HC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_HC]),
+					     c->stats[DS_HC], E_INT, NULL,
+					     fd->version);
+
+	//fprintf(stderr, "=== SC ===\n");
+	if (1) {
+	    int i2[2] = {0, DS_SC};
+
+	    h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL,
+						 E_BYTE_ARRAY, (void *)i2,
+						 fd->version);
+	} else {
+	    // Appears to be no practical benefit to using this method,
+	    // but it may work better if we start mixing SC, IN and BB
+	    // elements into the same external block.
+	    cram_byte_array_len_encoder e;
+
+	    e.len_encoding = E_EXTERNAL;
+	    e.len_dat = (void *)DS_SC_len;
+
+	    e.val_encoding = E_EXTERNAL;
+	    e.val_dat = (void *)DS_SC;
+
+	    h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL,
+						 E_BYTE_ARRAY, (void *)&e,
+						 fd->version);
+	}
+    }
+    
+    //fprintf(stderr, "=== IN ===\n");
+    {
+	int i2[2] = {0, DS_IN};
+	h->codecs[DS_IN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL,
+					     E_BYTE_ARRAY, (void *)i2,
+					     fd->version);
+    }
+
+    h->codecs[DS_QS] = cram_encoder_init(E_EXTERNAL, NULL, E_BYTE,
+					 (void *)DS_QS,
+					 fd->version);
+    {
+	int i2[2] = {0, DS_RN};
+	h->codecs[DS_RN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL,
+					     E_BYTE_ARRAY, (void *)i2,
+					     fd->version);
+    }
+
+
+    /* Encode slices */
+    for (i = 0; i < c->curr_slice; i++) {
+	if (fd->verbose)
+	    fprintf(stderr, "Encode slice %d\n", i);
+	if (cram_encode_slice(fd, c, h, c->slices[i]) != 0)
+	    return -1;
+    }
+
+    /* Create compression header */
+    {
+	h->ref_seq_id    = c->ref_seq_id;
+	h->ref_seq_start = c->ref_seq_start;
+	h->ref_seq_span  = c->ref_seq_span;
+	h->num_records   = c->num_records;
+	
+	h->mapped_qs_included = 0;   // fixme
+	h->unmapped_qs_included = 0; // fixme
+	// h->...  fixme
+	memcpy(h->substitution_matrix, CRAM_SUBST_MATRIX, 20);
+
+	if (!(c_hdr = cram_encode_compression_header(fd, c, h)))
+	    return -1;
+    }
+
+    /* Compute landmarks */
+    /* Fill out slice landmarks */
+    c->num_landmarks = c->curr_slice;
+    c->landmark = malloc(c->num_landmarks * sizeof(*c->landmark));
+    if (!c->landmark)
+	return -1;
+
+    /*
+     * Slice offset starts after the first block, so we need to simulate
+     * writing it to work out the correct offset
+     */
+    {
+	slice_offset = c_hdr->method == RAW
+	    ? c_hdr->uncomp_size
+	    : c_hdr->comp_size;
+	slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
+	    itf8_size(c_hdr->content_id) +
+	    itf8_size(c_hdr->comp_size) +
+	    itf8_size(c_hdr->uncomp_size);
+    }
+
+    c->ref_seq_id    = c->slices[0]->hdr->ref_seq_id;
+    c->ref_seq_start = c->slices[0]->hdr->ref_seq_start;
+    c->ref_seq_span  = c->slices[0]->hdr->ref_seq_span;
+    for (i = 0; i < c->curr_slice; i++) {
+	cram_slice *s = c->slices[i];
+	
+	c->num_blocks += s->hdr->num_blocks + 2;
+	c->landmark[i] = slice_offset;
+
+	if (s->hdr->ref_seq_start + s->hdr->ref_seq_span >
+	    c->ref_seq_start + c->ref_seq_span) {
+	    c->ref_seq_span = s->hdr->ref_seq_start + s->hdr->ref_seq_span
+		- c->ref_seq_start;
+	}
+	
+	slice_offset += s->hdr_block->method == RAW
+	    ? s->hdr_block->uncomp_size
+	    : s->hdr_block->comp_size;
+
+	slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
+	    itf8_size(s->hdr_block->content_id) +
+	    itf8_size(s->hdr_block->comp_size) +
+	    itf8_size(s->hdr_block->uncomp_size);
+
+	for (j = 0; j < s->hdr->num_blocks; j++) {
+	    slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
+		itf8_size(s->block[j]->content_id) +
+		itf8_size(s->block[j]->comp_size) +
+		itf8_size(s->block[j]->uncomp_size);
+
+	    slice_offset += s->block[j]->method == RAW
+		? s->block[j]->uncomp_size
+		: s->block[j]->comp_size;
+	}
+    }
+    c->length += slice_offset; // just past the final slice
+
+    c->comp_hdr_block = c_hdr;
+
+    if (c->ref_seq_id >= 0) {
+	cram_ref_decr(fd->refs, c->ref_seq_id);
+    }
+
+    /* Cache references up-front if we have unsorted access patterns */
+    if (!fd->no_ref && c->refs_used) {
+	for (i = 0; i < fd->refs->nref; i++) {
+	    if (c->refs_used[i])
+		cram_ref_decr(fd->refs, i);
+	}
+    }
+
+    return 0;
+}
+
+
+/*
+ * Adds a feature code to a read within a slice. For purposes of minimising
+ * memory allocations and fragmentation we have one array of features for all
+ * reads within the slice. We return the index into this array for this new
+ * feature.
+ *
+ * Returns feature index on success
+ *         -1 on failure.
+ */
+static int cram_add_feature(cram_container *c, cram_slice *s,
+			    cram_record *r, cram_feature *f) {
+    if (s->nfeatures >= s->afeatures) {
+	s->afeatures = s->afeatures ? s->afeatures*2 : 1024;
+	s->features = realloc(s->features, s->afeatures*sizeof(*s->features));
+	if (!s->features)
+	    return -1;
+    }
+
+    if (!r->nfeature++) {
+	r->feature = s->nfeatures;
+	cram_stats_add(c->stats[DS_FP], f->X.pos);
+    } else {
+	cram_stats_add(c->stats[DS_FP],
+		       f->X.pos - s->features[r->feature + r->nfeature-2].X.pos);
+    }
+    cram_stats_add(c->stats[DS_FC], f->X.code);
+
+    s->features[s->nfeatures++] = *f;
+
+    return 0;
+}
+
+static int cram_add_substitution(cram_fd *fd, cram_container *c,
+				 cram_slice *s, cram_record *r,
+				 int pos, char base, char qual, char ref) {
+    cram_feature f;
+
+    // seq=ACGTN vs ref=ACGT or seq=ACGT vs ref=ACGTN
+    if (fd->L2[(uc)base]<4 || (fd->L2[(uc)base]<5 && fd->L2[(uc)ref]<4)) {
+	f.X.pos = pos+1;
+	f.X.code = 'X';
+	f.X.base = fd->cram_sub_matrix[ref&0x1f][base&0x1f];
+	cram_stats_add(c->stats[DS_BS], f.X.base);
+    } else {
+	f.B.pos = pos+1;
+	f.B.code = 'B';
+	f.B.base = base;
+	f.B.qual = qual;
+	cram_stats_add(c->stats[DS_BA], f.B.base);
+	cram_stats_add(c->stats[DS_QS], f.B.qual);
+	BLOCK_APPEND_CHAR(s->qual_blk, qual);
+    }
+    return cram_add_feature(c, s, r, &f);
+}
+
+static int cram_add_bases(cram_fd *fd, cram_container *c,
+			  cram_slice *s, cram_record *r,
+			  int pos, int len, char *base) {
+    cram_feature f;
+
+    f.b.pos = pos+1;
+    f.b.code = 'b';
+    f.b.seq_idx = base - (char *)BLOCK_DATA(s->seqs_blk);
+    f.b.len = len;
+
+    return cram_add_feature(c, s, r, &f);
+}
+
+static int cram_add_base(cram_fd *fd, cram_container *c,
+			 cram_slice *s, cram_record *r,
+			 int pos, char base, char qual) {
+    cram_feature f;
+    f.B.pos = pos+1;
+    f.B.code = 'B';
+    f.B.base = base;
+    f.B.qual = qual;
+    cram_stats_add(c->stats[DS_BA], base);
+    cram_stats_add(c->stats[DS_QS], qual);
+    BLOCK_APPEND_CHAR(s->qual_blk, qual);
+    return cram_add_feature(c, s, r, &f);
+}
+
+static int cram_add_quality(cram_fd *fd, cram_container *c,
+			    cram_slice *s, cram_record *r,
+			    int pos, char qual) {
+    cram_feature f;
+    f.Q.pos = pos+1;
+    f.Q.code = 'Q';
+    f.Q.qual = qual;
+    cram_stats_add(c->stats[DS_QS], qual);
+    BLOCK_APPEND_CHAR(s->qual_blk, qual);
+    return cram_add_feature(c, s, r, &f);
+}
+
+static int cram_add_deletion(cram_container *c, cram_slice *s, cram_record *r,
+			     int pos, int len, char *base) {
+    cram_feature f;
+    f.D.pos = pos+1;
+    f.D.code = 'D';
+    f.D.len = len;
+    cram_stats_add(c->stats[DS_DL], len);
+    return cram_add_feature(c, s, r, &f);
+}
+
+static int cram_add_softclip(cram_container *c, cram_slice *s, cram_record *r,
+			     int pos, int len, char *base, int version) {
+    cram_feature f;
+    f.S.pos = pos+1;
+    f.S.code = 'S';
+    f.S.len = len;
+    switch (CRAM_MAJOR_VERS(version)) {
+    case 1:
+	f.S.seq_idx = BLOCK_SIZE(s->base_blk);
+	BLOCK_APPEND(s->base_blk, base, len);
+	BLOCK_APPEND_CHAR(s->base_blk, '\0');
+	break;
+
+    case 2:
+    default:
+	f.S.seq_idx = BLOCK_SIZE(s->soft_blk);
+	if (base) {
+	    BLOCK_APPEND(s->soft_blk, base, len);
+	} else {
+	    int i;
+	    for (i = 0; i < len; i++)
+		BLOCK_APPEND_CHAR(s->soft_blk, 'N');
+	}
+	BLOCK_APPEND_CHAR(s->soft_blk, '\0');
+	break;
+
+//    default:
+//	// v3.0 onwards uses BB data-series
+//	f.S.seq_idx = BLOCK_SIZE(s->soft_blk);
+    }
+    return cram_add_feature(c, s, r, &f);
+}
+
+static int cram_add_hardclip(cram_container *c, cram_slice *s, cram_record *r,
+			     int pos, int len, char *base) {
+    cram_feature f;
+    f.S.pos = pos+1;
+    f.S.code = 'H';
+    f.S.len = len;
+    cram_stats_add(c->stats[DS_HC], len);
+    return cram_add_feature(c, s, r, &f);
+}
+
+static int cram_add_skip(cram_container *c, cram_slice *s, cram_record *r,
+			     int pos, int len, char *base) {
+    cram_feature f;
+    f.S.pos = pos+1;
+    f.S.code = 'N';
+    f.S.len = len;
+    cram_stats_add(c->stats[DS_RS], len);
+    return cram_add_feature(c, s, r, &f);
+}
+
+static int cram_add_pad(cram_container *c, cram_slice *s, cram_record *r,
+			     int pos, int len, char *base) {
+    cram_feature f;
+    f.S.pos = pos+1;
+    f.S.code = 'P';
+    f.S.len = len;
+    cram_stats_add(c->stats[DS_PD], len);
+    return cram_add_feature(c, s, r, &f);
+}
+
+static int cram_add_insertion(cram_container *c, cram_slice *s, cram_record *r,
+			      int pos, int len, char *base) {
+    cram_feature f;
+    f.I.pos = pos+1;
+    if (len == 1) {
+	char b = base ? *base : 'N';
+	f.i.code = 'i';
+	f.i.base = b;
+	cram_stats_add(c->stats[DS_BA], b);
+    } else {
+	f.I.code = 'I';
+	f.I.len = len;
+	f.S.seq_idx = BLOCK_SIZE(s->base_blk);
+	if (base) {
+	    BLOCK_APPEND(s->base_blk, base, len);
+	} else {
+	    int i;
+	    for (i = 0; i < len; i++)
+		BLOCK_APPEND_CHAR(s->base_blk, 'N');
+	}
+	BLOCK_APPEND_CHAR(s->base_blk, '\0');
+    }
+    return cram_add_feature(c, s, r, &f);
+}
+
+/*
+ * Encodes auxiliary data.
+ * Returns the read-group parsed out of the BAM aux fields on success
+ *         NULL on failure or no rg present (FIXME)
+ */
+static char *cram_encode_aux_1_0(cram_fd *fd, bam_seq_t *b, cram_container *c,
+				 cram_slice *s, cram_record *cr) {
+    char *aux, *tmp, *rg = NULL;
+    int aux_size = bam_blk_size(b) -
+	((char *)bam_aux(b) - (char *)&bam_ref(b));
+	
+    /* Worst case is 1 nul char on every ??:Z: string, so +33% */
+    BLOCK_GROW(s->aux_blk, aux_size*1.34+1);
+    tmp = (char *)BLOCK_END(s->aux_blk);
+
+    aux = (char *)bam_aux(b);
+    cr->TN_idx = s->nTN;
+
+    while (aux[0] != 0) {
+	int32_t i32;
+	int r;
+
+	if (aux[0] == 'R' && aux[1] == 'G' && aux[2] == 'Z') {
+	    rg = &aux[3];
+	    while (*aux++);
+	    continue;
+	}
+	if (aux[0] == 'M' && aux[1] == 'D' && aux[2] == 'Z') {
+	    while (*aux++);
+	    continue;
+	}
+	if (aux[0] == 'N' && aux[1] == 'M') {
+	    switch(aux[2]) {
+	    case 'A': case 'C': case 'c': aux+=4; break;
+	    case 'I': case 'i': case 'f': aux+=7; break;
+	    default:
+		fprintf(stderr, "Unhandled type code for NM tag\n");
+		return NULL;
+	    }
+	    continue;
+	}
+
+	cr->ntags++;
+
+	i32 = (aux[0]<<16) | (aux[1]<<8) | aux[2];
+	kh_put(s_i2i, c->tags_used, i32, &r);
+	if (-1 == r)
+	    return NULL;
+
+	if (s->nTN >= s->aTN) {
+	    s->aTN = s->aTN ? s->aTN*2 : 1024;
+	    if (!(s->TN = realloc(s->TN, s->aTN * sizeof(*s->TN))))
+		return NULL;
+	}
+	s->TN[s->nTN++] = i32;
+	cram_stats_add(c->stats[DS_TN], i32);
+
+	switch(aux[2]) {
+	case 'A': case 'C': case 'c':
+	    aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    *tmp++=*aux++;
+	    break;
+
+	case 'S': case 's':
+	    aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    *tmp++=*aux++; *tmp++=*aux++;
+	    break;
+
+	case 'I': case 'i': case 'f':
+	    aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    break;
+
+	case 'd':
+	    aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    break;
+
+	case 'Z': case 'H':
+	    aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    while ((*tmp++=*aux++));
+	    *tmp++ = '\t'; // stop byte
+	    break;
+
+	case 'B': {
+	    int type = aux[3], blen;
+	    uint32_t count = (uint32_t)((((unsigned char *)aux)[4]<< 0) +
+					(((unsigned char *)aux)[5]<< 8) +
+					(((unsigned char *)aux)[6]<<16) +
+					(((unsigned char *)aux)[7]<<24));
+	    // skip TN field
+	    aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+
+	    // We use BYTE_ARRAY_LEN with external length, so store that first
+	    switch (type) {
+	    case 'c': case 'C':
+		blen = count;
+		break;
+	    case 's': case 'S':
+		blen = 2*count;
+		break;
+	    case 'i': case 'I': case 'f':
+		blen = 4*count;
+		break;
+	    default:
+		fprintf(stderr, "Unknown sub-type '%c' for aux type 'B'\n",
+			type);
+		return NULL;
+		    
+	    }
+
+	    tmp += itf8_put(tmp, blen+5);
+
+	    *tmp++=*aux++; // sub-type & length
+	    *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+
+	    // The tag data itself
+	    memcpy(tmp, aux, blen); tmp += blen; aux += blen;
+
+	    //cram_stats_add(c->aux_B_stats, blen);
+	    break;
+	}
+	default:
+	    fprintf(stderr, "Unknown aux type '%c'\n", aux[2]);
+	    return NULL;
+	}
+    }
+    cram_stats_add(c->stats[DS_TC], cr->ntags);
+
+    cr->aux = BLOCK_SIZE(s->aux_blk);
+    cr->aux_size = (uc *)tmp - (BLOCK_DATA(s->aux_blk) + cr->aux);
+    BLOCK_SIZE(s->aux_blk) = (uc *)tmp - BLOCK_DATA(s->aux_blk);
+    assert(s->aux_blk->byte <= s->aux_blk->alloc);
+
+    return rg;
+}
+
+/*
+ * Encodes auxiliary data. Largely duplicated from above, but done so to
+ * keep it simple and avoid a myriad of version ifs.
+ *
+ * Returns the read-group parsed out of the BAM aux fields on success
+ *         NULL on failure or no rg present (FIXME)
+ */
+static char *cram_encode_aux(cram_fd *fd, bam_seq_t *b, cram_container *c,
+			     cram_slice *s, cram_record *cr) {
+    char *aux, *orig, *tmp, *rg = NULL;
+    int aux_size = bam_get_l_aux(b);
+    cram_block *td_b = c->comp_hdr->TD_blk;
+    int TD_blk_size = BLOCK_SIZE(td_b), new;
+    char *key;
+    khint_t k;
+
+
+    /* Worst case is 1 nul char on every ??:Z: string, so +33% */
+    BLOCK_GROW(s->aux_blk, aux_size*1.34+1);
+    tmp = (char *)BLOCK_END(s->aux_blk);
+
+
+    orig = aux = (char *)bam_aux(b);
+
+    // Copy aux keys to td_b and aux values to s->aux_blk
+    while (aux - orig < aux_size && aux[0] != 0) {
+	uint32_t i32;
+	int r;
+
+	if (aux[0] == 'R' && aux[1] == 'G' && aux[2] == 'Z') {
+	    rg = &aux[3];
+	    while (*aux++);
+	    continue;
+	}
+	if (aux[0] == 'M' && aux[1] == 'D' && aux[2] == 'Z') {
+	    while (*aux++);
+	    continue;
+	}
+	if (aux[0] == 'N' && aux[1] == 'M') {
+	    switch(aux[2]) {
+	    case 'A': case 'C': case 'c': aux+=4; break;
+	    case 'S': case 's':           aux+=5; break;
+	    case 'I': case 'i': case 'f': aux+=7; break;
+	    default:
+		fprintf(stderr, "Unhandled type code for NM tag\n");
+		return NULL;
+	    }
+	    continue;
+	}
+
+	BLOCK_APPEND(td_b, aux, 3);
+
+	i32 = (aux[0]<<16) | (aux[1]<<8) | aux[2];
+	kh_put(s_i2i, c->tags_used, i32, &r);
+	if (-1 == r)
+	    return NULL;
+
+	// BQ:Z
+	if (aux[0] == 'B' && aux[1] == 'Q' && aux[2] == 'Z') {
+	    char *tmp;
+	    if (!s->aux_BQ_blk)
+		if (!(s->aux_BQ_blk = cram_new_block(EXTERNAL, DS_aux_BQ)))
+		    return NULL;
+	    BLOCK_GROW(s->aux_BQ_blk, aux_size*1.34+1);
+	    tmp = (char *)BLOCK_END(s->aux_BQ_blk);
+	    aux += 3;
+	    while ((*tmp++=*aux++));
+	    *tmp++ = '\t';
+	    BLOCK_SIZE(s->aux_BQ_blk) = (uc *)tmp - BLOCK_DATA(s->aux_BQ_blk);
+	    continue;
+	}
+
+	// BD:Z
+	if (aux[0] == 'B' && aux[1]=='D' && aux[2] == 'Z') {
+	    char *tmp;
+	    if (!s->aux_BD_blk)
+		if (!(s->aux_BD_blk = cram_new_block(EXTERNAL, DS_aux_BD)))
+		    return NULL;
+	    BLOCK_GROW(s->aux_BD_blk, aux_size*1.34+1);
+	    tmp = (char *)BLOCK_END(s->aux_BD_blk);
+	    aux += 3;
+	    while ((*tmp++=*aux++));
+	    *tmp++ = '\t';
+	    BLOCK_SIZE(s->aux_BD_blk) = (uc *)tmp - BLOCK_DATA(s->aux_BD_blk);
+	    continue;
+	}
+
+	// BI:Z
+	if (aux[0] == 'B' && aux[1]=='I' && aux[2] == 'Z') {
+	    char *tmp;
+	    if (!s->aux_BI_blk)
+		if (!(s->aux_BI_blk = cram_new_block(EXTERNAL, DS_aux_BI)))
+		    return NULL;
+	    BLOCK_GROW(s->aux_BI_blk, aux_size*1.34+1);
+	    tmp = (char *)BLOCK_END(s->aux_BI_blk);
+	    aux += 3;
+	    while ((*tmp++=*aux++));
+	    *tmp++ = '\t';
+	    BLOCK_SIZE(s->aux_BI_blk) = (uc *)tmp - BLOCK_DATA(s->aux_BI_blk);
+	    continue;
+	}
+
+	// OQ:Z:
+	if (aux[0] == 'O' && aux[1] == 'Q' && aux[2] == 'Z') {
+	    char *tmp;
+	    if (!s->aux_OQ_blk)
+		if (!(s->aux_OQ_blk = cram_new_block(EXTERNAL, DS_aux_OQ)))
+		    return NULL;
+	    BLOCK_GROW(s->aux_OQ_blk, aux_size*1.34+1);
+	    tmp = (char *)BLOCK_END(s->aux_OQ_blk);
+	    aux += 3;
+	    while ((*tmp++=*aux++));
+	    *tmp++ = '\t';
+	    BLOCK_SIZE(s->aux_OQ_blk) = (uc *)tmp - BLOCK_DATA(s->aux_OQ_blk);
+	    continue;
+	}
+
+	// FZ:B or ZM:B
+	if ((aux[0] == 'F' && aux[1] == 'Z' && aux[2] == 'B') ||
+	    (aux[0] == 'Z' && aux[1] == 'M' && aux[2] == 'B')) {
+	    int type = aux[3], blen;
+	    uint32_t count = (uint32_t)((((unsigned char *)aux)[4]<< 0) +
+					(((unsigned char *)aux)[5]<< 8) +
+					(((unsigned char *)aux)[6]<<16) +
+					(((unsigned char *)aux)[7]<<24));
+	    char *tmp;
+	    if (!s->aux_FZ_blk)
+		if (!(s->aux_FZ_blk = cram_new_block(EXTERNAL, DS_aux_FZ)))
+		    return NULL;
+	    BLOCK_GROW(s->aux_FZ_blk, aux_size*1.34+1);
+	    tmp = (char *)BLOCK_END(s->aux_FZ_blk);
+
+	    // skip TN field
+	    aux+=3;
+
+	    // We use BYTE_ARRAY_LEN with external length, so store that first
+	    switch (type) {
+	    case 'c': case 'C':
+		blen = count;
+		break;
+	    case 's': case 'S':
+		blen = 2*count;
+		break;
+	    case 'i': case 'I': case 'f':
+		blen = 4*count;
+		break;
+	    default:
+		fprintf(stderr, "Unknown sub-type '%c' for aux type 'B'\n",
+			type);
+		return NULL;
+		    
+	    }
+
+	    blen += 5; // sub-type & length
+	    tmp += itf8_put(tmp, blen);
+
+	    // The tag data itself
+	    memcpy(tmp, aux, blen); tmp += blen; aux += blen;
+
+	    BLOCK_SIZE(s->aux_FZ_blk) = (uc *)tmp - BLOCK_DATA(s->aux_FZ_blk);
+	    continue;
+	}
+
+	// Other quality data - {Q2,E2,U2,CQ}:Z and similar
+	if (((aux[0] == 'Q' && aux[1] == '2') ||
+	     (aux[0] == 'U' && aux[1] == '2') ||
+	     (aux[0] == 'Q' && aux[1] == 'T') ||
+	     (aux[0] == 'C' && aux[1] == 'Q')) && aux[2] == 'Z') {
+	    char *tmp;
+	    if (!s->aux_oq_blk)
+		if (!(s->aux_oq_blk = cram_new_block(EXTERNAL, DS_aux_oq)))
+		    return NULL;
+	    BLOCK_GROW(s->aux_oq_blk, aux_size*1.34+1);
+	    tmp = (char *)BLOCK_END(s->aux_oq_blk);
+	    aux += 3;
+	    while ((*tmp++=*aux++));
+	    *tmp++ = '\t';
+	    BLOCK_SIZE(s->aux_oq_blk) = (uc *)tmp - BLOCK_DATA(s->aux_oq_blk);
+	    continue;
+	}
+
+	// Other sequence data - {R2,E2,CS,BC,RT}:Z and similar
+	if (((aux[0] == 'R' && aux[1] == '2') ||
+	     (aux[0] == 'E' && aux[1] == '2') ||
+	     (aux[0] == 'C' && aux[1] == 'S') ||
+	     (aux[0] == 'B' && aux[1] == 'C') ||
+	     (aux[0] == 'R' && aux[1] == 'T')) && aux[2] == 'Z') {
+	    char *tmp;
+	    if (!s->aux_os_blk)
+		if (!(s->aux_os_blk = cram_new_block(EXTERNAL, DS_aux_os)))
+		    return NULL;
+	    BLOCK_GROW(s->aux_os_blk, aux_size*1.34+1);
+	    tmp = (char *)BLOCK_END(s->aux_os_blk);
+	    aux += 3;
+	    while ((*tmp++=*aux++));
+	    *tmp++ = '\t';
+	    BLOCK_SIZE(s->aux_os_blk) = (uc *)tmp - BLOCK_DATA(s->aux_os_blk);
+	    continue;
+	}
+
+
+	switch(aux[2]) {
+	case 'A': case 'C': case 'c':
+	    aux+=3;
+	    *tmp++=*aux++;
+	    break;
+
+	case 'S': case 's':
+	    aux+=3;
+	    *tmp++=*aux++; *tmp++=*aux++;
+	    break;
+
+	case 'I': case 'i': case 'f':
+	    aux+=3;
+	    *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    break;
+
+	case 'd':
+	    aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
+	    break;
+
+	case 'Z': case 'H':
+	    {
+		char *tmp;
+		if (!s->aux_oz_blk)
+		    if (!(s->aux_oz_blk = cram_new_block(EXTERNAL, DS_aux_oz)))
+			return NULL;
+		BLOCK_GROW(s->aux_oz_blk, aux_size*1.34+1);
+		tmp = (char *)BLOCK_END(s->aux_oz_blk);
+		aux += 3;
+		while ((*tmp++=*aux++));
+		*tmp++ = '\t';
+		BLOCK_SIZE(s->aux_oz_blk) = (uc *)tmp -
+		    BLOCK_DATA(s->aux_oz_blk);
+	    }
+	    break;
+
+	case 'B': {
+	    int type = aux[3], blen;
+	    uint32_t count = (uint32_t)((((unsigned char *)aux)[4]<< 0) +
+					(((unsigned char *)aux)[5]<< 8) +
+					(((unsigned char *)aux)[6]<<16) +
+					(((unsigned char *)aux)[7]<<24));
+	    // skip TN field
+	    aux+=3;
+
+	    // We use BYTE_ARRAY_LEN with external length, so store that first
+	    switch (type) {
+	    case 'c': case 'C':
+		blen = count;
+		break;
+	    case 's': case 'S':
+		blen = 2*count;
+		break;
+	    case 'i': case 'I': case 'f':
+		blen = 4*count;
+		break;
+	    default:
+		fprintf(stderr, "Unknown sub-type '%c' for aux type 'B'\n",
+			type);
+		return NULL;
+		    
+	    }
+
+	    blen += 5; // sub-type & length
+	    tmp += itf8_put(tmp, blen);
+
+	    // The tag data itself
+	    memcpy(tmp, aux, blen); tmp += blen; aux += blen;
+
+	    //cram_stats_add(c->aux_B_stats, blen);
+	    break;
+	}
+	default:
+	    fprintf(stderr, "Unknown aux type '%c'\n", aux[2]);
+	    return NULL;
+	}
+    }
+
+    // FIXME: sort BLOCK_DATA(td_b) by char[3] triples
+    
+    // And and increment TD hash entry
+    BLOCK_APPEND_CHAR(td_b, 0);
+
+    // Duplicate key as BLOCK_DATA() can be realloced to a new pointer.
+    key = string_ndup(c->comp_hdr->TD_keys, 
+		      (char *)BLOCK_DATA(td_b) + TD_blk_size,
+		      BLOCK_SIZE(td_b) - TD_blk_size);
+    k = kh_put(m_s2i, c->comp_hdr->TD_hash, key, &new);
+    if (new < 0) {
+	return NULL;
+    } else if (new == 0) {
+	BLOCK_SIZE(td_b) = TD_blk_size;
+    } else {
+	kh_val(c->comp_hdr->TD_hash, k) = c->comp_hdr->nTL;
+	c->comp_hdr->nTL++;
+    }
+
+    cr->TL = kh_val(c->comp_hdr->TD_hash, k);
+    cram_stats_add(c->stats[DS_TL], cr->TL);
+
+    cr->aux = BLOCK_SIZE(s->aux_blk);
+    cr->aux_size = (uc *)tmp - (BLOCK_DATA(s->aux_blk) + cr->aux);
+    BLOCK_SIZE(s->aux_blk) = (uc *)tmp - BLOCK_DATA(s->aux_blk);
+    assert(s->aux_blk->byte <= s->aux_blk->alloc);
+
+    return rg;
+}
+
+
+/*
+ * Handles creation of a new container or new slice, flushing any
+ * existing containers when appropriate. 
+ *
+ * Really this is next slice, which may or may not lead to a new container.
+ *
+ * Returns cram_container pointer on success
+ *         NULL on failure.
+ */
+static cram_container *cram_next_container(cram_fd *fd, bam_seq_t *b) {
+    cram_container *c = fd->ctr;
+    cram_slice *s;
+    int i;
+
+    /* First occurence */
+    if (c->curr_ref == -2)
+	c->curr_ref = bam_ref(b);
+
+    if (c->slice) {
+	s = c->slice;
+	if (c->multi_seq) {
+	    s->hdr->ref_seq_id    = -2;
+	    s->hdr->ref_seq_start = 0;
+	    s->hdr->ref_seq_span  = 0;
+	} else {
+	    s->hdr->ref_seq_id    = c->curr_ref;
+	    s->hdr->ref_seq_start = c->first_base;
+	    s->hdr->ref_seq_span  = c->last_base - c->first_base + 1;
+	}
+	s->hdr->num_records   = c->curr_rec;
+
+	if (c->curr_slice == 0) {
+	    if (c->ref_seq_id != s->hdr->ref_seq_id)
+		c->ref_seq_id  = s->hdr->ref_seq_id;
+	    c->ref_seq_start = c->first_base;
+	}
+
+	c->curr_slice++;
+    }
+
+    /* Flush container */
+    if (c->curr_slice == c->max_slice ||
+	(bam_ref(b) != c->curr_ref && !c->multi_seq)) {
+	c->ref_seq_span = fd->last_base - c->ref_seq_start + 1;
+	if (fd->verbose)
+	    fprintf(stderr, "Flush container %d/%d..%d\n",
+		    c->ref_seq_id, c->ref_seq_start,
+		    c->ref_seq_start + c->ref_seq_span -1);
+
+	/* Encode slices */
+	if (fd->pool) {
+	    if (-1 == cram_flush_container_mt(fd, c))
+		return NULL;
+	} else {
+	    if (-1 == cram_flush_container(fd, c))
+		return NULL;
+
+	    // Move to sep func, as we need cram_flush_container for
+	    // the closing phase to flush the partial container.
+	    for (i = 0; i < c->max_slice; i++) {
+		cram_free_slice(c->slices[i]);
+		c->slices[i] = NULL;
+	    }
+
+	    c->slice = NULL;
+	    c->curr_slice = 0;
+
+	    /* Easy approach for purposes of freeing stats */
+	    cram_free_container(c);
+	}
+
+	c = fd->ctr = cram_new_container(fd->seqs_per_slice,
+					 fd->slices_per_container);
+	if (!c)
+	    return NULL;
+	c->record_counter = fd->record_counter;
+	c->curr_ref = bam_ref(b);
+    }
+
+    c->last_pos = c->first_base = c->last_base = bam_pos(b)+1;
+
+    /* New slice */
+    c->slice = c->slices[c->curr_slice] =
+	cram_new_slice(MAPPED_SLICE, c->max_rec);
+    if (!c->slice)
+	return NULL;
+
+    if (c->multi_seq) {
+	c->slice->hdr->ref_seq_id = -2;
+	c->slice->hdr->ref_seq_start = 0;
+	c->slice->last_apos = 1;
+    } else {
+	c->slice->hdr->ref_seq_id = bam_ref(b);
+	// wrong for unsorted data, will fix during encoding.
+	c->slice->hdr->ref_seq_start = bam_pos(b)+1;
+	c->slice->last_apos = bam_pos(b)+1;
+    }
+
+    c->curr_rec = 0;
+
+    return c;
+}
+
+/*
+ * Converts a single bam record into a cram record.
+ * Possibly used within a thread.
+ *
+ * Returns 0 on success;
+ *        -1 on failure
+ */
+static int process_one_read(cram_fd *fd, cram_container *c,
+			    cram_slice *s, cram_record *cr,
+			    bam_seq_t *b, int rnum) {
+    int i, fake_qual = -1;
+    char *cp, *rg;
+    char *ref, *seq, *qual;
+
+    // FIXME: multi-ref containers
+
+    ref = c->ref;
+    cr->len         = bam_seq_len(b); cram_stats_add(c->stats[DS_RL], cr->len);
+
+    //fprintf(stderr, "%s => %d\n", rg ? rg : "\"\"", cr->rg);
+
+    // Fields to resolve later
+    //cr->mate_line;    // index to another cram_record
+    //cr->mate_flags;   // MF
+    //cr->ntags;        // TC
+    cr->ntags      = 0; //cram_stats_add(c->stats[DS_TC], cr->ntags);
+    if (CRAM_MAJOR_VERS(fd->version) == 1)
+	rg = cram_encode_aux_1_0(fd, b, c, s, cr);
+    else
+	rg = cram_encode_aux(fd, b, c, s, cr);
+
+    //cr->aux_size = b->blk_size - ((char *)bam_aux(b) - (char *)&bam_ref(b));
+    //cr->aux = DSTRING_LEN(s->aux_ds);
+    //dstring_nappend(s->aux_ds, bam_aux(b), cr->aux_size);
+
+    /* Read group, identified earlier */
+    if (rg) {
+	SAM_RG *brg = sam_hdr_find_rg(fd->header, rg);
+	cr->rg = brg ? brg->id : -1;
+    } else if (CRAM_MAJOR_VERS(fd->version) == 1) {
+	SAM_RG *brg = sam_hdr_find_rg(fd->header, "UNKNOWN");
+	assert(brg);
+    } else {
+	cr->rg = -1;
+    }
+    cram_stats_add(c->stats[DS_RG], cr->rg);
+
+    
+    cr->ref_id      = bam_ref(b);  cram_stats_add(c->stats[DS_RI], cr->ref_id);
+    cr->flags       = bam_flag(b);
+    if (bam_cigar_len(b) == 0)
+	cr->flags |= BAM_FUNMAP;
+    cram_stats_add(c->stats[DS_BF], fd->cram_flag_swap[cr->flags & 0xfff]);
+
+    // Non reference based encoding means storing the bases verbatim as features, which in
+    // turn means every base also has a quality already stored.
+    if (!fd->no_ref || CRAM_MAJOR_VERS(fd->version) >= 3)
+	cr->cram_flags = CRAM_FLAG_PRESERVE_QUAL_SCORES;
+    else
+	cr->cram_flags = 0;
+    //cram_stats_add(c->stats[DS_CF], cr->cram_flags);
+
+    c->num_bases   += cr->len;
+    cr->apos        = bam_pos(b)+1;
+    if (c->pos_sorted) {
+	if (cr->apos < s->last_apos) {
+	    c->pos_sorted = 0;
+	} else {
+	    cram_stats_add(c->stats[DS_AP], cr->apos - s->last_apos);
+	    s->last_apos = cr->apos;
+	}
+    } else {
+	//cram_stats_add(c->stats[DS_AP], cr->apos);
+    }
+    c->max_apos += (cr->apos > c->max_apos) * (cr->apos - c->max_apos);
+
+    cr->name        = BLOCK_SIZE(s->name_blk);
+    cr->name_len    = bam_name_len(b);
+    cram_stats_add(c->stats[DS_RN], cr->name_len);
+
+    BLOCK_APPEND(s->name_blk, bam_name(b), bam_name_len(b));
+
+
+    /*
+     * This seqs_ds is largely pointless and it could reuse the same memory
+     * over and over.
+     * s->base_blk is what we need for encoding.
+     */
+    cr->seq         = BLOCK_SIZE(s->seqs_blk);
+    cr->qual        = BLOCK_SIZE(s->qual_blk);
+    BLOCK_GROW(s->seqs_blk, cr->len+1);
+    BLOCK_GROW(s->qual_blk, cr->len);
+    seq = cp = (char *)BLOCK_END(s->seqs_blk);
+
+    *seq = 0;
+#ifdef ALLOW_UAC
+    {
+	// Convert seq 2 bases at a time for speed.
+	static const uint16_t code2base[256] = {
+	    15677, 16701, 17213, 19773, 18237, 21053, 21309, 22077,
+	    21565, 22333, 22845, 18493, 19261, 17469, 16957, 20029,
+	    15681, 16705, 17217, 19777, 18241, 21057, 21313, 22081,
+	    21569, 22337, 22849, 18497, 19265, 17473, 16961, 20033,
+	    15683, 16707, 17219, 19779, 18243, 21059, 21315, 22083,
+	    21571, 22339, 22851, 18499, 19267, 17475, 16963, 20035,
+	    15693, 16717, 17229, 19789, 18253, 21069, 21325, 22093,
+	    21581, 22349, 22861, 18509, 19277, 17485, 16973, 20045,
+	    15687, 16711, 17223, 19783, 18247, 21063, 21319, 22087,
+	    21575, 22343, 22855, 18503, 19271, 17479, 16967, 20039,
+	    15698, 16722, 17234, 19794, 18258, 21074, 21330, 22098,
+	    21586, 22354, 22866, 18514, 19282, 17490, 16978, 20050,
+	    15699, 16723, 17235, 19795, 18259, 21075, 21331, 22099,
+	    21587, 22355, 22867, 18515, 19283, 17491, 16979, 20051,
+	    15702, 16726, 17238, 19798, 18262, 21078, 21334, 22102,
+	    21590, 22358, 22870, 18518, 19286, 17494, 16982, 20054,
+	    15700, 16724, 17236, 19796, 18260, 21076, 21332, 22100,
+	    21588, 22356, 22868, 18516, 19284, 17492, 16980, 20052,
+	    15703, 16727, 17239, 19799, 18263, 21079, 21335, 22103,
+	    21591, 22359, 22871, 18519, 19287, 17495, 16983, 20055,
+	    15705, 16729, 17241, 19801, 18265, 21081, 21337, 22105,
+	    21593, 22361, 22873, 18521, 19289, 17497, 16985, 20057,
+	    15688, 16712, 17224, 19784, 18248, 21064, 21320, 22088,
+	    21576, 22344, 22856, 18504, 19272, 17480, 16968, 20040,
+	    15691, 16715, 17227, 19787, 18251, 21067, 21323, 22091,
+	    21579, 22347, 22859, 18507, 19275, 17483, 16971, 20043,
+	    15684, 16708, 17220, 19780, 18244, 21060, 21316, 22084,
+	    21572, 22340, 22852, 18500, 19268, 17476, 16964, 20036,
+	    15682, 16706, 17218, 19778, 18242, 21058, 21314, 22082,
+	    21570, 22338, 22850, 18498, 19266, 17474, 16962, 20034,
+	    15694, 16718, 17230, 19790, 18254, 21070, 21326, 22094,
+	    21582, 22350, 22862, 18510, 19278, 17486, 16974, 20046
+	};
+
+	int l2 = cr->len / 2;
+	unsigned char *from = (unsigned char *)bam_seq(b);
+	uint16_t *cpi = (uint16_t *)cp;
+	cp[0] = 0;
+	for (i = 0; i < l2; i++)
+	    cpi[i] = le_int2(code2base[from[i]]);
+	if ((i *= 2) < cr->len)
+	    cp[i] = seq_nt16_str[bam_seqi(bam_seq(b), i)];
+    }
+#else
+    for (i = 0; i < cr->len; i++)
+	cp[i] = seq_nt16_str[bam_seqi(bam_seq(b), i)];
+#endif
+    BLOCK_SIZE(s->seqs_blk) += cr->len;
+
+    qual = cp = (char *)bam_qual(b);
+
+    /* Copy and parse */
+    if (!(cr->flags & BAM_FUNMAP)) {
+	int32_t *cig_to, *cig_from;
+	int apos = cr->apos-1, spos = 0;
+
+	cr->cigar       = s->ncigar;
+	cr->ncigar      = bam_cigar_len(b);
+	while (cr->cigar + cr->ncigar >= s->cigar_alloc) {
+	    s->cigar_alloc = s->cigar_alloc ? s->cigar_alloc*2 : 1024;
+	    s->cigar = realloc(s->cigar, s->cigar_alloc * sizeof(*s->cigar));
+	    if (!s->cigar)
+		return -1;
+	}
+
+	cig_to = (int32_t *)s->cigar;
+	cig_from = (int32_t *)bam_cigar(b);
+
+	cr->feature = 0;
+	cr->nfeature = 0;
+	for (i = 0; i < cr->ncigar; i++) {
+	    enum cigar_op cig_op = cig_from[i] & BAM_CIGAR_MASK;
+	    int cig_len = cig_from[i] >> BAM_CIGAR_SHIFT;
+	    cig_to[i] = cig_from[i];
+
+	    /* Can also generate events from here for CRAM diffs */
+
+	    switch (cig_op) {
+		int l;
+
+		// Don't trust = and X ops to be correct.
+	    case BAM_CMATCH:
+	    case BAM_CBASE_MATCH:
+	    case BAM_CBASE_MISMATCH:
+		//fprintf(stderr, "\nBAM_CMATCH\nR: %.*s\nS: %.*s\n",
+		//	cig_len, &ref[apos], cig_len, &seq[spos]);
+		l = 0;
+		if (!fd->no_ref && cr->len) {
+		    int end = cig_len+apos < c->ref_end
+			? cig_len : c->ref_end - apos;
+		    char *sp = &seq[spos];
+		    char *rp = &ref[apos];
+		    char *qp = &qual[spos];
+		    for (l = 0; l < end; l++) {
+			if (rp[l] != sp[l]) {
+			    if (!sp[l])
+				break;
+			    if (0 && CRAM_MAJOR_VERS(fd->version) >= 3) {
+				// Disabled for the time being as it doesn't
+				// seem to gain us much.
+				int ol=l;
+				while (l<end && rp[l] != sp[l])
+				    l++;
+				if (l-ol > 1) {
+				    if (cram_add_bases(fd, c, s, cr, spos+ol,
+						       l-ol, &seq[spos+ol]))
+					return -1;
+				    l--;
+				} else {
+				    l = ol;
+				    if (cram_add_substitution(fd, c, s, cr,
+							      spos+l, sp[l],
+							      qp[l], rp[l]))
+					return -1;
+				}
+			    } else {
+				if (cram_add_substitution(fd, c, s, cr, spos+l,
+							  sp[l], qp[l], rp[l]))
+				    return -1;
+			    }
+			}
+		    }
+		    spos += l;
+		    apos += l;
+		}
+
+		if (l < cig_len && cr->len) {
+		    if (fd->no_ref) {
+			if (CRAM_MAJOR_VERS(fd->version) == 3) {
+			    if (cram_add_bases(fd, c, s, cr, spos,
+					       cig_len-l, &seq[spos]))
+				return -1;
+			    spos += cig_len-l;
+			} else {
+			    for (; l < cig_len && seq[spos]; l++, spos++) {
+				if (cram_add_base(fd, c, s, cr, spos,
+						  seq[spos], qual[spos]))
+				    return -1;
+			    }
+			}
+		    } else {
+			/* off end of sequence or non-ref based output */
+			for (; l < cig_len && seq[spos]; l++, spos++) {
+			    if (cram_add_base(fd, c, s, cr, spos,
+					      seq[spos], qual[spos]))
+				return -1;
+			}
+		    }
+		    apos += cig_len;
+		} else if (!cr->len) {
+		    /* Seq "*" */
+		    apos += cig_len;
+		    spos += cig_len;
+		}
+		break;
+		
+	    case BAM_CDEL:
+		if (cram_add_deletion(c, s, cr, spos, cig_len, &seq[spos]))
+		    return -1;
+		apos += cig_len;
+		break;
+
+	    case BAM_CREF_SKIP:
+		if (cram_add_skip(c, s, cr, spos, cig_len, &seq[spos]))
+		    return -1;
+		apos += cig_len;
+		break;
+
+	    case BAM_CINS:
+		if (cram_add_insertion(c, s, cr, spos, cig_len,
+				       cr->len ? &seq[spos] : NULL))
+		    return -1;
+		if (fd->no_ref && cr->len) {
+		    for (l = 0; l < cig_len; l++, spos++) {
+			cram_add_quality(fd, c, s, cr, spos, qual[spos]);
+		    }
+		} else {
+		    spos += cig_len;
+		}
+		break;
+
+	    case BAM_CSOFT_CLIP:
+		if (cram_add_softclip(c, s, cr, spos, cig_len,
+				      cr->len ? &seq[spos] : NULL,
+				      fd->version))
+		    return -1;
+		if (fd->no_ref &&
+		    !(cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES)) {
+		    if (cr->len) {
+			for (l = 0; l < cig_len; l++, spos++) {
+			    cram_add_quality(fd, c, s, cr, spos, qual[spos]);
+			}
+		    } else {
+			for (l = 0; l < cig_len; l++, spos++) {
+			    cram_add_quality(fd, c, s, cr, spos, -1);
+			}
+		    }
+		} else {
+		    spos += cig_len;
+		}
+		break;
+
+	    case BAM_CHARD_CLIP:
+		if (cram_add_hardclip(c, s, cr, spos, cig_len, &seq[spos]))
+		    return -1;
+		break;
+	
+	    case BAM_CPAD:
+		if (cram_add_pad(c, s, cr, spos, cig_len, &seq[spos]))
+		    return -1;
+		break;
+	    }
+	}
+	fake_qual = spos;
+	cr->aend = MIN(apos, c->ref_end);
+	cram_stats_add(c->stats[DS_FN], cr->nfeature);
+    } else {
+	// Unmapped
+	cr->cram_flags |= CRAM_FLAG_PRESERVE_QUAL_SCORES;
+	cr->cigar  = 0;
+	cr->ncigar = 0;
+	cr->nfeature = 0;
+	cr->aend = cr->apos;
+	for (i = 0; i < cr->len; i++)
+	    cram_stats_add(c->stats[DS_BA], seq[i]);
+    }
+
+    /*
+     * Append to the qual block now. We do this here as
+     * cram_add_substitution() can generate BA/QS events which need to 
+     * be in the qual block before we append the rest of the data.
+     */
+    if (cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES) {
+	/* Special case of seq "*" */
+	if (cr->len == 0) {
+	    cram_stats_add(c->stats[DS_RL], cr->len = fake_qual);
+	    BLOCK_GROW(s->qual_blk, cr->len);
+	    cp = (char *)BLOCK_END(s->qual_blk);
+	    memset(cp, 255, cr->len);
+	} else {
+	    BLOCK_GROW(s->qual_blk, cr->len);
+	    cp = (char *)BLOCK_END(s->qual_blk);
+	    char *from = (char *)&bam_qual(b)[0];
+	    char *to = &cp[0];
+	    memcpy(to, from, cr->len);
+	    //for (i = 0; i < cr->len; i++) cp[i] = from[i];
+	}
+	BLOCK_SIZE(s->qual_blk) += cr->len;
+    } else {
+	if (cr->len == 0) {
+	    cr->len = fake_qual >= 0 ? fake_qual : cr->aend - cr->apos + 1;
+	    cram_stats_add(c->stats[DS_RL], cr->len);
+	}
+    }
+
+    /* Now we know apos and aend both, update mate-pair information */
+    {
+	int new;
+	khint_t k;
+	int sec = (cr->flags & BAM_FSECONDARY) ? 1 : 0;
+
+	//fprintf(stderr, "Checking %"PRId64"/%.*s\t", rnum,
+	//	cr->name_len, DSTRING_STR(s->name_ds)+cr->name);
+	if (cr->flags & BAM_FPAIRED) {
+	    char *key = string_ndup(s->pair_keys,
+				    (char *)BLOCK_DATA(s->name_blk)+cr->name,
+				    cr->name_len);
+	    if (!key)
+		return -1;
+
+	    k = kh_put(m_s2i, s->pair[sec], key, &new);
+	    if (-1 == new)
+		return -1;
+	    else if (new > 0)
+		kh_val(s->pair[sec], k) = rnum;
+	} else {
+	    new = 1;
+	}
+
+	if (new == 0) {
+	    cram_record *p = &s->crecs[kh_val(s->pair[sec], k)];
+	    int aleft, aright, sign;
+
+	    aleft = MIN(cr->apos, p->apos);
+	    aright = MAX(cr->aend, p->aend);
+	    if (cr->apos < p->apos) {
+		sign = 1;
+	    } else if (cr->apos > p->apos) {
+		sign = -1;
+	    } else if (cr->flags & BAM_FREAD1) {
+		sign = 1;
+	    } else {
+		sign = -1;
+	    }
+	    
+	    //fprintf(stderr, "paired %"PRId64"\n", kh_val(s->pair[sec], k));
+
+	    // This vs p: tlen, matepos, flags
+	    if (bam_ins_size(b) != sign*(aright-aleft+1))
+		goto detached;
+
+	    if (MAX(bam_mate_pos(b)+1, 0) != p->apos)
+		goto detached;
+
+	    if (((bam_flag(b) & BAM_FMUNMAP) != 0) !=
+		((p->flags & BAM_FUNMAP) != 0))
+		goto detached;
+
+	    if (((bam_flag(b) & BAM_FMREVERSE) != 0) !=
+		((p->flags & BAM_FREVERSE) != 0))
+		goto detached;
+
+
+	    // p vs this: tlen, matepos, flags
+	    if (p->tlen != -sign*(aright-aleft+1))
+		goto detached;
+
+	    if (p->mate_pos != cr->apos)
+		goto detached;
+
+	    if (((p->flags & BAM_FMUNMAP) != 0) !=
+		((p->mate_flags & CRAM_M_UNMAP) != 0))
+		goto detached;
+
+	    if (((p->flags & BAM_FMREVERSE) != 0) !=
+		((p->mate_flags & CRAM_M_REVERSE) != 0))
+		goto detached;
+
+	    // Supplementary reads are just too ill defined
+	    if ((cr->flags & BAM_FSUPPLEMENTARY) ||
+		(p->flags & BAM_FSUPPLEMENTARY))
+		goto detached;
+
+	    /*
+	     * The fields below are unused when encoding this read as it is
+	     * no longer detached.  In theory they may get referred to when
+	     * processing a 3rd or 4th read in this template?, so we set them
+	     * here just to be sure.
+	     *
+	     * They do not need cram_stats_add() calls those as they are
+	     * not emitted.
+	     */
+	    cr->mate_pos = p->apos;
+	    cr->tlen = sign*(aright-aleft+1);
+	    cr->mate_flags =
+	    	((p->flags & BAM_FMUNMAP)   == BAM_FMUNMAP)   * CRAM_M_UNMAP +
+	    	((p->flags & BAM_FMREVERSE) == BAM_FMREVERSE) * CRAM_M_REVERSE;
+
+	    // Decrement statistics aggregated earlier
+	    cram_stats_del(c->stats[DS_NP], p->mate_pos);
+	    cram_stats_del(c->stats[DS_MF], p->mate_flags);
+	    cram_stats_del(c->stats[DS_TS], p->tlen);
+	    cram_stats_del(c->stats[DS_NS], p->mate_ref_id);
+
+	    /* Similarly we could correct the p-> values too, but these will no
+	     * longer have any code that refers back to them as the new 'p'
+	     * for this template is our current 'cr'.
+	     */
+	    //p->mate_pos = cr->apos;
+	    //p->mate_flags =
+	    //	((cr->flags & BAM_FMUNMAP)   == BAM_FMUNMAP)  * CRAM_M_UNMAP +
+	    //	((cr->flags & BAM_FMREVERSE) == BAM_FMREVERSE)* CRAM_M_REVERSE;
+	    //p->tlen = p->apos - cr->aend;
+
+	    // Clear detached from cr flags
+	    cr->cram_flags &= ~CRAM_FLAG_DETACHED;
+	    cram_stats_add(c->stats[DS_CF], cr->cram_flags);
+
+	    // Clear detached from p flags and set downstream
+	    cram_stats_del(c->stats[DS_CF], p->cram_flags);
+	    p->cram_flags  &= ~CRAM_FLAG_DETACHED;
+	    p->cram_flags  |=  CRAM_FLAG_MATE_DOWNSTREAM;
+	    cram_stats_add(c->stats[DS_CF], p->cram_flags);
+
+	    p->mate_line = rnum - (kh_val(s->pair[sec], k) + 1);
+	    cram_stats_add(c->stats[DS_NF], p->mate_line);
+
+	    kh_val(s->pair[sec], k) = rnum;
+	} else {
+	detached:
+	    //fprintf(stderr, "unpaired\n");
+
+	    /* Derive mate flags from this flag */
+	    cr->mate_flags = 0;
+	    if (bam_flag(b) & BAM_FMUNMAP)
+		cr->mate_flags |= CRAM_M_UNMAP;
+	    if (bam_flag(b) & BAM_FMREVERSE)
+		cr->mate_flags |= CRAM_M_REVERSE;
+
+	    cram_stats_add(c->stats[DS_MF], cr->mate_flags);
+
+	    cr->mate_pos    = MAX(bam_mate_pos(b)+1, 0);
+	    cram_stats_add(c->stats[DS_NP], cr->mate_pos);
+
+	    cr->tlen        = bam_ins_size(b);
+	    cram_stats_add(c->stats[DS_TS], cr->tlen);
+
+	    cr->cram_flags |= CRAM_FLAG_DETACHED;
+	    cram_stats_add(c->stats[DS_CF], cr->cram_flags);
+	    cram_stats_add(c->stats[DS_NS], bam_mate_ref(b));
+	}
+    }
+
+    cr->mqual       = bam_map_qual(b);
+    cram_stats_add(c->stats[DS_MQ], cr->mqual);
+
+    cr->mate_ref_id = bam_mate_ref(b);
+
+    if (!(bam_flag(b) & BAM_FUNMAP)) {
+	if (c->first_base > cr->apos)
+	    c->first_base = cr->apos;
+
+	if (c->last_base < cr->aend)
+	    c->last_base = cr->aend;
+    }
+
+    return 0;
+}
+
+/*
+ * Write iterator: put BAM format sequences into a CRAM file.
+ * We buffer up a containers worth of data at a time.
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) {
+    cram_container *c;
+
+    if (!fd->ctr) {
+	fd->ctr = cram_new_container(fd->seqs_per_slice,
+				     fd->slices_per_container);
+	if (!fd->ctr)
+	    return -1;
+	fd->ctr->record_counter = fd->record_counter;
+    }
+    c = fd->ctr;
+
+    if (!c->slice || c->curr_rec == c->max_rec ||
+	(bam_ref(b) != c->curr_ref && c->curr_ref >= -1)) {
+	int slice_rec, curr_rec, multi_seq = fd->multi_seq == 1;
+	int curr_ref = c->slice ? c->curr_ref : bam_ref(b);
+
+
+	/*
+	 * Start packing slices when we routinely have under 1/4tr full.
+	 *
+	 * This option isn't available if we choose to embed references
+	 * since we can only have one per slice.
+	 */
+	if (fd->multi_seq == -1 && c->curr_rec < c->max_rec/4+10 &&
+	    fd->last_slice && fd->last_slice < c->max_rec/4+10 &&
+	    !fd->embed_ref) {
+	    if (fd->verbose && !c->multi_seq)
+		fprintf(stderr, "Multi-ref enabled for this container\n");
+	    multi_seq = 1;
+	}
+
+	slice_rec = c->slice_rec;
+	curr_rec  = c->curr_rec;
+
+	if (CRAM_MAJOR_VERS(fd->version) == 1 ||
+	    c->curr_rec == c->max_rec || fd->multi_seq != 1 || !c->slice) {
+	    if (NULL == (c = cram_next_container(fd, b))) {
+		if (fd->ctr) {
+		    // prevent cram_close attempting to flush
+		    cram_free_container(fd->ctr);
+		    fd->ctr = NULL;
+		}
+		return -1;
+	    }
+	}
+
+	/*
+	 * Due to our processing order, some things we've already done we
+	 * cannot easily undo. So when we first notice we should be packing
+	 * multiple sequences per container we emit the small partial
+	 * container as-is and then start a fresh one in a different mode.
+	 */
+	if (multi_seq) {
+	    fd->multi_seq = 1;
+	    c->multi_seq = 1;
+	    c->pos_sorted = 0; // required atm for multi_seq slices
+
+	    if (!c->refs_used) {
+		pthread_mutex_lock(&fd->ref_lock);
+		c->refs_used = calloc(fd->refs->nref, sizeof(int));
+		pthread_mutex_unlock(&fd->ref_lock);
+		if (!c->refs_used)
+		    return -1;
+	    }
+	}
+
+	fd->last_slice = curr_rec - slice_rec;
+	c->slice_rec = c->curr_rec;
+
+	// Have we seen this reference before?
+	if (bam_ref(b) >= 0 && bam_ref(b) != curr_ref && !fd->embed_ref &&
+	    !fd->unsorted && multi_seq) {
+	    
+	    if (!c->refs_used) {
+		pthread_mutex_lock(&fd->ref_lock);
+		c->refs_used = calloc(fd->refs->nref, sizeof(int));
+		pthread_mutex_unlock(&fd->ref_lock);
+		if (!c->refs_used)
+		    return -1;
+	    } else if (c->refs_used && c->refs_used[bam_ref(b)]) {
+		fprintf(stderr, "Unsorted mode enabled\n");
+		pthread_mutex_lock(&fd->ref_lock);
+		fd->unsorted = 1;
+		pthread_mutex_unlock(&fd->ref_lock);
+		fd->multi_seq = 1;
+	    }
+	}
+
+	c->curr_ref = bam_ref(b);
+	if (c->refs_used && c->curr_ref >= 0) c->refs_used[c->curr_ref]++;
+    }
+
+    if (!c->bams) {
+	/* First time through, allocate a set of bam pointers */
+	pthread_mutex_lock(&fd->bam_list_lock);
+	if (fd->bl) {
+	    spare_bams *spare = fd->bl;
+	    c->bams = spare->bams;
+	    fd->bl = spare->next;
+	    free(spare);
+	} else {
+	    c->bams = calloc(c->max_c_rec, sizeof(bam_seq_t *));
+	    if (!c->bams)
+		return -1;
+	}
+	pthread_mutex_unlock(&fd->bam_list_lock);
+    }
+
+    /* Copy or alloc+copy the bam record, for later encoding */
+    if (c->bams[c->curr_c_rec])
+	bam_copy1(c->bams[c->curr_c_rec], b);
+    else
+	c->bams[c->curr_c_rec] = bam_dup(b);
+
+    c->curr_rec++;
+    c->curr_c_rec++;
+    fd->record_counter++;
+
+    return 0;
+}