diff ezBAMQC/src/htslib/cram/cram_decode.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ezBAMQC/src/htslib/cram/cram_decode.c	Thu Mar 24 17:12:52 2016 -0400
@@ -0,0 +1,2775 @@
+/*
+Copyright (c) 2012-2014 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice, 
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice, 
+this list of conditions and the following disclaimer in the documentation 
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * - In-memory decoding of CRAM data structures.
+ * - Iterator for reading CRAM record by record.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "io_lib_config.h"
+#endif
+
+#include <stdio.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <math.h>
+#include <ctype.h>
+
+#include "cram/cram.h"
+#include "cram/os.h"
+#include "cram/md5.h"
+
+//Whether CIGAR has just M or uses = and X to indicate match and mismatch
+//#define USE_X
+
+/* ----------------------------------------------------------------------
+ * CRAM compression headers
+ */
+
+/*
+ * Decodes the Tag Dictionary record in the preservation map
+ * Updates the cram compression header.
+ * 
+ * Returns number of bytes decoded on success
+ *        -1 on failure
+ */
+int cram_decode_TD(char *cp, cram_block_compression_hdr *h) {
+    char *op = cp;
+    unsigned char *dat;
+    cram_block *b;
+    int32_t blk_size;
+    int nTL, i, sz;
+
+    if (!(b = cram_new_block(0, 0)))
+	return -1;
+    h->TD_blk = b;
+
+    /* Decode */
+    cp += itf8_get(cp, &blk_size);
+    if (!blk_size) {
+	h->nTL = 0;
+	h->TL = NULL;
+	cram_free_block(b);
+        return cp - op;
+    }
+
+    BLOCK_APPEND(b, cp, blk_size);
+    cp += blk_size;
+    sz = cp - op;
+
+    // Force nul termination if missing
+    if (BLOCK_DATA(b)[BLOCK_SIZE(b)-1])
+	BLOCK_APPEND_CHAR(b, '\0');
+
+    /* Set up TL lookup table */
+    dat = BLOCK_DATA(b);
+
+    // Count
+    for (nTL = i = 0; i < BLOCK_SIZE(b); i++) {
+	nTL++;
+	while (dat[i])
+	    i++;
+    }
+
+    // Copy
+    h->nTL = nTL;
+    if (!(h->TL = calloc(h->nTL, sizeof(unsigned char *))))
+	return -1;
+    for (nTL = i = 0; i < BLOCK_SIZE(b); i++) {
+	h->TL[nTL++] = &dat[i];
+	while (dat[i])
+	    i++;
+    }
+    
+    return sz;
+}
+
+/*
+ * Decodes a CRAM block compression header.
+ * Returns header ptr on success
+ *         NULL on failure
+ */
+cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd,
+							   cram_block *b) {
+    char *cp, *cp_copy;
+    cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr));
+    int i;
+    int32_t map_size, map_count;
+
+    if (!hdr)
+	return NULL;
+
+    if (b->method != RAW) {
+	if (cram_uncompress_block(b)) {
+	    free(hdr);
+	    return NULL;
+	}
+    }
+
+    cp = (char *)b->data;
+
+    if (CRAM_MAJOR_VERS(fd->version) == 1) {
+	cp += itf8_get(cp, &hdr->ref_seq_id);
+	cp += itf8_get(cp, &hdr->ref_seq_start);
+	cp += itf8_get(cp, &hdr->ref_seq_span);
+	cp += itf8_get(cp, &hdr->num_records);
+	cp += itf8_get(cp, &hdr->num_landmarks);
+	if (!(hdr->landmark = malloc(hdr->num_landmarks * sizeof(int32_t)))) {
+	    free(hdr);
+	    return NULL;
+	}
+	for (i = 0; i < hdr->num_landmarks; i++) {
+	    cp += itf8_get(cp, &hdr->landmark[i]);
+	}
+    }
+
+    hdr->preservation_map = kh_init(map);
+
+    memset(hdr->rec_encoding_map, 0,
+	   CRAM_MAP_HASH * sizeof(hdr->rec_encoding_map[0]));
+    memset(hdr->tag_encoding_map, 0,
+	   CRAM_MAP_HASH * sizeof(hdr->tag_encoding_map[0]));
+
+    if (!hdr->preservation_map) {
+	cram_free_compression_header(hdr);
+	return NULL;
+    }
+
+    /* Initialise defaults for preservation map */
+    hdr->mapped_qs_included = 0;
+    hdr->unmapped_qs_included = 0;
+    hdr->unmapped_placed = 0;
+    hdr->qs_included = 0;
+    hdr->read_names_included = 0;
+    hdr->AP_delta = 1;
+    memcpy(hdr->substitution_matrix, "CGTNAGTNACTNACGNACGT", 20);
+
+    /* Preservation map */
+    cp += itf8_get(cp, &map_size); cp_copy = cp;
+    cp += itf8_get(cp, &map_count);
+    for (i = 0; i < map_count; i++) {
+	pmap_t hd;
+	khint_t k;
+	int r;
+
+	cp += 2;
+	switch(CRAM_KEY(cp[-2],cp[-1])) {
+	case CRAM_KEY('M','I'):
+	    hd.i = *cp++;
+	    k = kh_put(map, hdr->preservation_map, "MI", &r);
+	    if (-1 == r) {
+		cram_free_compression_header(hdr);
+                return NULL;
+            }
+
+	    kh_val(hdr->preservation_map, k) = hd;
+	    hdr->mapped_qs_included = hd.i;
+	    break;
+
+	case CRAM_KEY('U','I'):
+	    hd.i = *cp++;
+	    k = kh_put(map, hdr->preservation_map, "UI", &r);
+	    if (-1 == r) {
+		cram_free_compression_header(hdr);
+                return NULL;
+            }
+
+	    kh_val(hdr->preservation_map, k) = hd;
+	    hdr->unmapped_qs_included = hd.i;
+	    break;
+
+	case CRAM_KEY('P','I'):
+	    hd.i = *cp++;
+	    k = kh_put(map, hdr->preservation_map, "PI", &r);
+	    if (-1 == r) {
+		cram_free_compression_header(hdr);
+                return NULL;
+            }
+
+	    kh_val(hdr->preservation_map, k) = hd;
+	    hdr->unmapped_placed = hd.i;
+	    break;
+
+	case CRAM_KEY('R','N'):
+	    hd.i = *cp++;
+	    k = kh_put(map, hdr->preservation_map, "RN", &r);
+	    if (-1 == r) {
+		cram_free_compression_header(hdr);
+                return NULL;
+            }
+
+	    kh_val(hdr->preservation_map, k) = hd;
+	    hdr->read_names_included = hd.i;
+	    break;
+
+	case CRAM_KEY('A','P'):
+	    hd.i = *cp++;
+	    k = kh_put(map, hdr->preservation_map, "AP", &r);
+	    if (-1 == r) {
+		cram_free_compression_header(hdr);
+                return NULL;
+            }
+
+	    kh_val(hdr->preservation_map, k) = hd;
+	    hdr->AP_delta = hd.i;
+	    break;
+
+	case CRAM_KEY('R','R'):
+	    hd.i = *cp++;
+	    k = kh_put(map, hdr->preservation_map, "RR", &r);
+	    if (-1 == r) {
+		cram_free_compression_header(hdr);
+                return NULL;
+            }
+
+	    kh_val(hdr->preservation_map, k) = hd;
+	    fd->no_ref = !hd.i;
+	    break;
+
+	case CRAM_KEY('S','M'):
+	    hdr->substitution_matrix[0][(cp[0]>>6)&3] = 'C';
+	    hdr->substitution_matrix[0][(cp[0]>>4)&3] = 'G';
+	    hdr->substitution_matrix[0][(cp[0]>>2)&3] = 'T';
+	    hdr->substitution_matrix[0][(cp[0]>>0)&3] = 'N';
+
+	    hdr->substitution_matrix[1][(cp[1]>>6)&3] = 'A';
+	    hdr->substitution_matrix[1][(cp[1]>>4)&3] = 'G';
+	    hdr->substitution_matrix[1][(cp[1]>>2)&3] = 'T';
+	    hdr->substitution_matrix[1][(cp[1]>>0)&3] = 'N';
+
+	    hdr->substitution_matrix[2][(cp[2]>>6)&3] = 'A';
+	    hdr->substitution_matrix[2][(cp[2]>>4)&3] = 'C';
+	    hdr->substitution_matrix[2][(cp[2]>>2)&3] = 'T';
+	    hdr->substitution_matrix[2][(cp[2]>>0)&3] = 'N';
+
+	    hdr->substitution_matrix[3][(cp[3]>>6)&3] = 'A';
+	    hdr->substitution_matrix[3][(cp[3]>>4)&3] = 'C';
+	    hdr->substitution_matrix[3][(cp[3]>>2)&3] = 'G';
+	    hdr->substitution_matrix[3][(cp[3]>>0)&3] = 'N';
+
+	    hdr->substitution_matrix[4][(cp[4]>>6)&3] = 'A';
+	    hdr->substitution_matrix[4][(cp[4]>>4)&3] = 'C';
+	    hdr->substitution_matrix[4][(cp[4]>>2)&3] = 'G';
+	    hdr->substitution_matrix[4][(cp[4]>>0)&3] = 'T';
+
+	    hd.p = cp;
+	    cp += 5;
+
+	    k = kh_put(map, hdr->preservation_map, "SM", &r);
+	    if (-1 == r) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	    kh_val(hdr->preservation_map, k) = hd;
+	    break;
+
+	case CRAM_KEY('T','D'): {
+	    int sz = cram_decode_TD(cp, hdr); // tag dictionary
+	    if (sz < 0) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+
+	    hd.p = cp;
+	    cp += sz;
+
+	    k = kh_put(map, hdr->preservation_map, "TD", &r);
+	    if (-1 == r) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	    kh_val(hdr->preservation_map, k) = hd;
+	    break;
+	}
+
+	default:
+	    fprintf(stderr, "Unrecognised preservation map key %c%c\n",
+		    cp[-2], cp[-1]);
+	    // guess byte;
+	    cp++;
+	    break;
+	}
+    }
+    if (cp - cp_copy != map_size) {
+	cram_free_compression_header(hdr);
+	return NULL;
+    }
+
+    /* Record encoding map */
+    cp += itf8_get(cp, &map_size); cp_copy = cp;
+    cp += itf8_get(cp, &map_count);
+    for (i = 0; i < map_count; i++) {
+	char *key = cp;
+	int32_t encoding;
+	int32_t size;
+	cram_map *m = malloc(sizeof(*m)); // FIXME: use pooled_alloc
+
+	if (!m) {
+	    cram_free_compression_header(hdr);
+	    return NULL;
+	}
+
+	cp += 2;
+	cp += itf8_get(cp, &encoding);
+	cp += itf8_get(cp, &size);
+
+	// Fill out cram_map purely for cram_dump to dump out.
+	m->key = (key[0]<<8)|key[1];
+	m->encoding = encoding;
+	m->size     = size;
+	m->offset   = cp - (char *)b->data;
+	m->codec = NULL;
+
+	if (m->encoding == E_NULL)
+	    continue;
+
+	//printf("%s codes for %.2s\n", cram_encoding2str(encoding), key);
+
+	/*
+	 * For CRAM1.0 CF and BF are Byte and not Int.
+	 * Practically speaking it makes no difference unless we have a
+	 * 1.0 format file that stores these in EXTERNAL as only then
+	 * does Byte vs Int matter.
+	 *
+	 * Neither this C code nor Java reference implementations did this,
+	 * so we gloss over it and treat them as int.
+	 */
+
+	if (key[0] == 'B' && key[1] == 'F') {
+	    if (!(hdr->codecs[DS_BF] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'C' && key[1] == 'F') {
+	    if (!(hdr->codecs[DS_CF] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'R' && key[1] == 'I') {
+	    if (!(hdr->codecs[DS_RI] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'R' && key[1] == 'L') {
+	    if (!(hdr->codecs[DS_RL] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'A' && key[1] == 'P') {
+	    if (!(hdr->codecs[DS_AP] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'R' && key[1] == 'G') {
+	    if (!(hdr->codecs[DS_RG] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'M' && key[1] == 'F') {
+	    if (!(hdr->codecs[DS_MF] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'N' && key[1] == 'S') {
+	    if (!(hdr->codecs[DS_NS] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'N' && key[1] == 'P') {
+	    if (!(hdr->codecs[DS_NP] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'T' && key[1] == 'S') {
+	    if (!(hdr->codecs[DS_TS] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'N' && key[1] == 'F') {
+	    if (!(hdr->codecs[DS_NF] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'T' && key[1] == 'C') {
+	    if (!(hdr->codecs[DS_TC] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'T' && key[1] == 'N') {
+	    if (!(hdr->codecs[DS_TN] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'F' && key[1] == 'N') {
+	    if (!(hdr->codecs[DS_FN] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'F' && key[1] == 'C') {
+	    if (!(hdr->codecs[DS_FC] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'F' && key[1] == 'P') {
+	    if (!(hdr->codecs[DS_FP] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'B' && key[1] == 'S') {
+	    if (!(hdr->codecs[DS_BS] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'I' && key[1] == 'N') {
+	    if (!(hdr->codecs[DS_IN] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE_ARRAY,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'S' && key[1] == 'C') {
+	    if (!(hdr->codecs[DS_SC] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE_ARRAY,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'D' && key[1] == 'L') {
+	    if (!(hdr->codecs[DS_DL] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'B' && key[1] == 'A') {
+	    if (!(hdr->codecs[DS_BA] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'B' && key[1] == 'B') {
+	    if (!(hdr->codecs[DS_BB] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE_ARRAY,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'R' && key[1] == 'S') {
+	    if (!(hdr->codecs[DS_RS] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'P' && key[1] == 'D') {
+	    if (!(hdr->codecs[DS_PD] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'H' && key[1] == 'C') {
+	    if (!(hdr->codecs[DS_HC] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'M' && key[1] == 'Q') {
+	    if (!(hdr->codecs[DS_MQ] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'R' && key[1] == 'N') {
+	    if (!(hdr->codecs[DS_RN] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE_ARRAY_BLOCK,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'Q' && key[1] == 'S') {
+	    if (!(hdr->codecs[DS_QS] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'Q' && key[1] == 'Q') {
+	    if (!(hdr->codecs[DS_QQ] = cram_decoder_init(encoding, cp, size,
+							 E_BYTE_ARRAY,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'T' && key[1] == 'L') {
+	    if (!(hdr->codecs[DS_TL] = cram_decoder_init(encoding, cp, size,
+							 E_INT,
+							 fd->version))) {
+		cram_free_compression_header(hdr);
+		return NULL;
+	    }
+	} else if (key[0] == 'T' && key[1] == 'M') {
+	} else if (key[0] == 'T' && key[1] == 'V') {
+	} else
+	    fprintf(stderr, "Unrecognised key: %.2s\n", key);
+
+	cp += size;
+
+	m->next = hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])];
+	hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])] = m;
+    }
+    if (cp - cp_copy != map_size) {
+	cram_free_compression_header(hdr);
+	return NULL;
+    }
+
+    /* Tag encoding map */
+    cp += itf8_get(cp, &map_size); cp_copy = cp;
+    cp += itf8_get(cp, &map_count);
+    for (i = 0; i < map_count; i++) {
+	int32_t encoding;
+	int32_t size;
+	cram_map *m = malloc(sizeof(*m)); // FIXME: use pooled_alloc
+	char *key = cp+1;
+
+	if (!m) {
+	    cram_free_compression_header(hdr);
+	    return NULL;
+	}
+
+	m->key = (key[0]<<16)|(key[1]<<8)|key[2];
+
+	cp += 4; // Strictly ITF8, but this suffices
+	cp += itf8_get(cp, &encoding);
+	cp += itf8_get(cp, &size);
+
+	m->encoding = encoding;
+	m->size     = size;
+	m->offset   = cp - (char *)b->data;
+	if (!(m->codec = cram_decoder_init(encoding, cp, size,
+					   E_BYTE_ARRAY_BLOCK, fd->version))) {
+	    cram_free_compression_header(hdr);
+	    free(m);
+	    return NULL;
+	}
+	
+	cp += size;
+
+	m->next = hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])];
+	hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])] = m;
+    }
+    if (cp - cp_copy != map_size) {
+	cram_free_compression_header(hdr);
+	return NULL;
+    }
+
+    return hdr;
+}
+
+/*
+ * Note we also need to scan through the record encoding map to
+ * see which data series share the same block, either external or
+ * CORE. For example if we need the BF data series but MQ and CF
+ * are also encoded in the same block then we need to add those in
+ * as a dependency in order to correctly decode BF.
+ *
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int cram_dependent_data_series(cram_fd *fd,
+			       cram_block_compression_hdr *hdr,
+			       cram_slice *s) {
+    int *block_used;
+    int core_used = 0;
+    int i;
+    static int i_to_id[] = {
+	DS_BF, DS_AP, DS_FP, DS_RL, DS_DL, DS_NF, DS_BA, DS_QS,
+	DS_FC, DS_FN, DS_BS, DS_IN, DS_RG, DS_MQ, DS_TL, DS_RN,
+	DS_NS, DS_NP, DS_TS, DS_MF, DS_CF, DS_RI, DS_RS, DS_PD,
+	DS_HC, DS_SC, DS_BB, DS_QQ,
+    };
+    uint32_t orig_ds;
+
+    /*
+     * Set the data_series bit field based on fd->required_fields
+     * contents.
+     */
+    if (fd->required_fields && fd->required_fields != INT_MAX) {
+	hdr->data_series = 0;
+
+	if (fd->required_fields & SAM_QNAME)
+	    hdr->data_series |= CRAM_RN;
+
+	if (fd->required_fields & SAM_FLAG)
+	    hdr->data_series |= CRAM_BF;
+
+	if (fd->required_fields & SAM_RNAME)
+	    hdr->data_series |= CRAM_RI | CRAM_BF;
+
+	if (fd->required_fields & SAM_POS)
+	    hdr->data_series |= CRAM_AP | CRAM_BF;
+
+	if (fd->required_fields & SAM_MAPQ)
+	    hdr->data_series |= CRAM_MQ;
+
+	if (fd->required_fields & SAM_CIGAR)
+	    hdr->data_series |= CRAM_CIGAR;
+
+	if (fd->required_fields & SAM_RNEXT)
+	    hdr->data_series |= CRAM_CF | CRAM_NF | CRAM_RI | CRAM_NS |CRAM_BF;
+
+	if (fd->required_fields & SAM_PNEXT)
+	    hdr->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_NP | CRAM_BF;
+
+	if (fd->required_fields & SAM_TLEN)
+	    hdr->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_TS |
+		CRAM_BF | CRAM_MF | CRAM_RI | CRAM_CIGAR;
+
+	if (fd->required_fields & SAM_SEQ)
+	    hdr->data_series |= CRAM_SEQ;
+
+	if (!(fd->required_fields & SAM_AUX))
+	    // No easy way to get MD/NM without other tags at present
+	    fd->decode_md = 0;
+
+	if (fd->required_fields & SAM_QUAL)
+	    hdr->data_series |= CRAM_SEQ;
+
+	if (fd->required_fields & SAM_AUX)
+	    hdr->data_series |= CRAM_RG | CRAM_TL | CRAM_aux;
+
+	if (fd->required_fields & SAM_RGAUX)
+	    hdr->data_series |= CRAM_RG | CRAM_BF;
+
+	// Always uncompress CORE block
+	if (cram_uncompress_block(s->block[0]))
+	    return -1;
+    } else {
+	hdr->data_series = CRAM_ALL;
+
+	for (i = 0; i < s->hdr->num_blocks; i++) {
+	    if (cram_uncompress_block(s->block[i]))
+		return -1;
+	}
+
+	return 0;
+    }
+
+    block_used = calloc(s->hdr->num_blocks+1, sizeof(int));
+    if (!block_used)
+	return -1;
+
+    do {
+	/*
+	 * Also set data_series based on code prerequisites. Eg if we need
+	 * CRAM_QS then we also need to know CRAM_RL so we know how long it
+	 * is, or if we need FC/FP then we also need FN (number of features).
+	 *
+	 * It's not reciprocal though. We may be needing to decode FN
+	 * but have no need to decode FC, FP and cigar ops.
+	 */
+	if (hdr->data_series & CRAM_RS)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_PD)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_HC)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_QS)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_IN)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_SC)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_BS)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_DL)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_BA)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_BB)    hdr->data_series |= CRAM_FC|CRAM_FP;
+	if (hdr->data_series & CRAM_QQ)    hdr->data_series |= CRAM_FC|CRAM_FP;
+
+	// cram_decode_seq() needs seq[] array
+	if (hdr->data_series & (CRAM_SEQ|CRAM_CIGAR)) hdr->data_series |= CRAM_RL;
+
+	if (hdr->data_series & CRAM_FP)    hdr->data_series |= CRAM_FC;
+	if (hdr->data_series & CRAM_FC)    hdr->data_series |= CRAM_FN;
+	if (hdr->data_series & CRAM_aux)   hdr->data_series |= CRAM_TL;
+	if (hdr->data_series & CRAM_MF)    hdr->data_series |= CRAM_CF;
+	if (hdr->data_series & CRAM_MQ)    hdr->data_series |= CRAM_BF;
+	if (hdr->data_series & CRAM_BS)    hdr->data_series |= CRAM_RI;
+	if (hdr->data_series & (CRAM_MF |CRAM_NS |CRAM_NP |CRAM_TS |CRAM_NF))
+	    hdr->data_series |= CRAM_CF;
+	if (!hdr->read_names_included && hdr->data_series & CRAM_RN)
+	    hdr->data_series |= CRAM_CF | CRAM_NF;
+	if (hdr->data_series & (CRAM_BA | CRAM_QS | CRAM_BB | CRAM_QQ))
+	    hdr->data_series |= CRAM_BF | CRAM_CF | CRAM_RL;
+
+	orig_ds = hdr->data_series;
+
+	// Find which blocks are in use.
+	for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) {
+	    int bnum1, bnum2, j;
+	    cram_codec *c = hdr->codecs[i_to_id[i]];
+
+	    if (!(hdr->data_series & (1<<i)))
+		continue;
+
+	    if (!c)
+		continue;
+
+	    bnum1 = cram_codec_to_id(c, &bnum2);
+
+	    for (;;) {
+		switch (bnum1) {
+		case -2:
+		    break;
+
+		case -1:
+		    core_used = 1;
+		    break;
+
+		default:
+		    for (j = 0; j < s->hdr->num_blocks; j++) {
+			if (s->block[j]->content_type == EXTERNAL &&
+			    s->block[j]->content_id == bnum1) {
+			    block_used[j] = 1;
+			    if (cram_uncompress_block(s->block[j])) {
+				free(block_used);
+				return -1;
+			    }
+			}
+		    }
+		    break;
+		}
+
+		if (bnum2 == -2 || bnum1 == bnum2)
+		    break;
+
+		bnum1 = bnum2; // 2nd pass
+	    }
+	}
+
+	// Tags too
+	if ((fd->required_fields & SAM_AUX) ||
+	    (hdr->data_series & CRAM_aux)) {
+	    for (i = 0; i < CRAM_MAP_HASH; i++) {
+		int bnum1, bnum2, j;
+		cram_map *m = hdr->tag_encoding_map[i];
+
+		while (m) {
+		    cram_codec *c = m->codec;
+		    if (!c)
+			continue;
+
+		    bnum1 = cram_codec_to_id(c, &bnum2);
+
+		    for (;;) {
+			switch (bnum1) {
+			case -2:
+			    break;
+
+			case -1:
+			    core_used = 1;
+			    break;
+
+			default:
+			    for (j = 0; j < s->hdr->num_blocks; j++) {
+				if (s->block[j]->content_type == EXTERNAL &&
+				    s->block[j]->content_id == bnum1) {
+				    block_used[j] = 1;
+				    if (cram_uncompress_block(s->block[j])) {
+					free(block_used);
+					return -1;
+				    }
+				}
+			    }
+			    break;
+			}
+
+			if (bnum2 == -2 || bnum1 == bnum2)
+			    break;
+
+			bnum1 = bnum2; // 2nd pass
+		    }
+
+		    m = m->next;
+		}
+	    }
+	}
+
+	// We now know which blocks are in used, so repeat and find
+	// which other data series need to be added.
+	for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) {
+	    int bnum1, bnum2, j;
+	    cram_codec *c = hdr->codecs[i_to_id[i]];
+
+	    if (!c)
+		continue;
+
+	    bnum1 = cram_codec_to_id(c, &bnum2);
+
+	    for (;;) {
+		switch (bnum1) {
+		case -2:
+		    break;
+
+		case -1:
+		    if (core_used) {
+			//printf(" + data series %08x:\n", 1<<i);
+			hdr->data_series |= 1<<i;
+		    }
+		    break;
+
+		default:
+		    for (j = 0; j < s->hdr->num_blocks; j++) {
+			if (s->block[j]->content_type == EXTERNAL &&
+			    s->block[j]->content_id == bnum1) {
+			    if (block_used[j]) {
+				//printf(" + data series %08x:\n", 1<<i);
+				hdr->data_series |= 1<<i;
+			    }
+			}
+		    }
+		    break;
+		}
+
+		if (bnum2 == -2 || bnum1 == bnum2)
+		    break;
+
+		bnum1 = bnum2; // 2nd pass
+	    }
+	}
+
+	// Tags too
+	for (i = 0; i < CRAM_MAP_HASH; i++) {
+	    int bnum1, bnum2, j;
+	    cram_map *m = hdr->tag_encoding_map[i];
+
+	    while (m) {
+		cram_codec *c = m->codec;
+		if (!c)
+		    continue;
+
+		bnum1 = cram_codec_to_id(c, &bnum2);
+		
+		for (;;) {
+		    switch (bnum1) {
+		    case -2:
+			break;
+
+		    case -1:
+			//printf(" + data series %08x:\n", CRAM_aux);
+			hdr->data_series |= CRAM_aux;
+			break;
+
+		    default:
+			for (j = 0; j < s->hdr->num_blocks; j++) {
+			    if (s->block[j]->content_type &&
+				s->block[j]->content_id == bnum1) {
+				if (block_used[j]) {
+				    //printf(" + data series %08x:\n",
+				    //       CRAM_aux);
+				    hdr->data_series |= CRAM_aux;
+				}
+			    }
+			}
+			break;
+		    }
+
+		    if (bnum2 == -2 || bnum1 == bnum2)
+			break;
+
+		    bnum1 = bnum2; // 2nd pass
+		}
+
+		m = m->next;
+	    }
+	}
+    } while (orig_ds != hdr->data_series);
+
+    free(block_used);
+    return 0;
+}
+
+/* ----------------------------------------------------------------------
+ * CRAM slices
+ */
+
+/*
+ * Decodes a CRAM (un)mapped slice header block.
+ * Returns slice header ptr on success
+ *         NULL on failure
+ */
+cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b) {
+    cram_block_slice_hdr *hdr;
+    char *cp = (char *)b->data;
+    int i;
+
+    if (b->content_type != MAPPED_SLICE &&
+	b->content_type != UNMAPPED_SLICE)
+	return NULL;
+
+    if (!(hdr  = calloc(1, sizeof(*hdr))))
+	return NULL;
+
+    hdr->content_type = b->content_type;
+
+    if (b->content_type == MAPPED_SLICE) {
+	cp += itf8_get(cp, &hdr->ref_seq_id);
+	cp += itf8_get(cp, &hdr->ref_seq_start);
+	cp += itf8_get(cp, &hdr->ref_seq_span);
+    }
+    cp += itf8_get(cp, &hdr->num_records);
+    hdr->record_counter = 0;
+    if (CRAM_MAJOR_VERS(fd->version) == 2) {
+	int32_t i32;
+	cp += itf8_get(cp, &i32);
+	hdr->record_counter = i32;
+    } else if (CRAM_MAJOR_VERS(fd->version) >= 3) {
+	cp += ltf8_get(cp, &hdr->record_counter);
+    }
+
+    cp += itf8_get(cp, &hdr->num_blocks);
+
+    cp += itf8_get(cp, &hdr->num_content_ids);
+    hdr->block_content_ids = malloc(hdr->num_content_ids * sizeof(int32_t));
+    if (!hdr->block_content_ids) {
+	free(hdr);
+	return NULL;
+    }
+
+    for (i = 0; i < hdr->num_content_ids; i++) {
+	cp += itf8_get(cp, &hdr->block_content_ids[i]);
+    }
+
+    if (b->content_type == MAPPED_SLICE) {
+	cp += itf8_get(cp, &hdr->ref_base_id);
+    }
+
+    if (CRAM_MAJOR_VERS(fd->version) != 1) {
+	memcpy(hdr->md5, cp, 16);
+    } else {
+	memset(hdr->md5, 0, 16);
+    }
+
+    return hdr;
+}
+
+
+#if 0
+/* Returns the number of bits set in val; it the highest bit used */
+static int nbits(int v) {
+    static const int MultiplyDeBruijnBitPosition[32] = {
+	1, 10, 2, 11, 14, 22, 3, 30, 12, 15, 17, 19, 23, 26, 4, 31,
+	9, 13, 21, 29, 16, 18, 25, 8, 20, 28, 24, 7, 27, 6, 5, 32
+    };
+
+    v |= v >> 1; // first up to set all bits 1 after the first 1 */
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+
+    // DeBruijn magic to find top bit
+    return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27];
+}
+#endif
+
+#if 0
+static int sort_freqs(const void *vp1, const void *vp2) {
+    const int i1 = *(const int *)vp1;
+    const int i2 = *(const int *)vp2;
+    return i1-i2;
+}
+#endif
+
+/* ----------------------------------------------------------------------
+ * Primary CRAM sequence decoder
+ */
+
+/*
+ * Internal part of cram_decode_slice().
+ * Generates the sequence, quality and cigar components.
+ */
+static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
+			   cram_block *blk, cram_record *cr, SAM_hdr *bfd,
+			   int cf, char *seq, char *qual) {
+    int prev_pos = 0, f, r = 0, out_sz = 1;
+    int seq_pos = 1;
+    int cig_len = 0, ref_pos = cr->apos;
+    int32_t fn, i32;
+    enum cigar_op cig_op = BAM_CMATCH;
+    uint32_t *cigar = s->cigar;
+    uint32_t ncigar = s->ncigar;
+    uint32_t cigar_alloc = s->cigar_alloc;
+    uint32_t nm = 0, md_dist = 0;
+    int orig_aux = 0;
+    int decode_md = fd->decode_md && s->ref;
+    uint32_t ds = c->comp_hdr->data_series;
+
+    if ((ds & CRAM_QS) && !(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) {
+	memset(qual, 30, cr->len);
+    }
+
+    if (decode_md) {
+	orig_aux = BLOCK_SIZE(s->aux_blk);
+	BLOCK_APPEND(s->aux_blk, "MDZ", 3);
+    }
+    
+    if (ds & CRAM_FN) {
+	if (!c->comp_hdr->codecs[DS_FN]) return -1;
+	r |= c->comp_hdr->codecs[DS_FN]->decode(s,c->comp_hdr->codecs[DS_FN],
+						blk, (char *)&fn, &out_sz);
+    } else {
+	fn = 0;
+    }
+
+    ref_pos--; // count from 0
+    cr->cigar = ncigar;
+
+    if (!(ds & (CRAM_FC | CRAM_FP)))
+	goto skip_cigar;
+
+    for (f = 0; f < fn; f++) {
+	int32_t pos = 0;
+	char op;
+
+	if (ncigar+2 >= cigar_alloc) {
+	    cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024;
+	    s->cigar = cigar;
+	    if (!(cigar = realloc(cigar, cigar_alloc * sizeof(*cigar))))
+		return -1;
+	}
+
+	if (ds & CRAM_FC) {
+	    if (!c->comp_hdr->codecs[DS_FC]) return -1;
+	    r |= c->comp_hdr->codecs[DS_FC]->decode(s,
+						    c->comp_hdr->codecs[DS_FC],
+						    blk,
+						    &op,  &out_sz);
+	}
+
+	if (!(ds & CRAM_FP))
+	    continue;
+
+	if (!c->comp_hdr->codecs[DS_FP]) return -1;
+	r |= c->comp_hdr->codecs[DS_FP]->decode(s,
+						c->comp_hdr->codecs[DS_FP],
+						blk,
+						(char *)&pos, &out_sz);
+	pos += prev_pos;
+
+	if (pos > seq_pos) {
+	    if (pos > cr->len+1)
+		return -1;
+
+	    if (s->ref && cr->ref_id >= 0) {
+		if (ref_pos + pos - seq_pos > bfd->ref[cr->ref_id].len) {
+		    static int whinged = 0;
+		    if (!whinged)
+			fprintf(stderr, "Ref pos outside of ref "
+				"sequence boundary\n");
+		    whinged = 1;
+		} else {
+		    memcpy(&seq[seq_pos-1], &s->ref[ref_pos - s->ref_start +1],
+			   pos - seq_pos);
+		}
+	    }
+#ifdef USE_X
+	    if (cig_len && cig_op != BAM_CBASE_MATCH) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    cig_op = BAM_CBASE_MATCH;
+#else
+	    if (cig_len && cig_op != BAM_CMATCH) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    cig_op = BAM_CMATCH;
+#endif
+	    cig_len += pos - seq_pos;
+	    ref_pos += pos - seq_pos;
+	    md_dist += pos - seq_pos;
+	    seq_pos = pos;
+	}
+
+	prev_pos = pos;
+
+	if (!(ds & CRAM_FC))
+	    goto skip_cigar;
+
+	if (!(ds & CRAM_FC))
+	    continue;
+
+	switch(op) {
+	case 'S': { // soft clip: IN
+	    int32_t out_sz2 = 1;
+
+	    if (cig_len) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    if (ds & CRAM_IN) {
+		switch (CRAM_MAJOR_VERS(fd->version)) {
+		case 1:
+		    r |= c->comp_hdr->codecs[DS_IN]
+			? c->comp_hdr->codecs[DS_IN]
+			             ->decode(s, c->comp_hdr->codecs[DS_IN],
+					      blk, &seq[pos-1], &out_sz2)
+			: (seq[pos-1] = 'N', out_sz2 = 1, 0);
+		    break;
+
+		case 2:
+		default:
+		    r |= c->comp_hdr->codecs[DS_SC]
+			? c->comp_hdr->codecs[DS_SC]
+			             ->decode(s, c->comp_hdr->codecs[DS_SC],
+					      blk, &seq[pos-1], &out_sz2)
+			: (seq[pos-1] = 'N', out_sz2 = 1, 0);
+		    break;
+
+//		default:
+//		    r |= c->comp_hdr->codecs[DS_BB]
+//			? c->comp_hdr->codecs[DS_BB]
+//			             ->decode(s, c->comp_hdr->codecs[DS_BB],
+//					      blk, &seq[pos-1], &out_sz2)
+//			: (seq[pos-1] = 'N', out_sz2 = 1, 0);
+		}
+		cigar[ncigar++] = (out_sz2<<4) + BAM_CSOFT_CLIP;
+		cig_op = BAM_CSOFT_CLIP;
+		seq_pos += out_sz2;
+	    }
+	    break;
+	}
+
+	case 'X': { // Substitution; BS
+	    unsigned char base;
+#ifdef USE_X
+	    if (cig_len && cig_op != BAM_CBASE_MISMATCH) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    if (ds & CRAM_BS) {
+		if (!c->comp_hdr->codecs[DS_BS]) return -1;
+		r |= c->comp_hdr->codecs[DS_BS]
+		                ->decode(s, c->comp_hdr->codecs[DS_BS], blk,
+					 (char *)&base, &out_sz);
+		seq[pos-1] = 'N'; // FIXME look up BS=base value
+	    }
+	    cig_op = BAM_CBASE_MISMATCH;
+#else
+	    int ref_base;
+	    if (cig_len && cig_op != BAM_CMATCH) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    if (ds & CRAM_BS) {
+		if (!c->comp_hdr->codecs[DS_BS]) return -1;
+		r |= c->comp_hdr->codecs[DS_BS]
+		                ->decode(s, c->comp_hdr->codecs[DS_BS], blk,
+					 (char *)&base, &out_sz);
+		if (ref_pos >= bfd->ref[cr->ref_id].len || !s->ref) {
+		    seq[pos-1] = 'N';
+		} else {
+		    ref_base = fd->L1[(uc)s->ref[ref_pos - s->ref_start +1]];
+		    seq[pos-1] = c->comp_hdr->
+			substitution_matrix[ref_base][base];
+		    if (decode_md) {
+			BLOCK_APPEND_UINT(s->aux_blk, md_dist);
+			BLOCK_APPEND_CHAR(s->aux_blk,
+					  s->ref[ref_pos-s->ref_start +1]);
+			md_dist = 0;
+		    }
+		}
+	    }
+	    cig_op = BAM_CMATCH;
+#endif
+	    nm++;
+	    cig_len++;
+	    seq_pos++;
+	    ref_pos++;
+	    break;
+	}
+
+	case 'D': { // Deletion; DL
+	    if (cig_len && cig_op != BAM_CDEL) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    if (ds & CRAM_DL) {
+		if (!c->comp_hdr->codecs[DS_DL]) return -1;
+		r |= c->comp_hdr->codecs[DS_DL]
+		                ->decode(s, c->comp_hdr->codecs[DS_DL], blk,
+					 (char *)&i32, &out_sz);
+		if (decode_md) {
+		    BLOCK_APPEND_UINT(s->aux_blk, md_dist);
+		    BLOCK_APPEND_CHAR(s->aux_blk, '^');
+		    BLOCK_APPEND(s->aux_blk,
+				 &s->ref[ref_pos - s->ref_start +1],
+				 i32);
+		    md_dist = 0;
+		}
+		cig_op = BAM_CDEL;
+		cig_len += i32;
+		ref_pos += i32;
+		nm      += i32;
+		//printf("  %d: DL = %d (ret %d)\n", f, i32, r);
+	    }
+	    break;
+	}
+
+	case 'I': { // Insertion (several bases); IN
+	    int32_t out_sz2 = 1;
+
+	    if (cig_len && cig_op != BAM_CINS) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+
+	    if (ds & CRAM_IN) {
+		if (!c->comp_hdr->codecs[DS_IN]) return -1;
+		r |= c->comp_hdr->codecs[DS_IN]
+		                ->decode(s, c->comp_hdr->codecs[DS_IN], blk,
+					 &seq[pos-1], &out_sz2);
+		cig_op = BAM_CINS;
+		cig_len += out_sz2;
+		seq_pos += out_sz2;
+		nm      += out_sz2;
+		//printf("  %d: IN(I) = %.*s (ret %d, out_sz %d)\n", f, out_sz2, dat, r, out_sz2);
+	    }
+	    break;
+	}
+
+	case 'i': { // Insertion (single base); BA
+	    if (cig_len && cig_op != BAM_CINS) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    if (ds & CRAM_BA) {
+		if (!c->comp_hdr->codecs[DS_BA]) return -1;
+		r |= c->comp_hdr->codecs[DS_BA]
+		                ->decode(s, c->comp_hdr->codecs[DS_BA], blk,
+					 (char *)&seq[pos-1], &out_sz);
+	    }
+	    cig_op = BAM_CINS;
+	    cig_len++;
+	    seq_pos++;
+	    nm++;
+	    break;
+	}
+
+	case 'b': { // Several bases
+	    int32_t len = 1;
+
+	    if (cig_len && cig_op != BAM_CMATCH) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+
+	    if (ds & CRAM_BB) {
+		if (!c->comp_hdr->codecs[DS_BB]) return -1;
+		r |= c->comp_hdr->codecs[DS_BB]
+		    ->decode(s, c->comp_hdr->codecs[DS_BB], blk,
+			     (char *)&seq[pos-1], &len);
+	    }
+
+	    cig_op = BAM_CMATCH;
+
+	    cig_len+=len;
+	    seq_pos+=len;
+	    ref_pos+=len;
+	    //prev_pos+=len;
+	    break;
+	}
+
+	case 'q': { // Several quality values
+	    int32_t len = 1;
+
+	    if (cig_len && cig_op != BAM_CMATCH) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+
+	    if (ds & CRAM_QQ) {
+		if (!c->comp_hdr->codecs[DS_QQ]) return -1;
+		r |= c->comp_hdr->codecs[DS_QQ]
+		    ->decode(s, c->comp_hdr->codecs[DS_QQ], blk,
+			     (char *)&qual[pos-1], &len);
+	    }
+
+	    cig_op = BAM_CMATCH;
+
+	    cig_len+=len;
+	    seq_pos+=len;
+	    ref_pos+=len;
+	    //prev_pos+=len;
+	    break;
+	}
+
+	case 'B': { // Read base; BA, QS
+#ifdef USE_X
+	    if (cig_len && cig_op != BAM_CBASE_MISMATCH) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+#else
+	    if (cig_len && cig_op != BAM_CMATCH) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+#endif
+	    if (ds & CRAM_BA) {
+		if (!c->comp_hdr->codecs[DS_BA]) return -1;
+		r |= c->comp_hdr->codecs[DS_BA]
+		                ->decode(s, c->comp_hdr->codecs[DS_BA], blk,
+					 (char *)&seq[pos-1], &out_sz);
+	    }
+	    if (ds & CRAM_QS) {
+		if (!c->comp_hdr->codecs[DS_QS]) return -1;
+		r |= c->comp_hdr->codecs[DS_QS]
+		                ->decode(s, c->comp_hdr->codecs[DS_QS], blk,
+					 (char *)&qual[pos-1], &out_sz);
+	    }
+#ifdef USE_X
+	    cig_op = BAM_CBASE_MISMATCH;
+#else
+	    cig_op = BAM_CMATCH;
+#endif
+	    cig_len++;
+	    seq_pos++;
+	    ref_pos++;
+	    //printf("  %d: BA/QS(B) = %c/%d (ret %d)\n", f, i32, qc, r);
+	    break;
+	}
+
+	case 'Q': { // Quality score; QS
+	    if (ds & CRAM_QS) {
+		if (!c->comp_hdr->codecs[DS_QS]) return -1;
+		r |= c->comp_hdr->codecs[DS_QS]
+		                ->decode(s, c->comp_hdr->codecs[DS_QS], blk,
+					 (char *)&qual[pos-1], &out_sz);
+		//printf("  %d: QS = %d (ret %d)\n", f, qc, r);
+	    }
+	    break;
+	}
+
+	case 'H': { // hard clip; HC
+	    if (cig_len && cig_op != BAM_CHARD_CLIP) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    if (ds & CRAM_HC) {
+		if (!c->comp_hdr->codecs[DS_HC]) return -1;
+		r |= c->comp_hdr->codecs[DS_HC]
+		                ->decode(s, c->comp_hdr->codecs[DS_HC], blk,
+					 (char *)&i32, &out_sz);
+		cig_op = BAM_CHARD_CLIP;
+		cig_len += i32;
+		nm      += i32;
+	    }
+	    break;
+	}
+
+	case 'P': { // padding; PD
+	    if (cig_len && cig_op != BAM_CPAD) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    if (ds & CRAM_PD) {
+		if (!c->comp_hdr->codecs[DS_PD]) return -1;
+		r |= c->comp_hdr->codecs[DS_PD]
+		                ->decode(s, c->comp_hdr->codecs[DS_PD], blk,
+					 (char *)&i32, &out_sz);
+		cig_op = BAM_CPAD;
+		cig_len += i32;
+		nm      += i32;
+	    }
+	    break;
+	}
+
+	case 'N': { // Ref skip; RS
+	    if (cig_len && cig_op != BAM_CREF_SKIP) {
+		cigar[ncigar++] = (cig_len<<4) + cig_op;
+		cig_len = 0;
+	    }
+	    if (ds & CRAM_RS) {
+		if (!c->comp_hdr->codecs[DS_RS]) return -1;
+		r |= c->comp_hdr->codecs[DS_RS]
+		                ->decode(s, c->comp_hdr->codecs[DS_RS], blk,
+					 (char *)&i32, &out_sz);
+		cig_op = BAM_CREF_SKIP;
+		cig_len += i32;
+		ref_pos += i32;
+		nm      += i32;
+	    }
+	    break;
+	}
+
+	default:
+	    abort();
+	}
+    }
+
+    if (!(ds & CRAM_FC))
+	goto skip_cigar;
+
+    /* An implement match op for any unaccounted for bases */
+    if ((ds & CRAM_FN) && cr->len >= seq_pos) {
+	if (s->ref) {
+	    if (ref_pos + cr->len - seq_pos + 1 > bfd->ref[cr->ref_id].len) {
+		static int whinged = 0;
+		if (!whinged)
+		    fprintf(stderr, "Ref pos outside of ref sequence boundary\n");
+		whinged = 1;
+	    } else {
+		memcpy(&seq[seq_pos-1], &s->ref[ref_pos - s->ref_start +1],
+		       cr->len - seq_pos + 1);
+		ref_pos += cr->len - seq_pos + 1;
+		md_dist += cr->len - seq_pos + 1;
+	    }
+	}
+
+	if (ncigar+1 >= cigar_alloc) {
+	    cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024;
+	    s->cigar = cigar;
+	    if (!(cigar = realloc(cigar, cigar_alloc * sizeof(*cigar))))
+		return -1;
+	}
+#ifdef USE_X
+	if (cig_len && cig_op != BAM_CBASE_MATCH) {
+	    cigar[ncigar++] = (cig_len<<4) + cig_op;
+	    cig_len = 0;
+	}
+	cig_op = BAM_CBASE_MATCH;
+#else
+	if (cig_len && cig_op != BAM_CMATCH) {
+	    cigar[ncigar++] = (cig_len<<4) + cig_op;
+	    cig_len = 0;
+	}
+	cig_op = BAM_CMATCH;
+#endif
+	cig_len += cr->len - seq_pos+1;
+    }
+
+ skip_cigar:
+
+    if ((ds & CRAM_FN) && decode_md) {
+	BLOCK_APPEND_UINT(s->aux_blk, md_dist);
+    }
+
+    if (cig_len) {
+	if (ncigar >= cigar_alloc) {
+	    cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024;
+	    s->cigar = cigar;
+	    if (!(cigar = realloc(cigar, cigar_alloc * sizeof(*cigar))))
+		return -1;
+	}
+
+	cigar[ncigar++] = (cig_len<<4) + cig_op;
+    }
+
+    cr->ncigar = ncigar - cr->cigar;
+    cr->aend = ref_pos;
+
+    //printf("2: %.*s %d .. %d\n", cr->name_len, DSTRING_STR(name_ds) + cr->name, cr->apos, ref_pos);
+
+    if (ds & CRAM_MQ) {
+	if (!c->comp_hdr->codecs[DS_MQ]) return -1;
+	r |= c->comp_hdr->codecs[DS_MQ]
+	                ->decode(s, c->comp_hdr->codecs[DS_MQ], blk,
+				 (char *)&cr->mqual, &out_sz);
+    } else {
+	cr->mqual = 40;
+    }
+
+    if ((ds & CRAM_QS) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) {
+	int32_t out_sz2 = cr->len;
+
+	if (ds & CRAM_QS) {
+	    if (!c->comp_hdr->codecs[DS_QS]) return -1;
+	    r |= c->comp_hdr->codecs[DS_QS]
+		            ->decode(s, c->comp_hdr->codecs[DS_QS], blk,
+				     qual, &out_sz2);
+	}
+    }
+
+    s->cigar = cigar;
+    s->cigar_alloc = cigar_alloc;
+    s->ncigar = ncigar;
+
+    if (decode_md) {
+	char buf[7];
+	BLOCK_APPEND_CHAR(s->aux_blk, '\0'); // null terminate MD:Z:
+	cr->aux_size += BLOCK_SIZE(s->aux_blk) - orig_aux;
+	buf[0] = 'N'; buf[1] = 'M'; buf[2] = 'I';
+	buf[3] = (nm>> 0) & 0xff;
+	buf[4] = (nm>> 8) & 0xff;
+	buf[5] = (nm>>16) & 0xff;
+	buf[6] = (nm>>24) & 0xff;
+	BLOCK_APPEND(s->aux_blk, buf, 7);
+	cr->aux_size += 7;
+    }
+
+    return r;
+}
+
+/*
+ * Quick and simple hash lookup for cram_map arrays
+ */
+static cram_map *map_find(cram_map **map, unsigned char *key, int id) {
+    cram_map *m;
+
+    m = map[CRAM_MAP(key[0],key[1])];
+    while (m && m->key != id)
+	m= m->next;
+
+    return m;
+}
+
+//#define map_find(M,K,I) M[CRAM_MAP(K[0],K[1])];while (m && m->key != I);m= m->next
+
+
+static int cram_decode_aux_1_0(cram_container *c, cram_slice *s,
+			       cram_block *blk, cram_record *cr) {
+    int i, r = 0, out_sz = 1;
+    unsigned char ntags;
+	    
+    if (!c->comp_hdr->codecs[DS_TC]) return -1;
+    r |= c->comp_hdr->codecs[DS_TC]->decode(s, c->comp_hdr->codecs[DS_TC], blk,
+					    (char *)&ntags, &out_sz);
+    cr->ntags = ntags;
+
+    //printf("TC=%d\n", cr->ntags);
+    cr->aux_size = 0;
+    cr->aux = BLOCK_SIZE(s->aux_blk);
+
+    for (i = 0; i < cr->ntags; i++) {
+	int32_t id, out_sz = 1;
+	unsigned char tag_data[3];
+	cram_map *m;
+
+	//printf("Tag %d/%d\n", i+1, cr->ntags);
+	if (!c->comp_hdr->codecs[DS_TN]) return -1;
+	r |= c->comp_hdr->codecs[DS_TN]->decode(s, c->comp_hdr->codecs[DS_TN],
+						blk, (char *)&id, &out_sz);
+	if (out_sz == 3) {
+	    tag_data[0] = ((char *)&id)[0];
+	    tag_data[1] = ((char *)&id)[1];
+	    tag_data[2] = ((char *)&id)[2];
+	} else {
+	    tag_data[0] = (id>>16) & 0xff;
+	    tag_data[1] = (id>>8)  & 0xff;
+	    tag_data[2] = id       & 0xff;
+	} 
+
+	m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id);
+	if (!m)
+	    return -1;
+	BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3);
+
+	if (!m->codec) return -1;
+	r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz);
+
+	cr->aux_size += out_sz + 3;
+    }
+    
+    return r;
+}
+
+static int cram_decode_aux(cram_container *c, cram_slice *s,
+			   cram_block *blk, cram_record *cr) {
+    int i, r = 0, out_sz = 1;
+    int32_t TL = 0;
+    unsigned char *TN;
+    uint32_t ds = c->comp_hdr->data_series;
+	    
+    if (!(ds & (CRAM_TL|CRAM_aux))) {
+	cr->aux = 0;
+	cr->aux_size = 0;
+	return 0;
+    }
+
+    if (!c->comp_hdr->codecs[DS_TL]) return -1;
+    r |= c->comp_hdr->codecs[DS_TL]->decode(s, c->comp_hdr->codecs[DS_TL], blk,
+					    (char *)&TL, &out_sz);
+    if (r || TL < 0 || TL >= c->comp_hdr->nTL)
+	return -1;
+
+    TN = c->comp_hdr->TL[TL];
+    cr->ntags = strlen((char *)TN)/3; // optimise to remove strlen
+
+    //printf("TC=%d\n", cr->ntags);
+    cr->aux_size = 0;
+    cr->aux = BLOCK_SIZE(s->aux_blk);
+
+    if (!(ds & CRAM_aux))
+	return 0;
+
+    for (i = 0; i < cr->ntags; i++) {
+	int32_t id, out_sz = 1;
+	unsigned char tag_data[3];
+	cram_map *m;
+
+	//printf("Tag %d/%d\n", i+1, cr->ntags);
+	tag_data[0] = *TN++;
+	tag_data[1] = *TN++;
+	tag_data[2] = *TN++;
+	id = (tag_data[0]<<16) | (tag_data[1]<<8) | tag_data[2];
+
+	m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id);
+	if (!m)
+	    return -1;
+	BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3);
+
+	if (!m->codec) return -1;
+	r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz);
+	cr->aux_size += out_sz + 3;
+    }
+    
+    return r;
+}
+
+/* Resolve mate pair cross-references between recs within this slice */
+static void cram_decode_slice_xref(cram_slice *s, int required_fields) {
+    int rec;
+
+    if (!(required_fields & (SAM_RNEXT | SAM_PNEXT | SAM_TLEN))) {
+	for (rec = 0; rec < s->hdr->num_records; rec++) {
+	    cram_record *cr = &s->crecs[rec];
+
+	    cr->tlen = 0;
+	    cr->mate_pos = 0;
+	    cr->mate_ref_id = -1;
+	}
+
+	return;
+    }
+
+    for (rec = 0; rec < s->hdr->num_records; rec++) {
+	cram_record *cr = &s->crecs[rec];
+
+	if (cr->mate_line >= 0) {
+	    if (cr->mate_line < s->hdr->num_records) {
+		/*
+		 * On the first read, loop through computing lengths.
+		 * It's not perfect as we have one slice per reference so we
+		 * cannot detect when TLEN should be zero due to seqs that
+		 * map to multiple references.
+		 *
+		 * We also cannot set tlen correct when it spans a slice for
+		 * other reasons. This may make tlen too small. Should we
+		 * fix this by forcing TLEN to be stored verbatim in such cases?
+		 *
+		 * Or do we just admit defeat and output 0 for tlen? It's the
+		 * safe option...
+		 */
+		if (cr->tlen == INT_MIN) {
+		    int id1 = rec, id2 = rec;
+		    int aleft = cr->apos, aright = cr->aend;
+		    int tlen;
+		    int ref = cr->ref_id;
+
+		    // number of segments starting at the same point.
+		    int left_cnt = 0;
+
+		    do {
+			if (aleft > s->crecs[id2].apos)
+			    aleft = s->crecs[id2].apos, left_cnt = 1;
+			else if (aleft == s->crecs[id2].apos)
+			    left_cnt++;
+			if (aright < s->crecs[id2].aend)
+			    aright = s->crecs[id2].aend;
+			if (s->crecs[id2].mate_line == -1) {
+			    s->crecs[id2].mate_line = rec;
+			    break;
+			}
+			assert(s->crecs[id2].mate_line > id2);
+			id2 = s->crecs[id2].mate_line;
+
+			if (s->crecs[id2].ref_id != ref)
+			    ref = -1;
+		    } while (id2 != id1);
+
+		    if (ref != -1) {
+			tlen = aright - aleft + 1;
+			id1 = id2 = rec;
+
+			/*
+			 * When we have two seqs with identical start and
+			 * end coordinates, set +/- tlen based on 1st/last
+			 * bit flags instead, as a tie breaker.
+			 */
+			if (s->crecs[id2].apos == aleft) {
+			    if (left_cnt == 1 || 
+				(s->crecs[id2].flags & BAM_FREAD1))
+				s->crecs[id2].tlen = tlen;
+			    else
+				s->crecs[id2].tlen = -tlen;
+			} else {
+			    s->crecs[id2].tlen = -tlen;
+			}
+
+			id2 = s->crecs[id2].mate_line;
+			while (id2 != id1) {
+			    if (s->crecs[id2].apos == aleft) {
+				if (left_cnt == 1 || 
+				    (s->crecs[id2].flags & BAM_FREAD1))
+				    s->crecs[id2].tlen = tlen;
+				else
+				    s->crecs[id2].tlen = -tlen;
+			    } else {
+				s->crecs[id2].tlen = -tlen;
+			    }
+			    id2 = s->crecs[id2].mate_line;
+			}
+		    } else {
+			id1 = id2 = rec;
+
+			s->crecs[id2].tlen = 0;
+			id2 = s->crecs[id2].mate_line;
+			while (id2 != id1) {
+			    s->crecs[id2].tlen = 0;
+			    id2 = s->crecs[id2].mate_line;
+			}
+		    }
+		}
+
+		cr->mate_pos = s->crecs[cr->mate_line].apos;
+		cr->mate_ref_id = s->crecs[cr->mate_line].ref_id;
+
+		// paired
+		cr->flags |= BAM_FPAIRED;
+
+		// set mate unmapped if needed
+		if (s->crecs[cr->mate_line].flags & BAM_FUNMAP) {
+		    cr->flags |= BAM_FMUNMAP;
+		    cr->tlen = 0;
+		}
+		if (cr->flags & BAM_FUNMAP) {
+		    cr->tlen = 0;
+		}
+
+		// set mate reversed if needed
+		if (s->crecs[cr->mate_line].flags & BAM_FREVERSE)
+		    cr->flags |= BAM_FMREVERSE;
+	    } else {
+		fprintf(stderr, "Mate line out of bounds: %d vs [0, %d]\n",
+			cr->mate_line, s->hdr->num_records-1);
+	    }
+
+	    /* FIXME: construct read names here too if needed */
+	} else {
+	    if (cr->mate_flags & CRAM_M_REVERSE) {
+		cr->flags |= BAM_FPAIRED | BAM_FMREVERSE;
+	    }
+	    if (cr->mate_flags & CRAM_M_UNMAP) {
+		cr->flags |= BAM_FMUNMAP;
+		//cr->mate_ref_id = -1;
+	    }
+	    if (!(cr->flags & BAM_FPAIRED))
+		cr->mate_ref_id = -1;
+	}
+
+	if (cr->tlen == INT_MIN)
+	    cr->tlen = 0; // Just incase
+    }    
+}
+
+static char *md5_print(unsigned char *md5, char *out) {
+    int i;
+    for (i = 0; i < 16; i++) {
+	out[i*2+0] = "0123456789abcdef"[md5[i]>>4];
+	out[i*2+1] = "0123456789abcdef"[md5[i]&15];
+    }
+    out[32] = 0;
+
+    return out;
+}
+
+/*
+ * Decode an entire slice from container blocks. Fills out s->crecs[] array.
+ * Returns 0 on success
+ *        -1 on failure
+ */
+int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
+		      SAM_hdr *bfd) {
+    cram_block *blk = s->block[0];
+    int32_t bf, ref_id;
+    unsigned char cf;
+    int out_sz, r = 0;
+    int rec;
+    char *seq = NULL, *qual = NULL;
+    int unknown_rg = -1;
+    int embed_ref;
+    char **refs = NULL;
+    uint32_t ds;
+
+    if (cram_dependent_data_series(fd, c->comp_hdr, s) != 0)
+	return -1;
+
+    ds = c->comp_hdr->data_series;
+
+    blk->bit = 7; // MSB first
+
+    /* Look for unknown RG, added as last by Java CRAM? */
+    if (bfd->nrg > 0 &&
+	!strcmp(bfd->rg[bfd->nrg-1].name, "UNKNOWN"))
+	unknown_rg = bfd->nrg-1;
+
+    if (blk->content_type != CORE)
+	return -1;
+
+    if (s->crecs)
+	free(s->crecs);
+    if (!(s->crecs = malloc(s->hdr->num_records * sizeof(*s->crecs))))
+	return -1;
+
+    ref_id = s->hdr->ref_seq_id;
+    embed_ref = s->hdr->ref_base_id >= 0 ? 1 : 0;
+
+    if (ref_id >= 0) {
+	if (embed_ref) {
+	    cram_block *b;
+	    if (s->hdr->ref_base_id < 0) {
+		fprintf(stderr, "No reference specified and "
+			"no embedded reference is available.\n");
+		return -1;
+	    }
+	    if (!s->block_by_id ||
+		!(b = s->block_by_id[s->hdr->ref_base_id]))
+		return -1;
+	    cram_uncompress_block(b);
+	    s->ref = (char *)BLOCK_DATA(b);
+	    s->ref_start = s->hdr->ref_seq_start;
+	    s->ref_end   = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1;
+	} else if (!fd->no_ref) {
+	    //// Avoid Java cramtools bug by loading entire reference seq 
+	    //s->ref = cram_get_ref(fd, s->hdr->ref_seq_id, 1, 0);
+	    //s->ref_start = 1;
+
+	    if (fd->required_fields & SAM_SEQ)
+		s->ref =
+		cram_get_ref(fd, s->hdr->ref_seq_id,
+			     s->hdr->ref_seq_start,
+			     s->hdr->ref_seq_start + s->hdr->ref_seq_span -1);
+	    s->ref_start = s->hdr->ref_seq_start;
+	    s->ref_end   = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1;
+
+	    /* Sanity check */
+	    if (s->ref_start < 0) {
+		fprintf(stderr, "Slice starts before base 1.\n");
+		s->ref_start = 0;
+	    }
+	    pthread_mutex_lock(&fd->ref_lock);
+	    pthread_mutex_lock(&fd->refs->lock);
+	    if ((fd->required_fields & SAM_SEQ) &&
+		s->ref_end > fd->refs->ref_id[ref_id]->length) {
+		fprintf(stderr, "Slice ends beyond reference end.\n");
+		s->ref_end = fd->refs->ref_id[ref_id]->length;
+	    }
+	    pthread_mutex_unlock(&fd->refs->lock);
+	    pthread_mutex_unlock(&fd->ref_lock);
+	}
+    }
+
+    if ((fd->required_fields & SAM_SEQ) &&
+	s->ref == NULL && s->hdr->ref_seq_id >= 0 && !fd->no_ref) {
+	fprintf(stderr, "Unable to fetch reference #%d %d..%d\n",
+		s->hdr->ref_seq_id, s->hdr->ref_seq_start,
+		s->hdr->ref_seq_start + s->hdr->ref_seq_span-1);
+	return -1;
+    }
+
+    if (CRAM_MAJOR_VERS(fd->version) != 1
+	&& (fd->required_fields & SAM_SEQ)
+	&& s->hdr->ref_seq_id >= 0
+	&& !fd->ignore_md5
+	&& memcmp(s->hdr->md5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) {
+	MD5_CTX md5;
+	unsigned char digest[16];
+
+	if (s->ref && s->hdr->ref_seq_id >= 0) {
+	    int start, len;
+
+	    if (s->hdr->ref_seq_start >= s->ref_start) {
+		start = s->hdr->ref_seq_start - s->ref_start;
+	    } else {
+		fprintf(stderr, "Slice starts before base 1.\n");
+		start = 0;
+	    }
+
+	    if (s->hdr->ref_seq_span <= s->ref_end - s->ref_start + 1) {
+		len = s->hdr->ref_seq_span;
+	    } else {
+		fprintf(stderr, "Slice ends beyond reference end.\n");
+		len = s->ref_end - s->ref_start + 1;
+	    }
+
+	    MD5_Init(&md5);
+	    if (start + len > s->ref_end - s->ref_start + 1)
+		len = s->ref_end - s->ref_start + 1 - start;
+	    if (len >= 0)
+		MD5_Update(&md5, s->ref + start, len);
+	    MD5_Final(digest, &md5);
+	} else if (!s->ref && s->hdr->ref_base_id >= 0) {
+	    cram_block *b;
+	    if (s->block_by_id && (b = s->block_by_id[s->hdr->ref_base_id])) {
+		MD5_Init(&md5);
+		MD5_Update(&md5, b->data, b->uncomp_size);
+		MD5_Final(digest, &md5);
+	    }
+	}
+
+	if ((!s->ref && s->hdr->ref_base_id < 0)
+	    || memcmp(digest, s->hdr->md5, 16) != 0) {
+	    char M[33];
+	    fprintf(stderr, "ERROR: md5sum reference mismatch for ref "
+		    "%d pos %d..%d\n", ref_id, s->ref_start, s->ref_end);
+	    fprintf(stderr, "CRAM: %s\n", md5_print(s->hdr->md5, M));
+	    fprintf(stderr, "Ref : %s\n", md5_print(digest, M));
+	    return -1;
+	}
+    }
+
+    if (ref_id == -2) {
+	pthread_mutex_lock(&fd->ref_lock);
+	pthread_mutex_lock(&fd->refs->lock);
+	refs = calloc(fd->refs->nref, sizeof(char *));
+	pthread_mutex_unlock(&fd->refs->lock);
+	pthread_mutex_unlock(&fd->ref_lock);
+	if (!refs)
+	    return -1;
+    }
+
+    for (rec = 0; rec < s->hdr->num_records; rec++) {
+	cram_record *cr = &s->crecs[rec];
+
+	//fprintf(stderr, "Decode seq %d, %d/%d\n", rec, blk->byte, blk->bit);
+
+	cr->s = s;
+
+	out_sz = 1; /* decode 1 item */
+	if (ds & CRAM_BF) {
+	    if (!c->comp_hdr->codecs[DS_BF]) return -1;
+	    r |= c->comp_hdr->codecs[DS_BF]
+		            ->decode(s, c->comp_hdr->codecs[DS_BF], blk,
+				     (char *)&bf, &out_sz);
+	    if (bf < 0 ||
+		bf >= sizeof(fd->bam_flag_swap)/sizeof(*fd->bam_flag_swap))
+		return -1;
+	    bf = fd->bam_flag_swap[bf];
+	    cr->flags = bf;
+	} else {
+	    cr->flags = bf = 0x4; // unmapped
+	}
+
+	if (ds & CRAM_CF) {
+	    if (CRAM_MAJOR_VERS(fd->version) == 1) {
+		/* CF is byte in 1.0, int32 in 2.0 */
+		if (!c->comp_hdr->codecs[DS_CF]) return -1;
+		r |= c->comp_hdr->codecs[DS_CF]
+		                ->decode(s, c->comp_hdr->codecs[DS_CF], blk,
+					 (char *)&cf, &out_sz);
+		cr->cram_flags = cf;
+	    } else {
+		if (!c->comp_hdr->codecs[DS_CF]) return -1;
+		r |= c->comp_hdr->codecs[DS_CF]
+		                ->decode(s, c->comp_hdr->codecs[DS_CF], blk,
+					 (char *)&cr->cram_flags,
+					 &out_sz);
+		cf = cr->cram_flags;
+	    }
+	}
+
+	if (CRAM_MAJOR_VERS(fd->version) != 1 && ref_id == -2) {
+	    if (ds & CRAM_RI) {
+		if (!c->comp_hdr->codecs[DS_RI]) return -1;
+		r |= c->comp_hdr->codecs[DS_RI]
+		                ->decode(s, c->comp_hdr->codecs[DS_RI], blk,
+					 (char *)&cr->ref_id, &out_sz);
+		if ((fd->required_fields & (SAM_SEQ|SAM_TLEN))
+		    && cr->ref_id >= 0) {
+		    if (!fd->no_ref) {
+			if (!refs[cr->ref_id])
+			    refs[cr->ref_id] = cram_get_ref(fd, cr->ref_id,
+							    1, 0);
+			s->ref = refs[cr->ref_id];
+		    }
+		    s->ref_start = 1;
+		    pthread_mutex_lock(&fd->ref_lock);
+		    pthread_mutex_lock(&fd->refs->lock);
+		    s->ref_end = fd->refs->ref_id[cr->ref_id]->length;
+		    pthread_mutex_unlock(&fd->refs->lock);
+		    pthread_mutex_unlock(&fd->ref_lock);
+		}
+	    } else {
+		cr->ref_id = 0;
+	    }
+	} else {
+	    cr->ref_id = ref_id; // Forced constant in CRAM 1.0
+	}
+
+
+	if (ds & CRAM_RL) {
+	    if (!c->comp_hdr->codecs[DS_RL]) return -1;
+	    r |= c->comp_hdr->codecs[DS_RL]
+		            ->decode(s, c->comp_hdr->codecs[DS_RL], blk,
+				     (char *)&cr->len, &out_sz);
+	}
+
+	if (ds & CRAM_AP) {
+	    if (!c->comp_hdr->codecs[DS_AP]) return -1;
+	    r |= c->comp_hdr->codecs[DS_AP]
+		            ->decode(s, c->comp_hdr->codecs[DS_AP], blk,
+				     (char *)&cr->apos, &out_sz);
+	    if (c->comp_hdr->AP_delta)
+		cr->apos += s->last_apos;
+	    s->last_apos=  cr->apos;
+	} else {
+	    cr->apos = c->ref_seq_start;
+	}
+		    
+	if (ds & CRAM_RG) {
+	    if (!c->comp_hdr->codecs[DS_RG]) return -1;
+	    r |= c->comp_hdr->codecs[DS_RG]
+		           ->decode(s, c->comp_hdr->codecs[DS_RG], blk,
+				    (char *)&cr->rg, &out_sz);
+	    if (cr->rg == unknown_rg)
+		cr->rg = -1;
+	} else {
+	    cr->rg = -1;
+	}
+
+	cr->name_len = 0;
+
+	if (c->comp_hdr->read_names_included) {
+	    int32_t out_sz2 = 1;
+
+	    // Read directly into name cram_block
+	    cr->name = BLOCK_SIZE(s->name_blk);
+	    if (ds & CRAM_RN) {
+		if (!c->comp_hdr->codecs[DS_RN]) return -1;
+		r |= c->comp_hdr->codecs[DS_RN]
+		                ->decode(s, c->comp_hdr->codecs[DS_RN], blk,
+					 (char *)s->name_blk, &out_sz2);
+		cr->name_len = out_sz2;
+	    }
+	}
+
+	cr->mate_pos = 0;
+	cr->mate_line = -1;
+	cr->mate_ref_id = -1;
+	if ((ds & CRAM_CF) && (cf & CRAM_FLAG_DETACHED)) {
+	    if (ds & CRAM_MF) {
+		if (CRAM_MAJOR_VERS(fd->version) == 1) {
+		    /* MF is byte in 1.0, int32 in 2.0 */
+		    unsigned char mf;
+		    if (!c->comp_hdr->codecs[DS_MF]) return -1;
+		    r |= c->comp_hdr->codecs[DS_MF]
+			            ->decode(s, c->comp_hdr->codecs[DS_MF],
+					     blk, (char *)&mf, &out_sz);
+		    cr->mate_flags = mf;
+		} else {
+		    if (!c->comp_hdr->codecs[DS_MF]) return -1;
+		    r |= c->comp_hdr->codecs[DS_MF]
+			            ->decode(s, c->comp_hdr->codecs[DS_MF],
+					     blk,
+					     (char *)&cr->mate_flags,
+					     &out_sz);
+		}
+	    } else {
+		cr->mate_flags = 0;
+	    }
+
+	    if (!c->comp_hdr->read_names_included) {
+		int32_t out_sz2 = 1;
+	    
+		// Read directly into name cram_block
+		cr->name = BLOCK_SIZE(s->name_blk);
+		if (ds & CRAM_RN) {
+		    if (!c->comp_hdr->codecs[DS_RN]) return -1;
+		    r |= c->comp_hdr->codecs[DS_RN]
+			            ->decode(s, c->comp_hdr->codecs[DS_RN],
+					     blk, (char *)s->name_blk,
+					     &out_sz2);
+		    cr->name_len = out_sz2;
+		}
+	    }
+		    
+	    if (ds & CRAM_NS) {
+		if (!c->comp_hdr->codecs[DS_NS]) return -1;
+		r |= c->comp_hdr->codecs[DS_NS]
+		                ->decode(s, c->comp_hdr->codecs[DS_NS], blk,
+					 (char *)&cr->mate_ref_id, &out_sz);
+	    }
+
+// Skip as mate_ref of "*" is legit. It doesn't mean unmapped, just unknown.
+//	    if (cr->mate_ref_id == -1 && cr->flags & 0x01) {
+//		/* Paired, but unmapped */
+//		cr->flags |= BAM_FMUNMAP;
+//	    }
+
+	    if (ds & CRAM_NP) {
+		if (!c->comp_hdr->codecs[DS_NP]) return -1;
+		r |= c->comp_hdr->codecs[DS_NP]
+		                ->decode(s, c->comp_hdr->codecs[DS_NP], blk,
+					 (char *)&cr->mate_pos, &out_sz);
+	    }
+
+	    if (ds & CRAM_TS) {
+		if (!c->comp_hdr->codecs[DS_TS]) return -1;
+		r |= c->comp_hdr->codecs[DS_TS]
+		                ->decode(s, c->comp_hdr->codecs[DS_TS], blk,
+					 (char *)&cr->tlen, &out_sz);
+	    } else {
+		cr->tlen = INT_MIN;
+	    }
+	} else if ((ds & CRAM_CF) && (cf & CRAM_FLAG_MATE_DOWNSTREAM)) {
+	    if (ds & CRAM_NF) {
+		if (!c->comp_hdr->codecs[DS_NF]) return -1;
+		r |= c->comp_hdr->codecs[DS_NF]
+		                ->decode(s, c->comp_hdr->codecs[DS_NF], blk,
+					 (char *)&cr->mate_line, &out_sz);
+		cr->mate_line += rec + 1;
+
+		//cr->name_len = sprintf(name, "%d", name_id++);
+		//cr->name = DSTRING_LEN(name_ds);
+		//dstring_nappend(name_ds, name, cr->name_len);
+
+		cr->mate_ref_id = -1;
+		cr->tlen = INT_MIN;
+		cr->mate_pos = 0;
+	    } else  {
+		cr->mate_flags = 0;
+		cr->tlen = INT_MIN;
+	    }
+	} else {
+	    cr->mate_flags = 0;
+	    cr->tlen = INT_MIN;
+	}
+	/*
+	else if (!name[0]) {
+	    //name[0] = '?'; name[1] = 0;
+	    //cr->name_len = 1;
+	    //cr->name=  DSTRING_LEN(s->name_ds);
+	    //dstring_nappend(s->name_ds, "?", 1);
+
+	    cr->mate_ref_id = -1;
+	    cr->tlen = 0;
+	    cr->mate_pos = 0;
+	}
+	*/
+
+	/* Auxiliary tags */
+	if (CRAM_MAJOR_VERS(fd->version) == 1)
+	    r |= cram_decode_aux_1_0(c, s, blk, cr);
+	else
+	    r |= cram_decode_aux(c, s, blk, cr);
+
+	/* Fake up dynamic string growth and appending */
+	if (ds & CRAM_RL) {
+	    cr->seq = BLOCK_SIZE(s->seqs_blk);
+	    BLOCK_GROW(s->seqs_blk, cr->len);
+	    seq = (char *)BLOCK_END(s->seqs_blk);
+	    BLOCK_SIZE(s->seqs_blk) += cr->len;
+
+	    if (!seq)
+		return -1;
+	
+	    cr->qual = BLOCK_SIZE(s->qual_blk);
+	    BLOCK_GROW(s->qual_blk, cr->len);
+	    qual = (char *)BLOCK_END(s->qual_blk);
+	    BLOCK_SIZE(s->qual_blk) += cr->len;
+
+	    if (!s->ref)
+		memset(seq, '=', cr->len);
+	}
+
+	if (!(bf & BAM_FUNMAP)) {
+	    /* Decode sequence and generate CIGAR */
+	    if (ds & (CRAM_SEQ | CRAM_MQ)) {
+		r |= cram_decode_seq(fd, c, s, blk, cr, bfd, cf, seq, qual);
+	    } else {
+		cr->cigar = 0;
+		cr->ncigar = 0;
+		cr->aend = cr->apos;
+		cr->mqual = 0;
+	    }
+	} else {
+	    int out_sz2 = cr->len;
+
+	    //puts("Unmapped");
+	    cr->cigar = 0;
+	    cr->ncigar = 0;
+	    cr->aend = cr->apos;
+	    cr->mqual = 0;
+
+	    if (ds & CRAM_BA) {
+		if (!c->comp_hdr->codecs[DS_BA]) return -1;
+		r |= c->comp_hdr->codecs[DS_BA]
+		                ->decode(s, c->comp_hdr->codecs[DS_BA], blk,
+					 (char *)seq, &out_sz2);
+	    }
+
+	    if ((ds & CRAM_CF) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) {
+		out_sz2 = cr->len;
+		if (ds & CRAM_QS) {
+		    if (!c->comp_hdr->codecs[DS_QS]) return -1;
+		    r |= c->comp_hdr->codecs[DS_QS]
+			            ->decode(s, c->comp_hdr->codecs[DS_QS],
+					     blk, qual, &out_sz2);
+		}
+	    } else {
+		if (ds & CRAM_RL)
+		    memset(qual, 30, cr->len);
+	    }
+	}
+    }
+
+    pthread_mutex_lock(&fd->ref_lock);
+    if (refs) {
+	int i;
+	for (i = 0; i < fd->refs->nref; i++) {
+	    if (refs[i])
+		cram_ref_decr(fd->refs, i);
+	}
+	free(refs);
+    } else if (ref_id >= 0 && s->ref != fd->ref_free) {
+	cram_ref_decr(fd->refs, ref_id);
+    }
+    pthread_mutex_unlock(&fd->ref_lock);
+
+    /* Resolve mate pair cross-references between recs within this slice */
+    cram_decode_slice_xref(s, fd->required_fields);
+
+    return r;
+}
+
+typedef struct {
+    cram_fd *fd;
+    cram_container *c;
+    cram_slice *s;
+    SAM_hdr *h;
+    int exit_code;
+} cram_decode_job;
+
+void *cram_decode_slice_thread(void *arg) {
+    cram_decode_job *j = (cram_decode_job *)arg;
+
+    j->exit_code = cram_decode_slice(j->fd, j->c, j->s, j->h);
+
+    return j;
+}
+
+/*
+ * Spawn a multi-threaded version of cram_decode_slice().
+ */
+int cram_decode_slice_mt(cram_fd *fd, cram_container *c, cram_slice *s,
+			 SAM_hdr *bfd) {
+    cram_decode_job *j;
+    int nonblock;
+
+    if (!fd->pool)
+	return cram_decode_slice(fd, c, s, bfd);
+
+    if (!(j = malloc(sizeof(*j))))
+	return -1;
+
+    j->fd = fd;
+    j->c  = c;
+    j->s  = s;
+    j->h  = bfd;
+    
+    nonblock = t_pool_results_queue_sz(fd->rqueue) ? 1 : 0;
+
+    if (-1 == t_pool_dispatch2(fd->pool, fd->rqueue, cram_decode_slice_thread,
+			       j, nonblock)) {
+	/* Would block */
+	fd->job_pending = j;
+    } else {
+	fd->job_pending = NULL;
+    }
+
+    // flush too
+    return 0;
+}
+
+
+/* ----------------------------------------------------------------------
+ * CRAM sequence iterators.
+ */
+
+/*
+ * Converts a cram in-memory record into a bam in-memory record. We
+ * pass a pointer to a bam_seq_t pointer along with the a pointer to
+ * the allocated size. These can initially be pointers to NULL and zero.
+ *
+ * This function will reallocate the bam buffer as required and update
+ * (*bam)->alloc accordingly, allowing it to be used within a loop
+ * efficiently without needing to allocate new bam objects over and
+ * over again.
+ *
+ * Returns the used size of the bam record on success
+ *         -1 on failure.
+ */
+static int cram_to_bam(SAM_hdr *bfd, cram_fd *fd, cram_slice *s,
+		       cram_record *cr, int rec, bam_seq_t **bam) {
+    int bam_idx, rg_len;
+    char name_a[1024], *name;
+    int name_len;
+    char *aux, *aux_orig;
+    char *seq, *qual;
+
+    /* Assign names if not explicitly set */
+    if (fd->required_fields & SAM_QNAME) {
+	if (cr->name_len) {
+	    name = (char *)BLOCK_DATA(s->name_blk) + cr->name;
+	    name_len = cr->name_len;
+	} else {
+	    name = name_a;
+	    name_len = strlen(fd->prefix);
+	    memcpy(name, fd->prefix, name_len);
+	    name += name_len;
+	    *name++ = ':';
+	    if (cr->mate_line >= 0 && cr->mate_line < rec)
+		name = (char *)append_uint64((unsigned char *)name,
+					     s->hdr->record_counter +
+					     cr->mate_line + 1);
+	    else
+		name = (char *)append_uint64((unsigned char *)name,
+					     s->hdr->record_counter +
+					     rec + 1);
+	    name_len = name - name_a;
+	    name = name_a;
+	}
+    } else {
+	name = "?";
+	name_len = 1;
+    }
+
+    /* Generate BAM record */
+    if (cr->rg < -1 || cr->rg >= bfd->nrg)
+	return -1;
+    rg_len = (cr->rg != -1) ? bfd->rg[cr->rg].name_len + 4 : 0;
+
+    if (fd->required_fields & (SAM_SEQ | SAM_QUAL)) {
+	if (!BLOCK_DATA(s->seqs_blk))
+	    return -1;
+	seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq;
+    } else {
+	seq = "*";
+	cr->len = 1;
+    }
+
+
+    if (fd->required_fields & SAM_QUAL) {
+	if (!BLOCK_DATA(s->qual_blk))
+	    return -1;
+	qual = (char *)BLOCK_DATA(s->qual_blk) + cr->qual;
+    } else {
+	qual = NULL;
+    }
+
+    bam_idx = bam_construct_seq(bam, cr->aux_size + rg_len,
+				name, name_len,
+				cr->flags,
+				cr->ref_id,
+				cr->apos,
+				cr->aend,
+				cr->mqual,
+				cr->ncigar, &s->cigar[cr->cigar],
+				cr->mate_ref_id,
+				cr->mate_pos,
+				cr->tlen,
+				cr->len,
+				seq,
+				qual);
+    if (bam_idx == -1)
+	return -1;
+
+    aux = aux_orig = (char *)bam_aux(*bam);
+
+    /* Auxiliary strings */
+    if (cr->aux_size != 0) {
+	memcpy(aux, BLOCK_DATA(s->aux_blk) + cr->aux, cr->aux_size);
+	aux += cr->aux_size;
+    }
+
+    /* RG:Z: */
+    if (cr->rg != -1) {
+	int len = bfd->rg[cr->rg].name_len;
+	*aux++ = 'R'; *aux++ = 'G'; *aux++ = 'Z';
+	memcpy(aux, bfd->rg[cr->rg].name, len);
+	aux += len;
+	*aux++ = 0;
+    }
+    
+    return bam_idx + (aux - aux_orig);
+}
+
+/*
+ * Here be dragons! The multi-threading code in this is crufty beyond belief.
+ */
+static cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) {
+    cram_container *c;
+    cram_slice *s = NULL;
+
+    if (!(c = fd->ctr)) {
+	// Load first container.
+	do {
+	    if (!(c = fd->ctr = cram_read_container(fd)))
+		return NULL;
+	} while (c->length == 0);
+
+	/*
+	 * The first container may be a result of a sub-range query.
+	 * In which case it may still not be the optimal starting point
+	 * due to skipped containers/slices in the index. 
+	 */
+	if (fd->range.refid != -2) {
+	    while (c->ref_seq_id != -2 &&
+		   (c->ref_seq_id < fd->range.refid ||
+		    c->ref_seq_start + c->ref_seq_span-1 < fd->range.start)) {
+		if (0 != cram_seek(fd, c->length, SEEK_CUR))
+		    return NULL;
+		cram_free_container(fd->ctr);
+		do {
+		    if (!(c = fd->ctr = cram_read_container(fd)))
+			return NULL;
+		} while (c->length == 0);
+	    }
+
+	    if (c->ref_seq_id != -2 && c->ref_seq_id != fd->range.refid)
+		return NULL;
+	}
+
+	if (!(c->comp_hdr_block = cram_read_block(fd)))
+	    return NULL;
+	if (c->comp_hdr_block->content_type != COMPRESSION_HEADER)
+	    return NULL;
+
+	c->comp_hdr = cram_decode_compression_header(fd, c->comp_hdr_block);
+	if (!c->comp_hdr)
+	    return NULL;
+	if (!c->comp_hdr->AP_delta) {
+	    pthread_mutex_lock(&fd->ref_lock);
+	    fd->unsorted = 1;
+	    pthread_mutex_unlock(&fd->ref_lock);
+	}
+    }
+
+    if ((s = c->slice)) {
+	c->slice = NULL;
+	cram_free_slice(s);
+	s = NULL;
+    }
+
+    if (c->curr_slice == c->max_slice) {
+	cram_free_container(c);
+	c = NULL;
+    }
+
+    /* Sorry this is so contorted! */
+    for (;;) {
+	if (fd->job_pending) {
+	    cram_decode_job *j = (cram_decode_job *)fd->job_pending;
+	    c = j->c;
+	    s = j->s;
+	    free(fd->job_pending);
+	    fd->job_pending = NULL;
+	} else if (!fd->ooc) {
+	empty_container:
+	    if (!c || c->curr_slice == c->max_slice) {
+		// new container
+		do {
+		    if (!(c = fd->ctr = cram_read_container(fd))) {
+			if (fd->pool) {
+			    fd->ooc = 1;
+			    break;
+			}
+
+			return NULL;
+		    }
+		} while (c->length == 0);
+		if (fd->ooc)
+		    break;
+
+		/* Skip containers not yet spanning our range */
+		if (fd->range.refid != -2 && c->ref_seq_id != -2) {
+		    fd->required_fields |= SAM_POS;
+
+		    if (c->ref_seq_id != fd->range.refid) {
+			cram_free_container(c);
+			fd->ctr = NULL;
+			fd->ooc = 1;
+			fd->eof = 1;
+			break;
+		    }
+
+		    if (c->ref_seq_start > fd->range.end) {
+			cram_free_container(c);
+			fd->ctr = NULL;
+			fd->ooc = 1;
+			fd->eof = 1;
+			break;
+		    }
+
+		    if (c->ref_seq_start + c->ref_seq_span-1 <
+			fd->range.start) {
+			c->curr_rec = c->max_rec;
+			c->curr_slice = c->max_slice;
+			cram_seek(fd, c->length, SEEK_CUR);
+			cram_free_container(c);
+			c = NULL;
+			continue;
+		    }
+		}
+
+		if (!(c->comp_hdr_block = cram_read_block(fd)))
+		    return NULL;
+		if (c->comp_hdr_block->content_type != COMPRESSION_HEADER)
+		    return NULL;
+
+		c->comp_hdr =
+		    cram_decode_compression_header(fd, c->comp_hdr_block);
+		if (!c->comp_hdr)
+		    return NULL;
+
+		if (!c->comp_hdr->AP_delta) {
+		    pthread_mutex_lock(&fd->ref_lock);
+		    fd->unsorted = 1;
+		    pthread_mutex_unlock(&fd->ref_lock);
+		}
+	    }
+
+	    if (c->num_records == 0) {
+		cram_free_container(c); c = NULL;
+		goto empty_container;
+	    }
+
+
+	    if (!(s = c->slice = cram_read_slice(fd)))
+		return NULL;
+	    c->curr_slice++;
+	    c->curr_rec = 0;
+	    c->max_rec = s->hdr->num_records;
+
+	    s->last_apos = s->hdr->ref_seq_start;
+	    
+	    /* Skip slices not yet spanning our range */
+	    if (fd->range.refid != -2 && s->hdr->ref_seq_id != -2) {
+		if (s->hdr->ref_seq_id != fd->range.refid) {
+		    fd->eof = 1;
+		    cram_free_slice(s);
+		    c->slice = NULL;
+		    return NULL;
+		}
+
+		if (s->hdr->ref_seq_start > fd->range.end) {
+		    fd->eof = 1;
+		    cram_free_slice(s);
+		    c->slice = NULL;
+		    return NULL;
+		}
+
+		if (s->hdr->ref_seq_start + s->hdr->ref_seq_span-1 <
+		    fd->range.start) {
+		    cram_free_slice(s);
+		    c->slice = NULL;
+		    cram_free_container(c);
+		    c = NULL;
+		    continue;
+		}
+	    }
+	}
+
+	/* Test decoding of 1st seq */
+	if (!c || !s)
+	    break;
+
+	if (cram_decode_slice_mt(fd, c, s, fd->header) != 0) {
+	    //	if (cram_decode_slice(fd, c, s, fd->header) != 0) {
+	    fprintf(stderr, "Failure to decode slice\n");
+	    cram_free_slice(s);
+	    c->slice = NULL;
+	    return NULL;
+	}
+
+	if (!fd->pool || fd->job_pending)
+	    break;
+
+	// Push it a bit far, to qsize in queue rather than pending arrival,
+	// as cram tends to be a bit bursty in decode timings.
+	if (t_pool_results_queue_len(fd->rqueue) > fd->pool->qsize)
+	    break;
+    }
+
+    if (fd->pool) {
+	t_pool_result *res;
+	cram_decode_job *j;
+	
+//	fprintf(stderr, "Thread pool len = %d, %d\n",
+//		t_pool_results_queue_len(fd->rqueue),
+//		t_pool_results_queue_sz(fd->rqueue));
+
+	if (fd->ooc && t_pool_results_queue_empty(fd->rqueue))
+	    return NULL;
+
+	res = t_pool_next_result_wait(fd->rqueue);
+
+	if (!res || !res->data) {
+	    fprintf(stderr, "t_pool_next_result failure\n");
+	    return NULL;
+	}
+
+	j = (cram_decode_job *)res->data;
+	c = j->c;
+	s = j->s;
+
+	fd->ctr = c;
+
+	t_pool_delete_result(res, 1);
+    }
+
+    *cp = c;
+    return s;
+}
+
+/*
+ * Read the next cram record and return it.
+ * Note that to decode cram_record the caller will need to look up some data
+ * in the current slice, pointed to by fd->ctr->slice. This is valid until
+ * the next call to cram_get_seq (which may invalidate it).
+ *
+ * Returns record pointer on success (do not free)
+ *        NULL on failure
+ */
+cram_record *cram_get_seq(cram_fd *fd) {
+    cram_container *c;
+    cram_slice *s;
+
+    for (;;) {
+	c = fd->ctr;
+	if (c && c->slice && c->curr_rec < c->max_rec) {
+	    s = c->slice;
+	} else {
+	    if (!(s = cram_next_slice(fd, &c)))
+		return NULL;
+	}
+
+	if (fd->range.refid != -2) {
+	    if (s->crecs[c->curr_rec].ref_id < fd->range.refid) {
+		c->curr_rec++;
+		continue;
+	    }
+
+	    if (s->crecs[c->curr_rec].ref_id != fd->range.refid) {
+		fd->eof = 1;
+		cram_free_slice(s);
+		c->slice = NULL;
+		return NULL;
+	    }
+
+	    if (s->crecs[c->curr_rec].apos > fd->range.end) {
+		fd->eof = 1;
+		cram_free_slice(s);
+		c->slice = NULL;
+		return NULL;
+	    }
+
+	    if (s->crecs[c->curr_rec].aend < fd->range.start) {
+		c->curr_rec++;
+		continue;
+	    }
+	}
+
+	break;
+    }
+
+    fd->ctr = c;
+    c->slice = s;
+    return &s->crecs[c->curr_rec++];
+}
+
+/*
+ * Read the next cram record and convert it to a bam_seq_t struct.
+ *
+ * Returns 0 on success
+ *        -1 on EOF or failure (check fd->err)
+ */
+int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam) {
+    cram_record *cr;
+    cram_container *c;
+    cram_slice *s;
+
+    if (!(cr = cram_get_seq(fd)))
+	return -1;
+
+    c = fd->ctr;
+    s = c->slice;
+
+    return cram_to_bam(fd->header, fd, s, cr, c->curr_rec-1, bam);
+}