annotate ezBAMQC/src/htslib/cram/cram_encode.c @ 14:744987262771

Uploaded
author cshl-bsr
date Wed, 30 Mar 2016 12:15:03 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2 Copyright (c) 2012-2013 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Author: James Bonfield <jkb@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Redistribution and use in source and binary forms, with or without
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6 modification, are permitted provided that the following conditions are met:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 1. Redistributions of source code must retain the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 this list of conditions and the following disclaimer.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 2. Redistributions in binary form must reproduce the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 this list of conditions and the following disclaimer in the documentation
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13 and/or other materials provided with the distribution.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16 Institute nor the names of its contributors may be used to endorse or promote
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 products derived from this software without specific prior written permission.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 #ifdef HAVE_CONFIG_H
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 #include "io_lib_config.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 #include <stdio.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 #include <errno.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37 #include <assert.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 #include <stdlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 #include <string.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 #include <zlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 #include <sys/types.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 #include <sys/stat.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 #include <math.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 #include <ctype.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 #include "cram/cram.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 #include "cram/os.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 #include "cram/md5.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 #define Z_CRAM_STRAT Z_FILTERED
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 //#define Z_CRAM_STRAT Z_RLE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 //#define Z_CRAM_STRAT Z_HUFFMAN_ONLY
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 //#define Z_CRAM_STRAT Z_DEFAULT_STRATEGY
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 static int process_one_read(cram_fd *fd, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 cram_slice *s, cram_record *cr,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57 bam_seq_t *b, int rnum);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 * Returns index of val into key.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 * Basically strchr(key, val)-key;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 static int sub_idx(char *key, char val) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 for (i = 0; *key && *key++ != val; i++);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 return i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 * Encodes a compression header block into a generic cram_block structure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 * Returns cram_block ptr on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 cram_block_compression_hdr *h) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78 cram_block *cb = cram_new_block(COMPRESSION_HEADER, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79 cram_block *map = cram_new_block(COMPRESSION_HEADER, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 int i, mc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 if (!cb || !map)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 * This is a concatenation of several blocks of data:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 * header + landmarks, preservation map, read encoding map, and the tag
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88 * encoding map.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 * All 4 are variable sized and we need to know how large these are
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 * before creating the compression header itself as this starts with
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 * the total size (stored as a variable length string).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 // Duplicated from container itself, and removed in 1.1
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 itf8_put_blk(cb, h->ref_seq_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 itf8_put_blk(cb, h->ref_seq_start);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 itf8_put_blk(cb, h->ref_seq_span);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 itf8_put_blk(cb, h->num_records);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100 itf8_put_blk(cb, h->num_landmarks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 for (i = 0; i < h->num_landmarks; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 itf8_put_blk(cb, h->landmark[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106 /* Create in-memory preservation map */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 /* FIXME: should create this when we create the container */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 int r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 if (!(h->preservation_map = kh_init(map)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 k = kh_put(map, h->preservation_map, "RN", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116 if (-1 == r) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 kh_val(h->preservation_map, k).i = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 k = kh_put(map, h->preservation_map, "PI", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 if (-1 == r) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122 kh_val(h->preservation_map, k).i = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124 k = kh_put(map, h->preservation_map, "UI", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 if (-1 == r) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126 kh_val(h->preservation_map, k).i = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 k = kh_put(map, h->preservation_map, "MI", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129 if (-1 == r) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 kh_val(h->preservation_map, k).i = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 // Technically SM was in 1.0, but wasn't in Java impl.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134 k = kh_put(map, h->preservation_map, "SM", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 if (-1 == r) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136 kh_val(h->preservation_map, k).i = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 k = kh_put(map, h->preservation_map, "TD", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139 if (-1 == r) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 kh_val(h->preservation_map, k).i = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 k = kh_put(map, h->preservation_map, "AP", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 if (-1 == r) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144 kh_val(h->preservation_map, k).i = c->pos_sorted;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146 if (fd->no_ref || fd->embed_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 // Reference Required == No
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148 k = kh_put(map, h->preservation_map, "RR", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149 if (-1 == r) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 kh_val(h->preservation_map, k).i = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155 /* Encode preservation map; could collapse this and above into one */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 mc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 BLOCK_SIZE(map) = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158 if (h->preservation_map) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 for (k = kh_begin(h->preservation_map);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162 k != kh_end(h->preservation_map);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163 k++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164 const char *key;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165 khash_t(map) *pmap = h->preservation_map;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
166
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
167
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
168 if (!kh_exist(pmap, k))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
169 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
170
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
171 key = kh_key(pmap, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
172 BLOCK_APPEND(map, key, 2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
173
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
174 switch(CRAM_KEY(key[0], key[1])) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
175 case CRAM_KEY('M','I'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
176 BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
177 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
178
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
179 case CRAM_KEY('U','I'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
180 BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
181 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
182
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
183 case CRAM_KEY('P','I'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
184 BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
185 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
186
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
187 case CRAM_KEY('A','P'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
188 BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
189 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
190
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
191 case CRAM_KEY('R','N'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
192 BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
193 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
194
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
195 case CRAM_KEY('R','R'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
196 BLOCK_APPEND_CHAR(map, kh_val(pmap, k).i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
197 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
198
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
199 case CRAM_KEY('S','M'): {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
200 char smat[5], *mp = smat;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
201 *mp++ =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
202 (sub_idx("CGTN", h->substitution_matrix[0][0]) << 6) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
203 (sub_idx("CGTN", h->substitution_matrix[0][1]) << 4) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
204 (sub_idx("CGTN", h->substitution_matrix[0][2]) << 2) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
205 (sub_idx("CGTN", h->substitution_matrix[0][3]) << 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
206 *mp++ =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
207 (sub_idx("AGTN", h->substitution_matrix[1][0]) << 6) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
208 (sub_idx("AGTN", h->substitution_matrix[1][1]) << 4) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
209 (sub_idx("AGTN", h->substitution_matrix[1][2]) << 2) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
210 (sub_idx("AGTN", h->substitution_matrix[1][3]) << 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
211 *mp++ =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
212 (sub_idx("ACTN", h->substitution_matrix[2][0]) << 6) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
213 (sub_idx("ACTN", h->substitution_matrix[2][1]) << 4) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
214 (sub_idx("ACTN", h->substitution_matrix[2][2]) << 2) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
215 (sub_idx("ACTN", h->substitution_matrix[2][3]) << 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
216 *mp++ =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
217 (sub_idx("ACGN", h->substitution_matrix[3][0]) << 6) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
218 (sub_idx("ACGN", h->substitution_matrix[3][1]) << 4) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
219 (sub_idx("ACGN", h->substitution_matrix[3][2]) << 2) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
220 (sub_idx("ACGN", h->substitution_matrix[3][3]) << 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
221 *mp++ =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
222 (sub_idx("ACGT", h->substitution_matrix[4][0]) << 6) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
223 (sub_idx("ACGT", h->substitution_matrix[4][1]) << 4) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
224 (sub_idx("ACGT", h->substitution_matrix[4][2]) << 2) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
225 (sub_idx("ACGT", h->substitution_matrix[4][3]) << 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
226 BLOCK_APPEND(map, smat, 5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
227 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
228 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
229
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
230 case CRAM_KEY('T','D'): {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
231 itf8_put_blk(map, BLOCK_SIZE(h->TD_blk));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
232 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
233 BLOCK_DATA(h->TD_blk),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
234 BLOCK_SIZE(h->TD_blk));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
235 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
236 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
237
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
238 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
239 fprintf(stderr, "Unknown preservation key '%.2s'\n", key);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
240 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
241 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
242
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
243 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
244 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
245 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
246 itf8_put_blk(cb, BLOCK_SIZE(map) + itf8_size(mc));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
247 itf8_put_blk(cb, mc);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
248 BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
249
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
250 /* rec encoding map */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
251 mc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
252 BLOCK_SIZE(map) = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
253 if (h->codecs[DS_BF]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
254 if (-1 == h->codecs[DS_BF]->store(h->codecs[DS_BF], map, "BF",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
255 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
256 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
257 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
258 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
259 if (h->codecs[DS_CF]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
260 if (-1 == h->codecs[DS_CF]->store(h->codecs[DS_CF], map, "CF",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
261 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
262 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
263 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
264 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
265 if (h->codecs[DS_RL]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
266 if (-1 == h->codecs[DS_RL]->store(h->codecs[DS_RL], map, "RL",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
267 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
268 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
269 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
270 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
271 if (h->codecs[DS_AP]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
272 if (-1 == h->codecs[DS_AP]->store(h->codecs[DS_AP], map, "AP",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
273 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
274 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
275 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
276 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
277 if (h->codecs[DS_RG]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
278 if (-1 == h->codecs[DS_RG]->store(h->codecs[DS_RG], map, "RG",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
279 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
280 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
281 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
282 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
283 if (h->codecs[DS_MF]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
284 if (-1 == h->codecs[DS_MF]->store(h->codecs[DS_MF], map, "MF",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
285 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
286 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
287 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
288 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
289 if (h->codecs[DS_NS]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
290 if (-1 == h->codecs[DS_NS]->store(h->codecs[DS_NS], map, "NS",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
291 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
292 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
293 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
294 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
295 if (h->codecs[DS_NP]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
296 if (-1 == h->codecs[DS_NP]->store(h->codecs[DS_NP], map, "NP",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
297 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
298 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
299 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
300 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
301 if (h->codecs[DS_TS]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
302 if (-1 == h->codecs[DS_TS]->store(h->codecs[DS_TS], map, "TS",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
303 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
304 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
305 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
306 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
307 if (h->codecs[DS_NF]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
308 if (-1 == h->codecs[DS_NF]->store(h->codecs[DS_NF], map, "NF",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
309 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
310 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
311 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
312 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
313 if (h->codecs[DS_TC]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
314 if (-1 == h->codecs[DS_TC]->store(h->codecs[DS_TC], map, "TC",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
315 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
316 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
317 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
318 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
319 if (h->codecs[DS_TN]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
320 if (-1 == h->codecs[DS_TN]->store(h->codecs[DS_TN], map, "TN",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
321 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
322 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
323 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
324 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
325 if (h->codecs[DS_TL]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
326 if (-1 == h->codecs[DS_TL]->store(h->codecs[DS_TL], map, "TL",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
327 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
328 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
329 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
330 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
331 if (h->codecs[DS_FN]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
332 if (-1 == h->codecs[DS_FN]->store(h->codecs[DS_FN], map, "FN",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
333 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
334 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
335 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
336 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
337 if (h->codecs[DS_FC]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
338 if (-1 == h->codecs[DS_FC]->store(h->codecs[DS_FC], map, "FC",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
339 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
340 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
341 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
342 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
343 if (h->codecs[DS_FP]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
344 if (-1 == h->codecs[DS_FP]->store(h->codecs[DS_FP], map, "FP",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
345 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
346 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
347 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
348 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
349 if (h->codecs[DS_BS]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
350 if (-1 == h->codecs[DS_BS]->store(h->codecs[DS_BS], map, "BS",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
351 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
352 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
353 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
354 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
355 if (h->codecs[DS_IN]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
356 if (-1 == h->codecs[DS_IN]->store(h->codecs[DS_IN], map, "IN",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
357 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
358 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
359 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
360 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
361 if (h->codecs[DS_DL]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
362 if (-1 == h->codecs[DS_DL]->store(h->codecs[DS_DL], map, "DL",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
363 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
364 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
365 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
366 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
367 if (h->codecs[DS_BA]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
368 if (-1 == h->codecs[DS_BA]->store(h->codecs[DS_BA], map, "BA",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
369 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
370 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
371 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
372 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
373 if (h->codecs[DS_BB]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
374 if (-1 == h->codecs[DS_BB]->store(h->codecs[DS_BB], map, "BB",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
375 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
376 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
377 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
378 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
379 if (h->codecs[DS_MQ]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
380 if (-1 == h->codecs[DS_MQ]->store(h->codecs[DS_MQ], map, "MQ",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
381 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
382 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
383 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
384 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
385 if (h->codecs[DS_RN]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
386 if (-1 == h->codecs[DS_RN]->store(h->codecs[DS_RN], map, "RN",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
387 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
388 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
389 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
390 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
391 if (h->codecs[DS_QS]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
392 if (-1 == h->codecs[DS_QS]->store(h->codecs[DS_QS], map, "QS",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
393 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
394 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
395 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
396 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
397 if (h->codecs[DS_QQ]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
398 if (-1 == h->codecs[DS_QQ]->store(h->codecs[DS_QQ], map, "QQ",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
399 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
400 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
401 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
402 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
403 if (h->codecs[DS_RI]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
404 if (-1 == h->codecs[DS_RI]->store(h->codecs[DS_RI], map, "RI",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
405 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
406 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
407 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
408 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
409 if (CRAM_MAJOR_VERS(fd->version) != 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
410 if (h->codecs[DS_SC]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
411 if (-1 == h->codecs[DS_SC]->store(h->codecs[DS_SC], map, "SC",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
412 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
413 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
414 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
415 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
416 if (h->codecs[DS_RS]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
417 if (-1 == h->codecs[DS_RS]->store(h->codecs[DS_RS], map, "RS",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
418 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
419 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
420 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
421 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
422 if (h->codecs[DS_PD]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
423 if (-1 == h->codecs[DS_PD]->store(h->codecs[DS_PD], map, "PD",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
424 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
425 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
426 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
427 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
428 if (h->codecs[DS_HC]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
429 if (-1 == h->codecs[DS_HC]->store(h->codecs[DS_HC], map, "HC",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
430 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
431 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
432 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
433 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
434 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
435 if (h->codecs[DS_TM]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
436 if (-1 == h->codecs[DS_TM]->store(h->codecs[DS_TM], map, "TM",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
437 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
438 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
439 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
440 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
441 if (h->codecs[DS_TV]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
442 if (-1 == h->codecs[DS_TV]->store(h->codecs[DS_TV], map, "TV",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
443 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
444 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
445 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
446 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
447 itf8_put_blk(cb, BLOCK_SIZE(map) + itf8_size(mc));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
448 itf8_put_blk(cb, mc);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
449 BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
450
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
451 /* tag encoding map */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
452 #if 0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
453 mp = map; mc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
454 if (h->tag_encoding_map) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
455 HashItem *hi;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
456 HashIter *iter = HashTableIterCreate();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
457 if (!iter)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
458 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
459
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
460 while ((hi = HashTableIterNext(h->tag_encoding_map, iter))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
461 cram_map *m = hi->data.p;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
462 int sz;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
463
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
464 mp += itf8_put(mp, (hi->key[0]<<16)|(hi->key[1]<<8)|hi->key[2]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
465 if (-1 == (sz = m->codec->store(m->codec, mp, NULL, fd->version)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
466 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
467 mp += sz;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
468 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
469 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
470
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
471 HashTableIterDestroy(iter);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
472 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
473 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
474 mc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
475 BLOCK_SIZE(map) = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
476 if (c->tags_used) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
477 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
478
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
479 #define TAG_ID(a) ((#a[0]<<8)+#a[1])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
480
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
481 for (k = kh_begin(c->tags_used); k != kh_end(c->tags_used); k++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
482 int key;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
483 if (!kh_exist(c->tags_used, k))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
484 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
485
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
486 mc++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
487 itf8_put_blk(map, kh_key(c->tags_used, k));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
488
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
489 // use block content id 4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
490 switch((key = kh_key(c->tags_used, k)) & 0xff) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
491 case 'Z': case 'H':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
492 // string as byte_array_stop
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
493 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
494 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
495 "\005" // BYTE_ARRAY_STOP
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
496 "\005" // len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
497 "\t" // stop-byte is also SAM separator
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
498 DS_aux_S "\000\000\000",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
499 7);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
500 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
501 if (key>>8 == TAG_ID(OQ))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
502 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
503 "\005" // BYTE_ARRAY_STOP
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
504 "\002" // len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
505 "\t" // stop-byte is also SAM separator
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
506 DS_aux_OQ_S,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
507 4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
508 else if (key>>8 == TAG_ID(BQ))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
509 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
510 "\005" // BYTE_ARRAY_STOP
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
511 "\002" // len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
512 "\t" // stop-byte is also SAM separator
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
513 DS_aux_BQ_S,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
514 4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
515 else if (key>>8 == TAG_ID(BD))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
516 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
517 "\005" // BYTE_ARRAY_STOP
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
518 "\002" // len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
519 "\t" // stop-byte is also SAM separator
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
520 DS_aux_BD_S,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
521 4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
522 else if (key>>8 == TAG_ID(BI))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
523 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
524 "\005" // BYTE_ARRAY_STOP
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
525 "\002" // len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
526 "\t" // stop-byte is also SAM separator
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
527 DS_aux_BI_S,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
528 4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
529 else if ((key>>8 == TAG_ID(Q2)) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
530 (key>>8 == TAG_ID(U2)) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
531 (key>>8 == TAG_ID(QT)) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
532 (key>>8 == TAG_ID(CQ)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
533 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
534 "\005" // BYTE_ARRAY_STOP
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
535 "\002" // len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
536 "\t" // stop-byte is also SAM separator
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
537 DS_aux_oq_S,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
538 4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
539 else if ((key>>8 == TAG_ID(R2)) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
540 (key>>8 == TAG_ID(E2)) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
541 (key>>8 == TAG_ID(CS)) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
542 (key>>8 == TAG_ID(BC)) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
543 (key>>8 == TAG_ID(RT)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
544 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
545 "\005" // BYTE_ARRAY_STOP
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
546 "\002" // len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
547 "\t" // stop-byte is also SAM separator
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
548 DS_aux_os_S,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
549 4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
550 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
551 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
552 "\005" // BYTE_ARRAY_STOP
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
553 "\002" // len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
554 "\t" // stop-byte is also SAM separator
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
555 DS_aux_oz_S,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
556 4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
557 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
558 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
559
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
560 case 'A': case 'c': case 'C':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
561 // byte array len, 1 byte
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
562 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
563 "\004" // BYTE_ARRAY_LEN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
564 "\011" // length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
565 "\003" // HUFFMAN (len)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
566 "\004" // huffman-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
567 "\001" // 1 symbol
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
568 "\001" // symbol=1 byte value
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
569 "\001" // 1 length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
570 "\000" // length=0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
571 "\001" // EXTERNAL (val)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
572 "\001" // external-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
573 DS_aux_S,// content-id
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
574 11);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
575 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
576
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
577 case 's': case 'S':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
578 // byte array len, 2 byte
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
579 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
580 "\004" // BYTE_ARRAY_LEN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
581 "\011" // length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
582 "\003" // HUFFMAN (len)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
583 "\004" // huffman-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
584 "\001" // 1 symbol
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
585 "\002" // symbol=2 byte value
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
586 "\001" // 1 length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
587 "\000" // length=0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
588 "\001" // EXTERNAL (val)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
589 "\001" // external-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
590 DS_aux_S,// content-id
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
591 11);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
592 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
593
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
594 case 'i': case 'I': case 'f':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
595 // byte array len, 4 byte
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
596 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
597 "\004" // BYTE_ARRAY_LEN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
598 "\011" // length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
599 "\003" // HUFFMAN (len)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
600 "\004" // huffman-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
601 "\001" // 1 symbol
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
602 "\004" // symbol=4 byte value
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
603 "\001" // 1 length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
604 "\000" // length=0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
605 "\001" // EXTERNAL (val)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
606 "\001" // external-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
607 DS_aux_S,// content-id
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
608 11);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
609 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
610
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
611 case 'B':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
612 // Byte array of variable size, but we generate our tag
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
613 // byte stream at the wrong stage (during reading and not
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
614 // after slice header construction). So we use
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
615 // BYTE_ARRAY_LEN with the length codec being external
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
616 // too.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
617 if ((key>>8 == TAG_ID(FZ)) || (key>>8 == TAG_ID(ZM)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
618 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
619 "\004" // BYTE_ARRAY_LEN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
620 "\006" // length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
621 "\001" // EXTERNAL (len)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
622 "\001" // external-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
623 DS_aux_FZ_S // content-id
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
624 "\001" // EXTERNAL (val)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
625 "\001" // external-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
626 DS_aux_FZ_S,// content-id
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
627 8);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
628 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
629 BLOCK_APPEND(map,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
630 "\004" // BYTE_ARRAY_LEN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
631 "\006" // length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
632 "\001" // EXTERNAL (len)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
633 "\001" // external-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
634 DS_aux_S // content-id
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
635 "\001" // EXTERNAL (val)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
636 "\001" // external-len
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
637 DS_aux_S,// content-id
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
638 8);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
639 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
640
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
641 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
642 fprintf(stderr, "Unsupported SAM aux type '%c'\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
643 kh_key(c->tags_used, k) & 0xff);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
644 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
645 //mp += m->codec->store(m->codec, mp, NULL, fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
646 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
647 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
648 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
649 itf8_put_blk(cb, BLOCK_SIZE(map) + itf8_size(mc));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
650 itf8_put_blk(cb, mc);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
651 BLOCK_APPEND(cb, BLOCK_DATA(map), BLOCK_SIZE(map));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
652
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
653 if (fd->verbose)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
654 fprintf(stderr, "Wrote compression block header in %d bytes\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
655 (int)BLOCK_SIZE(cb));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
656
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
657 BLOCK_UPLEN(cb);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
658
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
659 cram_free_block(map);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
660
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
661 return cb;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
662 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
663
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
664
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
665 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
666 * Encodes a slice compression header.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
667 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
668 * Returns cram_block on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
669 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
670 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
671 cram_block *cram_encode_slice_header(cram_fd *fd, cram_slice *s) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
672 char *buf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
673 char *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
674 cram_block *b = cram_new_block(MAPPED_SLICE, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
675 int j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
676
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
677 if (!b)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
678 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
679
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
680 if (NULL == (cp = buf = malloc(16+5*(8+s->hdr->num_blocks)))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
681 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
682 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
683 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
684
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
685 cp += itf8_put(cp, s->hdr->ref_seq_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
686 cp += itf8_put(cp, s->hdr->ref_seq_start);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
687 cp += itf8_put(cp, s->hdr->ref_seq_span);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
688 cp += itf8_put(cp, s->hdr->num_records);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
689 if (CRAM_MAJOR_VERS(fd->version) == 2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
690 cp += itf8_put(cp, s->hdr->record_counter);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
691 else if (CRAM_MAJOR_VERS(fd->version) >= 3)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
692 cp += ltf8_put(cp, s->hdr->record_counter);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
693 cp += itf8_put(cp, s->hdr->num_blocks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
694 cp += itf8_put(cp, s->hdr->num_content_ids);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
695 for (j = 0; j < s->hdr->num_content_ids; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
696 cp += itf8_put(cp, s->hdr->block_content_ids[j]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
697 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
698 if (s->hdr->content_type == MAPPED_SLICE)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
699 cp += itf8_put(cp, s->hdr->ref_base_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
700
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
701 if (CRAM_MAJOR_VERS(fd->version) != 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
702 memcpy(cp, s->hdr->md5, 16); cp += 16;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
703 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
704
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
705 assert(cp-buf <= 16+5*(8+s->hdr->num_blocks));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
706
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
707 b->data = (unsigned char *)buf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
708 b->comp_size = b->uncomp_size = cp-buf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
709
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
710 return b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
711 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
712
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
713
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
714 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
715 * Encodes a single read.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
716 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
717 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
718 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
719 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
720 static int cram_encode_slice_read(cram_fd *fd,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
721 cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
722 cram_block_compression_hdr *h,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
723 cram_slice *s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
724 cram_record *cr,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
725 int *last_pos) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
726 int r = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
727 int32_t i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
728 unsigned char uc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
729
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
730 //fprintf(stderr, "Encode seq %d, %d/%d FN=%d, %s\n", rec, core->byte, core->bit, cr->nfeature, s->name_ds->str + cr->name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
731
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
732 //printf("BF=0x%x\n", cr->flags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
733 // bf = cram_flag_swap[cr->flags];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
734 i32 = fd->cram_flag_swap[cr->flags & 0xfff];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
735 r |= h->codecs[DS_BF]->encode(s, h->codecs[DS_BF], (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
736
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
737 i32 = cr->cram_flags;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
738 r |= h->codecs[DS_CF]->encode(s, h->codecs[DS_CF], (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
739
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
740 if (CRAM_MAJOR_VERS(fd->version) != 1 && s->hdr->ref_seq_id == -2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
741 r |= h->codecs[DS_RI]->encode(s, h->codecs[DS_RI], (char *)&cr->ref_id, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
742
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
743 r |= h->codecs[DS_RL]->encode(s, h->codecs[DS_RL], (char *)&cr->len, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
744
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
745 if (c->pos_sorted) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
746 i32 = cr->apos - *last_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
747 r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
748 *last_pos = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
749 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
750 i32 = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
751 r |= h->codecs[DS_AP]->encode(s, h->codecs[DS_AP], (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
752 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
753
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
754 r |= h->codecs[DS_RG]->encode(s, h->codecs[DS_RG], (char *)&cr->rg, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
755
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
756 if (c->comp_hdr->read_names_included) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
757 // RN codec: Already stored in block[3].
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
758 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
759
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
760 if (cr->cram_flags & CRAM_FLAG_DETACHED) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
761 i32 = cr->mate_flags;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
762 r |= h->codecs[DS_MF]->encode(s, h->codecs[DS_MF], (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
763
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
764 if (!c->comp_hdr->read_names_included) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
765 // RN codec: Already stored in block[3].
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
766 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
767
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
768 r |= h->codecs[DS_NS]->encode(s, h->codecs[DS_NS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
769 (char *)&cr->mate_ref_id, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
770
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
771 r |= h->codecs[DS_NP]->encode(s, h->codecs[DS_NP],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
772 (char *)&cr->mate_pos, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
773
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
774 r |= h->codecs[DS_TS]->encode(s, h->codecs[DS_TS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
775 (char *)&cr->tlen, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
776 } else if (cr->cram_flags & CRAM_FLAG_MATE_DOWNSTREAM) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
777 r |= h->codecs[DS_NF]->encode(s, h->codecs[DS_NF],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
778 (char *)&cr->mate_line, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
779 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
780
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
781 /* Aux tags */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
782 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
783 int j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
784 uc = cr->ntags;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
785 r |= h->codecs[DS_TC]->encode(s, h->codecs[DS_TC], (char *)&uc, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
786
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
787 for (j = 0; j < cr->ntags; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
788 uint32_t i32 = s->TN[cr->TN_idx + j]; // id
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
789 r |= h->codecs[DS_TN]->encode(s, h->codecs[DS_TN], (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
790 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
791 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
792 r |= h->codecs[DS_TL]->encode(s, h->codecs[DS_TL], (char *)&cr->TL, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
793 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
794
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
795 // qual
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
796 // QS codec : Already stored in block[2].
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
797
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
798 // features (diffs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
799 if (!(cr->flags & BAM_FUNMAP)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
800 int prev_pos = 0, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
801
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
802 r |= h->codecs[DS_FN]->encode(s, h->codecs[DS_FN],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
803 (char *)&cr->nfeature, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
804 for (j = 0; j < cr->nfeature; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
805 cram_feature *f = &s->features[cr->feature + j];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
806
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
807 uc = f->X.code;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
808 r |= h->codecs[DS_FC]->encode(s, h->codecs[DS_FC], (char *)&uc, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
809 i32 = f->X.pos - prev_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
810 r |= h->codecs[DS_FP]->encode(s, h->codecs[DS_FP], (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
811 prev_pos = f->X.pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
812
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
813 switch(f->X.code) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
814 //char *seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
815
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
816 case 'X':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
817 //fprintf(stderr, " FC=%c FP=%d base=%d\n", f->X.code, i32, f->X.base);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
818
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
819 uc = f->X.base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
820 r |= h->codecs[DS_BS]->encode(s, h->codecs[DS_BS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
821 (char *)&uc, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
822 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
823 case 'S':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
824 // Already done
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
825 // r |= h->codecs[DS_SC]->encode(s, h->codecs[DS_SC],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
826 // BLOCK_DATA(s->soft_blk) + f->S.seq_idx,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
827 // f->S.len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
828
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
829 // if (IS_CRAM_3_VERS(fd)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
830 // r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
831 // BLOCK_DATA(s->seqs_blk) + f->S.seq_idx,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
832 // f->S.len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
833 // }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
834 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
835 case 'I':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
836 //seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
837 //r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
838 // seq, f->S.len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
839 // if (IS_CRAM_3_VERS(fd)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
840 // r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
841 // BLOCK_DATA(s->seqs_blk) + f->I.seq_idx,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
842 // f->I.len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
843 // }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
844 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
845 case 'i':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
846 uc = f->i.base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
847 r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
848 (char *)&uc, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
849 //seq = DSTRING_STR(s->seqs_ds) + f->S.seq_idx;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
850 //r |= h->codecs[DS_IN]->encode(s, h->codecs[DS_IN],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
851 // seq, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
852 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
853 case 'D':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
854 i32 = f->D.len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
855 r |= h->codecs[DS_DL]->encode(s, h->codecs[DS_DL],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
856 (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
857 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
858
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
859 case 'B':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
860 // // Used when we try to store a non ACGTN base or an N
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
861 // // that aligns against a non ACGTN reference
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
862
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
863 uc = f->B.base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
864 r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
865 (char *)&uc, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
866
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
867 // Already added
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
868 // uc = f->B.qual;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
869 // r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
870 // (char *)&uc, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
871 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
872
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
873 case 'b':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
874 // string of bases
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
875 r |= h->codecs[DS_BB]->encode(s, h->codecs[DS_BB],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
876 (char *)BLOCK_DATA(s->seqs_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
877 + f->b.seq_idx,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
878 f->b.len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
879 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
880
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
881 case 'Q':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
882 // Already added
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
883 // uc = f->B.qual;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
884 // r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
885 // (char *)&uc, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
886 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
887
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
888 case 'N':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
889 i32 = f->N.len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
890 r |= h->codecs[DS_RS]->encode(s, h->codecs[DS_RS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
891 (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
892 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
893
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
894 case 'P':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
895 i32 = f->P.len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
896 r |= h->codecs[DS_PD]->encode(s, h->codecs[DS_PD],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
897 (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
898 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
899
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
900 case 'H':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
901 i32 = f->H.len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
902 r |= h->codecs[DS_HC]->encode(s, h->codecs[DS_HC],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
903 (char *)&i32, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
904 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
905
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
906
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
907 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
908 fprintf(stderr, "unhandled feature code %c\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
909 f->X.code);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
910 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
911 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
912 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
913
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
914 r |= h->codecs[DS_MQ]->encode(s, h->codecs[DS_MQ],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
915 (char *)&cr->mqual, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
916 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
917 char *seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
918 r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], seq, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
919 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
920
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
921 return r ? -1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
922 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
923
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
924
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
925 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
926 * Applies various compression methods to specific blocks, depending on
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
927 * known observations of how data series compress.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
928 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
929 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
930 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
931 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
932 static int cram_compress_slice(cram_fd *fd, cram_slice *s) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
933 int level = fd->level, i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
934 int method = 1<<GZIP | 1<<GZIP_RLE, methodF = method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
935
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
936 /* Compress the CORE Block too, with minimal zlib level */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
937 if (level > 5 && s->block[0]->uncomp_size > 500)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
938 cram_compress_block(fd, s->block[0], NULL, GZIP, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
939
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
940 if (fd->use_bz2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
941 method |= 1<<BZIP2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
942
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
943 if (fd->use_rans)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
944 method |= (1<<RANS0) | (1<<RANS1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
945
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
946 if (fd->use_lzma)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
947 method |= (1<<LZMA);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
948
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
949 /* Faster method for data series we only need entropy encoding on */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
950 methodF = method & ~(1<<GZIP | 1<<BZIP2 | 1<<LZMA);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
951 if (level >= 6)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
952 methodF = method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
953
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
954
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
955 /* Specific compression methods for certain block types */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
956 if (cram_compress_block(fd, s->block[DS_IN], fd->m[DS_IN], //IN (seq)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
957 method, level))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
958 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
959
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
960 if (fd->level == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
961 /* Do nothing */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
962 } else if (fd->level == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
963 if (cram_compress_block(fd, s->block[DS_QS], fd->m[DS_QS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
964 methodF, 1))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
965 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
966 for (i = DS_aux; i <= DS_aux_oz; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
967 if (s->block[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
968 if (cram_compress_block(fd, s->block[i], fd->m[i],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
969 method, 1))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
970 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
971 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
972 } else if (fd->level < 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
973 if (cram_compress_block(fd, s->block[DS_QS], fd->m[DS_QS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
974 method, 1))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
975 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
976 if (cram_compress_block(fd, s->block[DS_BA], fd->m[DS_BA],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
977 method, 1))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
978 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
979 if (s->block[DS_BB])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
980 if (cram_compress_block(fd, s->block[DS_BB], fd->m[DS_BB],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
981 method, 1))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
982 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
983 for (i = DS_aux; i <= DS_aux_oz; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
984 if (s->block[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
985 if (cram_compress_block(fd, s->block[i], fd->m[i],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
986 method, level))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
987 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
988 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
989 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
990 if (cram_compress_block(fd, s->block[DS_QS], fd->m[DS_QS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
991 method, level))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
992 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
993 if (cram_compress_block(fd, s->block[DS_BA], fd->m[DS_BA],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
994 method, level))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
995 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
996 if (s->block[DS_BB])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
997 if (cram_compress_block(fd, s->block[DS_BB], fd->m[DS_BB],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
998 method, level))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
999 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1000 for (i = DS_aux; i <= DS_aux_oz; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1001 if (s->block[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1002 if (cram_compress_block(fd, s->block[i], fd->m[i],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1003 method, level))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1004 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1005 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1006 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1007
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1008 // NAME: best is generally xz, bzip2, zlib then rans1
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1009 // It benefits well from a little bit extra compression level.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1010 if (cram_compress_block(fd, s->block[DS_RN], fd->m[DS_RN],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1011 method & ~(1<<RANS0 | 1<<GZIP_RLE),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1012 MIN(9,level)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1013 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1014
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1015 // NS shows strong local correlation as rearrangements are localised
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1016 if (s->block[DS_NS] != s->block[0])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1017 if (cram_compress_block(fd, s->block[DS_NS], fd->m[DS_NS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1018 method, level))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1019 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1020
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1021
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1022 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1023 * Minimal compression of any block still uncompressed, bar CORE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1024 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1025 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1026 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1027 for (i = 1; i < DS_END; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1028 if (!s->block[i] || s->block[i] == s->block[0])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1029 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1030
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1031 // fast methods only
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1032 if (s->block[i]->method == RAW) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1033 cram_compress_block(fd, s->block[i], fd->m[i],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1034 methodF, level);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1035 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1036 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1037 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1038
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1039 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1040 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1041
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1042 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1043 * Encodes a single slice from a container
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1044 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1045 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1046 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1047 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1048 static int cram_encode_slice(cram_fd *fd, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1049 cram_block_compression_hdr *h, cram_slice *s) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1050 int rec, r = 0, last_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1051 int embed_ref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1052 enum cram_DS_ID id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1053
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1054 embed_ref = fd->embed_ref && s->hdr->ref_seq_id != -1 ? 1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1055
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1056 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1057 * Slice external blocks:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1058 * ID 0 => base calls (insertions, soft-clip)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1059 * ID 1 => qualities
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1060 * ID 2 => names
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1061 * ID 3 => TS (insert size), NP (next frag)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1062 * ID 4 => tag values
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1063 * ID 6 => tag IDs (TN), if CRAM_V1.0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1064 * ID 7 => TD tag dictionary, if !CRAM_V1.0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1065 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1066
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1067 /* Create cram slice header */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1068 s->hdr->ref_base_id = embed_ref ? DS_ref : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1069 s->hdr->record_counter = c->num_records + c->record_counter;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1070 c->num_records += s->hdr->num_records;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1071
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1072 s->block = calloc(DS_END, sizeof(s->block[0]));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1073 s->hdr->block_content_ids = malloc(DS_END * sizeof(int32_t));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1074 if (!s->block || !s->hdr->block_content_ids)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1075 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1076
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1077 // Create first fixed blocks, always external.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1078 // CORE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1079 if (!(s->block[0] = cram_new_block(CORE, 0)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1080 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1081
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1082 // TN block for CRAM v1
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1083 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1084 if (h->codecs[DS_TN]->codec == E_EXTERNAL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1085 if (!(s->block[DS_TN] = cram_new_block(EXTERNAL,DS_TN))) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1086 h->codecs[DS_TN]->external.content_id = DS_TN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1087 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1088 s->block[DS_TN] = s->block[0];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1089 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1090 s->block[DS_TN] = s->block[DS_TN];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1091 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1092
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1093 // Embedded reference
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1094 if (embed_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1095 if (!(s->block[DS_ref] = cram_new_block(EXTERNAL, DS_ref)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1096 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1097 s->ref_id = DS_ref; // needed?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1098 BLOCK_APPEND(s->block[DS_ref],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1099 c->ref + c->first_base - c->ref_start,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1100 c->last_base - c->first_base + 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1101 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1102
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1103 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1104 * All the data-series blocks if appropriate.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1105 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1106 for (id = DS_BF; id < DS_TN; id++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1107 if (h->codecs[id] && (h->codecs[id]->codec == E_EXTERNAL ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1108 h->codecs[id]->codec == E_BYTE_ARRAY_STOP ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1109 h->codecs[id]->codec == E_BYTE_ARRAY_LEN)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1110 switch (h->codecs[id]->codec) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1111 case E_EXTERNAL:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1112 if (!(s->block[id] = cram_new_block(EXTERNAL, id)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1113 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1114 h->codecs[id]->external.content_id = id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1115 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1116
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1117 case E_BYTE_ARRAY_STOP:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1118 if (!(s->block[id] = cram_new_block(EXTERNAL, id)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1119 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1120 h->codecs[id]->byte_array_stop.content_id = id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1121 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1122
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1123 case E_BYTE_ARRAY_LEN: {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1124 cram_codec *cc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1125
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1126 cc = h->codecs[id]->e_byte_array_len.len_codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1127 if (cc->codec == E_EXTERNAL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1128 int eid = cc->external.content_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1129 if (!(s->block[eid] = cram_new_block(EXTERNAL, eid)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1130 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1131 cc->external.content_id = eid;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1132 cc->out = s->block[eid];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1133 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1134
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1135 cc = h->codecs[id]->e_byte_array_len.val_codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1136 if (cc->codec == E_EXTERNAL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1137 int eid = cc->external.content_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1138 if (!s->block[eid])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1139 if (!(s->block[eid] = cram_new_block(EXTERNAL, eid)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1140 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1141 cc->external.content_id = eid;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1142 cc->out = s->block[eid];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1143 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1144 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1145 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1146 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1147 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1148 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1149 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1150 if (!(id == DS_BB && !h->codecs[DS_BB]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1151 s->block[id] = s->block[0];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1152 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1153 if (h->codecs[id])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1154 h->codecs[id]->out = s->block[id];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1155 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1156
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1157 /* Encode reads */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1158 last_pos = s->hdr->ref_seq_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1159 for (rec = 0; rec < s->hdr->num_records; rec++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1160 cram_record *cr = &s->crecs[rec];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1161 if (cram_encode_slice_read(fd, c, h, s, cr, &last_pos) == -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1162 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1163 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1164
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1165 s->block[0]->uncomp_size = s->block[0]->byte + (s->block[0]->bit < 7);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1166 s->block[0]->comp_size = s->block[0]->uncomp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1167
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1168 // Make sure the fixed blocks point to the correct sources
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1169 s->block[DS_IN] = s->base_blk; s->base_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1170 s->block[DS_QS] = s->qual_blk; s->qual_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1171 s->block[DS_RN] = s->name_blk; s->name_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1172 s->block[DS_SC] = s->soft_blk; s->soft_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1173 s->block[DS_aux]= s->aux_blk; s->aux_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1174 s->block[DS_aux_OQ]= s->aux_OQ_blk; s->aux_OQ_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1175 s->block[DS_aux_BQ]= s->aux_BQ_blk; s->aux_BQ_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1176 s->block[DS_aux_BD]= s->aux_BD_blk; s->aux_BD_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1177 s->block[DS_aux_BI]= s->aux_BI_blk; s->aux_BI_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1178 s->block[DS_aux_FZ]= s->aux_FZ_blk; s->aux_FZ_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1179 s->block[DS_aux_oq]= s->aux_oq_blk; s->aux_oq_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1180 s->block[DS_aux_os]= s->aux_os_blk; s->aux_os_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1181 s->block[DS_aux_oz]= s->aux_oz_blk; s->aux_oz_blk = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1182
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1183 // Ensure block sizes are up to date.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1184 for (id = 1; id < DS_END; id++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1185 if (!s->block[id] || s->block[id] == s->block[0])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1186 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1187
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1188 if (s->block[id]->uncomp_size == 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1189 BLOCK_UPLEN(s->block[id]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1190 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1191
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1192 // Compress it all
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1193 if (cram_compress_slice(fd, s) == -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1194 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1195
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1196 // Collapse empty blocks and create hdr_block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1197 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1198 int i, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1199 for (i = j = 1; i < DS_END; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1200 if (!s->block[i] || s->block[i] == s->block[0])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1201 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1202 if (s->block[i]->uncomp_size == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1203 cram_free_block(s->block[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1204 s->block[i] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1205 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1206 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1207 s->block[j] = s->block[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1208 s->hdr->block_content_ids[j-1] = s->block[i]->content_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1209 j++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1210 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1211 s->hdr->num_content_ids = j-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1212 s->hdr->num_blocks = j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1213
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1214 if (!(s->hdr_block = cram_encode_slice_header(fd, s)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1215 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1216 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1217
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1218 return r ? -1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1219 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1220
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1221 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1222 * Encodes all slices in a container into blocks.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1223 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1224 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1225 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1226 int cram_encode_container(cram_fd *fd, cram_container *c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1227 int i, j, slice_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1228 cram_block_compression_hdr *h = c->comp_hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1229 cram_block *c_hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1230 int multi_ref = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1231 int r1, r2, sn, nref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1232 spare_bams *spares;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1233
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1234 /* Cache references up-front if we have unsorted access patterns */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1235 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1236 nref = fd->refs->nref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1237 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1238
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1239 if (!fd->no_ref && c->refs_used) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1240 for (i = 0; i < nref; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1241 if (c->refs_used[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1242 cram_get_ref(fd, i, 1, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1243 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1244 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1245
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1246 /* To create M5 strings */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1247 /* Fetch reference sequence */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1248 if (!fd->no_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1249 bam_seq_t *b = c->bams[0];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1250 char *ref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1251
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1252 ref = cram_get_ref(fd, bam_ref(b), 1, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1253 if (!ref && bam_ref(b) >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1254 fprintf(stderr, "Failed to load reference #%d\n", bam_ref(b));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1255 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1256 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1257 if ((c->ref_id = bam_ref(b)) >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1258 c->ref_seq_id = c->ref_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1259 c->ref = fd->refs->ref_id[c->ref_seq_id]->seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1260 c->ref_start = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1261 c->ref_end = fd->refs->ref_id[c->ref_seq_id]->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1262 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1263 c->ref_seq_id = c->ref_id; // FIXME remove one var!
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1264 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1265 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1266 c->ref_id = bam_ref(c->bams[0]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1267 cram_ref_incr(fd->refs, c->ref_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1268 c->ref_seq_id = c->ref_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1269 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1270
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1271 /* Turn bams into cram_records and gather basic stats */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1272 for (r1 = sn = 0; r1 < c->curr_c_rec; sn++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1273 cram_slice *s = c->slices[sn];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1274 int first_base = INT_MAX, last_base = INT_MIN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1275
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1276 assert(sn < c->curr_slice);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1277
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1278 /* FIXME: we could create our slice objects here too instead of
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1279 * in cram_put_bam_seq. It's more natural here and also this is
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1280 * bit is threaded so it's less work in the main thread.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1281 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1282
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1283 for (r2 = 0; r1 < c->curr_c_rec && r2 < c->max_rec; r1++, r2++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1284 cram_record *cr = &s->crecs[r2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1285 bam_seq_t *b = c->bams[r1];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1286
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1287 /* If multi-ref we need to cope with changing reference per seq */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1288 if (c->multi_seq && !fd->no_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1289 if (bam_ref(b) != c->ref_seq_id && bam_ref(b) >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1290 if (c->ref_seq_id >= 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1291 cram_ref_decr(fd->refs, c->ref_seq_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1292
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1293 if (!cram_get_ref(fd, bam_ref(b), 1, 0)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1294 fprintf(stderr, "Failed to load reference #%d\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1295 bam_ref(b));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1296 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1297 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1298
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1299 c->ref_seq_id = bam_ref(b); // overwritten later by -2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1300 assert(fd->refs->ref_id[c->ref_seq_id]->seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1301 c->ref = fd->refs->ref_id[c->ref_seq_id]->seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1302 c->ref_start = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1303 c->ref_end = fd->refs->ref_id[c->ref_seq_id]->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1304 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1305 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1306
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1307 process_one_read(fd, c, s, cr, b, r2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1308
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1309 if (first_base > cr->apos)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1310 first_base = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1311
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1312 if (last_base < cr->aend)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1313 last_base = cr->aend;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1314 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1315
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1316 if (c->multi_seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1317 s->hdr->ref_seq_id = -2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1318 s->hdr->ref_seq_start = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1319 s->hdr->ref_seq_span = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1320 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1321 s->hdr->ref_seq_id = c->ref_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1322 s->hdr->ref_seq_start = first_base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1323 s->hdr->ref_seq_span = last_base - first_base + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1324 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1325 s->hdr->num_records = r2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1326 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1327
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1328 if (c->multi_seq && !fd->no_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1329 if (c->ref_seq_id >= 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1330 cram_ref_decr(fd->refs, c->ref_seq_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1331 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1332
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1333 /* Link our bams[] array onto the spare bam list for reuse */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1334 spares = malloc(sizeof(*spares));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1335 pthread_mutex_lock(&fd->bam_list_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1336 spares->bams = c->bams;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1337 spares->next = fd->bl;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1338 fd->bl = spares;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1339 pthread_mutex_unlock(&fd->bam_list_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1340 c->bams = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1341
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1342 /* Detect if a multi-seq container */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1343 cram_stats_encoding(fd, c->stats[DS_RI]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1344 multi_ref = c->stats[DS_RI]->nvals > 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1345
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1346 if (multi_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1347 if (fd->verbose)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1348 fprintf(stderr, "Multi-ref container\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1349 c->ref_seq_id = -2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1350 c->ref_seq_start = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1351 c->ref_seq_span = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1352 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1353
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1354
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1355 /* Compute MD5s */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1356 for (i = 0; i < c->curr_slice; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1357 cram_slice *s = c->slices[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1358
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1359 if (CRAM_MAJOR_VERS(fd->version) != 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1360 if (s->hdr->ref_seq_id >= 0 && c->multi_seq == 0 && !fd->no_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1361 MD5_CTX md5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1362 MD5_Init(&md5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1363 MD5_Update(&md5,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1364 c->ref + s->hdr->ref_seq_start - c->ref_start,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1365 s->hdr->ref_seq_span);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1366 MD5_Final(s->hdr->md5, &md5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1367 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1368 memset(s->hdr->md5, 0, 16);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1369 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1370 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1371 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1372
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1373 c->num_records = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1374 c->num_blocks = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1375 c->length = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1376
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1377 //fprintf(stderr, "=== BF ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1378 h->codecs[DS_BF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BF]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1379 c->stats[DS_BF], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1380 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1381
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1382 //fprintf(stderr, "=== CF ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1383 h->codecs[DS_CF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_CF]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1384 c->stats[DS_CF], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1385 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1386 // fprintf(stderr, "=== RN ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1387 // h->codecs[DS_RN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RN]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1388 // c->stats[DS_RN], E_BYTE_ARRAY, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1389 // fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1390
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1391 //fprintf(stderr, "=== AP ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1392 if (c->pos_sorted) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1393 h->codecs[DS_AP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_AP]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1394 c->stats[DS_AP], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1395 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1396 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1397 int p[2] = {0, c->max_apos};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1398 h->codecs[DS_AP] = cram_encoder_init(E_BETA, NULL, E_INT, p,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1399 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1400 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1401
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1402 //fprintf(stderr, "=== RG ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1403 h->codecs[DS_RG] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RG]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1404 c->stats[DS_RG], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1405 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1406
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1407 //fprintf(stderr, "=== MQ ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1408 h->codecs[DS_MQ] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MQ]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1409 c->stats[DS_MQ], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1410 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1411
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1412 //fprintf(stderr, "=== NS ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1413 h->codecs[DS_NS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NS]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1414 c->stats[DS_NS], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1415 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1416
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1417 //fprintf(stderr, "=== MF ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1418 h->codecs[DS_MF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_MF]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1419 c->stats[DS_MF], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1420 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1421
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1422 //fprintf(stderr, "=== TS ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1423 h->codecs[DS_TS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TS]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1424 c->stats[DS_TS], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1425 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1426 //fprintf(stderr, "=== NP ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1427 h->codecs[DS_NP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NP]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1428 c->stats[DS_NP], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1429 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1430 //fprintf(stderr, "=== NF ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1431 h->codecs[DS_NF] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_NF]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1432 c->stats[DS_NF], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1433 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1434
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1435 //fprintf(stderr, "=== RL ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1436 h->codecs[DS_RL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RL]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1437 c->stats[DS_RL], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1438 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1439
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1440 //fprintf(stderr, "=== FN ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1441 h->codecs[DS_FN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FN]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1442 c->stats[DS_FN], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1443 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1444
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1445 //fprintf(stderr, "=== FC ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1446 h->codecs[DS_FC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FC]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1447 c->stats[DS_FC], E_BYTE, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1448 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1449
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1450 //fprintf(stderr, "=== FP ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1451 h->codecs[DS_FP] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_FP]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1452 c->stats[DS_FP], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1453 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1454
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1455 //fprintf(stderr, "=== DL ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1456 h->codecs[DS_DL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_DL]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1457 c->stats[DS_DL], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1458 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1459
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1460 //fprintf(stderr, "=== BA ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1461 h->codecs[DS_BA] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BA]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1462 c->stats[DS_BA], E_BYTE, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1463 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1464
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1465 if (CRAM_MAJOR_VERS(fd->version) >= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1466 cram_byte_array_len_encoder e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1467
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1468 e.len_encoding = E_EXTERNAL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1469 e.len_dat = (void *)DS_BB_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1470 //e.len_dat = (void *)DS_BB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1471
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1472 e.val_encoding = E_EXTERNAL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1473 e.val_dat = (void *)DS_BB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1474
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1475 h->codecs[DS_BB] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1476 E_BYTE_ARRAY, (void *)&e,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1477 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1478 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1479 h->codecs[DS_BB] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1480 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1481
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1482 //fprintf(stderr, "=== BS ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1483 h->codecs[DS_BS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_BS]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1484 c->stats[DS_BS], E_BYTE, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1485 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1486
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1487 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1488 h->codecs[DS_TL] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1489 h->codecs[DS_RI] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1490 h->codecs[DS_RS] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1491 h->codecs[DS_PD] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1492 h->codecs[DS_HC] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1493 h->codecs[DS_SC] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1494
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1495 //fprintf(stderr, "=== TC ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1496 h->codecs[DS_TC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TC]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1497 c->stats[DS_TC], E_BYTE, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1498 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1499
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1500 //fprintf(stderr, "=== TN ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1501 h->codecs[DS_TN] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TN]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1502 c->stats[DS_TN], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1503 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1504 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1505 h->codecs[DS_TC] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1506 h->codecs[DS_TN] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1507
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1508 //fprintf(stderr, "=== TL ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1509 h->codecs[DS_TL] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_TL]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1510 c->stats[DS_TL], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1511 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1512
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1513
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1514 //fprintf(stderr, "=== RI ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1515 h->codecs[DS_RI] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RI]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1516 c->stats[DS_RI], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1517 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1518
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1519 //fprintf(stderr, "=== RS ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1520 h->codecs[DS_RS] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_RS]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1521 c->stats[DS_RS], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1522 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1523
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1524 //fprintf(stderr, "=== PD ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1525 h->codecs[DS_PD] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_PD]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1526 c->stats[DS_PD], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1527 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1528
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1529 //fprintf(stderr, "=== HC ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1530 h->codecs[DS_HC] = cram_encoder_init(cram_stats_encoding(fd, c->stats[DS_HC]),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1531 c->stats[DS_HC], E_INT, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1532 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1533
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1534 //fprintf(stderr, "=== SC ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1535 if (1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1536 int i2[2] = {0, DS_SC};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1537
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1538 h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1539 E_BYTE_ARRAY, (void *)i2,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1540 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1541 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1542 // Appears to be no practical benefit to using this method,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1543 // but it may work better if we start mixing SC, IN and BB
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1544 // elements into the same external block.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1545 cram_byte_array_len_encoder e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1546
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1547 e.len_encoding = E_EXTERNAL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1548 e.len_dat = (void *)DS_SC_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1549
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1550 e.val_encoding = E_EXTERNAL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1551 e.val_dat = (void *)DS_SC;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1552
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1553 h->codecs[DS_SC] = cram_encoder_init(E_BYTE_ARRAY_LEN, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1554 E_BYTE_ARRAY, (void *)&e,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1555 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1556 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1557 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1558
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1559 //fprintf(stderr, "=== IN ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1560 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1561 int i2[2] = {0, DS_IN};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1562 h->codecs[DS_IN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1563 E_BYTE_ARRAY, (void *)i2,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1564 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1565 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1566
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1567 h->codecs[DS_QS] = cram_encoder_init(E_EXTERNAL, NULL, E_BYTE,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1568 (void *)DS_QS,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1569 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1570 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1571 int i2[2] = {0, DS_RN};
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1572 h->codecs[DS_RN] = cram_encoder_init(E_BYTE_ARRAY_STOP, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1573 E_BYTE_ARRAY, (void *)i2,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1574 fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1575 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1576
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1577
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1578 /* Encode slices */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1579 for (i = 0; i < c->curr_slice; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1580 if (fd->verbose)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1581 fprintf(stderr, "Encode slice %d\n", i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1582 if (cram_encode_slice(fd, c, h, c->slices[i]) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1583 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1584 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1585
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1586 /* Create compression header */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1587 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1588 h->ref_seq_id = c->ref_seq_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1589 h->ref_seq_start = c->ref_seq_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1590 h->ref_seq_span = c->ref_seq_span;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1591 h->num_records = c->num_records;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1592
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1593 h->mapped_qs_included = 0; // fixme
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1594 h->unmapped_qs_included = 0; // fixme
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1595 // h->... fixme
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1596 memcpy(h->substitution_matrix, CRAM_SUBST_MATRIX, 20);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1597
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1598 if (!(c_hdr = cram_encode_compression_header(fd, c, h)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1599 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1600 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1601
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1602 /* Compute landmarks */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1603 /* Fill out slice landmarks */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1604 c->num_landmarks = c->curr_slice;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1605 c->landmark = malloc(c->num_landmarks * sizeof(*c->landmark));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1606 if (!c->landmark)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1607 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1608
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1609 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1610 * Slice offset starts after the first block, so we need to simulate
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1611 * writing it to work out the correct offset
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1612 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1613 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1614 slice_offset = c_hdr->method == RAW
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1615 ? c_hdr->uncomp_size
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1616 : c_hdr->comp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1617 slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1618 itf8_size(c_hdr->content_id) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1619 itf8_size(c_hdr->comp_size) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1620 itf8_size(c_hdr->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1621 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1622
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1623 c->ref_seq_id = c->slices[0]->hdr->ref_seq_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1624 c->ref_seq_start = c->slices[0]->hdr->ref_seq_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1625 c->ref_seq_span = c->slices[0]->hdr->ref_seq_span;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1626 for (i = 0; i < c->curr_slice; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1627 cram_slice *s = c->slices[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1628
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1629 c->num_blocks += s->hdr->num_blocks + 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1630 c->landmark[i] = slice_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1631
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1632 if (s->hdr->ref_seq_start + s->hdr->ref_seq_span >
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1633 c->ref_seq_start + c->ref_seq_span) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1634 c->ref_seq_span = s->hdr->ref_seq_start + s->hdr->ref_seq_span
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1635 - c->ref_seq_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1636 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1637
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1638 slice_offset += s->hdr_block->method == RAW
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1639 ? s->hdr_block->uncomp_size
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1640 : s->hdr_block->comp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1641
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1642 slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1643 itf8_size(s->hdr_block->content_id) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1644 itf8_size(s->hdr_block->comp_size) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1645 itf8_size(s->hdr_block->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1646
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1647 for (j = 0; j < s->hdr->num_blocks; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1648 slice_offset += 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1649 itf8_size(s->block[j]->content_id) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1650 itf8_size(s->block[j]->comp_size) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1651 itf8_size(s->block[j]->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1652
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1653 slice_offset += s->block[j]->method == RAW
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1654 ? s->block[j]->uncomp_size
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1655 : s->block[j]->comp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1656 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1657 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1658 c->length += slice_offset; // just past the final slice
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1659
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1660 c->comp_hdr_block = c_hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1661
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1662 if (c->ref_seq_id >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1663 cram_ref_decr(fd->refs, c->ref_seq_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1664 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1665
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1666 /* Cache references up-front if we have unsorted access patterns */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1667 if (!fd->no_ref && c->refs_used) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1668 for (i = 0; i < fd->refs->nref; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1669 if (c->refs_used[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1670 cram_ref_decr(fd->refs, i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1671 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1672 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1673
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1674 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1675 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1676
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1677
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1678 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1679 * Adds a feature code to a read within a slice. For purposes of minimising
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1680 * memory allocations and fragmentation we have one array of features for all
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1681 * reads within the slice. We return the index into this array for this new
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1682 * feature.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1683 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1684 * Returns feature index on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1685 * -1 on failure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1686 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1687 static int cram_add_feature(cram_container *c, cram_slice *s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1688 cram_record *r, cram_feature *f) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1689 if (s->nfeatures >= s->afeatures) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1690 s->afeatures = s->afeatures ? s->afeatures*2 : 1024;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1691 s->features = realloc(s->features, s->afeatures*sizeof(*s->features));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1692 if (!s->features)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1693 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1694 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1695
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1696 if (!r->nfeature++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1697 r->feature = s->nfeatures;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1698 cram_stats_add(c->stats[DS_FP], f->X.pos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1699 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1700 cram_stats_add(c->stats[DS_FP],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1701 f->X.pos - s->features[r->feature + r->nfeature-2].X.pos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1702 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1703 cram_stats_add(c->stats[DS_FC], f->X.code);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1704
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1705 s->features[s->nfeatures++] = *f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1706
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1707 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1708 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1709
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1710 static int cram_add_substitution(cram_fd *fd, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1711 cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1712 int pos, char base, char qual, char ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1713 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1714
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1715 // seq=ACGTN vs ref=ACGT or seq=ACGT vs ref=ACGTN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1716 if (fd->L2[(uc)base]<4 || (fd->L2[(uc)base]<5 && fd->L2[(uc)ref]<4)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1717 f.X.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1718 f.X.code = 'X';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1719 f.X.base = fd->cram_sub_matrix[ref&0x1f][base&0x1f];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1720 cram_stats_add(c->stats[DS_BS], f.X.base);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1721 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1722 f.B.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1723 f.B.code = 'B';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1724 f.B.base = base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1725 f.B.qual = qual;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1726 cram_stats_add(c->stats[DS_BA], f.B.base);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1727 cram_stats_add(c->stats[DS_QS], f.B.qual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1728 BLOCK_APPEND_CHAR(s->qual_blk, qual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1729 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1730 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1731 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1732
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1733 static int cram_add_bases(cram_fd *fd, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1734 cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1735 int pos, int len, char *base) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1736 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1737
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1738 f.b.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1739 f.b.code = 'b';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1740 f.b.seq_idx = base - (char *)BLOCK_DATA(s->seqs_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1741 f.b.len = len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1742
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1743 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1744 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1745
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1746 static int cram_add_base(cram_fd *fd, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1747 cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1748 int pos, char base, char qual) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1749 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1750 f.B.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1751 f.B.code = 'B';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1752 f.B.base = base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1753 f.B.qual = qual;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1754 cram_stats_add(c->stats[DS_BA], base);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1755 cram_stats_add(c->stats[DS_QS], qual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1756 BLOCK_APPEND_CHAR(s->qual_blk, qual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1757 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1758 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1759
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1760 static int cram_add_quality(cram_fd *fd, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1761 cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1762 int pos, char qual) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1763 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1764 f.Q.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1765 f.Q.code = 'Q';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1766 f.Q.qual = qual;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1767 cram_stats_add(c->stats[DS_QS], qual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1768 BLOCK_APPEND_CHAR(s->qual_blk, qual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1769 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1770 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1771
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1772 static int cram_add_deletion(cram_container *c, cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1773 int pos, int len, char *base) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1774 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1775 f.D.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1776 f.D.code = 'D';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1777 f.D.len = len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1778 cram_stats_add(c->stats[DS_DL], len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1779 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1780 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1781
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1782 static int cram_add_softclip(cram_container *c, cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1783 int pos, int len, char *base, int version) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1784 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1785 f.S.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1786 f.S.code = 'S';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1787 f.S.len = len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1788 switch (CRAM_MAJOR_VERS(version)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1789 case 1:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1790 f.S.seq_idx = BLOCK_SIZE(s->base_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1791 BLOCK_APPEND(s->base_blk, base, len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1792 BLOCK_APPEND_CHAR(s->base_blk, '\0');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1793 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1794
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1795 case 2:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1796 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1797 f.S.seq_idx = BLOCK_SIZE(s->soft_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1798 if (base) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1799 BLOCK_APPEND(s->soft_blk, base, len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1800 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1801 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1802 for (i = 0; i < len; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1803 BLOCK_APPEND_CHAR(s->soft_blk, 'N');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1804 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1805 BLOCK_APPEND_CHAR(s->soft_blk, '\0');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1806 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1807
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1808 // default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1809 // // v3.0 onwards uses BB data-series
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1810 // f.S.seq_idx = BLOCK_SIZE(s->soft_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1811 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1812 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1813 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1814
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1815 static int cram_add_hardclip(cram_container *c, cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1816 int pos, int len, char *base) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1817 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1818 f.S.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1819 f.S.code = 'H';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1820 f.S.len = len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1821 cram_stats_add(c->stats[DS_HC], len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1822 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1823 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1824
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1825 static int cram_add_skip(cram_container *c, cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1826 int pos, int len, char *base) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1827 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1828 f.S.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1829 f.S.code = 'N';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1830 f.S.len = len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1831 cram_stats_add(c->stats[DS_RS], len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1832 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1833 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1834
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1835 static int cram_add_pad(cram_container *c, cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1836 int pos, int len, char *base) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1837 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1838 f.S.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1839 f.S.code = 'P';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1840 f.S.len = len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1841 cram_stats_add(c->stats[DS_PD], len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1842 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1843 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1844
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1845 static int cram_add_insertion(cram_container *c, cram_slice *s, cram_record *r,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1846 int pos, int len, char *base) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1847 cram_feature f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1848 f.I.pos = pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1849 if (len == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1850 char b = base ? *base : 'N';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1851 f.i.code = 'i';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1852 f.i.base = b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1853 cram_stats_add(c->stats[DS_BA], b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1854 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1855 f.I.code = 'I';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1856 f.I.len = len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1857 f.S.seq_idx = BLOCK_SIZE(s->base_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1858 if (base) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1859 BLOCK_APPEND(s->base_blk, base, len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1860 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1861 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1862 for (i = 0; i < len; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1863 BLOCK_APPEND_CHAR(s->base_blk, 'N');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1864 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1865 BLOCK_APPEND_CHAR(s->base_blk, '\0');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1866 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1867 return cram_add_feature(c, s, r, &f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1868 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1869
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1870 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1871 * Encodes auxiliary data.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1872 * Returns the read-group parsed out of the BAM aux fields on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1873 * NULL on failure or no rg present (FIXME)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1874 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1875 static char *cram_encode_aux_1_0(cram_fd *fd, bam_seq_t *b, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1876 cram_slice *s, cram_record *cr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1877 char *aux, *tmp, *rg = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1878 int aux_size = bam_blk_size(b) -
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1879 ((char *)bam_aux(b) - (char *)&bam_ref(b));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1880
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1881 /* Worst case is 1 nul char on every ??:Z: string, so +33% */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1882 BLOCK_GROW(s->aux_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1883 tmp = (char *)BLOCK_END(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1884
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1885 aux = (char *)bam_aux(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1886 cr->TN_idx = s->nTN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1887
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1888 while (aux[0] != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1889 int32_t i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1890 int r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1891
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1892 if (aux[0] == 'R' && aux[1] == 'G' && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1893 rg = &aux[3];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1894 while (*aux++);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1895 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1896 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1897 if (aux[0] == 'M' && aux[1] == 'D' && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1898 while (*aux++);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1899 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1900 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1901 if (aux[0] == 'N' && aux[1] == 'M') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1902 switch(aux[2]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1903 case 'A': case 'C': case 'c': aux+=4; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1904 case 'I': case 'i': case 'f': aux+=7; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1905 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1906 fprintf(stderr, "Unhandled type code for NM tag\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1907 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1908 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1909 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1910 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1911
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1912 cr->ntags++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1913
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1914 i32 = (aux[0]<<16) | (aux[1]<<8) | aux[2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1915 kh_put(s_i2i, c->tags_used, i32, &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1916 if (-1 == r)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1917 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1918
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1919 if (s->nTN >= s->aTN) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1920 s->aTN = s->aTN ? s->aTN*2 : 1024;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1921 if (!(s->TN = realloc(s->TN, s->aTN * sizeof(*s->TN))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1922 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1923 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1924 s->TN[s->nTN++] = i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1925 cram_stats_add(c->stats[DS_TN], i32);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1926
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1927 switch(aux[2]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1928 case 'A': case 'C': case 'c':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1929 aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1930 *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1931 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1932
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1933 case 'S': case 's':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1934 aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1935 *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1936 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1937
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1938 case 'I': case 'i': case 'f':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1939 aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1940 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1941 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1942
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1943 case 'd':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1944 aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1945 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1946 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1947 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1948
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1949 case 'Z': case 'H':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1950 aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1951 while ((*tmp++=*aux++));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1952 *tmp++ = '\t'; // stop byte
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1953 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1954
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1955 case 'B': {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1956 int type = aux[3], blen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1957 uint32_t count = (uint32_t)((((unsigned char *)aux)[4]<< 0) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1958 (((unsigned char *)aux)[5]<< 8) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1959 (((unsigned char *)aux)[6]<<16) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1960 (((unsigned char *)aux)[7]<<24));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1961 // skip TN field
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1962 aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1963
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1964 // We use BYTE_ARRAY_LEN with external length, so store that first
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1965 switch (type) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1966 case 'c': case 'C':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1967 blen = count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1968 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1969 case 's': case 'S':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1970 blen = 2*count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1971 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1972 case 'i': case 'I': case 'f':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1973 blen = 4*count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1974 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1975 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1976 fprintf(stderr, "Unknown sub-type '%c' for aux type 'B'\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1977 type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1978 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1979
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1980 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1981
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1982 tmp += itf8_put(tmp, blen+5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1983
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1984 *tmp++=*aux++; // sub-type & length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1985 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1986
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1987 // The tag data itself
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1988 memcpy(tmp, aux, blen); tmp += blen; aux += blen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1989
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1990 //cram_stats_add(c->aux_B_stats, blen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1991 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1992 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1993 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1994 fprintf(stderr, "Unknown aux type '%c'\n", aux[2]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1995 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1996 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1997 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1998 cram_stats_add(c->stats[DS_TC], cr->ntags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1999
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2000 cr->aux = BLOCK_SIZE(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2001 cr->aux_size = (uc *)tmp - (BLOCK_DATA(s->aux_blk) + cr->aux);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2002 BLOCK_SIZE(s->aux_blk) = (uc *)tmp - BLOCK_DATA(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2003 assert(s->aux_blk->byte <= s->aux_blk->alloc);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2004
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2005 return rg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2006 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2007
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2008 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2009 * Encodes auxiliary data. Largely duplicated from above, but done so to
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2010 * keep it simple and avoid a myriad of version ifs.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2011 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2012 * Returns the read-group parsed out of the BAM aux fields on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2013 * NULL on failure or no rg present (FIXME)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2014 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2015 static char *cram_encode_aux(cram_fd *fd, bam_seq_t *b, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2016 cram_slice *s, cram_record *cr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2017 char *aux, *orig, *tmp, *rg = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2018 int aux_size = bam_get_l_aux(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2019 cram_block *td_b = c->comp_hdr->TD_blk;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2020 int TD_blk_size = BLOCK_SIZE(td_b), new;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2021 char *key;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2022 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2023
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2024
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2025 /* Worst case is 1 nul char on every ??:Z: string, so +33% */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2026 BLOCK_GROW(s->aux_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2027 tmp = (char *)BLOCK_END(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2028
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2029
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2030 orig = aux = (char *)bam_aux(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2031
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2032 // Copy aux keys to td_b and aux values to s->aux_blk
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2033 while (aux - orig < aux_size && aux[0] != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2034 uint32_t i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2035 int r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2036
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2037 if (aux[0] == 'R' && aux[1] == 'G' && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2038 rg = &aux[3];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2039 while (*aux++);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2040 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2041 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2042 if (aux[0] == 'M' && aux[1] == 'D' && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2043 while (*aux++);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2044 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2045 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2046 if (aux[0] == 'N' && aux[1] == 'M') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2047 switch(aux[2]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2048 case 'A': case 'C': case 'c': aux+=4; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2049 case 'S': case 's': aux+=5; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2050 case 'I': case 'i': case 'f': aux+=7; break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2051 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2052 fprintf(stderr, "Unhandled type code for NM tag\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2053 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2054 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2055 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2056 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2057
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2058 BLOCK_APPEND(td_b, aux, 3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2059
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2060 i32 = (aux[0]<<16) | (aux[1]<<8) | aux[2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2061 kh_put(s_i2i, c->tags_used, i32, &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2062 if (-1 == r)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2063 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2064
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2065 // BQ:Z
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2066 if (aux[0] == 'B' && aux[1] == 'Q' && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2067 char *tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2068 if (!s->aux_BQ_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2069 if (!(s->aux_BQ_blk = cram_new_block(EXTERNAL, DS_aux_BQ)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2070 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2071 BLOCK_GROW(s->aux_BQ_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2072 tmp = (char *)BLOCK_END(s->aux_BQ_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2073 aux += 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2074 while ((*tmp++=*aux++));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2075 *tmp++ = '\t';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2076 BLOCK_SIZE(s->aux_BQ_blk) = (uc *)tmp - BLOCK_DATA(s->aux_BQ_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2077 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2078 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2079
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2080 // BD:Z
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2081 if (aux[0] == 'B' && aux[1]=='D' && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2082 char *tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2083 if (!s->aux_BD_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2084 if (!(s->aux_BD_blk = cram_new_block(EXTERNAL, DS_aux_BD)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2085 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2086 BLOCK_GROW(s->aux_BD_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2087 tmp = (char *)BLOCK_END(s->aux_BD_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2088 aux += 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2089 while ((*tmp++=*aux++));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2090 *tmp++ = '\t';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2091 BLOCK_SIZE(s->aux_BD_blk) = (uc *)tmp - BLOCK_DATA(s->aux_BD_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2092 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2093 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2094
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2095 // BI:Z
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2096 if (aux[0] == 'B' && aux[1]=='I' && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2097 char *tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2098 if (!s->aux_BI_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2099 if (!(s->aux_BI_blk = cram_new_block(EXTERNAL, DS_aux_BI)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2100 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2101 BLOCK_GROW(s->aux_BI_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2102 tmp = (char *)BLOCK_END(s->aux_BI_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2103 aux += 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2104 while ((*tmp++=*aux++));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2105 *tmp++ = '\t';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2106 BLOCK_SIZE(s->aux_BI_blk) = (uc *)tmp - BLOCK_DATA(s->aux_BI_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2107 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2108 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2109
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2110 // OQ:Z:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2111 if (aux[0] == 'O' && aux[1] == 'Q' && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2112 char *tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2113 if (!s->aux_OQ_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2114 if (!(s->aux_OQ_blk = cram_new_block(EXTERNAL, DS_aux_OQ)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2115 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2116 BLOCK_GROW(s->aux_OQ_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2117 tmp = (char *)BLOCK_END(s->aux_OQ_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2118 aux += 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2119 while ((*tmp++=*aux++));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2120 *tmp++ = '\t';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2121 BLOCK_SIZE(s->aux_OQ_blk) = (uc *)tmp - BLOCK_DATA(s->aux_OQ_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2122 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2123 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2124
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2125 // FZ:B or ZM:B
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2126 if ((aux[0] == 'F' && aux[1] == 'Z' && aux[2] == 'B') ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2127 (aux[0] == 'Z' && aux[1] == 'M' && aux[2] == 'B')) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2128 int type = aux[3], blen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2129 uint32_t count = (uint32_t)((((unsigned char *)aux)[4]<< 0) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2130 (((unsigned char *)aux)[5]<< 8) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2131 (((unsigned char *)aux)[6]<<16) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2132 (((unsigned char *)aux)[7]<<24));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2133 char *tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2134 if (!s->aux_FZ_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2135 if (!(s->aux_FZ_blk = cram_new_block(EXTERNAL, DS_aux_FZ)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2136 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2137 BLOCK_GROW(s->aux_FZ_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2138 tmp = (char *)BLOCK_END(s->aux_FZ_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2139
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2140 // skip TN field
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2141 aux+=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2142
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2143 // We use BYTE_ARRAY_LEN with external length, so store that first
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2144 switch (type) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2145 case 'c': case 'C':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2146 blen = count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2147 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2148 case 's': case 'S':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2149 blen = 2*count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2150 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2151 case 'i': case 'I': case 'f':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2152 blen = 4*count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2153 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2154 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2155 fprintf(stderr, "Unknown sub-type '%c' for aux type 'B'\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2156 type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2157 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2158
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2159 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2160
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2161 blen += 5; // sub-type & length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2162 tmp += itf8_put(tmp, blen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2163
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2164 // The tag data itself
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2165 memcpy(tmp, aux, blen); tmp += blen; aux += blen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2166
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2167 BLOCK_SIZE(s->aux_FZ_blk) = (uc *)tmp - BLOCK_DATA(s->aux_FZ_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2168 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2169 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2170
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2171 // Other quality data - {Q2,E2,U2,CQ}:Z and similar
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2172 if (((aux[0] == 'Q' && aux[1] == '2') ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2173 (aux[0] == 'U' && aux[1] == '2') ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2174 (aux[0] == 'Q' && aux[1] == 'T') ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2175 (aux[0] == 'C' && aux[1] == 'Q')) && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2176 char *tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2177 if (!s->aux_oq_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2178 if (!(s->aux_oq_blk = cram_new_block(EXTERNAL, DS_aux_oq)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2179 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2180 BLOCK_GROW(s->aux_oq_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2181 tmp = (char *)BLOCK_END(s->aux_oq_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2182 aux += 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2183 while ((*tmp++=*aux++));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2184 *tmp++ = '\t';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2185 BLOCK_SIZE(s->aux_oq_blk) = (uc *)tmp - BLOCK_DATA(s->aux_oq_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2186 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2187 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2188
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2189 // Other sequence data - {R2,E2,CS,BC,RT}:Z and similar
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2190 if (((aux[0] == 'R' && aux[1] == '2') ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2191 (aux[0] == 'E' && aux[1] == '2') ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2192 (aux[0] == 'C' && aux[1] == 'S') ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2193 (aux[0] == 'B' && aux[1] == 'C') ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2194 (aux[0] == 'R' && aux[1] == 'T')) && aux[2] == 'Z') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2195 char *tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2196 if (!s->aux_os_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2197 if (!(s->aux_os_blk = cram_new_block(EXTERNAL, DS_aux_os)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2198 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2199 BLOCK_GROW(s->aux_os_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2200 tmp = (char *)BLOCK_END(s->aux_os_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2201 aux += 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2202 while ((*tmp++=*aux++));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2203 *tmp++ = '\t';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2204 BLOCK_SIZE(s->aux_os_blk) = (uc *)tmp - BLOCK_DATA(s->aux_os_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2205 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2206 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2207
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2208
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2209 switch(aux[2]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2210 case 'A': case 'C': case 'c':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2211 aux+=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2212 *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2213 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2214
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2215 case 'S': case 's':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2216 aux+=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2217 *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2218 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2219
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2220 case 'I': case 'i': case 'f':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2221 aux+=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2222 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2223 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2224
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2225 case 'd':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2226 aux+=3; //*tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2227 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2228 *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++; *tmp++=*aux++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2229 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2230
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2231 case 'Z': case 'H':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2232 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2233 char *tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2234 if (!s->aux_oz_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2235 if (!(s->aux_oz_blk = cram_new_block(EXTERNAL, DS_aux_oz)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2236 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2237 BLOCK_GROW(s->aux_oz_blk, aux_size*1.34+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2238 tmp = (char *)BLOCK_END(s->aux_oz_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2239 aux += 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2240 while ((*tmp++=*aux++));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2241 *tmp++ = '\t';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2242 BLOCK_SIZE(s->aux_oz_blk) = (uc *)tmp -
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2243 BLOCK_DATA(s->aux_oz_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2244 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2245 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2246
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2247 case 'B': {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2248 int type = aux[3], blen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2249 uint32_t count = (uint32_t)((((unsigned char *)aux)[4]<< 0) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2250 (((unsigned char *)aux)[5]<< 8) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2251 (((unsigned char *)aux)[6]<<16) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2252 (((unsigned char *)aux)[7]<<24));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2253 // skip TN field
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2254 aux+=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2255
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2256 // We use BYTE_ARRAY_LEN with external length, so store that first
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2257 switch (type) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2258 case 'c': case 'C':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2259 blen = count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2260 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2261 case 's': case 'S':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2262 blen = 2*count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2263 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2264 case 'i': case 'I': case 'f':
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2265 blen = 4*count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2266 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2267 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2268 fprintf(stderr, "Unknown sub-type '%c' for aux type 'B'\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2269 type);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2270 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2271
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2272 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2273
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2274 blen += 5; // sub-type & length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2275 tmp += itf8_put(tmp, blen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2276
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2277 // The tag data itself
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2278 memcpy(tmp, aux, blen); tmp += blen; aux += blen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2279
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2280 //cram_stats_add(c->aux_B_stats, blen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2281 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2282 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2283 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2284 fprintf(stderr, "Unknown aux type '%c'\n", aux[2]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2285 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2286 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2287 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2288
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2289 // FIXME: sort BLOCK_DATA(td_b) by char[3] triples
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2290
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2291 // And and increment TD hash entry
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2292 BLOCK_APPEND_CHAR(td_b, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2293
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2294 // Duplicate key as BLOCK_DATA() can be realloced to a new pointer.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2295 key = string_ndup(c->comp_hdr->TD_keys,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2296 (char *)BLOCK_DATA(td_b) + TD_blk_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2297 BLOCK_SIZE(td_b) - TD_blk_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2298 k = kh_put(m_s2i, c->comp_hdr->TD_hash, key, &new);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2299 if (new < 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2300 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2301 } else if (new == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2302 BLOCK_SIZE(td_b) = TD_blk_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2303 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2304 kh_val(c->comp_hdr->TD_hash, k) = c->comp_hdr->nTL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2305 c->comp_hdr->nTL++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2306 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2307
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2308 cr->TL = kh_val(c->comp_hdr->TD_hash, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2309 cram_stats_add(c->stats[DS_TL], cr->TL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2310
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2311 cr->aux = BLOCK_SIZE(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2312 cr->aux_size = (uc *)tmp - (BLOCK_DATA(s->aux_blk) + cr->aux);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2313 BLOCK_SIZE(s->aux_blk) = (uc *)tmp - BLOCK_DATA(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2314 assert(s->aux_blk->byte <= s->aux_blk->alloc);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2315
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2316 return rg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2317 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2318
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2319
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2320 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2321 * Handles creation of a new container or new slice, flushing any
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2322 * existing containers when appropriate.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2323 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2324 * Really this is next slice, which may or may not lead to a new container.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2325 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2326 * Returns cram_container pointer on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2327 * NULL on failure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2328 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2329 static cram_container *cram_next_container(cram_fd *fd, bam_seq_t *b) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2330 cram_container *c = fd->ctr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2331 cram_slice *s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2332 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2333
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2334 /* First occurence */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2335 if (c->curr_ref == -2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2336 c->curr_ref = bam_ref(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2337
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2338 if (c->slice) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2339 s = c->slice;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2340 if (c->multi_seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2341 s->hdr->ref_seq_id = -2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2342 s->hdr->ref_seq_start = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2343 s->hdr->ref_seq_span = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2344 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2345 s->hdr->ref_seq_id = c->curr_ref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2346 s->hdr->ref_seq_start = c->first_base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2347 s->hdr->ref_seq_span = c->last_base - c->first_base + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2348 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2349 s->hdr->num_records = c->curr_rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2350
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2351 if (c->curr_slice == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2352 if (c->ref_seq_id != s->hdr->ref_seq_id)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2353 c->ref_seq_id = s->hdr->ref_seq_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2354 c->ref_seq_start = c->first_base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2355 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2356
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2357 c->curr_slice++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2358 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2359
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2360 /* Flush container */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2361 if (c->curr_slice == c->max_slice ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2362 (bam_ref(b) != c->curr_ref && !c->multi_seq)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2363 c->ref_seq_span = fd->last_base - c->ref_seq_start + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2364 if (fd->verbose)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2365 fprintf(stderr, "Flush container %d/%d..%d\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2366 c->ref_seq_id, c->ref_seq_start,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2367 c->ref_seq_start + c->ref_seq_span -1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2368
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2369 /* Encode slices */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2370 if (fd->pool) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2371 if (-1 == cram_flush_container_mt(fd, c))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2372 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2373 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2374 if (-1 == cram_flush_container(fd, c))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2375 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2376
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2377 // Move to sep func, as we need cram_flush_container for
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2378 // the closing phase to flush the partial container.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2379 for (i = 0; i < c->max_slice; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2380 cram_free_slice(c->slices[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2381 c->slices[i] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2382 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2383
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2384 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2385 c->curr_slice = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2386
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2387 /* Easy approach for purposes of freeing stats */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2388 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2389 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2390
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2391 c = fd->ctr = cram_new_container(fd->seqs_per_slice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2392 fd->slices_per_container);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2393 if (!c)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2394 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2395 c->record_counter = fd->record_counter;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2396 c->curr_ref = bam_ref(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2397 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2398
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2399 c->last_pos = c->first_base = c->last_base = bam_pos(b)+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2400
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2401 /* New slice */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2402 c->slice = c->slices[c->curr_slice] =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2403 cram_new_slice(MAPPED_SLICE, c->max_rec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2404 if (!c->slice)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2405 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2406
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2407 if (c->multi_seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2408 c->slice->hdr->ref_seq_id = -2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2409 c->slice->hdr->ref_seq_start = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2410 c->slice->last_apos = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2411 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2412 c->slice->hdr->ref_seq_id = bam_ref(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2413 // wrong for unsorted data, will fix during encoding.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2414 c->slice->hdr->ref_seq_start = bam_pos(b)+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2415 c->slice->last_apos = bam_pos(b)+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2416 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2417
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2418 c->curr_rec = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2419
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2420 return c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2421 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2422
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2423 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2424 * Converts a single bam record into a cram record.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2425 * Possibly used within a thread.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2426 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2427 * Returns 0 on success;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2428 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2429 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2430 static int process_one_read(cram_fd *fd, cram_container *c,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2431 cram_slice *s, cram_record *cr,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2432 bam_seq_t *b, int rnum) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2433 int i, fake_qual = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2434 char *cp, *rg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2435 char *ref, *seq, *qual;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2436
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2437 // FIXME: multi-ref containers
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2438
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2439 ref = c->ref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2440 cr->len = bam_seq_len(b); cram_stats_add(c->stats[DS_RL], cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2441
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2442 //fprintf(stderr, "%s => %d\n", rg ? rg : "\"\"", cr->rg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2443
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2444 // Fields to resolve later
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2445 //cr->mate_line; // index to another cram_record
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2446 //cr->mate_flags; // MF
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2447 //cr->ntags; // TC
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2448 cr->ntags = 0; //cram_stats_add(c->stats[DS_TC], cr->ntags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2449 if (CRAM_MAJOR_VERS(fd->version) == 1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2450 rg = cram_encode_aux_1_0(fd, b, c, s, cr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2451 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2452 rg = cram_encode_aux(fd, b, c, s, cr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2453
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2454 //cr->aux_size = b->blk_size - ((char *)bam_aux(b) - (char *)&bam_ref(b));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2455 //cr->aux = DSTRING_LEN(s->aux_ds);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2456 //dstring_nappend(s->aux_ds, bam_aux(b), cr->aux_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2457
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2458 /* Read group, identified earlier */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2459 if (rg) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2460 SAM_RG *brg = sam_hdr_find_rg(fd->header, rg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2461 cr->rg = brg ? brg->id : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2462 } else if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2463 SAM_RG *brg = sam_hdr_find_rg(fd->header, "UNKNOWN");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2464 assert(brg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2465 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2466 cr->rg = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2467 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2468 cram_stats_add(c->stats[DS_RG], cr->rg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2469
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2470
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2471 cr->ref_id = bam_ref(b); cram_stats_add(c->stats[DS_RI], cr->ref_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2472 cr->flags = bam_flag(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2473 if (bam_cigar_len(b) == 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2474 cr->flags |= BAM_FUNMAP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2475 cram_stats_add(c->stats[DS_BF], fd->cram_flag_swap[cr->flags & 0xfff]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2476
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2477 // Non reference based encoding means storing the bases verbatim as features, which in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2478 // turn means every base also has a quality already stored.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2479 if (!fd->no_ref || CRAM_MAJOR_VERS(fd->version) >= 3)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2480 cr->cram_flags = CRAM_FLAG_PRESERVE_QUAL_SCORES;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2481 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2482 cr->cram_flags = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2483 //cram_stats_add(c->stats[DS_CF], cr->cram_flags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2484
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2485 c->num_bases += cr->len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2486 cr->apos = bam_pos(b)+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2487 if (c->pos_sorted) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2488 if (cr->apos < s->last_apos) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2489 c->pos_sorted = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2490 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2491 cram_stats_add(c->stats[DS_AP], cr->apos - s->last_apos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2492 s->last_apos = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2493 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2494 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2495 //cram_stats_add(c->stats[DS_AP], cr->apos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2496 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2497 c->max_apos += (cr->apos > c->max_apos) * (cr->apos - c->max_apos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2498
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2499 cr->name = BLOCK_SIZE(s->name_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2500 cr->name_len = bam_name_len(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2501 cram_stats_add(c->stats[DS_RN], cr->name_len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2502
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2503 BLOCK_APPEND(s->name_blk, bam_name(b), bam_name_len(b));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2504
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2505
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2506 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2507 * This seqs_ds is largely pointless and it could reuse the same memory
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2508 * over and over.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2509 * s->base_blk is what we need for encoding.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2510 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2511 cr->seq = BLOCK_SIZE(s->seqs_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2512 cr->qual = BLOCK_SIZE(s->qual_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2513 BLOCK_GROW(s->seqs_blk, cr->len+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2514 BLOCK_GROW(s->qual_blk, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2515 seq = cp = (char *)BLOCK_END(s->seqs_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2516
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2517 *seq = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2518 #ifdef ALLOW_UAC
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2519 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2520 // Convert seq 2 bases at a time for speed.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2521 static const uint16_t code2base[256] = {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2522 15677, 16701, 17213, 19773, 18237, 21053, 21309, 22077,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2523 21565, 22333, 22845, 18493, 19261, 17469, 16957, 20029,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2524 15681, 16705, 17217, 19777, 18241, 21057, 21313, 22081,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2525 21569, 22337, 22849, 18497, 19265, 17473, 16961, 20033,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2526 15683, 16707, 17219, 19779, 18243, 21059, 21315, 22083,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2527 21571, 22339, 22851, 18499, 19267, 17475, 16963, 20035,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2528 15693, 16717, 17229, 19789, 18253, 21069, 21325, 22093,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2529 21581, 22349, 22861, 18509, 19277, 17485, 16973, 20045,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2530 15687, 16711, 17223, 19783, 18247, 21063, 21319, 22087,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2531 21575, 22343, 22855, 18503, 19271, 17479, 16967, 20039,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2532 15698, 16722, 17234, 19794, 18258, 21074, 21330, 22098,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2533 21586, 22354, 22866, 18514, 19282, 17490, 16978, 20050,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2534 15699, 16723, 17235, 19795, 18259, 21075, 21331, 22099,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2535 21587, 22355, 22867, 18515, 19283, 17491, 16979, 20051,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2536 15702, 16726, 17238, 19798, 18262, 21078, 21334, 22102,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2537 21590, 22358, 22870, 18518, 19286, 17494, 16982, 20054,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2538 15700, 16724, 17236, 19796, 18260, 21076, 21332, 22100,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2539 21588, 22356, 22868, 18516, 19284, 17492, 16980, 20052,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2540 15703, 16727, 17239, 19799, 18263, 21079, 21335, 22103,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2541 21591, 22359, 22871, 18519, 19287, 17495, 16983, 20055,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2542 15705, 16729, 17241, 19801, 18265, 21081, 21337, 22105,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2543 21593, 22361, 22873, 18521, 19289, 17497, 16985, 20057,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2544 15688, 16712, 17224, 19784, 18248, 21064, 21320, 22088,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2545 21576, 22344, 22856, 18504, 19272, 17480, 16968, 20040,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2546 15691, 16715, 17227, 19787, 18251, 21067, 21323, 22091,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2547 21579, 22347, 22859, 18507, 19275, 17483, 16971, 20043,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2548 15684, 16708, 17220, 19780, 18244, 21060, 21316, 22084,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2549 21572, 22340, 22852, 18500, 19268, 17476, 16964, 20036,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2550 15682, 16706, 17218, 19778, 18242, 21058, 21314, 22082,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2551 21570, 22338, 22850, 18498, 19266, 17474, 16962, 20034,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2552 15694, 16718, 17230, 19790, 18254, 21070, 21326, 22094,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2553 21582, 22350, 22862, 18510, 19278, 17486, 16974, 20046
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2554 };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2555
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2556 int l2 = cr->len / 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2557 unsigned char *from = (unsigned char *)bam_seq(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2558 uint16_t *cpi = (uint16_t *)cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2559 cp[0] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2560 for (i = 0; i < l2; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2561 cpi[i] = le_int2(code2base[from[i]]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2562 if ((i *= 2) < cr->len)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2563 cp[i] = seq_nt16_str[bam_seqi(bam_seq(b), i)];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2564 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2565 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2566 for (i = 0; i < cr->len; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2567 cp[i] = seq_nt16_str[bam_seqi(bam_seq(b), i)];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2568 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2569 BLOCK_SIZE(s->seqs_blk) += cr->len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2570
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2571 qual = cp = (char *)bam_qual(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2572
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2573 /* Copy and parse */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2574 if (!(cr->flags & BAM_FUNMAP)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2575 int32_t *cig_to, *cig_from;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2576 int apos = cr->apos-1, spos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2577
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2578 cr->cigar = s->ncigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2579 cr->ncigar = bam_cigar_len(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2580 while (cr->cigar + cr->ncigar >= s->cigar_alloc) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2581 s->cigar_alloc = s->cigar_alloc ? s->cigar_alloc*2 : 1024;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2582 s->cigar = realloc(s->cigar, s->cigar_alloc * sizeof(*s->cigar));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2583 if (!s->cigar)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2584 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2585 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2586
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2587 cig_to = (int32_t *)s->cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2588 cig_from = (int32_t *)bam_cigar(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2589
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2590 cr->feature = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2591 cr->nfeature = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2592 for (i = 0; i < cr->ncigar; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2593 enum cigar_op cig_op = cig_from[i] & BAM_CIGAR_MASK;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2594 int cig_len = cig_from[i] >> BAM_CIGAR_SHIFT;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2595 cig_to[i] = cig_from[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2596
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2597 /* Can also generate events from here for CRAM diffs */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2598
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2599 switch (cig_op) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2600 int l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2601
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2602 // Don't trust = and X ops to be correct.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2603 case BAM_CMATCH:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2604 case BAM_CBASE_MATCH:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2605 case BAM_CBASE_MISMATCH:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2606 //fprintf(stderr, "\nBAM_CMATCH\nR: %.*s\nS: %.*s\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2607 // cig_len, &ref[apos], cig_len, &seq[spos]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2608 l = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2609 if (!fd->no_ref && cr->len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2610 int end = cig_len+apos < c->ref_end
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2611 ? cig_len : c->ref_end - apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2612 char *sp = &seq[spos];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2613 char *rp = &ref[apos];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2614 char *qp = &qual[spos];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2615 for (l = 0; l < end; l++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2616 if (rp[l] != sp[l]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2617 if (!sp[l])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2618 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2619 if (0 && CRAM_MAJOR_VERS(fd->version) >= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2620 // Disabled for the time being as it doesn't
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2621 // seem to gain us much.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2622 int ol=l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2623 while (l<end && rp[l] != sp[l])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2624 l++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2625 if (l-ol > 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2626 if (cram_add_bases(fd, c, s, cr, spos+ol,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2627 l-ol, &seq[spos+ol]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2628 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2629 l--;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2630 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2631 l = ol;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2632 if (cram_add_substitution(fd, c, s, cr,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2633 spos+l, sp[l],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2634 qp[l], rp[l]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2635 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2636 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2637 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2638 if (cram_add_substitution(fd, c, s, cr, spos+l,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2639 sp[l], qp[l], rp[l]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2640 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2641 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2642 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2643 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2644 spos += l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2645 apos += l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2646 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2647
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2648 if (l < cig_len && cr->len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2649 if (fd->no_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2650 if (CRAM_MAJOR_VERS(fd->version) == 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2651 if (cram_add_bases(fd, c, s, cr, spos,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2652 cig_len-l, &seq[spos]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2653 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2654 spos += cig_len-l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2655 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2656 for (; l < cig_len && seq[spos]; l++, spos++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2657 if (cram_add_base(fd, c, s, cr, spos,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2658 seq[spos], qual[spos]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2659 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2660 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2661 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2662 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2663 /* off end of sequence or non-ref based output */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2664 for (; l < cig_len && seq[spos]; l++, spos++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2665 if (cram_add_base(fd, c, s, cr, spos,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2666 seq[spos], qual[spos]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2667 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2668 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2669 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2670 apos += cig_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2671 } else if (!cr->len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2672 /* Seq "*" */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2673 apos += cig_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2674 spos += cig_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2675 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2676 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2677
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2678 case BAM_CDEL:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2679 if (cram_add_deletion(c, s, cr, spos, cig_len, &seq[spos]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2680 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2681 apos += cig_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2682 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2683
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2684 case BAM_CREF_SKIP:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2685 if (cram_add_skip(c, s, cr, spos, cig_len, &seq[spos]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2686 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2687 apos += cig_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2688 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2689
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2690 case BAM_CINS:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2691 if (cram_add_insertion(c, s, cr, spos, cig_len,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2692 cr->len ? &seq[spos] : NULL))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2693 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2694 if (fd->no_ref && cr->len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2695 for (l = 0; l < cig_len; l++, spos++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2696 cram_add_quality(fd, c, s, cr, spos, qual[spos]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2697 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2698 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2699 spos += cig_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2700 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2701 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2702
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2703 case BAM_CSOFT_CLIP:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2704 if (cram_add_softclip(c, s, cr, spos, cig_len,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2705 cr->len ? &seq[spos] : NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2706 fd->version))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2707 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2708 if (fd->no_ref &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2709 !(cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2710 if (cr->len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2711 for (l = 0; l < cig_len; l++, spos++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2712 cram_add_quality(fd, c, s, cr, spos, qual[spos]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2713 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2714 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2715 for (l = 0; l < cig_len; l++, spos++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2716 cram_add_quality(fd, c, s, cr, spos, -1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2717 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2718 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2719 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2720 spos += cig_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2721 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2722 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2723
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2724 case BAM_CHARD_CLIP:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2725 if (cram_add_hardclip(c, s, cr, spos, cig_len, &seq[spos]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2726 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2727 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2728
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2729 case BAM_CPAD:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2730 if (cram_add_pad(c, s, cr, spos, cig_len, &seq[spos]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2731 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2732 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2733 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2734 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2735 fake_qual = spos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2736 cr->aend = MIN(apos, c->ref_end);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2737 cram_stats_add(c->stats[DS_FN], cr->nfeature);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2738 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2739 // Unmapped
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2740 cr->cram_flags |= CRAM_FLAG_PRESERVE_QUAL_SCORES;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2741 cr->cigar = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2742 cr->ncigar = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2743 cr->nfeature = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2744 cr->aend = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2745 for (i = 0; i < cr->len; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2746 cram_stats_add(c->stats[DS_BA], seq[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2747 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2748
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2749 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2750 * Append to the qual block now. We do this here as
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2751 * cram_add_substitution() can generate BA/QS events which need to
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2752 * be in the qual block before we append the rest of the data.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2753 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2754 if (cr->cram_flags & CRAM_FLAG_PRESERVE_QUAL_SCORES) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2755 /* Special case of seq "*" */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2756 if (cr->len == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2757 cram_stats_add(c->stats[DS_RL], cr->len = fake_qual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2758 BLOCK_GROW(s->qual_blk, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2759 cp = (char *)BLOCK_END(s->qual_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2760 memset(cp, 255, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2761 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2762 BLOCK_GROW(s->qual_blk, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2763 cp = (char *)BLOCK_END(s->qual_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2764 char *from = (char *)&bam_qual(b)[0];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2765 char *to = &cp[0];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2766 memcpy(to, from, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2767 //for (i = 0; i < cr->len; i++) cp[i] = from[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2768 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2769 BLOCK_SIZE(s->qual_blk) += cr->len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2770 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2771 if (cr->len == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2772 cr->len = fake_qual >= 0 ? fake_qual : cr->aend - cr->apos + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2773 cram_stats_add(c->stats[DS_RL], cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2774 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2775 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2776
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2777 /* Now we know apos and aend both, update mate-pair information */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2778 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2779 int new;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2780 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2781 int sec = (cr->flags & BAM_FSECONDARY) ? 1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2782
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2783 //fprintf(stderr, "Checking %"PRId64"/%.*s\t", rnum,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2784 // cr->name_len, DSTRING_STR(s->name_ds)+cr->name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2785 if (cr->flags & BAM_FPAIRED) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2786 char *key = string_ndup(s->pair_keys,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2787 (char *)BLOCK_DATA(s->name_blk)+cr->name,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2788 cr->name_len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2789 if (!key)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2790 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2791
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2792 k = kh_put(m_s2i, s->pair[sec], key, &new);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2793 if (-1 == new)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2794 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2795 else if (new > 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2796 kh_val(s->pair[sec], k) = rnum;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2797 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2798 new = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2799 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2800
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2801 if (new == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2802 cram_record *p = &s->crecs[kh_val(s->pair[sec], k)];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2803 int aleft, aright, sign;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2804
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2805 aleft = MIN(cr->apos, p->apos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2806 aright = MAX(cr->aend, p->aend);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2807 if (cr->apos < p->apos) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2808 sign = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2809 } else if (cr->apos > p->apos) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2810 sign = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2811 } else if (cr->flags & BAM_FREAD1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2812 sign = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2813 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2814 sign = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2815 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2816
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2817 //fprintf(stderr, "paired %"PRId64"\n", kh_val(s->pair[sec], k));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2818
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2819 // This vs p: tlen, matepos, flags
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2820 if (bam_ins_size(b) != sign*(aright-aleft+1))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2821 goto detached;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2822
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2823 if (MAX(bam_mate_pos(b)+1, 0) != p->apos)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2824 goto detached;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2825
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2826 if (((bam_flag(b) & BAM_FMUNMAP) != 0) !=
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2827 ((p->flags & BAM_FUNMAP) != 0))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2828 goto detached;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2829
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2830 if (((bam_flag(b) & BAM_FMREVERSE) != 0) !=
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2831 ((p->flags & BAM_FREVERSE) != 0))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2832 goto detached;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2833
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2834
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2835 // p vs this: tlen, matepos, flags
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2836 if (p->tlen != -sign*(aright-aleft+1))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2837 goto detached;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2838
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2839 if (p->mate_pos != cr->apos)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2840 goto detached;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2841
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2842 if (((p->flags & BAM_FMUNMAP) != 0) !=
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2843 ((p->mate_flags & CRAM_M_UNMAP) != 0))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2844 goto detached;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2845
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2846 if (((p->flags & BAM_FMREVERSE) != 0) !=
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2847 ((p->mate_flags & CRAM_M_REVERSE) != 0))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2848 goto detached;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2849
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2850 // Supplementary reads are just too ill defined
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2851 if ((cr->flags & BAM_FSUPPLEMENTARY) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2852 (p->flags & BAM_FSUPPLEMENTARY))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2853 goto detached;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2854
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2855 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2856 * The fields below are unused when encoding this read as it is
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2857 * no longer detached. In theory they may get referred to when
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2858 * processing a 3rd or 4th read in this template?, so we set them
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2859 * here just to be sure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2860 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2861 * They do not need cram_stats_add() calls those as they are
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2862 * not emitted.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2863 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2864 cr->mate_pos = p->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2865 cr->tlen = sign*(aright-aleft+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2866 cr->mate_flags =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2867 ((p->flags & BAM_FMUNMAP) == BAM_FMUNMAP) * CRAM_M_UNMAP +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2868 ((p->flags & BAM_FMREVERSE) == BAM_FMREVERSE) * CRAM_M_REVERSE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2869
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2870 // Decrement statistics aggregated earlier
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2871 cram_stats_del(c->stats[DS_NP], p->mate_pos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2872 cram_stats_del(c->stats[DS_MF], p->mate_flags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2873 cram_stats_del(c->stats[DS_TS], p->tlen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2874 cram_stats_del(c->stats[DS_NS], p->mate_ref_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2875
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2876 /* Similarly we could correct the p-> values too, but these will no
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2877 * longer have any code that refers back to them as the new 'p'
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2878 * for this template is our current 'cr'.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2879 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2880 //p->mate_pos = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2881 //p->mate_flags =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2882 // ((cr->flags & BAM_FMUNMAP) == BAM_FMUNMAP) * CRAM_M_UNMAP +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2883 // ((cr->flags & BAM_FMREVERSE) == BAM_FMREVERSE)* CRAM_M_REVERSE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2884 //p->tlen = p->apos - cr->aend;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2885
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2886 // Clear detached from cr flags
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2887 cr->cram_flags &= ~CRAM_FLAG_DETACHED;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2888 cram_stats_add(c->stats[DS_CF], cr->cram_flags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2889
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2890 // Clear detached from p flags and set downstream
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2891 cram_stats_del(c->stats[DS_CF], p->cram_flags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2892 p->cram_flags &= ~CRAM_FLAG_DETACHED;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2893 p->cram_flags |= CRAM_FLAG_MATE_DOWNSTREAM;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2894 cram_stats_add(c->stats[DS_CF], p->cram_flags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2895
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2896 p->mate_line = rnum - (kh_val(s->pair[sec], k) + 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2897 cram_stats_add(c->stats[DS_NF], p->mate_line);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2898
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2899 kh_val(s->pair[sec], k) = rnum;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2900 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2901 detached:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2902 //fprintf(stderr, "unpaired\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2903
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2904 /* Derive mate flags from this flag */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2905 cr->mate_flags = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2906 if (bam_flag(b) & BAM_FMUNMAP)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2907 cr->mate_flags |= CRAM_M_UNMAP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2908 if (bam_flag(b) & BAM_FMREVERSE)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2909 cr->mate_flags |= CRAM_M_REVERSE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2910
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2911 cram_stats_add(c->stats[DS_MF], cr->mate_flags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2912
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2913 cr->mate_pos = MAX(bam_mate_pos(b)+1, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2914 cram_stats_add(c->stats[DS_NP], cr->mate_pos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2915
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2916 cr->tlen = bam_ins_size(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2917 cram_stats_add(c->stats[DS_TS], cr->tlen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2918
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2919 cr->cram_flags |= CRAM_FLAG_DETACHED;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2920 cram_stats_add(c->stats[DS_CF], cr->cram_flags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2921 cram_stats_add(c->stats[DS_NS], bam_mate_ref(b));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2922 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2923 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2924
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2925 cr->mqual = bam_map_qual(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2926 cram_stats_add(c->stats[DS_MQ], cr->mqual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2927
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2928 cr->mate_ref_id = bam_mate_ref(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2929
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2930 if (!(bam_flag(b) & BAM_FUNMAP)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2931 if (c->first_base > cr->apos)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2932 c->first_base = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2933
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2934 if (c->last_base < cr->aend)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2935 c->last_base = cr->aend;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2936 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2937
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2938 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2939 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2940
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2941 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2942 * Write iterator: put BAM format sequences into a CRAM file.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2943 * We buffer up a containers worth of data at a time.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2944 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2945 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2946 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2947 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2948 int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2949 cram_container *c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2950
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2951 if (!fd->ctr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2952 fd->ctr = cram_new_container(fd->seqs_per_slice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2953 fd->slices_per_container);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2954 if (!fd->ctr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2955 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2956 fd->ctr->record_counter = fd->record_counter;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2957 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2958 c = fd->ctr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2959
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2960 if (!c->slice || c->curr_rec == c->max_rec ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2961 (bam_ref(b) != c->curr_ref && c->curr_ref >= -1)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2962 int slice_rec, curr_rec, multi_seq = fd->multi_seq == 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2963 int curr_ref = c->slice ? c->curr_ref : bam_ref(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2964
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2965
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2966 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2967 * Start packing slices when we routinely have under 1/4tr full.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2968 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2969 * This option isn't available if we choose to embed references
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2970 * since we can only have one per slice.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2971 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2972 if (fd->multi_seq == -1 && c->curr_rec < c->max_rec/4+10 &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2973 fd->last_slice && fd->last_slice < c->max_rec/4+10 &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2974 !fd->embed_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2975 if (fd->verbose && !c->multi_seq)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2976 fprintf(stderr, "Multi-ref enabled for this container\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2977 multi_seq = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2978 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2979
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2980 slice_rec = c->slice_rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2981 curr_rec = c->curr_rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2982
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2983 if (CRAM_MAJOR_VERS(fd->version) == 1 ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2984 c->curr_rec == c->max_rec || fd->multi_seq != 1 || !c->slice) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2985 if (NULL == (c = cram_next_container(fd, b))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2986 if (fd->ctr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2987 // prevent cram_close attempting to flush
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2988 cram_free_container(fd->ctr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2989 fd->ctr = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2990 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2991 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2992 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2993 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2994
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2995 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2996 * Due to our processing order, some things we've already done we
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2997 * cannot easily undo. So when we first notice we should be packing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2998 * multiple sequences per container we emit the small partial
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2999 * container as-is and then start a fresh one in a different mode.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3000 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3001 if (multi_seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3002 fd->multi_seq = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3003 c->multi_seq = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3004 c->pos_sorted = 0; // required atm for multi_seq slices
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3005
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3006 if (!c->refs_used) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3007 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3008 c->refs_used = calloc(fd->refs->nref, sizeof(int));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3009 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3010 if (!c->refs_used)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3011 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3012 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3013 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3014
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3015 fd->last_slice = curr_rec - slice_rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3016 c->slice_rec = c->curr_rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3017
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3018 // Have we seen this reference before?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3019 if (bam_ref(b) >= 0 && bam_ref(b) != curr_ref && !fd->embed_ref &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3020 !fd->unsorted && multi_seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3021
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3022 if (!c->refs_used) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3023 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3024 c->refs_used = calloc(fd->refs->nref, sizeof(int));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3025 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3026 if (!c->refs_used)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3027 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3028 } else if (c->refs_used && c->refs_used[bam_ref(b)]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3029 fprintf(stderr, "Unsorted mode enabled\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3030 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3031 fd->unsorted = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3032 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3033 fd->multi_seq = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3034 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3035 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3036
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3037 c->curr_ref = bam_ref(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3038 if (c->refs_used && c->curr_ref >= 0) c->refs_used[c->curr_ref]++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3039 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3040
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3041 if (!c->bams) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3042 /* First time through, allocate a set of bam pointers */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3043 pthread_mutex_lock(&fd->bam_list_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3044 if (fd->bl) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3045 spare_bams *spare = fd->bl;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3046 c->bams = spare->bams;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3047 fd->bl = spare->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3048 free(spare);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3049 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3050 c->bams = calloc(c->max_c_rec, sizeof(bam_seq_t *));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3051 if (!c->bams)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3052 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3053 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3054 pthread_mutex_unlock(&fd->bam_list_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3055 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3056
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3057 /* Copy or alloc+copy the bam record, for later encoding */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3058 if (c->bams[c->curr_c_rec])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3059 bam_copy1(c->bams[c->curr_c_rec], b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3060 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3061 c->bams[c->curr_c_rec] = bam_dup(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3062
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3063 c->curr_rec++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3064 c->curr_c_rec++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3065 fd->record_counter++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3066
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3067 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3068 }