annotate ezBAMQC/src/htslib/cram/cram_decode.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2 Copyright (c) 2012-2014 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Author: James Bonfield <jkb@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Redistribution and use in source and binary forms, with or without
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6 modification, are permitted provided that the following conditions are met:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 1. Redistributions of source code must retain the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 this list of conditions and the following disclaimer.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 2. Redistributions in binary form must reproduce the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 this list of conditions and the following disclaimer in the documentation
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13 and/or other materials provided with the distribution.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16 Institute nor the names of its contributors may be used to endorse or promote
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 products derived from this software without specific prior written permission.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 * - In-memory decoding of CRAM data structures.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 * - Iterator for reading CRAM record by record.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 #ifdef HAVE_CONFIG_H
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37 #include "io_lib_config.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 #include <stdio.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 #include <errno.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 #include <assert.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 #include <stdlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 #include <string.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 #include <zlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 #include <sys/types.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 #include <sys/stat.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 #include <math.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 #include <ctype.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 #include "cram/cram.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 #include "cram/os.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 #include "cram/md5.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 //Whether CIGAR has just M or uses = and X to indicate match and mismatch
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 //#define USE_X
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 * CRAM compression headers
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 * Decodes the Tag Dictionary record in the preservation map
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 * Updates the cram compression header.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 * Returns number of bytes decoded on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 int cram_decode_TD(char *cp, cram_block_compression_hdr *h) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 char *op = cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 unsigned char *dat;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 cram_block *b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 int32_t blk_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 int nTL, i, sz;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 if (!(b = cram_new_block(0, 0)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78 h->TD_blk = b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 /* Decode */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 cp += itf8_get(cp, &blk_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 if (!blk_size) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 h->nTL = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84 h->TL = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 return cp - op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 BLOCK_APPEND(b, cp, blk_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 cp += blk_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 sz = cp - op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93 // Force nul termination if missing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 if (BLOCK_DATA(b)[BLOCK_SIZE(b)-1])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 BLOCK_APPEND_CHAR(b, '\0');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 /* Set up TL lookup table */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 dat = BLOCK_DATA(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100 // Count
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 for (nTL = i = 0; i < BLOCK_SIZE(b); i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 nTL++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 while (dat[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 i++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 // Copy
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 h->nTL = nTL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 if (!(h->TL = calloc(h->nTL, sizeof(unsigned char *))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 for (nTL = i = 0; i < BLOCK_SIZE(b); i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 h->TL[nTL++] = &dat[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 while (dat[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114 i++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 return sz;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 * Decodes a CRAM block compression header.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122 * Returns header ptr on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126 cram_block *b) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 char *cp, *cp_copy;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 int32_t map_size, map_count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 if (!hdr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 if (b->method != RAW) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136 if (cram_uncompress_block(b)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137 free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 cp = (char *)b->data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145 cp += itf8_get(cp, &hdr->ref_seq_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146 cp += itf8_get(cp, &hdr->ref_seq_start);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 cp += itf8_get(cp, &hdr->ref_seq_span);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148 cp += itf8_get(cp, &hdr->num_records);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149 cp += itf8_get(cp, &hdr->num_landmarks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 if (!(hdr->landmark = malloc(hdr->num_landmarks * sizeof(int32_t)))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151 free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154 for (i = 0; i < hdr->num_landmarks; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155 cp += itf8_get(cp, &hdr->landmark[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 hdr->preservation_map = kh_init(map);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 memset(hdr->rec_encoding_map, 0,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162 CRAM_MAP_HASH * sizeof(hdr->rec_encoding_map[0]));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163 memset(hdr->tag_encoding_map, 0,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164 CRAM_MAP_HASH * sizeof(hdr->tag_encoding_map[0]));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
166 if (!hdr->preservation_map) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
167 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
168 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
169 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
170
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
171 /* Initialise defaults for preservation map */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
172 hdr->mapped_qs_included = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
173 hdr->unmapped_qs_included = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
174 hdr->unmapped_placed = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
175 hdr->qs_included = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
176 hdr->read_names_included = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
177 hdr->AP_delta = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
178 memcpy(hdr->substitution_matrix, "CGTNAGTNACTNACGNACGT", 20);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
179
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
180 /* Preservation map */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
181 cp += itf8_get(cp, &map_size); cp_copy = cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
182 cp += itf8_get(cp, &map_count);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
183 for (i = 0; i < map_count; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
184 pmap_t hd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
185 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
186 int r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
187
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
188 cp += 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
189 switch(CRAM_KEY(cp[-2],cp[-1])) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
190 case CRAM_KEY('M','I'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
191 hd.i = *cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
192 k = kh_put(map, hdr->preservation_map, "MI", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
193 if (-1 == r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
194 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
195 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
196 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
197
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
198 kh_val(hdr->preservation_map, k) = hd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
199 hdr->mapped_qs_included = hd.i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
200 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
201
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
202 case CRAM_KEY('U','I'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
203 hd.i = *cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
204 k = kh_put(map, hdr->preservation_map, "UI", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
205 if (-1 == r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
206 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
207 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
208 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
209
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
210 kh_val(hdr->preservation_map, k) = hd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
211 hdr->unmapped_qs_included = hd.i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
212 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
213
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
214 case CRAM_KEY('P','I'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
215 hd.i = *cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
216 k = kh_put(map, hdr->preservation_map, "PI", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
217 if (-1 == r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
218 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
219 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
220 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
221
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
222 kh_val(hdr->preservation_map, k) = hd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
223 hdr->unmapped_placed = hd.i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
224 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
225
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
226 case CRAM_KEY('R','N'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
227 hd.i = *cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
228 k = kh_put(map, hdr->preservation_map, "RN", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
229 if (-1 == r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
230 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
231 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
232 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
233
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
234 kh_val(hdr->preservation_map, k) = hd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
235 hdr->read_names_included = hd.i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
236 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
237
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
238 case CRAM_KEY('A','P'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
239 hd.i = *cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
240 k = kh_put(map, hdr->preservation_map, "AP", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
241 if (-1 == r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
242 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
243 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
244 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
245
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
246 kh_val(hdr->preservation_map, k) = hd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
247 hdr->AP_delta = hd.i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
248 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
249
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
250 case CRAM_KEY('R','R'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
251 hd.i = *cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
252 k = kh_put(map, hdr->preservation_map, "RR", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
253 if (-1 == r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
254 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
255 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
256 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
257
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
258 kh_val(hdr->preservation_map, k) = hd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
259 fd->no_ref = !hd.i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
260 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
261
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
262 case CRAM_KEY('S','M'):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
263 hdr->substitution_matrix[0][(cp[0]>>6)&3] = 'C';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
264 hdr->substitution_matrix[0][(cp[0]>>4)&3] = 'G';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
265 hdr->substitution_matrix[0][(cp[0]>>2)&3] = 'T';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
266 hdr->substitution_matrix[0][(cp[0]>>0)&3] = 'N';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
267
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
268 hdr->substitution_matrix[1][(cp[1]>>6)&3] = 'A';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
269 hdr->substitution_matrix[1][(cp[1]>>4)&3] = 'G';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
270 hdr->substitution_matrix[1][(cp[1]>>2)&3] = 'T';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
271 hdr->substitution_matrix[1][(cp[1]>>0)&3] = 'N';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
272
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
273 hdr->substitution_matrix[2][(cp[2]>>6)&3] = 'A';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
274 hdr->substitution_matrix[2][(cp[2]>>4)&3] = 'C';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
275 hdr->substitution_matrix[2][(cp[2]>>2)&3] = 'T';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
276 hdr->substitution_matrix[2][(cp[2]>>0)&3] = 'N';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
277
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
278 hdr->substitution_matrix[3][(cp[3]>>6)&3] = 'A';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
279 hdr->substitution_matrix[3][(cp[3]>>4)&3] = 'C';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
280 hdr->substitution_matrix[3][(cp[3]>>2)&3] = 'G';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
281 hdr->substitution_matrix[3][(cp[3]>>0)&3] = 'N';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
282
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
283 hdr->substitution_matrix[4][(cp[4]>>6)&3] = 'A';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
284 hdr->substitution_matrix[4][(cp[4]>>4)&3] = 'C';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
285 hdr->substitution_matrix[4][(cp[4]>>2)&3] = 'G';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
286 hdr->substitution_matrix[4][(cp[4]>>0)&3] = 'T';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
287
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
288 hd.p = cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
289 cp += 5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
290
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
291 k = kh_put(map, hdr->preservation_map, "SM", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
292 if (-1 == r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
293 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
294 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
295 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
296 kh_val(hdr->preservation_map, k) = hd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
297 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
298
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
299 case CRAM_KEY('T','D'): {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
300 int sz = cram_decode_TD(cp, hdr); // tag dictionary
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
301 if (sz < 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
302 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
303 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
304 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
305
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
306 hd.p = cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
307 cp += sz;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
308
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
309 k = kh_put(map, hdr->preservation_map, "TD", &r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
310 if (-1 == r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
311 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
312 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
313 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
314 kh_val(hdr->preservation_map, k) = hd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
315 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
316 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
317
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
318 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
319 fprintf(stderr, "Unrecognised preservation map key %c%c\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
320 cp[-2], cp[-1]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
321 // guess byte;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
322 cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
323 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
324 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
325 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
326 if (cp - cp_copy != map_size) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
327 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
328 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
329 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
330
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
331 /* Record encoding map */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
332 cp += itf8_get(cp, &map_size); cp_copy = cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
333 cp += itf8_get(cp, &map_count);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
334 for (i = 0; i < map_count; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
335 char *key = cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
336 int32_t encoding;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
337 int32_t size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
338 cram_map *m = malloc(sizeof(*m)); // FIXME: use pooled_alloc
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
339
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
340 if (!m) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
341 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
342 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
343 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
344
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
345 cp += 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
346 cp += itf8_get(cp, &encoding);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
347 cp += itf8_get(cp, &size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
348
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
349 // Fill out cram_map purely for cram_dump to dump out.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
350 m->key = (key[0]<<8)|key[1];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
351 m->encoding = encoding;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
352 m->size = size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
353 m->offset = cp - (char *)b->data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
354 m->codec = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
355
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
356 if (m->encoding == E_NULL)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
357 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
358
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
359 //printf("%s codes for %.2s\n", cram_encoding2str(encoding), key);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
360
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
361 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
362 * For CRAM1.0 CF and BF are Byte and not Int.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
363 * Practically speaking it makes no difference unless we have a
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
364 * 1.0 format file that stores these in EXTERNAL as only then
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
365 * does Byte vs Int matter.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
366 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
367 * Neither this C code nor Java reference implementations did this,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
368 * so we gloss over it and treat them as int.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
369 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
370
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
371 if (key[0] == 'B' && key[1] == 'F') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
372 if (!(hdr->codecs[DS_BF] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
373 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
374 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
375 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
376 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
377 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
378 } else if (key[0] == 'C' && key[1] == 'F') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
379 if (!(hdr->codecs[DS_CF] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
380 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
381 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
382 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
383 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
384 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
385 } else if (key[0] == 'R' && key[1] == 'I') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
386 if (!(hdr->codecs[DS_RI] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
387 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
388 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
389 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
390 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
391 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
392 } else if (key[0] == 'R' && key[1] == 'L') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
393 if (!(hdr->codecs[DS_RL] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
394 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
395 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
396 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
397 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
398 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
399 } else if (key[0] == 'A' && key[1] == 'P') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
400 if (!(hdr->codecs[DS_AP] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
401 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
402 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
403 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
404 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
405 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
406 } else if (key[0] == 'R' && key[1] == 'G') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
407 if (!(hdr->codecs[DS_RG] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
408 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
409 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
410 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
411 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
412 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
413 } else if (key[0] == 'M' && key[1] == 'F') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
414 if (!(hdr->codecs[DS_MF] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
415 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
416 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
417 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
418 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
419 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
420 } else if (key[0] == 'N' && key[1] == 'S') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
421 if (!(hdr->codecs[DS_NS] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
422 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
423 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
424 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
425 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
426 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
427 } else if (key[0] == 'N' && key[1] == 'P') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
428 if (!(hdr->codecs[DS_NP] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
429 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
430 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
431 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
432 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
433 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
434 } else if (key[0] == 'T' && key[1] == 'S') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
435 if (!(hdr->codecs[DS_TS] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
436 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
437 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
438 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
439 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
440 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
441 } else if (key[0] == 'N' && key[1] == 'F') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
442 if (!(hdr->codecs[DS_NF] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
443 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
444 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
445 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
446 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
447 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
448 } else if (key[0] == 'T' && key[1] == 'C') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
449 if (!(hdr->codecs[DS_TC] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
450 E_BYTE,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
451 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
452 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
453 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
454 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
455 } else if (key[0] == 'T' && key[1] == 'N') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
456 if (!(hdr->codecs[DS_TN] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
457 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
458 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
459 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
460 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
461 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
462 } else if (key[0] == 'F' && key[1] == 'N') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
463 if (!(hdr->codecs[DS_FN] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
464 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
465 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
466 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
467 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
468 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
469 } else if (key[0] == 'F' && key[1] == 'C') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
470 if (!(hdr->codecs[DS_FC] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
471 E_BYTE,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
472 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
473 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
474 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
475 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
476 } else if (key[0] == 'F' && key[1] == 'P') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
477 if (!(hdr->codecs[DS_FP] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
478 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
479 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
480 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
481 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
482 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
483 } else if (key[0] == 'B' && key[1] == 'S') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
484 if (!(hdr->codecs[DS_BS] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
485 E_BYTE,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
486 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
487 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
488 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
489 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
490 } else if (key[0] == 'I' && key[1] == 'N') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
491 if (!(hdr->codecs[DS_IN] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
492 E_BYTE_ARRAY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
493 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
494 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
495 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
496 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
497 } else if (key[0] == 'S' && key[1] == 'C') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
498 if (!(hdr->codecs[DS_SC] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
499 E_BYTE_ARRAY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
500 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
501 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
502 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
503 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
504 } else if (key[0] == 'D' && key[1] == 'L') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
505 if (!(hdr->codecs[DS_DL] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
506 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
507 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
508 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
509 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
510 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
511 } else if (key[0] == 'B' && key[1] == 'A') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
512 if (!(hdr->codecs[DS_BA] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
513 E_BYTE,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
514 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
515 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
516 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
517 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
518 } else if (key[0] == 'B' && key[1] == 'B') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
519 if (!(hdr->codecs[DS_BB] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
520 E_BYTE_ARRAY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
521 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
522 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
523 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
524 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
525 } else if (key[0] == 'R' && key[1] == 'S') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
526 if (!(hdr->codecs[DS_RS] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
527 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
528 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
529 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
530 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
531 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
532 } else if (key[0] == 'P' && key[1] == 'D') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
533 if (!(hdr->codecs[DS_PD] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
534 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
535 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
536 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
537 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
538 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
539 } else if (key[0] == 'H' && key[1] == 'C') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
540 if (!(hdr->codecs[DS_HC] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
541 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
542 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
543 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
544 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
545 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
546 } else if (key[0] == 'M' && key[1] == 'Q') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
547 if (!(hdr->codecs[DS_MQ] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
548 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
549 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
550 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
551 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
552 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
553 } else if (key[0] == 'R' && key[1] == 'N') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
554 if (!(hdr->codecs[DS_RN] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
555 E_BYTE_ARRAY_BLOCK,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
556 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
557 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
558 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
559 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
560 } else if (key[0] == 'Q' && key[1] == 'S') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
561 if (!(hdr->codecs[DS_QS] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
562 E_BYTE,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
563 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
564 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
565 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
566 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
567 } else if (key[0] == 'Q' && key[1] == 'Q') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
568 if (!(hdr->codecs[DS_QQ] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
569 E_BYTE_ARRAY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
570 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
571 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
572 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
573 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
574 } else if (key[0] == 'T' && key[1] == 'L') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
575 if (!(hdr->codecs[DS_TL] = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
576 E_INT,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
577 fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
578 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
579 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
580 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
581 } else if (key[0] == 'T' && key[1] == 'M') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
582 } else if (key[0] == 'T' && key[1] == 'V') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
583 } else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
584 fprintf(stderr, "Unrecognised key: %.2s\n", key);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
585
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
586 cp += size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
587
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
588 m->next = hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
589 hdr->rec_encoding_map[CRAM_MAP(key[0], key[1])] = m;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
590 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
591 if (cp - cp_copy != map_size) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
592 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
593 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
594 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
595
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
596 /* Tag encoding map */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
597 cp += itf8_get(cp, &map_size); cp_copy = cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
598 cp += itf8_get(cp, &map_count);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
599 for (i = 0; i < map_count; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
600 int32_t encoding;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
601 int32_t size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
602 cram_map *m = malloc(sizeof(*m)); // FIXME: use pooled_alloc
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
603 char *key = cp+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
604
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
605 if (!m) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
606 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
607 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
608 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
609
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
610 m->key = (key[0]<<16)|(key[1]<<8)|key[2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
611
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
612 cp += 4; // Strictly ITF8, but this suffices
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
613 cp += itf8_get(cp, &encoding);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
614 cp += itf8_get(cp, &size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
615
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
616 m->encoding = encoding;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
617 m->size = size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
618 m->offset = cp - (char *)b->data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
619 if (!(m->codec = cram_decoder_init(encoding, cp, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
620 E_BYTE_ARRAY_BLOCK, fd->version))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
621 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
622 free(m);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
623 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
624 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
625
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
626 cp += size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
627
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
628 m->next = hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
629 hdr->tag_encoding_map[CRAM_MAP(key[0],key[1])] = m;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
630 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
631 if (cp - cp_copy != map_size) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
632 cram_free_compression_header(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
633 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
634 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
635
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
636 return hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
637 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
638
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
639 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
640 * Note we also need to scan through the record encoding map to
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
641 * see which data series share the same block, either external or
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
642 * CORE. For example if we need the BF data series but MQ and CF
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
643 * are also encoded in the same block then we need to add those in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
644 * as a dependency in order to correctly decode BF.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
645 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
646 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
647 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
648 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
649 int cram_dependent_data_series(cram_fd *fd,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
650 cram_block_compression_hdr *hdr,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
651 cram_slice *s) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
652 int *block_used;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
653 int core_used = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
654 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
655 static int i_to_id[] = {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
656 DS_BF, DS_AP, DS_FP, DS_RL, DS_DL, DS_NF, DS_BA, DS_QS,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
657 DS_FC, DS_FN, DS_BS, DS_IN, DS_RG, DS_MQ, DS_TL, DS_RN,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
658 DS_NS, DS_NP, DS_TS, DS_MF, DS_CF, DS_RI, DS_RS, DS_PD,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
659 DS_HC, DS_SC, DS_BB, DS_QQ,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
660 };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
661 uint32_t orig_ds;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
662
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
663 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
664 * Set the data_series bit field based on fd->required_fields
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
665 * contents.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
666 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
667 if (fd->required_fields && fd->required_fields != INT_MAX) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
668 hdr->data_series = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
669
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
670 if (fd->required_fields & SAM_QNAME)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
671 hdr->data_series |= CRAM_RN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
672
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
673 if (fd->required_fields & SAM_FLAG)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
674 hdr->data_series |= CRAM_BF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
675
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
676 if (fd->required_fields & SAM_RNAME)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
677 hdr->data_series |= CRAM_RI | CRAM_BF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
678
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
679 if (fd->required_fields & SAM_POS)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
680 hdr->data_series |= CRAM_AP | CRAM_BF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
681
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
682 if (fd->required_fields & SAM_MAPQ)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
683 hdr->data_series |= CRAM_MQ;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
684
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
685 if (fd->required_fields & SAM_CIGAR)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
686 hdr->data_series |= CRAM_CIGAR;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
687
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
688 if (fd->required_fields & SAM_RNEXT)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
689 hdr->data_series |= CRAM_CF | CRAM_NF | CRAM_RI | CRAM_NS |CRAM_BF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
690
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
691 if (fd->required_fields & SAM_PNEXT)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
692 hdr->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_NP | CRAM_BF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
693
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
694 if (fd->required_fields & SAM_TLEN)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
695 hdr->data_series |= CRAM_CF | CRAM_NF | CRAM_AP | CRAM_TS |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
696 CRAM_BF | CRAM_MF | CRAM_RI | CRAM_CIGAR;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
697
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
698 if (fd->required_fields & SAM_SEQ)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
699 hdr->data_series |= CRAM_SEQ;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
700
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
701 if (!(fd->required_fields & SAM_AUX))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
702 // No easy way to get MD/NM without other tags at present
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
703 fd->decode_md = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
704
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
705 if (fd->required_fields & SAM_QUAL)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
706 hdr->data_series |= CRAM_SEQ;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
707
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
708 if (fd->required_fields & SAM_AUX)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
709 hdr->data_series |= CRAM_RG | CRAM_TL | CRAM_aux;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
710
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
711 if (fd->required_fields & SAM_RGAUX)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
712 hdr->data_series |= CRAM_RG | CRAM_BF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
713
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
714 // Always uncompress CORE block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
715 if (cram_uncompress_block(s->block[0]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
716 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
717 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
718 hdr->data_series = CRAM_ALL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
719
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
720 for (i = 0; i < s->hdr->num_blocks; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
721 if (cram_uncompress_block(s->block[i]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
722 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
723 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
724
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
725 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
726 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
727
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
728 block_used = calloc(s->hdr->num_blocks+1, sizeof(int));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
729 if (!block_used)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
730 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
731
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
732 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
733 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
734 * Also set data_series based on code prerequisites. Eg if we need
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
735 * CRAM_QS then we also need to know CRAM_RL so we know how long it
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
736 * is, or if we need FC/FP then we also need FN (number of features).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
737 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
738 * It's not reciprocal though. We may be needing to decode FN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
739 * but have no need to decode FC, FP and cigar ops.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
740 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
741 if (hdr->data_series & CRAM_RS) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
742 if (hdr->data_series & CRAM_PD) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
743 if (hdr->data_series & CRAM_HC) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
744 if (hdr->data_series & CRAM_QS) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
745 if (hdr->data_series & CRAM_IN) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
746 if (hdr->data_series & CRAM_SC) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
747 if (hdr->data_series & CRAM_BS) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
748 if (hdr->data_series & CRAM_DL) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
749 if (hdr->data_series & CRAM_BA) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
750 if (hdr->data_series & CRAM_BB) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
751 if (hdr->data_series & CRAM_QQ) hdr->data_series |= CRAM_FC|CRAM_FP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
752
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
753 // cram_decode_seq() needs seq[] array
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
754 if (hdr->data_series & (CRAM_SEQ|CRAM_CIGAR)) hdr->data_series |= CRAM_RL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
755
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
756 if (hdr->data_series & CRAM_FP) hdr->data_series |= CRAM_FC;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
757 if (hdr->data_series & CRAM_FC) hdr->data_series |= CRAM_FN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
758 if (hdr->data_series & CRAM_aux) hdr->data_series |= CRAM_TL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
759 if (hdr->data_series & CRAM_MF) hdr->data_series |= CRAM_CF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
760 if (hdr->data_series & CRAM_MQ) hdr->data_series |= CRAM_BF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
761 if (hdr->data_series & CRAM_BS) hdr->data_series |= CRAM_RI;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
762 if (hdr->data_series & (CRAM_MF |CRAM_NS |CRAM_NP |CRAM_TS |CRAM_NF))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
763 hdr->data_series |= CRAM_CF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
764 if (!hdr->read_names_included && hdr->data_series & CRAM_RN)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
765 hdr->data_series |= CRAM_CF | CRAM_NF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
766 if (hdr->data_series & (CRAM_BA | CRAM_QS | CRAM_BB | CRAM_QQ))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
767 hdr->data_series |= CRAM_BF | CRAM_CF | CRAM_RL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
768
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
769 orig_ds = hdr->data_series;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
770
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
771 // Find which blocks are in use.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
772 for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
773 int bnum1, bnum2, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
774 cram_codec *c = hdr->codecs[i_to_id[i]];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
775
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
776 if (!(hdr->data_series & (1<<i)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
777 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
778
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
779 if (!c)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
780 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
781
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
782 bnum1 = cram_codec_to_id(c, &bnum2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
783
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
784 for (;;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
785 switch (bnum1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
786 case -2:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
787 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
788
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
789 case -1:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
790 core_used = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
791 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
792
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
793 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
794 for (j = 0; j < s->hdr->num_blocks; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
795 if (s->block[j]->content_type == EXTERNAL &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
796 s->block[j]->content_id == bnum1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
797 block_used[j] = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
798 if (cram_uncompress_block(s->block[j])) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
799 free(block_used);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
800 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
801 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
802 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
803 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
804 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
805 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
806
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
807 if (bnum2 == -2 || bnum1 == bnum2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
808 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
809
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
810 bnum1 = bnum2; // 2nd pass
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
811 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
812 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
813
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
814 // Tags too
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
815 if ((fd->required_fields & SAM_AUX) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
816 (hdr->data_series & CRAM_aux)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
817 for (i = 0; i < CRAM_MAP_HASH; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
818 int bnum1, bnum2, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
819 cram_map *m = hdr->tag_encoding_map[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
820
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
821 while (m) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
822 cram_codec *c = m->codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
823 if (!c)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
824 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
825
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
826 bnum1 = cram_codec_to_id(c, &bnum2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
827
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
828 for (;;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
829 switch (bnum1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
830 case -2:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
831 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
832
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
833 case -1:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
834 core_used = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
835 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
836
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
837 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
838 for (j = 0; j < s->hdr->num_blocks; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
839 if (s->block[j]->content_type == EXTERNAL &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
840 s->block[j]->content_id == bnum1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
841 block_used[j] = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
842 if (cram_uncompress_block(s->block[j])) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
843 free(block_used);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
844 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
845 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
846 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
847 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
848 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
849 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
850
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
851 if (bnum2 == -2 || bnum1 == bnum2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
852 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
853
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
854 bnum1 = bnum2; // 2nd pass
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
855 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
856
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
857 m = m->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
858 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
859 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
860 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
861
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
862 // We now know which blocks are in used, so repeat and find
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
863 // which other data series need to be added.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
864 for (i = 0; i < sizeof(i_to_id)/sizeof(*i_to_id); i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
865 int bnum1, bnum2, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
866 cram_codec *c = hdr->codecs[i_to_id[i]];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
867
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
868 if (!c)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
869 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
870
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
871 bnum1 = cram_codec_to_id(c, &bnum2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
872
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
873 for (;;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
874 switch (bnum1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
875 case -2:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
876 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
877
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
878 case -1:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
879 if (core_used) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
880 //printf(" + data series %08x:\n", 1<<i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
881 hdr->data_series |= 1<<i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
882 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
883 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
884
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
885 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
886 for (j = 0; j < s->hdr->num_blocks; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
887 if (s->block[j]->content_type == EXTERNAL &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
888 s->block[j]->content_id == bnum1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
889 if (block_used[j]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
890 //printf(" + data series %08x:\n", 1<<i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
891 hdr->data_series |= 1<<i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
892 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
893 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
894 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
895 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
896 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
897
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
898 if (bnum2 == -2 || bnum1 == bnum2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
899 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
900
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
901 bnum1 = bnum2; // 2nd pass
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
902 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
903 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
904
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
905 // Tags too
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
906 for (i = 0; i < CRAM_MAP_HASH; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
907 int bnum1, bnum2, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
908 cram_map *m = hdr->tag_encoding_map[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
909
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
910 while (m) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
911 cram_codec *c = m->codec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
912 if (!c)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
913 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
914
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
915 bnum1 = cram_codec_to_id(c, &bnum2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
916
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
917 for (;;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
918 switch (bnum1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
919 case -2:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
920 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
921
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
922 case -1:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
923 //printf(" + data series %08x:\n", CRAM_aux);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
924 hdr->data_series |= CRAM_aux;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
925 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
926
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
927 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
928 for (j = 0; j < s->hdr->num_blocks; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
929 if (s->block[j]->content_type &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
930 s->block[j]->content_id == bnum1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
931 if (block_used[j]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
932 //printf(" + data series %08x:\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
933 // CRAM_aux);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
934 hdr->data_series |= CRAM_aux;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
935 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
936 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
937 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
938 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
939 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
940
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
941 if (bnum2 == -2 || bnum1 == bnum2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
942 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
943
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
944 bnum1 = bnum2; // 2nd pass
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
945 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
946
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
947 m = m->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
948 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
949 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
950 } while (orig_ds != hdr->data_series);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
951
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
952 free(block_used);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
953 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
954 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
955
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
956 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
957 * CRAM slices
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
958 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
959
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
960 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
961 * Decodes a CRAM (un)mapped slice header block.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
962 * Returns slice header ptr on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
963 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
964 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
965 cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
966 cram_block_slice_hdr *hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
967 char *cp = (char *)b->data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
968 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
969
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
970 if (b->content_type != MAPPED_SLICE &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
971 b->content_type != UNMAPPED_SLICE)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
972 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
973
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
974 if (!(hdr = calloc(1, sizeof(*hdr))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
975 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
976
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
977 hdr->content_type = b->content_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
978
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
979 if (b->content_type == MAPPED_SLICE) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
980 cp += itf8_get(cp, &hdr->ref_seq_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
981 cp += itf8_get(cp, &hdr->ref_seq_start);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
982 cp += itf8_get(cp, &hdr->ref_seq_span);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
983 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
984 cp += itf8_get(cp, &hdr->num_records);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
985 hdr->record_counter = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
986 if (CRAM_MAJOR_VERS(fd->version) == 2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
987 int32_t i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
988 cp += itf8_get(cp, &i32);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
989 hdr->record_counter = i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
990 } else if (CRAM_MAJOR_VERS(fd->version) >= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
991 cp += ltf8_get(cp, &hdr->record_counter);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
992 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
993
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
994 cp += itf8_get(cp, &hdr->num_blocks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
995
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
996 cp += itf8_get(cp, &hdr->num_content_ids);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
997 hdr->block_content_ids = malloc(hdr->num_content_ids * sizeof(int32_t));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
998 if (!hdr->block_content_ids) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
999 free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1000 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1001 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1002
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1003 for (i = 0; i < hdr->num_content_ids; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1004 cp += itf8_get(cp, &hdr->block_content_ids[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1005 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1006
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1007 if (b->content_type == MAPPED_SLICE) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1008 cp += itf8_get(cp, &hdr->ref_base_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1009 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1010
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1011 if (CRAM_MAJOR_VERS(fd->version) != 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1012 memcpy(hdr->md5, cp, 16);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1013 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1014 memset(hdr->md5, 0, 16);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1015 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1016
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1017 return hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1018 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1019
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1020
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1021 #if 0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1022 /* Returns the number of bits set in val; it the highest bit used */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1023 static int nbits(int v) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1024 static const int MultiplyDeBruijnBitPosition[32] = {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1025 1, 10, 2, 11, 14, 22, 3, 30, 12, 15, 17, 19, 23, 26, 4, 31,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1026 9, 13, 21, 29, 16, 18, 25, 8, 20, 28, 24, 7, 27, 6, 5, 32
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1027 };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1028
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1029 v |= v >> 1; // first up to set all bits 1 after the first 1 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1030 v |= v >> 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1031 v |= v >> 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1032 v |= v >> 8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1033 v |= v >> 16;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1034
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1035 // DeBruijn magic to find top bit
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1036 return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1037 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1038 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1039
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1040 #if 0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1041 static int sort_freqs(const void *vp1, const void *vp2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1042 const int i1 = *(const int *)vp1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1043 const int i2 = *(const int *)vp2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1044 return i1-i2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1045 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1046 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1047
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1048 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1049 * Primary CRAM sequence decoder
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1050 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1051
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1052 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1053 * Internal part of cram_decode_slice().
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1054 * Generates the sequence, quality and cigar components.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1055 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1056 static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1057 cram_block *blk, cram_record *cr, SAM_hdr *bfd,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1058 int cf, char *seq, char *qual) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1059 int prev_pos = 0, f, r = 0, out_sz = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1060 int seq_pos = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1061 int cig_len = 0, ref_pos = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1062 int32_t fn, i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1063 enum cigar_op cig_op = BAM_CMATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1064 uint32_t *cigar = s->cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1065 uint32_t ncigar = s->ncigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1066 uint32_t cigar_alloc = s->cigar_alloc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1067 uint32_t nm = 0, md_dist = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1068 int orig_aux = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1069 int decode_md = fd->decode_md && s->ref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1070 uint32_t ds = c->comp_hdr->data_series;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1071
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1072 if ((ds & CRAM_QS) && !(cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1073 memset(qual, 30, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1074 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1075
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1076 if (decode_md) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1077 orig_aux = BLOCK_SIZE(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1078 BLOCK_APPEND(s->aux_blk, "MDZ", 3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1079 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1080
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1081 if (ds & CRAM_FN) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1082 if (!c->comp_hdr->codecs[DS_FN]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1083 r |= c->comp_hdr->codecs[DS_FN]->decode(s,c->comp_hdr->codecs[DS_FN],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1084 blk, (char *)&fn, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1085 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1086 fn = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1087 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1088
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1089 ref_pos--; // count from 0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1090 cr->cigar = ncigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1091
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1092 if (!(ds & (CRAM_FC | CRAM_FP)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1093 goto skip_cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1094
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1095 for (f = 0; f < fn; f++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1096 int32_t pos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1097 char op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1098
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1099 if (ncigar+2 >= cigar_alloc) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1100 cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1101 s->cigar = cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1102 if (!(cigar = realloc(cigar, cigar_alloc * sizeof(*cigar))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1103 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1104 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1105
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1106 if (ds & CRAM_FC) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1107 if (!c->comp_hdr->codecs[DS_FC]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1108 r |= c->comp_hdr->codecs[DS_FC]->decode(s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1109 c->comp_hdr->codecs[DS_FC],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1110 blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1111 &op, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1112 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1113
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1114 if (!(ds & CRAM_FP))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1115 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1116
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1117 if (!c->comp_hdr->codecs[DS_FP]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1118 r |= c->comp_hdr->codecs[DS_FP]->decode(s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1119 c->comp_hdr->codecs[DS_FP],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1120 blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1121 (char *)&pos, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1122 pos += prev_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1123
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1124 if (pos > seq_pos) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1125 if (pos > cr->len+1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1126 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1127
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1128 if (s->ref && cr->ref_id >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1129 if (ref_pos + pos - seq_pos > bfd->ref[cr->ref_id].len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1130 static int whinged = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1131 if (!whinged)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1132 fprintf(stderr, "Ref pos outside of ref "
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1133 "sequence boundary\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1134 whinged = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1135 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1136 memcpy(&seq[seq_pos-1], &s->ref[ref_pos - s->ref_start +1],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1137 pos - seq_pos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1138 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1139 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1140 #ifdef USE_X
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1141 if (cig_len && cig_op != BAM_CBASE_MATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1142 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1143 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1144 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1145 cig_op = BAM_CBASE_MATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1146 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1147 if (cig_len && cig_op != BAM_CMATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1148 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1149 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1150 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1151 cig_op = BAM_CMATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1152 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1153 cig_len += pos - seq_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1154 ref_pos += pos - seq_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1155 md_dist += pos - seq_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1156 seq_pos = pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1157 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1158
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1159 prev_pos = pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1160
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1161 if (!(ds & CRAM_FC))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1162 goto skip_cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1163
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1164 if (!(ds & CRAM_FC))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1165 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1166
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1167 switch(op) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1168 case 'S': { // soft clip: IN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1169 int32_t out_sz2 = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1170
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1171 if (cig_len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1172 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1173 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1174 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1175 if (ds & CRAM_IN) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1176 switch (CRAM_MAJOR_VERS(fd->version)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1177 case 1:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1178 r |= c->comp_hdr->codecs[DS_IN]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1179 ? c->comp_hdr->codecs[DS_IN]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1180 ->decode(s, c->comp_hdr->codecs[DS_IN],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1181 blk, &seq[pos-1], &out_sz2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1182 : (seq[pos-1] = 'N', out_sz2 = 1, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1183 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1184
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1185 case 2:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1186 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1187 r |= c->comp_hdr->codecs[DS_SC]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1188 ? c->comp_hdr->codecs[DS_SC]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1189 ->decode(s, c->comp_hdr->codecs[DS_SC],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1190 blk, &seq[pos-1], &out_sz2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1191 : (seq[pos-1] = 'N', out_sz2 = 1, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1192 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1193
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1194 // default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1195 // r |= c->comp_hdr->codecs[DS_BB]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1196 // ? c->comp_hdr->codecs[DS_BB]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1197 // ->decode(s, c->comp_hdr->codecs[DS_BB],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1198 // blk, &seq[pos-1], &out_sz2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1199 // : (seq[pos-1] = 'N', out_sz2 = 1, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1200 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1201 cigar[ncigar++] = (out_sz2<<4) + BAM_CSOFT_CLIP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1202 cig_op = BAM_CSOFT_CLIP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1203 seq_pos += out_sz2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1204 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1205 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1206 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1207
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1208 case 'X': { // Substitution; BS
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1209 unsigned char base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1210 #ifdef USE_X
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1211 if (cig_len && cig_op != BAM_CBASE_MISMATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1212 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1213 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1214 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1215 if (ds & CRAM_BS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1216 if (!c->comp_hdr->codecs[DS_BS]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1217 r |= c->comp_hdr->codecs[DS_BS]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1218 ->decode(s, c->comp_hdr->codecs[DS_BS], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1219 (char *)&base, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1220 seq[pos-1] = 'N'; // FIXME look up BS=base value
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1221 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1222 cig_op = BAM_CBASE_MISMATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1223 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1224 int ref_base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1225 if (cig_len && cig_op != BAM_CMATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1226 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1227 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1228 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1229 if (ds & CRAM_BS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1230 if (!c->comp_hdr->codecs[DS_BS]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1231 r |= c->comp_hdr->codecs[DS_BS]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1232 ->decode(s, c->comp_hdr->codecs[DS_BS], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1233 (char *)&base, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1234 if (ref_pos >= bfd->ref[cr->ref_id].len || !s->ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1235 seq[pos-1] = 'N';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1236 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1237 ref_base = fd->L1[(uc)s->ref[ref_pos - s->ref_start +1]];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1238 seq[pos-1] = c->comp_hdr->
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1239 substitution_matrix[ref_base][base];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1240 if (decode_md) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1241 BLOCK_APPEND_UINT(s->aux_blk, md_dist);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1242 BLOCK_APPEND_CHAR(s->aux_blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1243 s->ref[ref_pos-s->ref_start +1]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1244 md_dist = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1245 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1246 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1247 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1248 cig_op = BAM_CMATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1249 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1250 nm++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1251 cig_len++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1252 seq_pos++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1253 ref_pos++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1254 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1255 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1256
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1257 case 'D': { // Deletion; DL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1258 if (cig_len && cig_op != BAM_CDEL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1259 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1260 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1261 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1262 if (ds & CRAM_DL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1263 if (!c->comp_hdr->codecs[DS_DL]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1264 r |= c->comp_hdr->codecs[DS_DL]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1265 ->decode(s, c->comp_hdr->codecs[DS_DL], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1266 (char *)&i32, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1267 if (decode_md) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1268 BLOCK_APPEND_UINT(s->aux_blk, md_dist);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1269 BLOCK_APPEND_CHAR(s->aux_blk, '^');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1270 BLOCK_APPEND(s->aux_blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1271 &s->ref[ref_pos - s->ref_start +1],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1272 i32);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1273 md_dist = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1274 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1275 cig_op = BAM_CDEL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1276 cig_len += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1277 ref_pos += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1278 nm += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1279 //printf(" %d: DL = %d (ret %d)\n", f, i32, r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1280 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1281 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1282 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1283
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1284 case 'I': { // Insertion (several bases); IN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1285 int32_t out_sz2 = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1286
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1287 if (cig_len && cig_op != BAM_CINS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1288 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1289 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1290 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1291
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1292 if (ds & CRAM_IN) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1293 if (!c->comp_hdr->codecs[DS_IN]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1294 r |= c->comp_hdr->codecs[DS_IN]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1295 ->decode(s, c->comp_hdr->codecs[DS_IN], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1296 &seq[pos-1], &out_sz2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1297 cig_op = BAM_CINS;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1298 cig_len += out_sz2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1299 seq_pos += out_sz2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1300 nm += out_sz2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1301 //printf(" %d: IN(I) = %.*s (ret %d, out_sz %d)\n", f, out_sz2, dat, r, out_sz2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1302 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1303 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1304 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1305
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1306 case 'i': { // Insertion (single base); BA
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1307 if (cig_len && cig_op != BAM_CINS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1308 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1309 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1310 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1311 if (ds & CRAM_BA) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1312 if (!c->comp_hdr->codecs[DS_BA]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1313 r |= c->comp_hdr->codecs[DS_BA]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1314 ->decode(s, c->comp_hdr->codecs[DS_BA], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1315 (char *)&seq[pos-1], &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1316 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1317 cig_op = BAM_CINS;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1318 cig_len++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1319 seq_pos++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1320 nm++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1321 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1322 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1323
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1324 case 'b': { // Several bases
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1325 int32_t len = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1326
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1327 if (cig_len && cig_op != BAM_CMATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1328 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1329 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1330 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1331
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1332 if (ds & CRAM_BB) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1333 if (!c->comp_hdr->codecs[DS_BB]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1334 r |= c->comp_hdr->codecs[DS_BB]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1335 ->decode(s, c->comp_hdr->codecs[DS_BB], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1336 (char *)&seq[pos-1], &len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1337 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1338
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1339 cig_op = BAM_CMATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1340
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1341 cig_len+=len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1342 seq_pos+=len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1343 ref_pos+=len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1344 //prev_pos+=len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1345 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1346 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1347
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1348 case 'q': { // Several quality values
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1349 int32_t len = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1350
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1351 if (cig_len && cig_op != BAM_CMATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1352 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1353 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1354 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1355
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1356 if (ds & CRAM_QQ) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1357 if (!c->comp_hdr->codecs[DS_QQ]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1358 r |= c->comp_hdr->codecs[DS_QQ]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1359 ->decode(s, c->comp_hdr->codecs[DS_QQ], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1360 (char *)&qual[pos-1], &len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1361 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1362
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1363 cig_op = BAM_CMATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1364
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1365 cig_len+=len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1366 seq_pos+=len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1367 ref_pos+=len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1368 //prev_pos+=len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1369 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1370 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1371
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1372 case 'B': { // Read base; BA, QS
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1373 #ifdef USE_X
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1374 if (cig_len && cig_op != BAM_CBASE_MISMATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1375 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1376 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1377 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1378 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1379 if (cig_len && cig_op != BAM_CMATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1380 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1381 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1382 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1383 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1384 if (ds & CRAM_BA) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1385 if (!c->comp_hdr->codecs[DS_BA]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1386 r |= c->comp_hdr->codecs[DS_BA]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1387 ->decode(s, c->comp_hdr->codecs[DS_BA], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1388 (char *)&seq[pos-1], &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1389 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1390 if (ds & CRAM_QS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1391 if (!c->comp_hdr->codecs[DS_QS]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1392 r |= c->comp_hdr->codecs[DS_QS]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1393 ->decode(s, c->comp_hdr->codecs[DS_QS], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1394 (char *)&qual[pos-1], &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1395 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1396 #ifdef USE_X
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1397 cig_op = BAM_CBASE_MISMATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1398 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1399 cig_op = BAM_CMATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1400 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1401 cig_len++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1402 seq_pos++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1403 ref_pos++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1404 //printf(" %d: BA/QS(B) = %c/%d (ret %d)\n", f, i32, qc, r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1405 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1406 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1407
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1408 case 'Q': { // Quality score; QS
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1409 if (ds & CRAM_QS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1410 if (!c->comp_hdr->codecs[DS_QS]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1411 r |= c->comp_hdr->codecs[DS_QS]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1412 ->decode(s, c->comp_hdr->codecs[DS_QS], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1413 (char *)&qual[pos-1], &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1414 //printf(" %d: QS = %d (ret %d)\n", f, qc, r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1415 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1416 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1417 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1418
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1419 case 'H': { // hard clip; HC
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1420 if (cig_len && cig_op != BAM_CHARD_CLIP) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1421 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1422 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1423 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1424 if (ds & CRAM_HC) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1425 if (!c->comp_hdr->codecs[DS_HC]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1426 r |= c->comp_hdr->codecs[DS_HC]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1427 ->decode(s, c->comp_hdr->codecs[DS_HC], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1428 (char *)&i32, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1429 cig_op = BAM_CHARD_CLIP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1430 cig_len += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1431 nm += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1432 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1433 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1434 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1435
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1436 case 'P': { // padding; PD
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1437 if (cig_len && cig_op != BAM_CPAD) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1438 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1439 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1440 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1441 if (ds & CRAM_PD) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1442 if (!c->comp_hdr->codecs[DS_PD]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1443 r |= c->comp_hdr->codecs[DS_PD]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1444 ->decode(s, c->comp_hdr->codecs[DS_PD], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1445 (char *)&i32, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1446 cig_op = BAM_CPAD;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1447 cig_len += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1448 nm += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1449 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1450 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1451 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1452
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1453 case 'N': { // Ref skip; RS
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1454 if (cig_len && cig_op != BAM_CREF_SKIP) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1455 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1456 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1457 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1458 if (ds & CRAM_RS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1459 if (!c->comp_hdr->codecs[DS_RS]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1460 r |= c->comp_hdr->codecs[DS_RS]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1461 ->decode(s, c->comp_hdr->codecs[DS_RS], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1462 (char *)&i32, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1463 cig_op = BAM_CREF_SKIP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1464 cig_len += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1465 ref_pos += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1466 nm += i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1467 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1468 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1469 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1470
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1471 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1472 abort();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1473 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1474 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1475
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1476 if (!(ds & CRAM_FC))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1477 goto skip_cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1478
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1479 /* An implement match op for any unaccounted for bases */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1480 if ((ds & CRAM_FN) && cr->len >= seq_pos) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1481 if (s->ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1482 if (ref_pos + cr->len - seq_pos + 1 > bfd->ref[cr->ref_id].len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1483 static int whinged = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1484 if (!whinged)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1485 fprintf(stderr, "Ref pos outside of ref sequence boundary\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1486 whinged = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1487 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1488 memcpy(&seq[seq_pos-1], &s->ref[ref_pos - s->ref_start +1],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1489 cr->len - seq_pos + 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1490 ref_pos += cr->len - seq_pos + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1491 md_dist += cr->len - seq_pos + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1492 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1493 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1494
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1495 if (ncigar+1 >= cigar_alloc) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1496 cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1497 s->cigar = cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1498 if (!(cigar = realloc(cigar, cigar_alloc * sizeof(*cigar))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1499 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1500 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1501 #ifdef USE_X
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1502 if (cig_len && cig_op != BAM_CBASE_MATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1503 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1504 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1505 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1506 cig_op = BAM_CBASE_MATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1507 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1508 if (cig_len && cig_op != BAM_CMATCH) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1509 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1510 cig_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1511 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1512 cig_op = BAM_CMATCH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1513 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1514 cig_len += cr->len - seq_pos+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1515 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1516
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1517 skip_cigar:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1518
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1519 if ((ds & CRAM_FN) && decode_md) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1520 BLOCK_APPEND_UINT(s->aux_blk, md_dist);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1521 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1522
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1523 if (cig_len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1524 if (ncigar >= cigar_alloc) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1525 cigar_alloc = cigar_alloc ? cigar_alloc*2 : 1024;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1526 s->cigar = cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1527 if (!(cigar = realloc(cigar, cigar_alloc * sizeof(*cigar))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1528 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1529 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1530
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1531 cigar[ncigar++] = (cig_len<<4) + cig_op;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1532 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1533
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1534 cr->ncigar = ncigar - cr->cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1535 cr->aend = ref_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1536
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1537 //printf("2: %.*s %d .. %d\n", cr->name_len, DSTRING_STR(name_ds) + cr->name, cr->apos, ref_pos);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1538
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1539 if (ds & CRAM_MQ) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1540 if (!c->comp_hdr->codecs[DS_MQ]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1541 r |= c->comp_hdr->codecs[DS_MQ]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1542 ->decode(s, c->comp_hdr->codecs[DS_MQ], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1543 (char *)&cr->mqual, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1544 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1545 cr->mqual = 40;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1546 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1547
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1548 if ((ds & CRAM_QS) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1549 int32_t out_sz2 = cr->len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1550
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1551 if (ds & CRAM_QS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1552 if (!c->comp_hdr->codecs[DS_QS]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1553 r |= c->comp_hdr->codecs[DS_QS]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1554 ->decode(s, c->comp_hdr->codecs[DS_QS], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1555 qual, &out_sz2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1556 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1557 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1558
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1559 s->cigar = cigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1560 s->cigar_alloc = cigar_alloc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1561 s->ncigar = ncigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1562
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1563 if (decode_md) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1564 char buf[7];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1565 BLOCK_APPEND_CHAR(s->aux_blk, '\0'); // null terminate MD:Z:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1566 cr->aux_size += BLOCK_SIZE(s->aux_blk) - orig_aux;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1567 buf[0] = 'N'; buf[1] = 'M'; buf[2] = 'I';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1568 buf[3] = (nm>> 0) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1569 buf[4] = (nm>> 8) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1570 buf[5] = (nm>>16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1571 buf[6] = (nm>>24) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1572 BLOCK_APPEND(s->aux_blk, buf, 7);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1573 cr->aux_size += 7;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1574 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1575
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1576 return r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1577 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1578
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1579 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1580 * Quick and simple hash lookup for cram_map arrays
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1581 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1582 static cram_map *map_find(cram_map **map, unsigned char *key, int id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1583 cram_map *m;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1584
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1585 m = map[CRAM_MAP(key[0],key[1])];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1586 while (m && m->key != id)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1587 m= m->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1588
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1589 return m;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1590 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1591
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1592 //#define map_find(M,K,I) M[CRAM_MAP(K[0],K[1])];while (m && m->key != I);m= m->next
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1593
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1594
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1595 static int cram_decode_aux_1_0(cram_container *c, cram_slice *s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1596 cram_block *blk, cram_record *cr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1597 int i, r = 0, out_sz = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1598 unsigned char ntags;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1599
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1600 if (!c->comp_hdr->codecs[DS_TC]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1601 r |= c->comp_hdr->codecs[DS_TC]->decode(s, c->comp_hdr->codecs[DS_TC], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1602 (char *)&ntags, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1603 cr->ntags = ntags;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1604
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1605 //printf("TC=%d\n", cr->ntags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1606 cr->aux_size = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1607 cr->aux = BLOCK_SIZE(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1608
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1609 for (i = 0; i < cr->ntags; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1610 int32_t id, out_sz = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1611 unsigned char tag_data[3];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1612 cram_map *m;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1613
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1614 //printf("Tag %d/%d\n", i+1, cr->ntags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1615 if (!c->comp_hdr->codecs[DS_TN]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1616 r |= c->comp_hdr->codecs[DS_TN]->decode(s, c->comp_hdr->codecs[DS_TN],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1617 blk, (char *)&id, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1618 if (out_sz == 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1619 tag_data[0] = ((char *)&id)[0];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1620 tag_data[1] = ((char *)&id)[1];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1621 tag_data[2] = ((char *)&id)[2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1622 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1623 tag_data[0] = (id>>16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1624 tag_data[1] = (id>>8) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1625 tag_data[2] = id & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1626 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1627
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1628 m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1629 if (!m)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1630 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1631 BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1632
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1633 if (!m->codec) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1634 r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1635
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1636 cr->aux_size += out_sz + 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1637 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1638
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1639 return r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1640 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1641
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1642 static int cram_decode_aux(cram_container *c, cram_slice *s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1643 cram_block *blk, cram_record *cr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1644 int i, r = 0, out_sz = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1645 int32_t TL = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1646 unsigned char *TN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1647 uint32_t ds = c->comp_hdr->data_series;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1648
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1649 if (!(ds & (CRAM_TL|CRAM_aux))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1650 cr->aux = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1651 cr->aux_size = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1652 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1653 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1654
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1655 if (!c->comp_hdr->codecs[DS_TL]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1656 r |= c->comp_hdr->codecs[DS_TL]->decode(s, c->comp_hdr->codecs[DS_TL], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1657 (char *)&TL, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1658 if (r || TL < 0 || TL >= c->comp_hdr->nTL)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1659 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1660
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1661 TN = c->comp_hdr->TL[TL];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1662 cr->ntags = strlen((char *)TN)/3; // optimise to remove strlen
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1663
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1664 //printf("TC=%d\n", cr->ntags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1665 cr->aux_size = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1666 cr->aux = BLOCK_SIZE(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1667
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1668 if (!(ds & CRAM_aux))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1669 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1670
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1671 for (i = 0; i < cr->ntags; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1672 int32_t id, out_sz = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1673 unsigned char tag_data[3];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1674 cram_map *m;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1675
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1676 //printf("Tag %d/%d\n", i+1, cr->ntags);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1677 tag_data[0] = *TN++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1678 tag_data[1] = *TN++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1679 tag_data[2] = *TN++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1680 id = (tag_data[0]<<16) | (tag_data[1]<<8) | tag_data[2];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1681
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1682 m = map_find(c->comp_hdr->tag_encoding_map, tag_data, id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1683 if (!m)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1684 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1685 BLOCK_APPEND(s->aux_blk, (char *)tag_data, 3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1686
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1687 if (!m->codec) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1688 r |= m->codec->decode(s, m->codec, blk, (char *)s->aux_blk, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1689 cr->aux_size += out_sz + 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1690 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1691
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1692 return r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1693 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1694
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1695 /* Resolve mate pair cross-references between recs within this slice */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1696 static void cram_decode_slice_xref(cram_slice *s, int required_fields) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1697 int rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1698
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1699 if (!(required_fields & (SAM_RNEXT | SAM_PNEXT | SAM_TLEN))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1700 for (rec = 0; rec < s->hdr->num_records; rec++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1701 cram_record *cr = &s->crecs[rec];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1702
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1703 cr->tlen = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1704 cr->mate_pos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1705 cr->mate_ref_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1706 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1707
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1708 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1709 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1710
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1711 for (rec = 0; rec < s->hdr->num_records; rec++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1712 cram_record *cr = &s->crecs[rec];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1713
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1714 if (cr->mate_line >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1715 if (cr->mate_line < s->hdr->num_records) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1716 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1717 * On the first read, loop through computing lengths.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1718 * It's not perfect as we have one slice per reference so we
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1719 * cannot detect when TLEN should be zero due to seqs that
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1720 * map to multiple references.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1721 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1722 * We also cannot set tlen correct when it spans a slice for
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1723 * other reasons. This may make tlen too small. Should we
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1724 * fix this by forcing TLEN to be stored verbatim in such cases?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1725 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1726 * Or do we just admit defeat and output 0 for tlen? It's the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1727 * safe option...
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1728 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1729 if (cr->tlen == INT_MIN) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1730 int id1 = rec, id2 = rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1731 int aleft = cr->apos, aright = cr->aend;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1732 int tlen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1733 int ref = cr->ref_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1734
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1735 // number of segments starting at the same point.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1736 int left_cnt = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1737
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1738 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1739 if (aleft > s->crecs[id2].apos)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1740 aleft = s->crecs[id2].apos, left_cnt = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1741 else if (aleft == s->crecs[id2].apos)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1742 left_cnt++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1743 if (aright < s->crecs[id2].aend)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1744 aright = s->crecs[id2].aend;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1745 if (s->crecs[id2].mate_line == -1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1746 s->crecs[id2].mate_line = rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1747 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1748 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1749 assert(s->crecs[id2].mate_line > id2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1750 id2 = s->crecs[id2].mate_line;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1751
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1752 if (s->crecs[id2].ref_id != ref)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1753 ref = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1754 } while (id2 != id1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1755
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1756 if (ref != -1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1757 tlen = aright - aleft + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1758 id1 = id2 = rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1759
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1760 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1761 * When we have two seqs with identical start and
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1762 * end coordinates, set +/- tlen based on 1st/last
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1763 * bit flags instead, as a tie breaker.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1764 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1765 if (s->crecs[id2].apos == aleft) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1766 if (left_cnt == 1 ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1767 (s->crecs[id2].flags & BAM_FREAD1))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1768 s->crecs[id2].tlen = tlen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1769 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1770 s->crecs[id2].tlen = -tlen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1771 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1772 s->crecs[id2].tlen = -tlen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1773 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1774
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1775 id2 = s->crecs[id2].mate_line;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1776 while (id2 != id1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1777 if (s->crecs[id2].apos == aleft) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1778 if (left_cnt == 1 ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1779 (s->crecs[id2].flags & BAM_FREAD1))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1780 s->crecs[id2].tlen = tlen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1781 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1782 s->crecs[id2].tlen = -tlen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1783 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1784 s->crecs[id2].tlen = -tlen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1785 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1786 id2 = s->crecs[id2].mate_line;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1787 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1788 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1789 id1 = id2 = rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1790
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1791 s->crecs[id2].tlen = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1792 id2 = s->crecs[id2].mate_line;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1793 while (id2 != id1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1794 s->crecs[id2].tlen = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1795 id2 = s->crecs[id2].mate_line;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1796 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1797 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1798 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1799
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1800 cr->mate_pos = s->crecs[cr->mate_line].apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1801 cr->mate_ref_id = s->crecs[cr->mate_line].ref_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1802
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1803 // paired
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1804 cr->flags |= BAM_FPAIRED;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1805
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1806 // set mate unmapped if needed
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1807 if (s->crecs[cr->mate_line].flags & BAM_FUNMAP) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1808 cr->flags |= BAM_FMUNMAP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1809 cr->tlen = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1810 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1811 if (cr->flags & BAM_FUNMAP) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1812 cr->tlen = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1813 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1814
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1815 // set mate reversed if needed
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1816 if (s->crecs[cr->mate_line].flags & BAM_FREVERSE)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1817 cr->flags |= BAM_FMREVERSE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1818 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1819 fprintf(stderr, "Mate line out of bounds: %d vs [0, %d]\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1820 cr->mate_line, s->hdr->num_records-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1821 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1822
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1823 /* FIXME: construct read names here too if needed */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1824 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1825 if (cr->mate_flags & CRAM_M_REVERSE) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1826 cr->flags |= BAM_FPAIRED | BAM_FMREVERSE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1827 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1828 if (cr->mate_flags & CRAM_M_UNMAP) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1829 cr->flags |= BAM_FMUNMAP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1830 //cr->mate_ref_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1831 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1832 if (!(cr->flags & BAM_FPAIRED))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1833 cr->mate_ref_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1834 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1835
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1836 if (cr->tlen == INT_MIN)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1837 cr->tlen = 0; // Just incase
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1838 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1839 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1840
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1841 static char *md5_print(unsigned char *md5, char *out) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1842 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1843 for (i = 0; i < 16; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1844 out[i*2+0] = "0123456789abcdef"[md5[i]>>4];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1845 out[i*2+1] = "0123456789abcdef"[md5[i]&15];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1846 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1847 out[32] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1848
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1849 return out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1850 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1851
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1852 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1853 * Decode an entire slice from container blocks. Fills out s->crecs[] array.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1854 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1855 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1856 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1857 int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1858 SAM_hdr *bfd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1859 cram_block *blk = s->block[0];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1860 int32_t bf, ref_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1861 unsigned char cf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1862 int out_sz, r = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1863 int rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1864 char *seq = NULL, *qual = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1865 int unknown_rg = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1866 int embed_ref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1867 char **refs = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1868 uint32_t ds;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1869
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1870 if (cram_dependent_data_series(fd, c->comp_hdr, s) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1871 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1872
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1873 ds = c->comp_hdr->data_series;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1874
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1875 blk->bit = 7; // MSB first
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1876
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1877 /* Look for unknown RG, added as last by Java CRAM? */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1878 if (bfd->nrg > 0 &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1879 !strcmp(bfd->rg[bfd->nrg-1].name, "UNKNOWN"))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1880 unknown_rg = bfd->nrg-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1881
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1882 if (blk->content_type != CORE)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1883 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1884
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1885 if (s->crecs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1886 free(s->crecs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1887 if (!(s->crecs = malloc(s->hdr->num_records * sizeof(*s->crecs))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1888 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1889
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1890 ref_id = s->hdr->ref_seq_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1891 embed_ref = s->hdr->ref_base_id >= 0 ? 1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1892
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1893 if (ref_id >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1894 if (embed_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1895 cram_block *b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1896 if (s->hdr->ref_base_id < 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1897 fprintf(stderr, "No reference specified and "
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1898 "no embedded reference is available.\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1899 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1900 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1901 if (!s->block_by_id ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1902 !(b = s->block_by_id[s->hdr->ref_base_id]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1903 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1904 cram_uncompress_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1905 s->ref = (char *)BLOCK_DATA(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1906 s->ref_start = s->hdr->ref_seq_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1907 s->ref_end = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1908 } else if (!fd->no_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1909 //// Avoid Java cramtools bug by loading entire reference seq
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1910 //s->ref = cram_get_ref(fd, s->hdr->ref_seq_id, 1, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1911 //s->ref_start = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1912
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1913 if (fd->required_fields & SAM_SEQ)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1914 s->ref =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1915 cram_get_ref(fd, s->hdr->ref_seq_id,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1916 s->hdr->ref_seq_start,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1917 s->hdr->ref_seq_start + s->hdr->ref_seq_span -1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1918 s->ref_start = s->hdr->ref_seq_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1919 s->ref_end = s->hdr->ref_seq_start + s->hdr->ref_seq_span-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1920
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1921 /* Sanity check */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1922 if (s->ref_start < 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1923 fprintf(stderr, "Slice starts before base 1.\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1924 s->ref_start = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1925 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1926 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1927 pthread_mutex_lock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1928 if ((fd->required_fields & SAM_SEQ) &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1929 s->ref_end > fd->refs->ref_id[ref_id]->length) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1930 fprintf(stderr, "Slice ends beyond reference end.\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1931 s->ref_end = fd->refs->ref_id[ref_id]->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1932 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1933 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1934 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1935 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1936 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1937
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1938 if ((fd->required_fields & SAM_SEQ) &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1939 s->ref == NULL && s->hdr->ref_seq_id >= 0 && !fd->no_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1940 fprintf(stderr, "Unable to fetch reference #%d %d..%d\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1941 s->hdr->ref_seq_id, s->hdr->ref_seq_start,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1942 s->hdr->ref_seq_start + s->hdr->ref_seq_span-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1943 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1944 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1945
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1946 if (CRAM_MAJOR_VERS(fd->version) != 1
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1947 && (fd->required_fields & SAM_SEQ)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1948 && s->hdr->ref_seq_id >= 0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1949 && !fd->ignore_md5
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1950 && memcmp(s->hdr->md5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1951 MD5_CTX md5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1952 unsigned char digest[16];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1953
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1954 if (s->ref && s->hdr->ref_seq_id >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1955 int start, len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1956
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1957 if (s->hdr->ref_seq_start >= s->ref_start) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1958 start = s->hdr->ref_seq_start - s->ref_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1959 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1960 fprintf(stderr, "Slice starts before base 1.\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1961 start = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1962 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1963
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1964 if (s->hdr->ref_seq_span <= s->ref_end - s->ref_start + 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1965 len = s->hdr->ref_seq_span;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1966 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1967 fprintf(stderr, "Slice ends beyond reference end.\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1968 len = s->ref_end - s->ref_start + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1969 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1970
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1971 MD5_Init(&md5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1972 if (start + len > s->ref_end - s->ref_start + 1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1973 len = s->ref_end - s->ref_start + 1 - start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1974 if (len >= 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1975 MD5_Update(&md5, s->ref + start, len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1976 MD5_Final(digest, &md5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1977 } else if (!s->ref && s->hdr->ref_base_id >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1978 cram_block *b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1979 if (s->block_by_id && (b = s->block_by_id[s->hdr->ref_base_id])) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1980 MD5_Init(&md5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1981 MD5_Update(&md5, b->data, b->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1982 MD5_Final(digest, &md5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1983 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1984 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1985
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1986 if ((!s->ref && s->hdr->ref_base_id < 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1987 || memcmp(digest, s->hdr->md5, 16) != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1988 char M[33];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1989 fprintf(stderr, "ERROR: md5sum reference mismatch for ref "
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1990 "%d pos %d..%d\n", ref_id, s->ref_start, s->ref_end);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1991 fprintf(stderr, "CRAM: %s\n", md5_print(s->hdr->md5, M));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1992 fprintf(stderr, "Ref : %s\n", md5_print(digest, M));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1993 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1994 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1995 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1996
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1997 if (ref_id == -2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1998 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1999 pthread_mutex_lock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2000 refs = calloc(fd->refs->nref, sizeof(char *));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2001 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2002 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2003 if (!refs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2004 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2005 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2006
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2007 for (rec = 0; rec < s->hdr->num_records; rec++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2008 cram_record *cr = &s->crecs[rec];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2009
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2010 //fprintf(stderr, "Decode seq %d, %d/%d\n", rec, blk->byte, blk->bit);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2011
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2012 cr->s = s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2013
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2014 out_sz = 1; /* decode 1 item */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2015 if (ds & CRAM_BF) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2016 if (!c->comp_hdr->codecs[DS_BF]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2017 r |= c->comp_hdr->codecs[DS_BF]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2018 ->decode(s, c->comp_hdr->codecs[DS_BF], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2019 (char *)&bf, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2020 if (bf < 0 ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2021 bf >= sizeof(fd->bam_flag_swap)/sizeof(*fd->bam_flag_swap))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2022 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2023 bf = fd->bam_flag_swap[bf];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2024 cr->flags = bf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2025 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2026 cr->flags = bf = 0x4; // unmapped
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2027 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2028
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2029 if (ds & CRAM_CF) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2030 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2031 /* CF is byte in 1.0, int32 in 2.0 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2032 if (!c->comp_hdr->codecs[DS_CF]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2033 r |= c->comp_hdr->codecs[DS_CF]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2034 ->decode(s, c->comp_hdr->codecs[DS_CF], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2035 (char *)&cf, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2036 cr->cram_flags = cf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2037 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2038 if (!c->comp_hdr->codecs[DS_CF]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2039 r |= c->comp_hdr->codecs[DS_CF]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2040 ->decode(s, c->comp_hdr->codecs[DS_CF], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2041 (char *)&cr->cram_flags,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2042 &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2043 cf = cr->cram_flags;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2044 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2045 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2046
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2047 if (CRAM_MAJOR_VERS(fd->version) != 1 && ref_id == -2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2048 if (ds & CRAM_RI) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2049 if (!c->comp_hdr->codecs[DS_RI]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2050 r |= c->comp_hdr->codecs[DS_RI]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2051 ->decode(s, c->comp_hdr->codecs[DS_RI], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2052 (char *)&cr->ref_id, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2053 if ((fd->required_fields & (SAM_SEQ|SAM_TLEN))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2054 && cr->ref_id >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2055 if (!fd->no_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2056 if (!refs[cr->ref_id])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2057 refs[cr->ref_id] = cram_get_ref(fd, cr->ref_id,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2058 1, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2059 s->ref = refs[cr->ref_id];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2060 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2061 s->ref_start = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2062 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2063 pthread_mutex_lock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2064 s->ref_end = fd->refs->ref_id[cr->ref_id]->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2065 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2066 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2067 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2068 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2069 cr->ref_id = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2070 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2071 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2072 cr->ref_id = ref_id; // Forced constant in CRAM 1.0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2073 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2074
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2075
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2076 if (ds & CRAM_RL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2077 if (!c->comp_hdr->codecs[DS_RL]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2078 r |= c->comp_hdr->codecs[DS_RL]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2079 ->decode(s, c->comp_hdr->codecs[DS_RL], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2080 (char *)&cr->len, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2081 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2082
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2083 if (ds & CRAM_AP) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2084 if (!c->comp_hdr->codecs[DS_AP]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2085 r |= c->comp_hdr->codecs[DS_AP]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2086 ->decode(s, c->comp_hdr->codecs[DS_AP], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2087 (char *)&cr->apos, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2088 if (c->comp_hdr->AP_delta)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2089 cr->apos += s->last_apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2090 s->last_apos= cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2091 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2092 cr->apos = c->ref_seq_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2093 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2094
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2095 if (ds & CRAM_RG) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2096 if (!c->comp_hdr->codecs[DS_RG]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2097 r |= c->comp_hdr->codecs[DS_RG]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2098 ->decode(s, c->comp_hdr->codecs[DS_RG], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2099 (char *)&cr->rg, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2100 if (cr->rg == unknown_rg)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2101 cr->rg = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2102 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2103 cr->rg = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2104 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2105
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2106 cr->name_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2107
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2108 if (c->comp_hdr->read_names_included) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2109 int32_t out_sz2 = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2110
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2111 // Read directly into name cram_block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2112 cr->name = BLOCK_SIZE(s->name_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2113 if (ds & CRAM_RN) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2114 if (!c->comp_hdr->codecs[DS_RN]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2115 r |= c->comp_hdr->codecs[DS_RN]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2116 ->decode(s, c->comp_hdr->codecs[DS_RN], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2117 (char *)s->name_blk, &out_sz2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2118 cr->name_len = out_sz2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2119 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2120 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2121
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2122 cr->mate_pos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2123 cr->mate_line = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2124 cr->mate_ref_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2125 if ((ds & CRAM_CF) && (cf & CRAM_FLAG_DETACHED)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2126 if (ds & CRAM_MF) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2127 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2128 /* MF is byte in 1.0, int32 in 2.0 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2129 unsigned char mf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2130 if (!c->comp_hdr->codecs[DS_MF]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2131 r |= c->comp_hdr->codecs[DS_MF]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2132 ->decode(s, c->comp_hdr->codecs[DS_MF],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2133 blk, (char *)&mf, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2134 cr->mate_flags = mf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2135 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2136 if (!c->comp_hdr->codecs[DS_MF]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2137 r |= c->comp_hdr->codecs[DS_MF]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2138 ->decode(s, c->comp_hdr->codecs[DS_MF],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2139 blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2140 (char *)&cr->mate_flags,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2141 &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2142 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2143 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2144 cr->mate_flags = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2145 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2146
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2147 if (!c->comp_hdr->read_names_included) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2148 int32_t out_sz2 = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2149
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2150 // Read directly into name cram_block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2151 cr->name = BLOCK_SIZE(s->name_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2152 if (ds & CRAM_RN) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2153 if (!c->comp_hdr->codecs[DS_RN]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2154 r |= c->comp_hdr->codecs[DS_RN]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2155 ->decode(s, c->comp_hdr->codecs[DS_RN],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2156 blk, (char *)s->name_blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2157 &out_sz2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2158 cr->name_len = out_sz2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2159 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2160 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2161
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2162 if (ds & CRAM_NS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2163 if (!c->comp_hdr->codecs[DS_NS]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2164 r |= c->comp_hdr->codecs[DS_NS]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2165 ->decode(s, c->comp_hdr->codecs[DS_NS], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2166 (char *)&cr->mate_ref_id, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2167 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2168
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2169 // Skip as mate_ref of "*" is legit. It doesn't mean unmapped, just unknown.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2170 // if (cr->mate_ref_id == -1 && cr->flags & 0x01) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2171 // /* Paired, but unmapped */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2172 // cr->flags |= BAM_FMUNMAP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2173 // }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2174
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2175 if (ds & CRAM_NP) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2176 if (!c->comp_hdr->codecs[DS_NP]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2177 r |= c->comp_hdr->codecs[DS_NP]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2178 ->decode(s, c->comp_hdr->codecs[DS_NP], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2179 (char *)&cr->mate_pos, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2180 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2181
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2182 if (ds & CRAM_TS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2183 if (!c->comp_hdr->codecs[DS_TS]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2184 r |= c->comp_hdr->codecs[DS_TS]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2185 ->decode(s, c->comp_hdr->codecs[DS_TS], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2186 (char *)&cr->tlen, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2187 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2188 cr->tlen = INT_MIN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2189 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2190 } else if ((ds & CRAM_CF) && (cf & CRAM_FLAG_MATE_DOWNSTREAM)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2191 if (ds & CRAM_NF) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2192 if (!c->comp_hdr->codecs[DS_NF]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2193 r |= c->comp_hdr->codecs[DS_NF]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2194 ->decode(s, c->comp_hdr->codecs[DS_NF], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2195 (char *)&cr->mate_line, &out_sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2196 cr->mate_line += rec + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2197
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2198 //cr->name_len = sprintf(name, "%d", name_id++);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2199 //cr->name = DSTRING_LEN(name_ds);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2200 //dstring_nappend(name_ds, name, cr->name_len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2201
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2202 cr->mate_ref_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2203 cr->tlen = INT_MIN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2204 cr->mate_pos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2205 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2206 cr->mate_flags = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2207 cr->tlen = INT_MIN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2208 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2209 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2210 cr->mate_flags = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2211 cr->tlen = INT_MIN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2212 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2213 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2214 else if (!name[0]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2215 //name[0] = '?'; name[1] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2216 //cr->name_len = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2217 //cr->name= DSTRING_LEN(s->name_ds);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2218 //dstring_nappend(s->name_ds, "?", 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2219
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2220 cr->mate_ref_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2221 cr->tlen = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2222 cr->mate_pos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2223 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2224 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2225
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2226 /* Auxiliary tags */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2227 if (CRAM_MAJOR_VERS(fd->version) == 1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2228 r |= cram_decode_aux_1_0(c, s, blk, cr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2229 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2230 r |= cram_decode_aux(c, s, blk, cr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2231
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2232 /* Fake up dynamic string growth and appending */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2233 if (ds & CRAM_RL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2234 cr->seq = BLOCK_SIZE(s->seqs_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2235 BLOCK_GROW(s->seqs_blk, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2236 seq = (char *)BLOCK_END(s->seqs_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2237 BLOCK_SIZE(s->seqs_blk) += cr->len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2238
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2239 if (!seq)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2240 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2241
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2242 cr->qual = BLOCK_SIZE(s->qual_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2243 BLOCK_GROW(s->qual_blk, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2244 qual = (char *)BLOCK_END(s->qual_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2245 BLOCK_SIZE(s->qual_blk) += cr->len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2246
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2247 if (!s->ref)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2248 memset(seq, '=', cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2249 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2250
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2251 if (!(bf & BAM_FUNMAP)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2252 /* Decode sequence and generate CIGAR */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2253 if (ds & (CRAM_SEQ | CRAM_MQ)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2254 r |= cram_decode_seq(fd, c, s, blk, cr, bfd, cf, seq, qual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2255 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2256 cr->cigar = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2257 cr->ncigar = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2258 cr->aend = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2259 cr->mqual = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2260 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2261 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2262 int out_sz2 = cr->len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2263
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2264 //puts("Unmapped");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2265 cr->cigar = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2266 cr->ncigar = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2267 cr->aend = cr->apos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2268 cr->mqual = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2269
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2270 if (ds & CRAM_BA) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2271 if (!c->comp_hdr->codecs[DS_BA]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2272 r |= c->comp_hdr->codecs[DS_BA]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2273 ->decode(s, c->comp_hdr->codecs[DS_BA], blk,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2274 (char *)seq, &out_sz2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2275 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2276
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2277 if ((ds & CRAM_CF) && (cf & CRAM_FLAG_PRESERVE_QUAL_SCORES)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2278 out_sz2 = cr->len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2279 if (ds & CRAM_QS) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2280 if (!c->comp_hdr->codecs[DS_QS]) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2281 r |= c->comp_hdr->codecs[DS_QS]
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2282 ->decode(s, c->comp_hdr->codecs[DS_QS],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2283 blk, qual, &out_sz2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2284 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2285 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2286 if (ds & CRAM_RL)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2287 memset(qual, 30, cr->len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2288 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2289 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2290 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2291
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2292 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2293 if (refs) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2294 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2295 for (i = 0; i < fd->refs->nref; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2296 if (refs[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2297 cram_ref_decr(fd->refs, i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2298 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2299 free(refs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2300 } else if (ref_id >= 0 && s->ref != fd->ref_free) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2301 cram_ref_decr(fd->refs, ref_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2302 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2303 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2304
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2305 /* Resolve mate pair cross-references between recs within this slice */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2306 cram_decode_slice_xref(s, fd->required_fields);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2307
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2308 return r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2309 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2310
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2311 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2312 cram_fd *fd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2313 cram_container *c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2314 cram_slice *s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2315 SAM_hdr *h;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2316 int exit_code;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2317 } cram_decode_job;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2318
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2319 void *cram_decode_slice_thread(void *arg) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2320 cram_decode_job *j = (cram_decode_job *)arg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2321
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2322 j->exit_code = cram_decode_slice(j->fd, j->c, j->s, j->h);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2323
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2324 return j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2325 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2326
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2327 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2328 * Spawn a multi-threaded version of cram_decode_slice().
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2329 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2330 int cram_decode_slice_mt(cram_fd *fd, cram_container *c, cram_slice *s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2331 SAM_hdr *bfd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2332 cram_decode_job *j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2333 int nonblock;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2334
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2335 if (!fd->pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2336 return cram_decode_slice(fd, c, s, bfd);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2337
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2338 if (!(j = malloc(sizeof(*j))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2339 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2340
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2341 j->fd = fd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2342 j->c = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2343 j->s = s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2344 j->h = bfd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2345
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2346 nonblock = t_pool_results_queue_sz(fd->rqueue) ? 1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2347
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2348 if (-1 == t_pool_dispatch2(fd->pool, fd->rqueue, cram_decode_slice_thread,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2349 j, nonblock)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2350 /* Would block */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2351 fd->job_pending = j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2352 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2353 fd->job_pending = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2354 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2355
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2356 // flush too
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2357 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2358 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2359
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2360
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2361 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2362 * CRAM sequence iterators.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2363 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2364
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2365 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2366 * Converts a cram in-memory record into a bam in-memory record. We
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2367 * pass a pointer to a bam_seq_t pointer along with the a pointer to
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2368 * the allocated size. These can initially be pointers to NULL and zero.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2369 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2370 * This function will reallocate the bam buffer as required and update
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2371 * (*bam)->alloc accordingly, allowing it to be used within a loop
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2372 * efficiently without needing to allocate new bam objects over and
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2373 * over again.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2374 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2375 * Returns the used size of the bam record on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2376 * -1 on failure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2377 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2378 static int cram_to_bam(SAM_hdr *bfd, cram_fd *fd, cram_slice *s,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2379 cram_record *cr, int rec, bam_seq_t **bam) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2380 int bam_idx, rg_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2381 char name_a[1024], *name;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2382 int name_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2383 char *aux, *aux_orig;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2384 char *seq, *qual;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2385
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2386 /* Assign names if not explicitly set */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2387 if (fd->required_fields & SAM_QNAME) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2388 if (cr->name_len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2389 name = (char *)BLOCK_DATA(s->name_blk) + cr->name;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2390 name_len = cr->name_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2391 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2392 name = name_a;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2393 name_len = strlen(fd->prefix);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2394 memcpy(name, fd->prefix, name_len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2395 name += name_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2396 *name++ = ':';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2397 if (cr->mate_line >= 0 && cr->mate_line < rec)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2398 name = (char *)append_uint64((unsigned char *)name,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2399 s->hdr->record_counter +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2400 cr->mate_line + 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2401 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2402 name = (char *)append_uint64((unsigned char *)name,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2403 s->hdr->record_counter +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2404 rec + 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2405 name_len = name - name_a;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2406 name = name_a;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2407 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2408 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2409 name = "?";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2410 name_len = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2411 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2412
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2413 /* Generate BAM record */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2414 if (cr->rg < -1 || cr->rg >= bfd->nrg)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2415 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2416 rg_len = (cr->rg != -1) ? bfd->rg[cr->rg].name_len + 4 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2417
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2418 if (fd->required_fields & (SAM_SEQ | SAM_QUAL)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2419 if (!BLOCK_DATA(s->seqs_blk))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2420 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2421 seq = (char *)BLOCK_DATA(s->seqs_blk) + cr->seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2422 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2423 seq = "*";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2424 cr->len = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2425 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2426
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2427
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2428 if (fd->required_fields & SAM_QUAL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2429 if (!BLOCK_DATA(s->qual_blk))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2430 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2431 qual = (char *)BLOCK_DATA(s->qual_blk) + cr->qual;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2432 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2433 qual = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2434 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2435
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2436 bam_idx = bam_construct_seq(bam, cr->aux_size + rg_len,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2437 name, name_len,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2438 cr->flags,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2439 cr->ref_id,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2440 cr->apos,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2441 cr->aend,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2442 cr->mqual,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2443 cr->ncigar, &s->cigar[cr->cigar],
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2444 cr->mate_ref_id,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2445 cr->mate_pos,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2446 cr->tlen,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2447 cr->len,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2448 seq,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2449 qual);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2450 if (bam_idx == -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2451 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2452
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2453 aux = aux_orig = (char *)bam_aux(*bam);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2454
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2455 /* Auxiliary strings */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2456 if (cr->aux_size != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2457 memcpy(aux, BLOCK_DATA(s->aux_blk) + cr->aux, cr->aux_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2458 aux += cr->aux_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2459 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2460
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2461 /* RG:Z: */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2462 if (cr->rg != -1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2463 int len = bfd->rg[cr->rg].name_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2464 *aux++ = 'R'; *aux++ = 'G'; *aux++ = 'Z';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2465 memcpy(aux, bfd->rg[cr->rg].name, len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2466 aux += len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2467 *aux++ = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2468 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2469
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2470 return bam_idx + (aux - aux_orig);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2471 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2472
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2473 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2474 * Here be dragons! The multi-threading code in this is crufty beyond belief.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2475 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2476 static cram_slice *cram_next_slice(cram_fd *fd, cram_container **cp) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2477 cram_container *c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2478 cram_slice *s = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2479
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2480 if (!(c = fd->ctr)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2481 // Load first container.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2482 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2483 if (!(c = fd->ctr = cram_read_container(fd)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2484 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2485 } while (c->length == 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2486
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2487 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2488 * The first container may be a result of a sub-range query.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2489 * In which case it may still not be the optimal starting point
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2490 * due to skipped containers/slices in the index.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2491 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2492 if (fd->range.refid != -2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2493 while (c->ref_seq_id != -2 &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2494 (c->ref_seq_id < fd->range.refid ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2495 c->ref_seq_start + c->ref_seq_span-1 < fd->range.start)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2496 if (0 != cram_seek(fd, c->length, SEEK_CUR))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2497 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2498 cram_free_container(fd->ctr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2499 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2500 if (!(c = fd->ctr = cram_read_container(fd)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2501 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2502 } while (c->length == 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2503 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2504
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2505 if (c->ref_seq_id != -2 && c->ref_seq_id != fd->range.refid)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2506 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2507 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2508
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2509 if (!(c->comp_hdr_block = cram_read_block(fd)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2510 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2511 if (c->comp_hdr_block->content_type != COMPRESSION_HEADER)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2512 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2513
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2514 c->comp_hdr = cram_decode_compression_header(fd, c->comp_hdr_block);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2515 if (!c->comp_hdr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2516 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2517 if (!c->comp_hdr->AP_delta) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2518 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2519 fd->unsorted = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2520 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2521 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2522 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2523
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2524 if ((s = c->slice)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2525 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2526 cram_free_slice(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2527 s = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2528 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2529
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2530 if (c->curr_slice == c->max_slice) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2531 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2532 c = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2533 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2534
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2535 /* Sorry this is so contorted! */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2536 for (;;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2537 if (fd->job_pending) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2538 cram_decode_job *j = (cram_decode_job *)fd->job_pending;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2539 c = j->c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2540 s = j->s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2541 free(fd->job_pending);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2542 fd->job_pending = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2543 } else if (!fd->ooc) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2544 empty_container:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2545 if (!c || c->curr_slice == c->max_slice) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2546 // new container
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2547 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2548 if (!(c = fd->ctr = cram_read_container(fd))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2549 if (fd->pool) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2550 fd->ooc = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2551 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2552 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2553
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2554 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2555 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2556 } while (c->length == 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2557 if (fd->ooc)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2558 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2559
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2560 /* Skip containers not yet spanning our range */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2561 if (fd->range.refid != -2 && c->ref_seq_id != -2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2562 fd->required_fields |= SAM_POS;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2563
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2564 if (c->ref_seq_id != fd->range.refid) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2565 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2566 fd->ctr = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2567 fd->ooc = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2568 fd->eof = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2569 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2570 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2571
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2572 if (c->ref_seq_start > fd->range.end) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2573 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2574 fd->ctr = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2575 fd->ooc = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2576 fd->eof = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2577 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2578 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2579
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2580 if (c->ref_seq_start + c->ref_seq_span-1 <
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2581 fd->range.start) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2582 c->curr_rec = c->max_rec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2583 c->curr_slice = c->max_slice;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2584 cram_seek(fd, c->length, SEEK_CUR);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2585 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2586 c = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2587 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2588 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2589 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2590
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2591 if (!(c->comp_hdr_block = cram_read_block(fd)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2592 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2593 if (c->comp_hdr_block->content_type != COMPRESSION_HEADER)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2594 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2595
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2596 c->comp_hdr =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2597 cram_decode_compression_header(fd, c->comp_hdr_block);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2598 if (!c->comp_hdr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2599 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2600
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2601 if (!c->comp_hdr->AP_delta) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2602 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2603 fd->unsorted = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2604 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2605 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2606 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2607
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2608 if (c->num_records == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2609 cram_free_container(c); c = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2610 goto empty_container;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2611 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2612
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2613
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2614 if (!(s = c->slice = cram_read_slice(fd)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2615 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2616 c->curr_slice++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2617 c->curr_rec = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2618 c->max_rec = s->hdr->num_records;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2619
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2620 s->last_apos = s->hdr->ref_seq_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2621
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2622 /* Skip slices not yet spanning our range */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2623 if (fd->range.refid != -2 && s->hdr->ref_seq_id != -2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2624 if (s->hdr->ref_seq_id != fd->range.refid) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2625 fd->eof = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2626 cram_free_slice(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2627 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2628 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2629 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2630
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2631 if (s->hdr->ref_seq_start > fd->range.end) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2632 fd->eof = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2633 cram_free_slice(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2634 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2635 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2636 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2637
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2638 if (s->hdr->ref_seq_start + s->hdr->ref_seq_span-1 <
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2639 fd->range.start) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2640 cram_free_slice(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2641 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2642 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2643 c = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2644 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2645 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2646 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2647 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2648
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2649 /* Test decoding of 1st seq */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2650 if (!c || !s)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2651 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2652
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2653 if (cram_decode_slice_mt(fd, c, s, fd->header) != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2654 // if (cram_decode_slice(fd, c, s, fd->header) != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2655 fprintf(stderr, "Failure to decode slice\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2656 cram_free_slice(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2657 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2658 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2659 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2660
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2661 if (!fd->pool || fd->job_pending)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2662 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2663
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2664 // Push it a bit far, to qsize in queue rather than pending arrival,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2665 // as cram tends to be a bit bursty in decode timings.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2666 if (t_pool_results_queue_len(fd->rqueue) > fd->pool->qsize)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2667 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2668 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2669
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2670 if (fd->pool) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2671 t_pool_result *res;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2672 cram_decode_job *j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2673
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2674 // fprintf(stderr, "Thread pool len = %d, %d\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2675 // t_pool_results_queue_len(fd->rqueue),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2676 // t_pool_results_queue_sz(fd->rqueue));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2677
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2678 if (fd->ooc && t_pool_results_queue_empty(fd->rqueue))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2679 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2680
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2681 res = t_pool_next_result_wait(fd->rqueue);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2682
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2683 if (!res || !res->data) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2684 fprintf(stderr, "t_pool_next_result failure\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2685 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2686 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2687
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2688 j = (cram_decode_job *)res->data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2689 c = j->c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2690 s = j->s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2691
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2692 fd->ctr = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2693
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2694 t_pool_delete_result(res, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2695 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2696
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2697 *cp = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2698 return s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2699 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2700
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2701 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2702 * Read the next cram record and return it.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2703 * Note that to decode cram_record the caller will need to look up some data
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2704 * in the current slice, pointed to by fd->ctr->slice. This is valid until
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2705 * the next call to cram_get_seq (which may invalidate it).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2706 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2707 * Returns record pointer on success (do not free)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2708 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2709 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2710 cram_record *cram_get_seq(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2711 cram_container *c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2712 cram_slice *s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2713
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2714 for (;;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2715 c = fd->ctr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2716 if (c && c->slice && c->curr_rec < c->max_rec) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2717 s = c->slice;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2718 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2719 if (!(s = cram_next_slice(fd, &c)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2720 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2721 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2722
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2723 if (fd->range.refid != -2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2724 if (s->crecs[c->curr_rec].ref_id < fd->range.refid) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2725 c->curr_rec++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2726 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2727 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2728
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2729 if (s->crecs[c->curr_rec].ref_id != fd->range.refid) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2730 fd->eof = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2731 cram_free_slice(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2732 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2733 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2734 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2735
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2736 if (s->crecs[c->curr_rec].apos > fd->range.end) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2737 fd->eof = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2738 cram_free_slice(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2739 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2740 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2741 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2742
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2743 if (s->crecs[c->curr_rec].aend < fd->range.start) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2744 c->curr_rec++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2745 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2746 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2747 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2748
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2749 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2750 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2751
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2752 fd->ctr = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2753 c->slice = s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2754 return &s->crecs[c->curr_rec++];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2755 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2756
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2757 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2758 * Read the next cram record and convert it to a bam_seq_t struct.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2759 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2760 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2761 * -1 on EOF or failure (check fd->err)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2762 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2763 int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2764 cram_record *cr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2765 cram_container *c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2766 cram_slice *s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2767
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2768 if (!(cr = cram_get_seq(fd)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2769 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2770
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2771 c = fd->ctr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2772 s = c->slice;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2773
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2774 return cram_to_bam(fd->header, fd, s, cr, c->curr_rec-1, bam);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2775 }