annotate ezBAMQC/src/htslib/cram/cram_io.c @ 18:494b5cd02238

bash script
author youngkim
date Wed, 30 Mar 2016 13:39:05 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2 Copyright (c) 2012-2014 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Author: James Bonfield <jkb@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Redistribution and use in source and binary forms, with or without
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6 modification, are permitted provided that the following conditions are met:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 1. Redistributions of source code must retain the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 this list of conditions and the following disclaimer.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 2. Redistributions in binary form must reproduce the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 this list of conditions and the following disclaimer in the documentation
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13 and/or other materials provided with the distribution.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16 Institute nor the names of its contributors may be used to endorse or promote
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 products derived from this software without specific prior written permission.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 * CRAM I/O primitives.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34 * - ITF8 encoding and decoding.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 * - Block based I/O
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 * - Zlib inflating and deflating (memory)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37 * - CRAM basic data structure reading and writing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 * - File opening / closing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 * - Reference sequence handling
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 * TODO: BLOCK_GROW, BLOCK_RESIZE, BLOCK_APPEND and itf8_put_blk all need
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 * a way to return errors for when malloc fails.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 #ifdef HAVE_CONFIG_H
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 #include "io_lib_config.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 #include <stdio.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 #include <errno.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 #include <assert.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 #include <stdlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 #include <string.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 #include <zlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57 #ifdef HAVE_LIBBZ2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 #include <bzlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 #ifdef HAVE_LIBLZMA
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 #include <lzma.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 #include <sys/types.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 #include <sys/stat.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 #include <math.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 #include <ctype.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 #include "cram/cram.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 #include "cram/os.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 #include "cram/md5.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 #include "cram/open_trace_file.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 #include "cram/rANS_static.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 //#define REF_DEBUG
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 #ifdef REF_DEBUG
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 #include <sys/syscall.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78 #define gettid() (int)syscall(SYS_gettid)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 #define RP(...) fprintf (stderr, __VA_ARGS__)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 #define RP(...)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 #include "htslib/hfile.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 #include "htslib/bgzf.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 #include "htslib/faidx.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 #define TRIAL_SPAN 50
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 #define NTRIALS 3
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 * ITF8 encoding and decoding.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 * Also see the itf8_get and itf8_put macros in cram_io.h
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100 * Reads an integer in ITF-8 encoding from 'cp' and stores it in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 * *val.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 * Returns the number of bytes read on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106 int itf8_decode(cram_fd *fd, int32_t *val_p) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 static int nbytes[16] = {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108 0,0,0,0, 0,0,0,0, // 0000xxxx - 0111xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 1,1,1,1, // 1000xxxx - 1011xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 2,2, // 1100xxxx - 1101xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 3, // 1110xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 4, // 1111xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 static int nbits[16] = {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 0x3f, 0x3f, 0x3f, 0x3f, // 1000xxxx - 1011xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118 0x1f, 0x1f, // 1100xxxx - 1101xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119 0x0f, // 1110xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 0x0f, // 1111xxxx
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 int32_t val = hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124 if (val == -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 int i = nbytes[val>>4];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 val &= nbits[val>>4];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 switch(i) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131 case 0:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 *val_p = val;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 case 1:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137 *val_p = val;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 return 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 case 2:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 *val_p = val;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144 return 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146 case 3:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 *val_p = val;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151 return 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153 case 4: // really 3.5 more, why make it different?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 val = (val<<4) | (((unsigned char)hgetc(fd->fp)) & 0x0f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158 *val_p = val;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 return 5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165 * Encodes and writes a single integer in ITF-8 format.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
166 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
167 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
168 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
169 int itf8_encode(cram_fd *fd, int32_t val) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
170 char buf[5];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
171 int len = itf8_put(buf, val);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
172 return hwrite(fd->fp, buf, len) == len ? 0 : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
173 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
174
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
175 #ifndef ITF8_MACROS
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
176 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
177 * As above, but decoding from memory
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
178 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
179 int itf8_get(char *cp, int32_t *val_p) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
180 unsigned char *up = (unsigned char *)cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
181
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
182 if (up[0] < 0x80) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
183 *val_p = up[0];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
184 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
185 } else if (up[0] < 0xc0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
186 *val_p = ((up[0] <<8) | up[1]) & 0x3fff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
187 return 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
188 } else if (up[0] < 0xe0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
189 *val_p = ((up[0]<<16) | (up[1]<< 8) | up[2]) & 0x1fffff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
190 return 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
191 } else if (up[0] < 0xf0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
192 *val_p = ((up[0]<<24) | (up[1]<<16) | (up[2]<<8) | up[3]) & 0x0fffffff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
193 return 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
194 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
195 *val_p = ((up[0] & 0x0f)<<28) | (up[1]<<20) | (up[2]<<12) | (up[3]<<4) | (up[4] & 0x0f);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
196 return 5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
197 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
198 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
199
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
200 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
201 * Stores a value to memory in ITF-8 format.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
202 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
203 * Returns the number of bytes required to store the number.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
204 * This is a maximum of 5 bytes.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
205 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
206 int itf8_put(char *cp, int32_t val) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
207 if (!(val & ~0x00000007f)) { // 1 byte
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
208 *cp = val;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
209 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
210 } else if (!(val & ~0x00003fff)) { // 2 byte
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
211 *cp++ = (val >> 8 ) | 0x80;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
212 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
213 return 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
214 } else if (!(val & ~0x01fffff)) { // 3 byte
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
215 *cp++ = (val >> 16) | 0xc0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
216 *cp++ = (val >> 8 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
217 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
218 return 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
219 } else if (!(val & ~0x0fffffff)) { // 4 byte
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
220 *cp++ = (val >> 24) | 0xe0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
221 *cp++ = (val >> 16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
222 *cp++ = (val >> 8 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
223 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
224 return 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
225 } else { // 5 byte
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
226 *cp++ = 0xf0 | ((val>>28) & 0xff);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
227 *cp++ = (val >> 20) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
228 *cp++ = (val >> 12) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
229 *cp++ = (val >> 4 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
230 *cp = val & 0x0f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
231 return 5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
232 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
233 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
234 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
235
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
236 /* 64-bit itf8 variant */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
237 int ltf8_put(char *cp, int64_t val) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
238 if (!(val & ~((1LL<<7)-1))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
239 *cp = val;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
240 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
241 } else if (!(val & ~((1LL<<(6+8))-1))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
242 *cp++ = (val >> 8 ) | 0x80;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
243 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
244 return 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
245 } else if (!(val & ~((1LL<<(5+2*8))-1))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
246 *cp++ = (val >> 16) | 0xc0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
247 *cp++ = (val >> 8 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
248 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
249 return 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
250 } else if (!(val & ~((1LL<<(4+3*8))-1))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
251 *cp++ = (val >> 24) | 0xe0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
252 *cp++ = (val >> 16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
253 *cp++ = (val >> 8 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
254 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
255 return 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
256 } else if (!(val & ~((1LL<<(3+4*8))-1))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
257 *cp++ = (val >> 32) | 0xf0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
258 *cp++ = (val >> 24) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
259 *cp++ = (val >> 16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
260 *cp++ = (val >> 8 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
261 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
262 return 5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
263 } else if (!(val & ~((1LL<<(2+5*8))-1))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
264 *cp++ = (val >> 40) | 0xf8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
265 *cp++ = (val >> 32) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
266 *cp++ = (val >> 24) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
267 *cp++ = (val >> 16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
268 *cp++ = (val >> 8 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
269 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
270 return 6;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
271 } else if (!(val & ~((1LL<<(1+6*8))-1))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
272 *cp++ = (val >> 48) | 0xfc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
273 *cp++ = (val >> 40) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
274 *cp++ = (val >> 32) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
275 *cp++ = (val >> 24) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
276 *cp++ = (val >> 16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
277 *cp++ = (val >> 8 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
278 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
279 return 7;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
280 } else if (!(val & ~((1LL<<(7*8))-1))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
281 *cp++ = (val >> 56) | 0xfe;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
282 *cp++ = (val >> 48) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
283 *cp++ = (val >> 40) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
284 *cp++ = (val >> 32) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
285 *cp++ = (val >> 24) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
286 *cp++ = (val >> 16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
287 *cp++ = (val >> 8 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
288 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
289 return 8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
290 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
291 *cp++ = 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
292 *cp++ = (val >> 56) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
293 *cp++ = (val >> 48) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
294 *cp++ = (val >> 40) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
295 *cp++ = (val >> 32) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
296 *cp++ = (val >> 24) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
297 *cp++ = (val >> 16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
298 *cp++ = (val >> 8 ) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
299 *cp = val & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
300 return 9;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
301 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
302 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
303
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
304 int ltf8_get(char *cp, int64_t *val_p) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
305 unsigned char *up = (unsigned char *)cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
306
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
307 if (up[0] < 0x80) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
308 *val_p = up[0];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
309 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
310 } else if (up[0] < 0xc0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
311 *val_p = (((uint64_t)up[0]<< 8) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
312 (uint64_t)up[1]) & (((1LL<<(6+8)))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
313 return 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
314 } else if (up[0] < 0xe0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
315 *val_p = (((uint64_t)up[0]<<16) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
316 ((uint64_t)up[1]<< 8) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
317 (uint64_t)up[2]) & ((1LL<<(5+2*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
318 return 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
319 } else if (up[0] < 0xf0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
320 *val_p = (((uint64_t)up[0]<<24) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
321 ((uint64_t)up[1]<<16) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
322 ((uint64_t)up[2]<< 8) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
323 (uint64_t)up[3]) & ((1LL<<(4+3*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
324 return 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
325 } else if (up[0] < 0xf8) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
326 *val_p = (((uint64_t)up[0]<<32) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
327 ((uint64_t)up[1]<<24) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
328 ((uint64_t)up[2]<<16) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
329 ((uint64_t)up[3]<< 8) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
330 (uint64_t)up[4]) & ((1LL<<(3+4*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
331 return 5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
332 } else if (up[0] < 0xfc) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
333 *val_p = (((uint64_t)up[0]<<40) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
334 ((uint64_t)up[1]<<32) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
335 ((uint64_t)up[2]<<24) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
336 ((uint64_t)up[3]<<16) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
337 ((uint64_t)up[4]<< 8) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
338 (uint64_t)up[5]) & ((1LL<<(2+5*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
339 return 6;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
340 } else if (up[0] < 0xfe) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
341 *val_p = (((uint64_t)up[0]<<48) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
342 ((uint64_t)up[1]<<40) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
343 ((uint64_t)up[2]<<32) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
344 ((uint64_t)up[3]<<24) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
345 ((uint64_t)up[4]<<16) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
346 ((uint64_t)up[5]<< 8) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
347 (uint64_t)up[6]) & ((1LL<<(1+6*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
348 return 7;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
349 } else if (up[0] < 0xff) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
350 *val_p = (((uint64_t)up[1]<<48) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
351 ((uint64_t)up[2]<<40) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
352 ((uint64_t)up[3]<<32) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
353 ((uint64_t)up[4]<<24) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
354 ((uint64_t)up[5]<<16) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
355 ((uint64_t)up[6]<< 8) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
356 (uint64_t)up[7]) & ((1LL<<(7*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
357 return 8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
358 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
359 *val_p = (((uint64_t)up[1]<<56) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
360 ((uint64_t)up[2]<<48) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
361 ((uint64_t)up[3]<<40) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
362 ((uint64_t)up[4]<<32) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
363 ((uint64_t)up[5]<<24) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
364 ((uint64_t)up[6]<<16) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
365 ((uint64_t)up[7]<< 8) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
366 (uint64_t)up[8]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
367 return 9;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
368 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
369 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
370
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
371 int ltf8_decode(cram_fd *fd, int64_t *val_p) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
372 int c = hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
373 int64_t val = (unsigned char)c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
374 if (c == -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
375 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
376
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
377 if (val < 0x80) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
378 *val_p = val;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
379 return 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
380
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
381 } else if (val < 0xc0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
382 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
383 *val_p = val & (((1LL<<(6+8)))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
384 return 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
385
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
386 } else if (val < 0xe0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
387 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
388 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
389 *val_p = val & ((1LL<<(5+2*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
390 return 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
391
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
392 } else if (val < 0xf0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
393 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
394 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
395 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
396 *val_p = val & ((1LL<<(4+3*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
397 return 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
398
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
399 } else if (val < 0xf8) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
400 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
401 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
402 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
403 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
404 *val_p = val & ((1LL<<(3+4*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
405 return 5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
406
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
407 } else if (val < 0xfc) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
408 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
409 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
410 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
411 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
412 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
413 *val_p = val & ((1LL<<(2+5*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
414 return 6;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
415
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
416 } else if (val < 0xfe) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
417 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
418 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
419 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
420 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
421 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
422 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
423 *val_p = val & ((1LL<<(1+6*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
424 return 7;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
425
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
426 } else if (val < 0xff) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
427 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
428 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
429 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
430 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
431 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
432 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
433 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
434 *val_p = val & ((1LL<<(7*8))-1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
435 return 8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
436
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
437 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
438 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
439 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
440 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
441 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
442 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
443 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
444 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
445 val = (val<<8) | (unsigned char)hgetc(fd->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
446 *val_p = val;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
447 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
448
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
449 return 9;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
450 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
451
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
452 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
453 * Pushes a value in ITF8 format onto the end of a block.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
454 * This shouldn't be used for high-volume data as it is not the fastest
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
455 * method.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
456 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
457 * Returns the number of bytes written
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
458 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
459 int itf8_put_blk(cram_block *blk, int val) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
460 char buf[5];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
461 int sz;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
462
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
463 sz = itf8_put(buf, val);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
464 BLOCK_APPEND(blk, buf, sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
465 return sz;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
466 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
467
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
468 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
469 * Decodes a 32-bit little endian value from fd and stores in val.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
470 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
471 * Returns the number of bytes read on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
472 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
473 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
474 int int32_decode(cram_fd *fd, int32_t *val) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
475 int32_t i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
476 if (4 != hread(fd->fp, &i, 4))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
477 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
478
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
479 *val = le_int4(i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
480 return 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
481 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
482
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
483 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
484 * Encodes a 32-bit little endian value 'val' and writes to fd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
485 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
486 * Returns the number of bytes written on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
487 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
488 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
489 int int32_encode(cram_fd *fd, int32_t val) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
490 val = le_int4(val);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
491 if (4 != hwrite(fd->fp, &val, 4))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
492 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
493
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
494 return 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
495 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
496
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
497 /* As int32_decoded/encode, but from/to blocks instead of cram_fd */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
498 int int32_get(cram_block *b, int32_t *val) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
499 if (b->uncomp_size - BLOCK_SIZE(b) < 4)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
500 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
501
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
502 *val =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
503 b->data[b->byte ] |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
504 (b->data[b->byte+1] << 8) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
505 (b->data[b->byte+2] << 16) |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
506 (b->data[b->byte+3] << 24);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
507 BLOCK_SIZE(b) += 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
508 return 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
509 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
510
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
511 /* As int32_decoded/encode, but from/to blocks instead of cram_fd */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
512 int int32_put(cram_block *b, int32_t val) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
513 unsigned char cp[4];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
514 cp[0] = ( val & 0xff);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
515 cp[1] = ((val>>8) & 0xff);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
516 cp[2] = ((val>>16) & 0xff);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
517 cp[3] = ((val>>24) & 0xff);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
518
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
519 BLOCK_APPEND(b, cp, 4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
520 return b->data ? 0 : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
521 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
522
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
523 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
524 * zlib compression code - from Gap5's tg_iface_g.c
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
525 * They're static here as they're only used within the cram_compress_block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
526 * and cram_uncompress_block functions, which are the external interface.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
527 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
528 char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
529 z_stream s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
530 unsigned char *data = NULL; /* Uncompressed output */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
531 int data_alloc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
532 int err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
533
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
534 /* Starting point at uncompressed size, and scale after that */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
535 data = malloc(data_alloc = csize*1.2+100);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
536 if (!data)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
537 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
538
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
539 /* Initialise zlib stream */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
540 s.zalloc = Z_NULL; /* use default allocation functions */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
541 s.zfree = Z_NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
542 s.opaque = Z_NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
543 s.next_in = (unsigned char *)cdata;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
544 s.avail_in = csize;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
545 s.total_in = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
546 s.next_out = data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
547 s.avail_out = data_alloc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
548 s.total_out = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
549
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
550 //err = inflateInit(&s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
551 err = inflateInit2(&s, 15 + 32);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
552 if (err != Z_OK) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
553 fprintf(stderr, "zlib inflateInit error: %s\n", s.msg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
554 free(data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
555 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
556 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
557
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
558 /* Decode to 'data' array */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
559 for (;s.avail_in;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
560 unsigned char *data_tmp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
561 int alloc_inc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
562
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
563 s.next_out = &data[s.total_out];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
564 err = inflate(&s, Z_NO_FLUSH);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
565 if (err == Z_STREAM_END)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
566 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
567
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
568 if (err != Z_OK) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
569 fprintf(stderr, "zlib inflate error: %s\n", s.msg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
570 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
571 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
572
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
573 /* More to come, so realloc based on growth so far */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
574 alloc_inc = (double)s.avail_in/s.total_in * s.total_out + 100;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
575 data = realloc((data_tmp = data), data_alloc += alloc_inc);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
576 if (!data) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
577 free(data_tmp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
578 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
579 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
580 s.avail_out += alloc_inc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
581 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
582 inflateEnd(&s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
583
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
584 *size = s.total_out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
585 return (char *)data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
586 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
587
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
588 static char *zlib_mem_deflate(char *data, size_t size, size_t *cdata_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
589 int level, int strat) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
590 z_stream s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
591 unsigned char *cdata = NULL; /* Compressed output */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
592 int cdata_alloc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
593 int cdata_pos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
594 int err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
595
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
596 cdata = malloc(cdata_alloc = size*1.05+100);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
597 if (!cdata)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
598 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
599 cdata_pos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
600
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
601 /* Initialise zlib stream */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
602 s.zalloc = Z_NULL; /* use default allocation functions */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
603 s.zfree = Z_NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
604 s.opaque = Z_NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
605 s.next_in = (unsigned char *)data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
606 s.avail_in = size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
607 s.total_in = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
608 s.next_out = cdata;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
609 s.avail_out = cdata_alloc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
610 s.total_out = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
611 s.data_type = Z_BINARY;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
612
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
613 err = deflateInit2(&s, level, Z_DEFLATED, 15|16, 9, strat);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
614 if (err != Z_OK) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
615 fprintf(stderr, "zlib deflateInit2 error: %s\n", s.msg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
616 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
617 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
618
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
619 /* Encode to 'cdata' array */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
620 for (;s.avail_in;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
621 s.next_out = &cdata[cdata_pos];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
622 s.avail_out = cdata_alloc - cdata_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
623 if (cdata_alloc - cdata_pos <= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
624 fprintf(stderr, "Deflate produced larger output than expected. Abort\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
625 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
626 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
627 err = deflate(&s, Z_NO_FLUSH);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
628 cdata_pos = cdata_alloc - s.avail_out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
629 if (err != Z_OK) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
630 fprintf(stderr, "zlib deflate error: %s\n", s.msg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
631 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
632 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
633 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
634 if (deflate(&s, Z_FINISH) != Z_STREAM_END) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
635 fprintf(stderr, "zlib deflate error: %s\n", s.msg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
636 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
637 *cdata_size = s.total_out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
638
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
639 if (deflateEnd(&s) != Z_OK) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
640 fprintf(stderr, "zlib deflate error: %s\n", s.msg);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
641 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
642 return (char *)cdata;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
643 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
644
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
645 #ifdef HAVE_LIBLZMA
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
646 /* ------------------------------------------------------------------------ */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
647 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
648 * Data compression routines using liblzma (xz)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
649 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
650 * On a test set this shrunk the main db from 136157104 bytes to 114796168, but
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
651 * caused tg_index to grow from 2m43.707s to 15m3.961s. Exporting as bfastq
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
652 * went from 18.3s to 36.3s. So decompression suffers too, but not as bad
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
653 * as compression times.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
654 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
655 * For now we disable this functionality. If it's to be reenabled make sure you
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
656 * improve the mem_inflate implementation as it's just a test hack at the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
657 * moment.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
658 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
659
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
660 static char *lzma_mem_deflate(char *data, size_t size, size_t *cdata_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
661 int level) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
662 char *out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
663 size_t out_size = lzma_stream_buffer_bound(size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
664 *cdata_size = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
665
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
666 out = malloc(out_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
667
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
668 /* Single call compression */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
669 if (LZMA_OK != lzma_easy_buffer_encode(level, LZMA_CHECK_CRC32, NULL,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
670 (uint8_t *)data, size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
671 (uint8_t *)out, cdata_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
672 out_size))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
673 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
674
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
675 return out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
676 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
677
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
678 static char *lzma_mem_inflate(char *cdata, size_t csize, size_t *size) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
679 lzma_stream strm = LZMA_STREAM_INIT;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
680 size_t out_size = 0, out_pos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
681 char *out = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
682 int r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
683
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
684 /* Initiate the decoder */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
685 if (LZMA_OK != lzma_stream_decoder(&strm, 50000000, 0))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
686 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
687
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
688 /* Decode loop */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
689 strm.avail_in = csize;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
690 strm.next_in = (uint8_t *)cdata;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
691
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
692 for (;strm.avail_in;) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
693 if (strm.avail_in > out_size - out_pos) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
694 out_size += strm.avail_in * 4 + 32768;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
695 out = realloc(out, out_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
696 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
697 strm.avail_out = out_size - out_pos;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
698 strm.next_out = (uint8_t *)&out[out_pos];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
699
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
700 r = lzma_code(&strm, LZMA_RUN);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
701 if (LZMA_OK != r && LZMA_STREAM_END != r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
702 fprintf(stderr, "r=%d\n", r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
703 fprintf(stderr, "mem=%"PRId64"d\n", (int64_t)lzma_memusage(&strm));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
704 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
705 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
706
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
707 out_pos = strm.total_out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
708
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
709 if (r == LZMA_STREAM_END)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
710 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
711 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
712
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
713 /* finish up any unflushed data; necessary? */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
714 r = lzma_code(&strm, LZMA_FINISH);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
715 if (r != LZMA_OK && r != LZMA_STREAM_END) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
716 fprintf(stderr, "r=%d\n", r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
717 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
718 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
719
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
720 out = realloc(out, strm.total_out);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
721 *size = strm.total_out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
722
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
723 lzma_end(&strm);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
724
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
725 return out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
726 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
727 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
728
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
729 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
730 * CRAM blocks - the dynamically growable data block. We have code to
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
731 * create, update, (un)compress and read/write.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
732 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
733 * These are derived from the deflate_interlaced.c blocks, but with the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
734 * CRAM extension of content types and IDs.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
735 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
736
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
737 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
738 * Allocates a new cram_block structure with a specified content_type and
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
739 * id.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
740 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
741 * Returns block pointer on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
742 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
743 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
744 cram_block *cram_new_block(enum cram_content_type content_type,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
745 int content_id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
746 cram_block *b = malloc(sizeof(*b));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
747 if (!b)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
748 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
749 b->method = b->orig_method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
750 b->content_type = content_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
751 b->content_id = content_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
752 b->comp_size = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
753 b->uncomp_size = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
754 b->data = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
755 b->alloc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
756 b->byte = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
757 b->bit = 7; // MSB
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
758
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
759 return b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
760 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
761
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
762 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
763 * Reads a block from a cram file.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
764 * Returns cram_block pointer on success.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
765 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
766 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
767 cram_block *cram_read_block(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
768 cram_block *b = malloc(sizeof(*b));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
769 if (!b)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
770 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
771
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
772 //fprintf(stderr, "Block at %d\n", (int)ftell(fd->fp));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
773
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
774 if (-1 == (b->method = hgetc(fd->fp))) { free(b); return NULL; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
775 if (-1 == (b->content_type= hgetc(fd->fp))) { free(b); return NULL; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
776 if (-1 == itf8_decode(fd, &b->content_id)) { free(b); return NULL; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
777 if (-1 == itf8_decode(fd, &b->comp_size)) { free(b); return NULL; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
778 if (-1 == itf8_decode(fd, &b->uncomp_size)) { free(b); return NULL; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
779
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
780 // fprintf(stderr, " method %d, ctype %d, cid %d, csize %d, ucsize %d\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
781 // b->method, b->content_type, b->content_id, b->comp_size, b->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
782
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
783 if (b->method == RAW) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
784 b->alloc = b->uncomp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
785 if (!(b->data = malloc(b->uncomp_size))){ free(b); return NULL; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
786 if (b->uncomp_size != hread(fd->fp, b->data, b->uncomp_size)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
787 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
788 free(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
789 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
790 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
791 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
792 b->alloc = b->comp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
793 if (!(b->data = malloc(b->comp_size))) { free(b); return NULL; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
794 if (b->comp_size != hread(fd->fp, b->data, b->comp_size)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
795 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
796 free(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
797 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
798 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
799 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
800
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
801 if (CRAM_MAJOR_VERS(fd->version) >= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
802 unsigned char dat[100], *cp = dat;;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
803 uint32_t crc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
804
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
805
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
806 if (-1 == int32_decode(fd, (int32_t *)&b->crc32)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
807 free(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
808 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
809 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
810
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
811 *cp++ = b->method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
812 *cp++ = b->content_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
813 cp += itf8_put(cp, b->content_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
814 cp += itf8_put(cp, b->comp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
815 cp += itf8_put(cp, b->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
816 crc = crc32(0L, dat, cp-dat);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
817 crc = crc32(crc, b->data ? b->data : (uc *)"", b->alloc);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
818
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
819 if (crc != b->crc32) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
820 fprintf(stderr, "Block CRC32 failure\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
821 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
822 free(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
823 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
824 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
825 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
826
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
827 b->orig_method = b->method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
828 b->idx = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
829 b->byte = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
830 b->bit = 7; // MSB
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
831
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
832 return b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
833 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
834
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
835 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
836 * Writes a CRAM block.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
837 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
838 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
839 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
840 int cram_write_block(cram_fd *fd, cram_block *b) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
841 assert(b->method != RAW || (b->comp_size == b->uncomp_size));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
842
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
843 if (hputc(b->method, fd->fp) == EOF) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
844 if (hputc(b->content_type, fd->fp) == EOF) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
845 if (itf8_encode(fd, b->content_id) == -1) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
846 if (itf8_encode(fd, b->comp_size) == -1) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
847 if (itf8_encode(fd, b->uncomp_size) == -1) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
848
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
849 if (b->method == RAW) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
850 if (b->uncomp_size != hwrite(fd->fp, b->data, b->uncomp_size))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
851 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
852 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
853 if (b->comp_size != hwrite(fd->fp, b->data, b->comp_size))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
854 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
855 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
856
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
857 if (CRAM_MAJOR_VERS(fd->version) >= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
858 unsigned char dat[100], *cp = dat;;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
859 uint32_t crc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
860
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
861 *cp++ = b->method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
862 *cp++ = b->content_type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
863 cp += itf8_put(cp, b->content_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
864 cp += itf8_put(cp, b->comp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
865 cp += itf8_put(cp, b->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
866 crc = crc32(0L, dat, cp-dat);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
867
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
868 if (b->method == RAW) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
869 b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
870 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
871 b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->comp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
872 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
873
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
874 if (-1 == int32_encode(fd, b->crc32))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
875 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
876 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
877
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
878 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
879 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
880
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
881 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
882 * Frees a CRAM block, deallocating internal data too.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
883 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
884 void cram_free_block(cram_block *b) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
885 if (!b)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
886 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
887 if (b->data)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
888 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
889 free(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
890 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
891
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
892 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
893 * Uncompresses a CRAM block, if compressed.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
894 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
895 int cram_uncompress_block(cram_block *b) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
896 char *uncomp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
897 size_t uncomp_size = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
898
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
899 if (b->uncomp_size == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
900 // blank block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
901 b->method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
902 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
903 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
904
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
905 switch (b->method) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
906 case RAW:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
907 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
908
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
909 case GZIP:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
910 uncomp = zlib_mem_inflate((char *)b->data, b->comp_size, &uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
911 if (!uncomp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
912 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
913 if ((int)uncomp_size != b->uncomp_size) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
914 free(uncomp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
915 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
916 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
917 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
918 b->data = (unsigned char *)uncomp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
919 b->alloc = uncomp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
920 b->method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
921 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
922
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
923 #ifdef HAVE_LIBBZ2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
924 case BZIP2: {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
925 unsigned int usize = b->uncomp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
926 if (!(uncomp = malloc(usize)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
927 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
928 if (BZ_OK != BZ2_bzBuffToBuffDecompress(uncomp, &usize,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
929 (char *)b->data, b->comp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
930 0, 0)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
931 free(uncomp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
932 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
933 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
934 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
935 b->data = (unsigned char *)uncomp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
936 b->alloc = usize;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
937 b->method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
938 b->uncomp_size = usize; // Just incase it differs
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
939 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
940 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
941 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
942 case BZIP2:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
943 fprintf(stderr, "Bzip2 compression is not compiled into this "
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
944 "version.\nPlease rebuild and try again.\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
945 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
946 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
947
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
948 #ifdef HAVE_LIBLZMA
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
949 case LZMA:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
950 uncomp = lzma_mem_inflate((char *)b->data, b->comp_size, &uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
951 if (!uncomp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
952 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
953 if ((int)uncomp_size != b->uncomp_size)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
954 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
955 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
956 b->data = (unsigned char *)uncomp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
957 b->alloc = uncomp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
958 b->method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
959 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
960 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
961 case LZMA:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
962 fprintf(stderr, "Lzma compression is not compiled into this "
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
963 "version.\nPlease rebuild and try again.\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
964 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
965 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
966 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
967
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
968 case RANS: {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
969 unsigned int usize = b->uncomp_size, usize2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
970 uncomp = (char *)rans_uncompress(b->data, b->comp_size, &usize2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
971 assert(usize == usize2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
972 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
973 b->data = (unsigned char *)uncomp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
974 b->alloc = usize2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
975 b->method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
976 b->uncomp_size = usize2; // Just incase it differs
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
977 //fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
978 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
979 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
980
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
981 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
982 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
983 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
984
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
985 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
986 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
987
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
988 static char *cram_compress_by_method(char *in, size_t in_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
989 size_t *out_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
990 enum cram_block_method method,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
991 int level, int strat) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
992 switch (method) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
993 case GZIP:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
994 return zlib_mem_deflate(in, in_size, out_size, level, strat);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
995
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
996 case BZIP2: {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
997 #ifdef HAVE_LIBBZ2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
998 unsigned int comp_size = in_size*1.01 + 600;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
999 char *comp = malloc(comp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1000 if (!comp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1001 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1002
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1003 if (BZ_OK != BZ2_bzBuffToBuffCompress(comp, &comp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1004 in, in_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1005 level, 0, 30)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1006 free(comp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1007 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1008 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1009 *out_size = comp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1010 return comp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1011 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1012 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1013 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1014 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1015
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1016 case LZMA:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1017 #ifdef HAVE_LIBLZMA
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1018 return lzma_mem_deflate(in, in_size, out_size, level);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1019 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1020 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1021 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1022
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1023 case RANS0: {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1024 unsigned int out_size_i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1025 unsigned char *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1026 cp = rans_compress((unsigned char *)in, in_size, &out_size_i, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1027 *out_size = out_size_i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1028 return (char *)cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1029 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1030
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1031 case RANS1: {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1032 unsigned int out_size_i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1033 unsigned char *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1034
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1035 cp = rans_compress((unsigned char *)in, in_size, &out_size_i, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1036 *out_size = out_size_i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1037 return (char *)cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1038 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1039
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1040 case RAW:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1041 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1042
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1043 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1044 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1045 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1046
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1047 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1048 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1049
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1050
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1051 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1052 * Compresses a block using one of two different zlib strategies. If we only
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1053 * want one choice set strat2 to be -1.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1054 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1055 * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1056 * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1057 * significantly faster.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1058 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1059 int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1060 int method, int level) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1061
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1062 char *comp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1063 size_t comp_size = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1064 int strat;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1065
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1066 //fprintf(stderr, "IN: block %d, sz %d\n", b->content_id, b->uncomp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1067
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1068 if (method == RAW || level == 0 || b->uncomp_size == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1069 b->method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1070 b->comp_size = b->uncomp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1071 //fprintf(stderr, "Skip block id %d\n", b->content_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1072 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1073 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1074
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1075 if (metrics) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1076 pthread_mutex_lock(&fd->metrics_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1077 if (metrics->trial > 0 || --metrics->next_trial <= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1078 size_t sz_best = INT_MAX;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1079 size_t sz_gz_rle = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1080 size_t sz_gz_def = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1081 size_t sz_rans0 = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1082 size_t sz_rans1 = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1083 size_t sz_bzip2 = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1084 size_t sz_lzma = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1085 int method_best = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1086 char *c_best = NULL, *c = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1087
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1088 if (metrics->revised_method)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1089 method = metrics->revised_method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1090 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1091 metrics->revised_method = method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1092
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1093 if (metrics->next_trial == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1094 metrics->next_trial = TRIAL_SPAN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1095 metrics->trial = NTRIALS;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1096 metrics->sz_gz_rle /= 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1097 metrics->sz_gz_def /= 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1098 metrics->sz_rans0 /= 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1099 metrics->sz_rans1 /= 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1100 metrics->sz_bzip2 /= 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1101 metrics->sz_lzma /= 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1102 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1103
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1104 pthread_mutex_unlock(&fd->metrics_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1105
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1106 if (method & (1<<GZIP_RLE)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1107 c = cram_compress_by_method((char *)b->data, b->uncomp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1108 &sz_gz_rle, GZIP, 1, Z_RLE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1109 if (c && sz_best > sz_gz_rle) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1110 sz_best = sz_gz_rle;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1111 method_best = GZIP_RLE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1112 if (c_best)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1113 free(c_best);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1114 c_best = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1115 } else if (c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1116 free(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1117 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1118 sz_gz_rle = b->uncomp_size*2+1000;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1119 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1120
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1121 //fprintf(stderr, "Block %d; %d->%d\n", b->content_id, b->uncomp_size, sz_gz_rle);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1122 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1123
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1124 if (method & (1<<GZIP)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1125 c = cram_compress_by_method((char *)b->data, b->uncomp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1126 &sz_gz_def, GZIP, level,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1127 Z_FILTERED);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1128 if (c && sz_best > sz_gz_def) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1129 sz_best = sz_gz_def;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1130 method_best = GZIP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1131 if (c_best)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1132 free(c_best);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1133 c_best = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1134 } else if (c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1135 free(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1136 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1137 sz_gz_def = b->uncomp_size*2+1000;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1138 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1139
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1140 //fprintf(stderr, "Block %d; %d->%d\n", b->content_id, b->uncomp_size, sz_gz_def);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1141 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1142
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1143 if (method & (1<<RANS0)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1144 c = cram_compress_by_method((char *)b->data, b->uncomp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1145 &sz_rans0, RANS0, 0, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1146 if (c && sz_best > sz_rans0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1147 sz_best = sz_rans0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1148 method_best = RANS0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1149 if (c_best)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1150 free(c_best);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1151 c_best = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1152 } else if (c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1153 free(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1154 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1155 sz_rans0 = b->uncomp_size*2+1000;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1156 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1157 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1158
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1159 if (method & (1<<RANS1)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1160 c = cram_compress_by_method((char *)b->data, b->uncomp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1161 &sz_rans1, RANS1, 0, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1162 if (c && sz_best > sz_rans1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1163 sz_best = sz_rans1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1164 method_best = RANS1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1165 if (c_best)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1166 free(c_best);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1167 c_best = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1168 } else if (c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1169 free(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1170 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1171 sz_rans1 = b->uncomp_size*2+1000;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1172 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1173 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1174
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1175 if (method & (1<<BZIP2)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1176 c = cram_compress_by_method((char *)b->data, b->uncomp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1177 &sz_bzip2, BZIP2, level, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1178 if (c && sz_best > sz_bzip2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1179 sz_best = sz_bzip2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1180 method_best = BZIP2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1181 if (c_best)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1182 free(c_best);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1183 c_best = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1184 } else if (c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1185 free(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1186 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1187 sz_bzip2 = b->uncomp_size*2+1000;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1188 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1189 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1190
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1191 if (method & (1<<LZMA)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1192 c = cram_compress_by_method((char *)b->data, b->uncomp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1193 &sz_lzma, LZMA, level, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1194 if (c && sz_best > sz_lzma) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1195 sz_best = sz_lzma;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1196 method_best = LZMA;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1197 if (c_best)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1198 free(c_best);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1199 c_best = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1200 } else if (c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1201 free(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1202 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1203 sz_lzma = b->uncomp_size*2+1000;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1204 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1205 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1206
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1207 //fprintf(stderr, "sz_best = %d\n", sz_best);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1208
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1209 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1210 b->data = (unsigned char *)c_best;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1211 //printf("method_best = %s\n", cram_block_method2str(method_best));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1212 b->method = method_best == GZIP_RLE ? GZIP : method_best;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1213 b->comp_size = sz_best;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1214
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1215 pthread_mutex_lock(&fd->metrics_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1216 metrics->sz_gz_rle += sz_gz_rle;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1217 metrics->sz_gz_def += sz_gz_def;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1218 metrics->sz_rans0 += sz_rans0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1219 metrics->sz_rans1 += sz_rans1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1220 metrics->sz_bzip2 += sz_bzip2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1221 metrics->sz_lzma += sz_lzma;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1222 if (--metrics->trial == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1223 int best_method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1224 int best_sz = INT_MAX;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1225
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1226 // Scale methods by cost
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1227 if (fd->level <= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1228 metrics->sz_rans1 *= 1.02;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1229 metrics->sz_gz_def *= 1.04;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1230 metrics->sz_bzip2 *= 1.08;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1231 metrics->sz_lzma *= 1.10;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1232 } else if (fd->level <= 6) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1233 metrics->sz_rans1 *= 1.01;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1234 metrics->sz_gz_def *= 1.02;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1235 metrics->sz_bzip2 *= 1.03;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1236 metrics->sz_lzma *= 1.05;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1237 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1238
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1239 if (method & (1<<GZIP_RLE) && best_sz > metrics->sz_gz_rle)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1240 best_sz = metrics->sz_gz_rle, best_method = GZIP_RLE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1241
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1242 if (method & (1<<GZIP) && best_sz > metrics->sz_gz_def)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1243 best_sz = metrics->sz_gz_def, best_method = GZIP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1244
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1245 if (method & (1<<RANS0) && best_sz > metrics->sz_rans0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1246 best_sz = metrics->sz_rans0, best_method = RANS0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1247
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1248 if (method & (1<<RANS1) && best_sz > metrics->sz_rans1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1249 best_sz = metrics->sz_rans1, best_method = RANS1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1250
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1251 if (method & (1<<BZIP2) && best_sz > metrics->sz_bzip2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1252 best_sz = metrics->sz_bzip2, best_method = BZIP2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1253
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1254 if (method & (1<<LZMA) && best_sz > metrics->sz_lzma)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1255 best_sz = metrics->sz_lzma, best_method = LZMA;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1256
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1257 if (best_method == GZIP_RLE) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1258 metrics->method = GZIP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1259 metrics->strat = Z_RLE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1260 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1261 metrics->method = best_method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1262 metrics->strat = Z_FILTERED;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1263 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1264
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1265 // If we see at least MAXFAIL trials in a row for a specific
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1266 // compression method with more than MAXDELTA aggregate
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1267 // size then we drop this from the list of methods used
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1268 // for this block type.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1269 #define MAXDELTA 0.20
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1270 #define MAXFAILS 4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1271 if (best_method == GZIP_RLE) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1272 metrics->gz_rle_cnt = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1273 metrics->gz_rle_extra = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1274 } else if (best_sz < metrics->sz_gz_rle) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1275 double r = (double)metrics->sz_gz_rle / best_sz - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1276 if (++metrics->gz_rle_cnt >= MAXFAILS &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1277 (metrics->gz_rle_extra += r) >= MAXDELTA)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1278 method &= ~(1<<GZIP_RLE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1279 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1280
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1281 if (best_method == GZIP) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1282 metrics->gz_def_cnt = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1283 metrics->gz_def_extra = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1284 } else if (best_sz < metrics->sz_gz_def) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1285 double r = (double)metrics->sz_gz_def / best_sz - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1286 if (++metrics->gz_def_cnt >= MAXFAILS &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1287 (metrics->gz_def_extra += r) >= MAXDELTA)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1288 method &= ~(1<<GZIP);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1289 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1290
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1291 if (best_method == RANS0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1292 metrics->rans0_cnt = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1293 metrics->rans0_extra = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1294 } else if (best_sz < metrics->sz_rans0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1295 double r = (double)metrics->sz_rans0 / best_sz - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1296 if (++metrics->rans0_cnt >= MAXFAILS &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1297 (metrics->rans0_extra += r) >= MAXDELTA)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1298 method &= ~(1<<RANS0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1299 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1300
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1301 if (best_method == RANS1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1302 metrics->rans1_cnt = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1303 metrics->rans1_extra = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1304 } else if (best_sz < metrics->sz_rans1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1305 double r = (double)metrics->sz_rans1 / best_sz - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1306 if (++metrics->rans1_cnt >= MAXFAILS &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1307 (metrics->rans1_extra += r) >= MAXDELTA)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1308 method &= ~(1<<RANS1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1309 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1310
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1311 if (best_method == BZIP2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1312 metrics->bzip2_cnt = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1313 metrics->bzip2_extra = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1314 } else if (best_sz < metrics->sz_bzip2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1315 double r = (double)metrics->sz_bzip2 / best_sz - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1316 if (++metrics->bzip2_cnt >= MAXFAILS &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1317 (metrics->bzip2_extra += r) >= MAXDELTA)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1318 method &= ~(1<<BZIP2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1319 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1320
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1321 if (best_method == LZMA) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1322 metrics->lzma_cnt = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1323 metrics->lzma_extra = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1324 } else if (best_sz < metrics->sz_lzma) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1325 double r = (double)metrics->sz_lzma / best_sz - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1326 if (++metrics->lzma_cnt >= MAXFAILS &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1327 (metrics->lzma_extra += r) >= MAXDELTA)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1328 method &= ~(1<<LZMA);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1329 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1330
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1331 //if (method != metrics->revised_method)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1332 // fprintf(stderr, "%d: method from %x to %x\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1333 // b->content_id, metrics->revised_method, method);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1334 metrics->revised_method = method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1335 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1336 pthread_mutex_unlock(&fd->metrics_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1337 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1338 strat = metrics->strat;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1339 method = metrics->method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1340
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1341 pthread_mutex_unlock(&fd->metrics_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1342 comp = cram_compress_by_method((char *)b->data, b->uncomp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1343 &comp_size, method,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1344 level, strat);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1345 if (!comp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1346 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1347 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1348 b->data = (unsigned char *)comp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1349 b->comp_size = comp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1350 b->method = method;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1351 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1352
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1353 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1354 // no cached metrics, so just do zlib?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1355 comp = cram_compress_by_method((char *)b->data, b->uncomp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1356 &comp_size, GZIP, level, Z_FILTERED);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1357 if (!comp) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1358 fprintf(stderr, "Compression failed!\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1359 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1360 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1361 free(b->data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1362 b->data = (unsigned char *)comp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1363 b->comp_size = comp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1364 b->method = GZIP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1365 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1366
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1367 if (fd->verbose)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1368 fprintf(stderr, "Compressed block ID %d from %d to %d by method %s\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1369 b->content_id, b->uncomp_size, b->comp_size,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1370 cram_block_method2str(b->method));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1371
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1372 if (b->method == RANS1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1373 b->method = RANS0; // Spec just has RANS (not 0/1) with auto-sensing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1374
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1375 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1376 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1377
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1378 cram_metrics *cram_new_metrics(void) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1379 cram_metrics *m = calloc(1, sizeof(*m));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1380 if (!m)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1381 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1382 m->trial = NTRIALS-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1383 m->next_trial = TRIAL_SPAN;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1384 m->method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1385 m->strat = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1386 m->revised_method = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1387
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1388 return m;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1389 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1390
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1391 char *cram_block_method2str(enum cram_block_method m) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1392 switch(m) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1393 case RAW: return "RAW";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1394 case GZIP: return "GZIP";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1395 case BZIP2: return "BZIP2";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1396 case LZMA: return "LZMA";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1397 case RANS0: return "RANS0";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1398 case RANS1: return "RANS1";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1399 case GZIP_RLE: return "GZIP_RLE";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1400 case ERROR: break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1401 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1402 return "?";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1403 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1404
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1405 char *cram_content_type2str(enum cram_content_type t) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1406 switch (t) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1407 case FILE_HEADER: return "FILE_HEADER";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1408 case COMPRESSION_HEADER: return "COMPRESSION_HEADER";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1409 case MAPPED_SLICE: return "MAPPED_SLICE";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1410 case UNMAPPED_SLICE: return "UNMAPPED_SLICE";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1411 case EXTERNAL: return "EXTERNAL";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1412 case CORE: return "CORE";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1413 case CT_ERROR: break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1414 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1415 return "?";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1416 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1417
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1418 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1419 * Extra error checking on fclose to really ensure data is written.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1420 * Care needs to be taken to handle pipes vs real files.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1421 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1422 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1423 * -1 on failure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1424 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1425 int paranoid_fclose(FILE *fp) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1426 if (-1 == fflush(fp) && errno != EBADF) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1427 fclose(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1428 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1429 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1430
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1431 errno = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1432 if (-1 == fsync(fileno(fp))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1433 if (errno != EINVAL) { // eg pipe
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1434 fclose(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1435 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1436 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1437 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1438 return fclose(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1439 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1440
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1441 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1442 * Reference sequence handling
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1443 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1444 * These revolve around the refs_t structure, which may potentially be
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1445 * shared between multiple cram_fd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1446 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1447 * We start with refs_create() to allocate an empty refs_t and then
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1448 * populate it with @SQ line data using refs_from_header(). This is done on
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1449 * cram_open(). Also at start up we can call cram_load_reference() which
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1450 * is used with "scramble -r foo.fa". This replaces the fd->refs with the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1451 * new one specified. In either case refs2id() is then called which
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1452 * maps ref_entry names to @SQ ids (refs_t->ref_id[]).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1453 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1454 * Later, possibly within a thread, we will want to know the actual ref
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1455 * seq itself, obtained by calling cram_get_ref(). This may use the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1456 * UR: or M5: fields or the filename specified in the original
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1457 * cram_load_reference() call.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1458 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1459 * Given the potential for multi-threaded reference usage, we have
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1460 * reference counting (sorry for the confusing double use of "ref") to
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1461 * track the number of callers interested in any specific reference.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1462 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1463
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1464 void refs_free(refs_t *r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1465 RP("refs_free()\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1466
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1467 if (--r->count > 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1468 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1469
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1470 if (!r)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1471 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1472
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1473 if (r->pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1474 string_pool_destroy(r->pool);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1475
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1476 if (r->h_meta) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1477 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1478
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1479 for (k = kh_begin(r->h_meta); k != kh_end(r->h_meta); k++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1480 ref_entry *e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1481
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1482 if (!kh_exist(r->h_meta, k))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1483 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1484 if (!(e = kh_val(r->h_meta, k)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1485 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1486 if (e->seq)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1487 free(e->seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1488 free(e);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1489 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1490
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1491 kh_destroy(refs, r->h_meta);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1492 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1493
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1494 if (r->ref_id)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1495 free(r->ref_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1496
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1497 if (r->fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1498 bgzf_close(r->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1499
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1500 pthread_mutex_destroy(&r->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1501
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1502 free(r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1503 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1504
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1505 static refs_t *refs_create(void) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1506 refs_t *r = calloc(1, sizeof(*r));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1507
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1508 RP("refs_create()\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1509
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1510 if (!r)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1511 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1512
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1513 if (!(r->pool = string_pool_create(8192)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1514 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1515
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1516 r->ref_id = NULL; // see refs2id() to populate.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1517 r->count = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1518 r->last = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1519 r->last_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1520
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1521 if (!(r->h_meta = kh_init(refs)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1522 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1523
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1524 pthread_mutex_init(&r->lock, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1525
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1526 return r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1527
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1528 err:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1529 refs_free(r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1530 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1531 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1532
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1533 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1534 * Opens a reference fasta file as a BGZF stream, allowing for
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1535 * compressed files. It automatically builds a .fai file if
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1536 * required and if compressed a .gzi bgzf index too.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1537 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1538 * Returns a BGZF handle on success;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1539 * NULL on failure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1540 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1541 static BGZF *bgzf_open_ref(char *fn, char *mode) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1542 BGZF *fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1543 char fai_file[PATH_MAX];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1544
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1545 snprintf(fai_file, PATH_MAX, "%s.fai", fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1546 if (access(fai_file, R_OK) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1547 if (fai_build(fn) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1548 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1549
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1550 if (!(fp = bgzf_open(fn, mode))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1551 perror(fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1552 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1553 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1554
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1555 if (fp->is_compressed == 1 && bgzf_index_load(fp, fn, ".gzi") < 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1556 fprintf(stderr, "Unable to load .gzi index '%s.gzi'\n", fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1557 bgzf_close(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1558 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1559 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1560
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1561 return fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1562 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1563
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1564 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1565 * Loads a FAI file for a reference.fasta.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1566 * "is_err" indicates whether failure to load is worthy of emitting an
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1567 * error message. In some cases (eg with embedded references) we
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1568 * speculatively load, just incase, and silently ignore errors.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1569 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1570 * Returns the refs_t struct on success (maybe newly allocated);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1571 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1572 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1573 static refs_t *refs_load_fai(refs_t *r_orig, char *fn, int is_err) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1574 struct stat sb;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1575 FILE *fp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1576 char fai_fn[PATH_MAX];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1577 char line[8192];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1578 refs_t *r = r_orig;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1579 size_t fn_l = strlen(fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1580 int id = 0, id_alloc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1581
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1582 RP("refs_load_fai %s\n", fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1583
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1584 if (!r)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1585 if (!(r = refs_create()))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1586 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1587
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1588 /* Open reference, for later use */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1589 if (stat(fn, &sb) != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1590 if (is_err)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1591 perror(fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1592 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1593 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1594
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1595 if (r->fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1596 if (bgzf_close(r->fp) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1597 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1598 r->fp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1599
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1600 if (!(r->fn = string_dup(r->pool, fn)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1601 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1602
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1603 if (fn_l > 4 && strcmp(&fn[fn_l-4], ".fai") == 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1604 r->fn[fn_l-4] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1605
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1606 if (!(r->fp = bgzf_open_ref(r->fn, "r")))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1607 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1608
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1609 /* Parse .fai file and load meta-data */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1610 sprintf(fai_fn, "%.*s.fai", PATH_MAX-5, r->fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1611
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1612 if (stat(fai_fn, &sb) != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1613 if (is_err)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1614 perror(fai_fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1615 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1616 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1617 if (!(fp = fopen(fai_fn, "r"))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1618 if (is_err)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1619 perror(fai_fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1620 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1621 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1622 while (fgets(line, 8192, fp) != NULL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1623 ref_entry *e = malloc(sizeof(*e));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1624 char *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1625 int n;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1626 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1627
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1628 if (!e)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1629 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1630
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1631 // id
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1632 for (cp = line; *cp && !isspace(*cp); cp++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1633 ;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1634 *cp++ = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1635 e->name = string_dup(r->pool, line);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1636
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1637 // length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1638 while (*cp && isspace(*cp))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1639 cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1640 e->length = strtoll(cp, &cp, 10);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1641
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1642 // offset
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1643 while (*cp && isspace(*cp))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1644 cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1645 e->offset = strtoll(cp, &cp, 10);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1646
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1647 // bases per line
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1648 while (*cp && isspace(*cp))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1649 cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1650 e->bases_per_line = strtol(cp, &cp, 10);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1651
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1652 // line length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1653 while (*cp && isspace(*cp))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1654 cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1655 e->line_length = strtol(cp, &cp, 10);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1656
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1657 // filename
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1658 e->fn = r->fn;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1659
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1660 e->count = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1661 e->seq = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1662
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1663 k = kh_put(refs, r->h_meta, e->name, &n);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1664 if (-1 == n) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1665 free(e);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1666 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1667 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1668
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1669 if (n) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1670 kh_val(r->h_meta, k) = e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1671 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1672 ref_entry *re = kh_val(r->h_meta, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1673 if (re && (re->count != 0 || re->length != 0)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1674 /* Keep old */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1675 free(e);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1676 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1677 /* Replace old */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1678 if (re)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1679 free(re);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1680 kh_val(r->h_meta, k) = e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1681 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1682 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1683
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1684 if (id >= id_alloc) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1685 int x;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1686
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1687 id_alloc = id_alloc ?id_alloc*2 : 16;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1688 r->ref_id = realloc(r->ref_id, id_alloc * sizeof(*r->ref_id));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1689
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1690 for (x = id; x < id_alloc; x++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1691 r->ref_id[x] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1692 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1693 r->ref_id[id] = e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1694 r->nref = ++id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1695 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1696
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1697 return r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1698
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1699 err:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1700 if (fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1701 fclose(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1702
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1703 if (!r_orig)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1704 refs_free(r);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1705
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1706 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1707 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1708
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1709 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1710 * Indexes references by the order they appear in a BAM file. This may not
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1711 * necessarily be the same order they appear in the fasta reference file.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1712 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1713 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1714 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1715 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1716 int refs2id(refs_t *r, SAM_hdr *h) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1717 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1718
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1719 if (r->ref_id)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1720 free(r->ref_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1721 if (r->last)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1722 r->last = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1723
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1724 r->ref_id = calloc(h->nref, sizeof(*r->ref_id));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1725 if (!r->ref_id)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1726 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1727
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1728 r->nref = h->nref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1729 for (i = 0; i < h->nref; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1730 khint_t k = kh_get(refs, r->h_meta, h->ref[i].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1731 if (k != kh_end(r->h_meta)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1732 r->ref_id[i] = kh_val(r->h_meta, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1733 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1734 fprintf(stderr, "Unable to find ref name '%s'\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1735 h->ref[i].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1736 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1737 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1738
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1739 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1740 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1741
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1742 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1743 * Generates refs_t entries based on @SQ lines in the header.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1744 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1745 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1746 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1747 static int refs_from_header(refs_t *r, cram_fd *fd, SAM_hdr *h) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1748 int i, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1749
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1750 if (!h || h->nref == 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1751 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1752
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1753 //fprintf(stderr, "refs_from_header for %p mode %c\n", fd, fd->mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1754
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1755 /* Existing refs are fine, as long as they're compatible with the hdr. */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1756 if (!(r->ref_id = realloc(r->ref_id, (r->nref + h->nref) * sizeof(*r->ref_id))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1757 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1758
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1759 /* Copy info from h->ref[i] over to r */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1760 for (i = 0, j = r->nref; i < h->nref; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1761 SAM_hdr_type *ty;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1762 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1763 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1764 int n;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1765
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1766 k = kh_get(refs, r->h_meta, h->ref[i].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1767 if (k != kh_end(r->h_meta))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1768 // Ref already known about
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1769 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1770
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1771 if (!(r->ref_id[j] = calloc(1, sizeof(ref_entry))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1772 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1773
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1774 if (!h->ref[j].name)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1775 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1776
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1777 r->ref_id[j]->name = string_dup(r->pool, h->ref[i].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1778 r->ref_id[j]->length = 0; // marker for not yet loaded
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1779
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1780 /* Initialise likely filename if known */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1781 if ((ty = sam_hdr_find(h, "SQ", "SN", h->ref[i].name))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1782 if ((tag = sam_hdr_find_key(h, ty, "M5", NULL))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1783 r->ref_id[j]->fn = string_dup(r->pool, tag->str+3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1784 //fprintf(stderr, "Tagging @SQ %s / %s\n", r->ref_id[h]->name, r->ref_id[h]->fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1785 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1786 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1787
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1788 k = kh_put(refs, r->h_meta, r->ref_id[j]->name, &n);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1789 if (n <= 0) // already exists or error
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1790 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1791 kh_val(r->h_meta, k) = r->ref_id[j];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1792
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1793 j++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1794 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1795 r->nref = j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1796
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1797 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1798 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1799
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1800 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1801 * Attaches a header to a cram_fd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1802 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1803 * This should be used when creating a new cram_fd for writing where
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1804 * we have an SAM_hdr already constructed (eg from a file we've read
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1805 * in).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1806 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1807 int cram_set_header(cram_fd *fd, SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1808 if (fd->header)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1809 sam_hdr_free(fd->header);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1810 fd->header = hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1811 return refs_from_header(fd->refs, fd, hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1812 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1813
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1814 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1815 * Converts a directory and a filename into an expanded path, replacing %s
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1816 * in directory with the filename and %[0-9]+s with portions of the filename
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1817 * Any remaining parts of filename are added to the end with /%s.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1818 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1819 void expand_cache_path(char *path, char *dir, char *fn) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1820 char *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1821
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1822 while ((cp = strchr(dir, '%'))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1823 strncpy(path, dir, cp-dir);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1824 path += cp-dir;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1825
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1826 if (*++cp == 's') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1827 strcpy(path, fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1828 path += strlen(fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1829 fn += strlen(fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1830 cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1831 } else if (*cp >= '0' && *cp <= '9') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1832 char *endp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1833 long l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1834
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1835 l = strtol(cp, &endp, 10);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1836 l = MIN(l, strlen(fn));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1837 if (*endp == 's') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1838 strncpy(path, fn, l);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1839 path += l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1840 fn += l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1841 *path = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1842 cp = endp+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1843 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1844 *path++ = '%';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1845 *path++ = *cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1846 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1847 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1848 *path++ = '%';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1849 *path++ = *cp++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1850 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1851 dir = cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1852 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1853 strcpy(path, dir);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1854 path += strlen(dir);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1855 if (*fn && path[-1] != '/')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1856 *path++ = '/';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1857 strcpy(path, fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1858 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1859
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1860 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1861 * Make the directory containing path and any prefix directories.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1862 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1863 void mkdir_prefix(char *path, int mode) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1864 char *cp = strrchr(path, '/');
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1865 if (!cp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1866 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1867
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1868 *cp = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1869 if (is_directory(path)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1870 *cp = '/';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1871 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1872 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1873
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1874 if (mkdir(path, mode) == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1875 chmod(path, mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1876 *cp = '/';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1877 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1878 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1879
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1880 mkdir_prefix(path, mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1881 mkdir(path, mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1882 chmod(path, mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1883 *cp = '/';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1884 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1885
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1886 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1887 * Return the cache directory to use, based on the first of these
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1888 * environment variables to be set to a non-empty value.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1889 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1890 static const char *get_cache_basedir(const char **extra) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1891 char *base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1892
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1893 *extra = "";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1894
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1895 base = getenv("XDG_CACHE_HOME");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1896 if (base && *base) return base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1897
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1898 base = getenv("HOME");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1899 if (base && *base) { *extra = "/.cache"; return base; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1900
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1901 base = getenv("TMPDIR");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1902 if (base && *base) return base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1903
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1904 base = getenv("TEMP");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1905 if (base && *base) return base;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1906
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1907 return "/tmp";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1908 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1909
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1910 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1911 * Queries the M5 string from the header and attempts to populate the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1912 * reference from this using the REF_PATH environment.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1913 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1914 * Returns 0 on sucess
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1915 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1916 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1917 static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1918 char *ref_path = getenv("REF_PATH");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1919 SAM_hdr_type *ty;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1920 SAM_hdr_tag *tag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1921 char path[PATH_MAX], path_tmp[PATH_MAX], cache[PATH_MAX];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1922 char *local_cache = getenv("REF_CACHE");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1923 mFILE *mf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1924
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1925 if (fd->verbose)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1926 fprintf(stderr, "cram_populate_ref on fd %p, id %d\n", fd, id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1927
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1928 if (!ref_path || *ref_path == '\0') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1929 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1930 * If we have no ref path, we use the EBI server.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1931 * However to avoid spamming it we require a local ref cache too.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1932 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1933 ref_path = "http://www.ebi.ac.uk:80/ena/cram/md5/%s";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1934 if (!local_cache || *local_cache == '\0') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1935 const char *extra;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1936 const char *base = get_cache_basedir(&extra);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1937 snprintf(cache,PATH_MAX, "%s%s/hts-ref/%%2s/%%2s/%%s", base, extra);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1938 local_cache = cache;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1939 if (fd->verbose)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1940 fprintf(stderr, "Populating local cache: %s\n", local_cache);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1941 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1942 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1943
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1944 if (!r->name)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1945 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1946
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1947 if (!(ty = sam_hdr_find(fd->header, "SQ", "SN", r->name)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1948 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1949
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1950 if (!(tag = sam_hdr_find_key(fd->header, ty, "M5", NULL)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1951 goto no_M5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1952
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1953 if (fd->verbose)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1954 fprintf(stderr, "Querying ref %s\n", tag->str+3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1955
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1956 /* Use cache if available */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1957 if (local_cache && *local_cache) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1958 struct stat sb;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1959 BGZF *fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1960
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1961 expand_cache_path(path, local_cache, tag->str+3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1962
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1963 if (0 == stat(path, &sb) && (fp = bgzf_open(path, "r"))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1964 r->length = sb.st_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1965 r->offset = r->line_length = r->bases_per_line = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1966
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1967 r->fn = string_dup(fd->refs->pool, path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1968
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1969 if (fd->refs->fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1970 if (bgzf_close(fd->refs->fp) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1971 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1972 fd->refs->fp = fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1973 fd->refs->fn = r->fn;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1974
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1975 // Fall back to cram_get_ref() where it'll do the actual
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1976 // reading of the file.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1977 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1978 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1979 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1980
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1981 /* Otherwise search */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1982 if ((mf = open_path_mfile(tag->str+3, ref_path, NULL))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1983 size_t sz;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1984 r->seq = mfsteal(mf, &sz);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1985 r->length = sz;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1986 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1987 refs_t *refs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1988 char *fn;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1989
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1990 no_M5:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1991 /* Failed to find in search path or M5 cache, see if @SQ UR: tag? */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1992 if (!(tag = sam_hdr_find_key(fd->header, ty, "UR", NULL)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1993 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1994
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1995 fn = (strncmp(tag->str+3, "file:", 5) == 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1996 ? tag->str+8
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1997 : tag->str+3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1998
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1999 if (fd->refs->fp) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2000 if (bgzf_close(fd->refs->fp) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2001 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2002 fd->refs->fp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2003 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2004 if (!(refs = refs_load_fai(fd->refs, fn, 0)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2005 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2006 fd->refs = refs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2007 if (fd->refs->fp) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2008 if (bgzf_close(fd->refs->fp) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2009 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2010 fd->refs->fp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2011 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2012
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2013 if (!fd->refs->fn)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2014 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2015
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2016 if (-1 == refs2id(fd->refs, fd->header))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2017 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2018 if (!fd->refs->ref_id || !fd->refs->ref_id[id])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2019 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2020
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2021 // Local copy already, so fall back to cram_get_ref().
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2022 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2023 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2024
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2025 /* Populate the local disk cache if required */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2026 if (local_cache && *local_cache) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2027 FILE *fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2028 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2029
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2030 expand_cache_path(path, local_cache, tag->str+3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2031 if (fd->verbose)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2032 fprintf(stderr, "Path='%s'\n", path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2033 mkdir_prefix(path, 01777);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2034
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2035 i = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2036 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2037 sprintf(path_tmp, "%s.tmp_%d", path, /*getpid(),*/ i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2038 i++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2039 fp = fopen(path_tmp, "wx");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2040 } while (fp == NULL && errno == EEXIST);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2041 if (!fp) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2042 perror(path_tmp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2043
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2044 // Not fatal - we have the data already so keep going.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2045 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2046 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2047
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2048 if (r->length != fwrite(r->seq, 1, r->length, fp)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2049 perror(path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2050 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2051 if (-1 == paranoid_fclose(fp)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2052 unlink(path_tmp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2053 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2054 if (0 == chmod(path_tmp, 0444))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2055 rename(path_tmp, path);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2056 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2057 unlink(path_tmp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2058 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2059 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2060
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2061 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2062 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2063
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2064 static void cram_ref_incr_locked(refs_t *r, int id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2065 RP("%d INC REF %d, %d %p\n", gettid(), id, (int)(id>=0?r->ref_id[id]->count+1:-999), id>=0?r->ref_id[id]->seq:(char *)1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2066
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2067 if (id < 0 || !r->ref_id[id]->seq)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2068 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2069
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2070 if (r->last_id == id)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2071 r->last_id = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2072
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2073 ++r->ref_id[id]->count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2074 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2075
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2076 void cram_ref_incr(refs_t *r, int id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2077 pthread_mutex_lock(&r->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2078 cram_ref_incr_locked(r, id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2079 pthread_mutex_unlock(&r->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2080 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2081
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2082 static void cram_ref_decr_locked(refs_t *r, int id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2083 RP("%d DEC REF %d, %d %p\n", gettid(), id, (int)(id>=0?r->ref_id[id]->count-1:-999), id>=0?r->ref_id[id]->seq:(char *)1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2084
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2085 if (id < 0 || !r->ref_id[id]->seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2086 assert(r->ref_id[id]->count >= 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2087 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2088 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2089
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2090 if (--r->ref_id[id]->count <= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2091 assert(r->ref_id[id]->count == 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2092 if (r->last_id >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2093 if (r->ref_id[r->last_id]->count <= 0 &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2094 r->ref_id[r->last_id]->seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2095 RP("%d FREE REF %d (%p)\n", gettid(),
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2096 r->last_id, r->ref_id[r->last_id]->seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2097 free(r->ref_id[r->last_id]->seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2098 r->ref_id[r->last_id]->seq = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2099 r->ref_id[r->last_id]->length = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2100 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2101 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2102 r->last_id = id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2103 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2104 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2105
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2106 void cram_ref_decr(refs_t *r, int id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2107 pthread_mutex_lock(&r->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2108 cram_ref_decr_locked(r, id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2109 pthread_mutex_unlock(&r->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2110 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2111
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2112 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2113 * Used by cram_ref_load and cram_ref_get. The file handle will have
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2114 * already been opened, so we can catch it. The ref_entry *e informs us
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2115 * of whether this is a multi-line fasta file or a raw MD5 style file.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2116 * Either way we create a single contiguous sequence.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2117 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2118 * Returns all or part of a reference sequence on success (malloced);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2119 * NULL on failure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2120 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2121 static char *load_ref_portion(BGZF *fp, ref_entry *e, int start, int end) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2122 off_t offset, len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2123 char *seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2124
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2125 if (end < start)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2126 end = start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2127
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2128 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2129 * Compute locations in file. This is trivial for the MD5 files, but
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2130 * is still necessary for the fasta variants.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2131 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2132 offset = e->line_length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2133 ? e->offset + (start-1)/e->bases_per_line * e->line_length +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2134 (start-1) % e->bases_per_line
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2135 : start-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2136
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2137 len = (e->line_length
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2138 ? e->offset + (end-1)/e->bases_per_line * e->line_length +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2139 (end-1) % e->bases_per_line
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2140 : end-1) - offset + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2141
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2142 if (bgzf_useek(fp, offset, SEEK_SET) < 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2143 perror("bgzf_useek() on reference file");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2144 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2145 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2146
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2147 if (len == 0 || !(seq = malloc(len))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2148 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2149 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2150
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2151 if (len != bgzf_read(fp, seq, len)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2152 perror("bgzf_read() on reference file");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2153 free(seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2154 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2155 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2156
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2157 /* Strip white-space if required. */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2158 if (len != end-start+1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2159 int i, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2160 char *cp = seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2161 char *cp_to;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2162
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2163 for (i = j = 0; i < len; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2164 if (cp[i] >= '!' && cp[i] <= '~')
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2165 cp[j++] = cp[i] & ~0x20;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2166 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2167 cp_to = cp+j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2168
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2169 if (cp_to - seq != end-start+1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2170 fprintf(stderr, "Malformed reference file?\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2171 free(seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2172 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2173 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2174 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2175 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2176 for (i = 0; i < len; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2177 seq[i] = seq[i] & ~0x20; // uppercase in ASCII
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2178 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2179 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2180
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2181 return seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2182 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2183
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2184 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2185 * Load the entire reference 'id'.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2186 * This also increments the reference count by 1.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2187 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2188 * Returns ref_entry on success;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2189 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2190 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2191 ref_entry *cram_ref_load(refs_t *r, int id) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2192 ref_entry *e = r->ref_id[id];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2193 int start = 1, end = e->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2194 char *seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2195
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2196 if (e->seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2197 return e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2198 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2199
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2200 assert(e->count == 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2201
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2202 if (r->last) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2203 #ifdef REF_DEBUG
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2204 int idx = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2205 for (idx = 0; idx < r->nref; idx++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2206 if (r->last == r->ref_id[idx])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2207 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2208 RP("%d cram_ref_load DECR %d\n", gettid(), idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2209 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2210 assert(r->last->count > 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2211 if (--r->last->count <= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2212 RP("%d FREE REF %d (%p)\n", gettid(), id, r->ref_id[id]->seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2213 if (r->last->seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2214 free(r->last->seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2215 r->last->seq = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2216 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2217 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2218 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2219
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2220 /* Open file if it's not already the current open reference */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2221 if (strcmp(r->fn, e->fn) || r->fp == NULL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2222 if (r->fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2223 if (bgzf_close(r->fp) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2224 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2225 r->fn = e->fn;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2226 if (!(r->fp = bgzf_open_ref(r->fn, "r")))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2227 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2228 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2229
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2230 RP("%d Loading ref %d (%d..%d)\n", gettid(), id, start, end);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2231
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2232 if (!(seq = load_ref_portion(r->fp, e, start, end))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2233 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2234 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2235
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2236 RP("%d Loaded ref %d (%d..%d) = %p\n", gettid(), id, start, end, seq);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2237
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2238 RP("%d INC REF %d, %d\n", gettid(), id, (int)(e->count+1));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2239 e->seq = seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2240 e->count++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2241
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2242 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2243 * Also keep track of last used ref so incr/decr loops on the same
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2244 * sequence don't cause load/free loops.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2245 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2246 RP("%d cram_ref_load INCR %d => %d\n", gettid(), id, e->count+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2247 r->last = e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2248 e->count++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2249
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2250 return e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2251 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2252
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2253 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2254 * Returns a portion of a reference sequence from start to end inclusive.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2255 * The returned pointer is owned by either the cram_file fd or by the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2256 * internal refs_t structure and should not be freed by the caller.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2257 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2258 * The difference is whether or not this refs_t is in use by just the one
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2259 * cram_fd or by multiples, or whether we have multiple threads accessing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2260 * references. In either case fd->shared will be true and we start using
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2261 * reference counting to track the number of users of a specific reference
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2262 * sequence.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2263 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2264 * Otherwise the ref seq returned is allocated as part of cram_fd itself
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2265 * and will be freed up on the next call to cram_get_ref or cram_close.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2266 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2267 * To return the entire reference sequence, specify start as 1 and end
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2268 * as 0.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2269 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2270 * To cease using a reference, call cram_ref_decr().
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2271 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2272 * Returns reference on success,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2273 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2274 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2275 char *cram_get_ref(cram_fd *fd, int id, int start, int end) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2276 ref_entry *r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2277 char *seq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2278 int ostart = start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2279
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2280 if (id == -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2281 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2282
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2283 /* FIXME: axiomatic query of r->seq being true?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2284 * Or shortcut for unsorted data where we load once and never free?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2285 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2286
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2287 //fd->shared_ref = 1; // hard code for now to simplify things
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2288
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2289 pthread_mutex_lock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2290
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2291 RP("%d cram_get_ref on fd %p, id %d, range %d..%d\n", gettid(), fd, id, start, end);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2292
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2293 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2294 * Unsorted data implies we want to fetch an entire reference at a time.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2295 * We just deal with this at the moment by claiming we're sharing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2296 * references instead, which has the same requirement.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2297 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2298 if (fd->unsorted)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2299 fd->shared_ref = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2300
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2301
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2302 /* Sanity checking: does this ID exist? */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2303 if (id >= fd->refs->nref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2304 fprintf(stderr, "No reference found for id %d\n", id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2305 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2306 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2307 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2308
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2309 if (!fd->refs || !fd->refs->ref_id[id]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2310 fprintf(stderr, "No reference found for id %d\n", id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2311 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2312 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2313 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2314
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2315 if (!(r = fd->refs->ref_id[id])) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2316 fprintf(stderr, "No reference found for id %d\n", id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2317 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2318 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2319 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2320
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2321
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2322 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2323 * It has an entry, but may not have been populated yet.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2324 * Any manually loaded .fai files have their lengths known.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2325 * A ref entry computed from @SQ lines (M5 or UR field) will have
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2326 * r->length == 0 unless it's been loaded once and verified that we have
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2327 * an on-disk filename for it.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2328 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2329 * 19 Sep 2013: Moved the lock here as the cram_populate_ref code calls
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2330 * open_path_mfile and libcurl, which isn't multi-thread safe unless I
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2331 * rewrite my code to have one curl handle per thread.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2332 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2333 pthread_mutex_lock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2334 if (r->length == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2335 if (cram_populate_ref(fd, id, r) == -1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2336 fprintf(stderr, "Failed to populate reference for id %d\n", id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2337 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2338 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2339 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2340 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2341 r = fd->refs->ref_id[id];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2342 if (fd->unsorted)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2343 cram_ref_incr_locked(fd->refs, id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2344 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2345
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2346
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2347 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2348 * We now know that we the filename containing the reference, so check
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2349 * for limits. If it's over half the reference we'll load all of it in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2350 * memory as this will speed up subsequent calls.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2351 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2352 if (end < 1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2353 end = r->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2354 if (end >= r->length)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2355 end = r->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2356 assert(start >= 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2357
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2358 if (end - start >= 0.5*r->length || fd->shared_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2359 start = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2360 end = r->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2361 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2362
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2363 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2364 * Maybe we have it cached already? If so use it.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2365 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2366 * Alternatively if we don't have the sequence but we're sharing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2367 * references and/or are asking for the entire length of it, then
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2368 * load the full reference into the refs structure and return
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2369 * a pointer to that one instead.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2370 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2371 if (fd->shared_ref || r->seq || (start == 1 && end == r->length)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2372 char *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2373
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2374 if (id >= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2375 if (r->seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2376 cram_ref_incr_locked(fd->refs, id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2377 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2378 ref_entry *e;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2379 if (!(e = cram_ref_load(fd->refs, id))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2380 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2381 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2382 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2383 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2384
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2385 /* unsorted data implies cache ref indefinitely, to avoid
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2386 * continually loading and unloading.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2387 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2388 if (fd->unsorted)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2389 cram_ref_incr_locked(fd->refs, id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2390 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2391
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2392 fd->ref = NULL; /* We never access it directly */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2393 fd->ref_start = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2394 fd->ref_end = r->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2395 fd->ref_id = id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2396
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2397 cp = fd->refs->ref_id[id]->seq + ostart-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2398 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2399 fd->ref = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2400 cp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2401 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2402
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2403 RP("%d cram_get_ref returning for id %d, count %d\n", gettid(), id, (int)r->count);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2404
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2405 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2406 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2407 return cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2408 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2409
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2410 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2411 * Otherwise we're not sharing, we don't have a copy of it already and
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2412 * we're only asking for a small portion of it.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2413 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2414 * In this case load up just that segment ourselves, freeing any old
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2415 * small segments in the process.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2416 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2417
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2418 /* Unmapped ref ID */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2419 if (id < 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2420 if (fd->ref_free) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2421 free(fd->ref_free);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2422 fd->ref_free = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2423 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2424 fd->ref = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2425 fd->ref_id = id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2426 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2427 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2428 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2429 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2430
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2431 /* Open file if it's not already the current open reference */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2432 if (strcmp(fd->refs->fn, r->fn) || fd->refs->fp == NULL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2433 if (fd->refs->fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2434 if (bgzf_close(fd->refs->fp) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2435 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2436 fd->refs->fn = r->fn;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2437 if (!(fd->refs->fp = bgzf_open_ref(fd->refs->fn, "r"))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2438 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2439 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2440 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2441 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2442 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2443
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2444 if (!(fd->ref = load_ref_portion(fd->refs->fp, r, start, end))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2445 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2446 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2447 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2448 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2449
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2450 if (fd->ref_free)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2451 free(fd->ref_free);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2452
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2453 fd->ref_id = id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2454 fd->ref_start = start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2455 fd->ref_end = end;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2456 fd->ref_free = fd->ref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2457 seq = fd->ref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2458
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2459 pthread_mutex_unlock(&fd->refs->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2460 pthread_mutex_unlock(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2461
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2462 return seq + ostart - start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2463 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2464
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2465 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2466 * If fd has been opened for reading, it may be permitted to specify 'fn'
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2467 * as NULL and let the code auto-detect the reference by parsing the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2468 * SAM header @SQ lines.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2469 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2470 int cram_load_reference(cram_fd *fd, char *fn) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2471 if (fn) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2472 fd->refs = refs_load_fai(fd->refs, fn,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2473 !(fd->embed_ref && fd->mode == 'r'));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2474 fn = fd->refs ? fd->refs->fn : NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2475 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2476 fd->ref_fn = fn;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2477
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2478 if ((!fd->refs || (fd->refs->nref == 0 && !fn)) && fd->header) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2479 if (fd->refs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2480 refs_free(fd->refs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2481 if (!(fd->refs = refs_create()))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2482 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2483 if (-1 == refs_from_header(fd->refs, fd, fd->header))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2484 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2485 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2486
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2487 if (fd->header)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2488 if (-1 == refs2id(fd->refs, fd->header))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2489 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2490
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2491 return fn ? 0 : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2492 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2493
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2494 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2495 * Containers
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2496 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2497
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2498 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2499 * Creates a new container, specifying the maximum number of slices
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2500 * and records permitted.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2501 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2502 * Returns cram_container ptr on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2503 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2504 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2505 cram_container *cram_new_container(int nrec, int nslice) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2506 cram_container *c = calloc(1, sizeof(*c));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2507 enum cram_DS_ID id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2508
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2509 if (!c)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2510 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2511
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2512 c->curr_ref = -2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2513
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2514 c->max_c_rec = nrec * nslice;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2515 c->curr_c_rec = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2516
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2517 c->max_rec = nrec;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2518 c->record_counter = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2519 c->num_bases = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2520
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2521 c->max_slice = nslice;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2522 c->curr_slice = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2523
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2524 c->pos_sorted = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2525 c->max_apos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2526 c->multi_seq = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2527
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2528 c->bams = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2529
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2530 if (!(c->slices = (cram_slice **)calloc(nslice, sizeof(cram_slice *))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2531 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2532 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2533
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2534 if (!(c->comp_hdr = cram_new_compression_header()))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2535 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2536 c->comp_hdr_block = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2537
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2538 for (id = DS_RN; id < DS_TN; id++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2539 if (!(c->stats[id] = cram_stats_create())) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2540
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2541 //c->aux_B_stats = cram_stats_create();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2542
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2543 if (!(c->tags_used = kh_init(s_i2i)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2544 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2545 c->refs_used = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2546
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2547 return c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2548
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2549 err:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2550 if (c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2551 if (c->slices)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2552 free(c->slices);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2553 free(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2554 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2555 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2556 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2557
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2558 void cram_free_container(cram_container *c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2559 enum cram_DS_ID id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2560 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2561
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2562 if (!c)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2563 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2564
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2565 if (c->refs_used)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2566 free(c->refs_used);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2567
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2568 if (c->landmark)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2569 free(c->landmark);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2570
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2571 if (c->comp_hdr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2572 cram_free_compression_header(c->comp_hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2573
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2574 if (c->comp_hdr_block)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2575 cram_free_block(c->comp_hdr_block);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2576
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2577 if (c->slices) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2578 for (i = 0; i < c->max_slice; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2579 if (c->slices[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2580 cram_free_slice(c->slices[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2581 free(c->slices);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2582 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2583
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2584 for (id = DS_RN; id < DS_TN; id++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2585 if (c->stats[id]) cram_stats_free(c->stats[id]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2586
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2587 //if (c->aux_B_stats) cram_stats_free(c->aux_B_stats);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2588
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2589 if (c->tags_used) kh_destroy(s_i2i, c->tags_used);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2590
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2591 free(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2592 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2593
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2594 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2595 * Reads a container header.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2596 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2597 * Returns cram_container on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2598 * NULL on failure or no container left (fd->err == 0).
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2599 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2600 cram_container *cram_read_container(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2601 cram_container c2, *c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2602 int i, s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2603 size_t rd = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2604
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2605 fd->err = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2606 fd->eof = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2607
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2608 memset(&c2, 0, sizeof(c2));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2609 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2610 if ((s = itf8_decode(fd, &c2.length)) == -1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2611 fd->eof = fd->empty_container ? 1 : 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2612 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2613 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2614 rd+=s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2615 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2616 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2617 if ((s = int32_decode(fd, &c2.length)) == -1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2618 if (CRAM_MAJOR_VERS(fd->version) == 2 &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2619 CRAM_MINOR_VERS(fd->version) == 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2620 fd->eof = 1; // EOF blocks arrived in v2.1
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2621 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2622 fd->eof = fd->empty_container ? 1 : 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2623 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2624 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2625 rd+=s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2626 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2627 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2628 if ((s = itf8_decode(fd, &c2.ref_seq_id)) == -1) return NULL; else rd+=s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2629 if ((s = itf8_decode(fd, &c2.ref_seq_start))== -1) return NULL; else rd+=s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2630 if ((s = itf8_decode(fd, &c2.ref_seq_span)) == -1) return NULL; else rd+=s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2631 if ((s = itf8_decode(fd, &c2.num_records)) == -1) return NULL; else rd+=s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2632
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2633 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2634 c2.record_counter = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2635 c2.num_bases = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2636 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2637 if (CRAM_MAJOR_VERS(fd->version) >= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2638 if ((s = ltf8_decode(fd, &c2.record_counter)) == -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2639 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2640 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2641 rd += s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2642 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2643 int32_t i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2644 if ((s = itf8_decode(fd, &i32)) == -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2645 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2646 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2647 rd += s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2648 c2.record_counter = i32;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2649 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2650
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2651 if ((s = ltf8_decode(fd, &c2.num_bases))== -1)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2652 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2653 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2654 rd += s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2655 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2656 if ((s = itf8_decode(fd, &c2.num_blocks)) == -1) return NULL; else rd+=s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2657 if ((s = itf8_decode(fd, &c2.num_landmarks))== -1) return NULL; else rd+=s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2658
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2659 if (!(c = calloc(1, sizeof(*c))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2660 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2661
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2662 *c = c2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2663
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2664 if (!(c->landmark = malloc(c->num_landmarks * sizeof(int32_t))) &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2665 c->num_landmarks) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2666 fd->err = errno;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2667 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2668 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2669 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2670 for (i = 0; i < c->num_landmarks; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2671 if ((s = itf8_decode(fd, &c->landmark[i])) == -1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2672 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2673 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2674 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2675 rd += s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2676 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2677 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2678
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2679 if (CRAM_MAJOR_VERS(fd->version) >= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2680 uint32_t crc, i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2681 unsigned char *dat = malloc(50 + 5*(c->num_landmarks)), *cp = dat;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2682 if (!dat) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2683 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2684 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2685 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2686 if (-1 == int32_decode(fd, (int32_t *)&c->crc32))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2687 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2688 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2689 rd+=4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2690
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2691 /* Reencode first as we can't easily access the original byte stream.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2692 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2693 * FIXME: Technically this means this may not be fool proof. We could
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2694 * create a CRAM file using a 2 byte ITF8 value that can fit in a
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2695 * 1 byte field, meaning the encoding is different to the original
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2696 * form and so has a different CRC.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2697 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2698 * The correct implementation would be to have an alternative form
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2699 * of itf8_decode which also squirrels away the raw byte stream
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2700 * during decoding so we can then CRC that.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2701 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2702 *(unsigned int *)cp = le_int4(c->length); cp += 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2703 cp += itf8_put(cp, c->ref_seq_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2704 cp += itf8_put(cp, c->ref_seq_start);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2705 cp += itf8_put(cp, c->ref_seq_span);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2706 cp += itf8_put(cp, c->num_records);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2707 cp += ltf8_put((char *)cp, c->record_counter);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2708 cp += itf8_put(cp, c->num_bases);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2709 cp += itf8_put(cp, c->num_blocks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2710 cp += itf8_put(cp, c->num_landmarks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2711 for (i = 0; i < c->num_landmarks; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2712 cp += itf8_put(cp, c->landmark[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2713 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2714
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2715 crc = crc32(0L, dat, cp-dat);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2716 if (crc != c->crc32) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2717 fprintf(stderr, "Container header CRC32 failure\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2718 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2719 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2720 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2721 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2722
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2723 c->offset = rd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2724 c->slices = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2725 c->curr_slice = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2726 c->max_slice = c->num_landmarks;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2727 c->slice_rec = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2728 c->curr_rec = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2729 c->max_rec = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2730
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2731 if (c->ref_seq_id == -2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2732 c->multi_seq = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2733 fd->multi_seq = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2734 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2735
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2736 fd->empty_container =
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2737 (c->num_records == 0 &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2738 c->ref_seq_id == -1 &&
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2739 c->ref_seq_start == 0x454f46 /* EOF */) ? 1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2740
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2741 return c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2742 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2743
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2744 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2745 * Writes a container structure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2746 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2747 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2748 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2749 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2750 int cram_write_container(cram_fd *fd, cram_container *c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2751 char buf_a[1024], *buf = buf_a, *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2752 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2753
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2754 if (55 + c->num_landmarks * 5 >= 1024)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2755 buf = malloc(55 + c->num_landmarks * 5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2756 cp = buf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2757
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2758 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2759 cp += itf8_put(cp, c->length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2760 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2761 *(int32_t *)cp = le_int4(c->length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2762 cp += 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2763 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2764 if (c->multi_seq) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2765 cp += itf8_put(cp, -2);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2766 cp += itf8_put(cp, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2767 cp += itf8_put(cp, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2768 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2769 cp += itf8_put(cp, c->ref_seq_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2770 cp += itf8_put(cp, c->ref_seq_start);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2771 cp += itf8_put(cp, c->ref_seq_span);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2772 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2773 cp += itf8_put(cp, c->num_records);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2774 if (CRAM_MAJOR_VERS(fd->version) == 2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2775 cp += itf8_put(cp, c->record_counter);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2776 cp += ltf8_put(cp, c->num_bases);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2777 } else if (CRAM_MAJOR_VERS(fd->version) >= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2778 cp += ltf8_put(cp, c->record_counter);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2779 cp += ltf8_put(cp, c->num_bases);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2780 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2781
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2782 cp += itf8_put(cp, c->num_blocks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2783 cp += itf8_put(cp, c->num_landmarks);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2784 for (i = 0; i < c->num_landmarks; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2785 cp += itf8_put(cp, c->landmark[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2786
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2787 if (CRAM_MAJOR_VERS(fd->version) >= 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2788 c->crc32 = crc32(0L, (uc *)buf, cp-buf);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2789 cp[0] = c->crc32 & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2790 cp[1] = (c->crc32 >> 8) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2791 cp[2] = (c->crc32 >> 16) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2792 cp[3] = (c->crc32 >> 24) & 0xff;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2793 cp += 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2794 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2795
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2796 if (cp-buf != hwrite(fd->fp, buf, cp-buf)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2797 if (buf != buf_a)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2798 free(buf);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2799 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2800 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2801
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2802 if (buf != buf_a)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2803 free(buf);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2804
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2805 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2806 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2807
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2808 // common component shared by cram_flush_container{,_mt}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2809 static int cram_flush_container2(cram_fd *fd, cram_container *c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2810 int i, j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2811
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2812 //fprintf(stderr, "Writing container %d, sum %u\n", c->record_counter, sum);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2813
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2814 /* Write the container struct itself */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2815 if (0 != cram_write_container(fd, c))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2816 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2817
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2818 /* And the compression header */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2819 if (0 != cram_write_block(fd, c->comp_hdr_block))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2820 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2821
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2822 /* Followed by the slice blocks */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2823 for (i = 0; i < c->curr_slice; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2824 cram_slice *s = c->slices[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2825
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2826 if (0 != cram_write_block(fd, s->hdr_block))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2827 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2828
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2829 for (j = 0; j < s->hdr->num_blocks; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2830 if (0 != cram_write_block(fd, s->block[j]))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2831 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2832 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2833 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2834
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2835 return hflush(fd->fp) == 0 ? 0 : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2836 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2837
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2838 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2839 * Flushes a completely or partially full container to disk, writing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2840 * container structure, header and blocks. This also calls the encoder
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2841 * functions.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2842 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2843 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2844 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2845 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2846 int cram_flush_container(cram_fd *fd, cram_container *c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2847 /* Encode the container blocks and generate compression header */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2848 if (0 != cram_encode_container(fd, c))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2849 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2850
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2851 return cram_flush_container2(fd, c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2852 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2853
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2854 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2855 cram_fd *fd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2856 cram_container *c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2857 } cram_job;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2858
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2859 void *cram_flush_thread(void *arg) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2860 cram_job *j = (cram_job *)arg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2861
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2862 /* Encode the container blocks and generate compression header */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2863 if (0 != cram_encode_container(j->fd, j->c)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2864 fprintf(stderr, "cram_encode_container failed\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2865 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2866 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2867
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2868 return arg;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2869 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2870
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2871 static int cram_flush_result(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2872 int i, ret = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2873 t_pool_result *r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2874
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2875 while ((r = t_pool_next_result(fd->rqueue))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2876 cram_job *j = (cram_job *)r->data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2877 cram_container *c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2878
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2879 if (!j) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2880 t_pool_delete_result(r, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2881 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2882 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2883
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2884 fd = j->fd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2885 c = j->c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2886
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2887 if (0 != cram_flush_container2(fd, c))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2888 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2889
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2890 /* Free the container */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2891 for (i = 0; i < c->max_slice; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2892 cram_free_slice(c->slices[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2893 c->slices[i] = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2894 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2895
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2896 c->slice = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2897 c->curr_slice = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2898
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2899 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2900
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2901 ret |= hflush(fd->fp) == 0 ? 0 : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2902
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2903 t_pool_delete_result(r, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2904 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2905
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2906 return ret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2907 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2908
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2909 int cram_flush_container_mt(cram_fd *fd, cram_container *c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2910 cram_job *j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2911
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2912 if (!fd->pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2913 return cram_flush_container(fd, c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2914
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2915 if (!(j = malloc(sizeof(*j))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2916 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2917 j->fd = fd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2918 j->c = c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2919
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2920 t_pool_dispatch(fd->pool, fd->rqueue, cram_flush_thread, j);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2921
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2922 return cram_flush_result(fd);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2923 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2924
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2925 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2926 * Compression headers; the first part of the container
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2927 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2928
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2929 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2930 * Creates a new blank container compression header
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2931 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2932 * Returns header ptr on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2933 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2934 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2935 cram_block_compression_hdr *cram_new_compression_header(void) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2936 cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2937 if (!hdr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2938 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2939
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2940 if (!(hdr->TD_blk = cram_new_block(CORE, 0))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2941 free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2942 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2943 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2944
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2945 if (!(hdr->TD_hash = kh_init(m_s2i))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2946 cram_free_block(hdr->TD_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2947 free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2948 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2949 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2950
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2951 if (!(hdr->TD_keys = string_pool_create(8192))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2952 kh_destroy(m_s2i, hdr->TD_hash);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2953 cram_free_block(hdr->TD_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2954 free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2955 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2956 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2957
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2958 return hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2959 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2960
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2961 void cram_free_compression_header(cram_block_compression_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2962 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2963
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2964 if (hdr->landmark)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2965 free(hdr->landmark);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2966
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2967 if (hdr->preservation_map)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2968 kh_destroy(map, hdr->preservation_map);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2969
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2970 for (i = 0; i < CRAM_MAP_HASH; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2971 cram_map *m, *m2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2972 for (m = hdr->rec_encoding_map[i]; m; m = m2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2973 m2 = m->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2974 if (m->codec)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2975 m->codec->free(m->codec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2976 free(m);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2977 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2978 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2979
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2980 for (i = 0; i < CRAM_MAP_HASH; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2981 cram_map *m, *m2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2982 for (m = hdr->tag_encoding_map[i]; m; m = m2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2983 m2 = m->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2984 if (m->codec)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2985 m->codec->free(m->codec);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2986 free(m);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2987 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2988 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2989
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2990 for (i = 0; i < DS_END; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2991 if (hdr->codecs[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2992 hdr->codecs[i]->free(hdr->codecs[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2993 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2994
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2995 if (hdr->TL)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2996 free(hdr->TL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2997 if (hdr->TD_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2998 cram_free_block(hdr->TD_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2999 if (hdr->TD_hash)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3000 kh_destroy(m_s2i, hdr->TD_hash);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3001 if (hdr->TD_keys)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3002 string_pool_destroy(hdr->TD_keys);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3003
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3004 free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3005 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3006
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3007
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3008 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3009 * Slices and slice headers
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3010 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3011
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3012 void cram_free_slice_header(cram_block_slice_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3013 if (!hdr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3014 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3015
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3016 if (hdr->block_content_ids)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3017 free(hdr->block_content_ids);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3018
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3019 free(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3020
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3021 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3022 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3023
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3024 void cram_free_slice(cram_slice *s) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3025 if (!s)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3026 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3027
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3028 if (s->hdr_block)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3029 cram_free_block(s->hdr_block);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3030
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3031 if (s->block) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3032 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3033
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3034 if (s->hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3035 for (i = 0; i < s->hdr->num_blocks; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3036 cram_free_block(s->block[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3037 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3038 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3039 free(s->block);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3040 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3041
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3042 if (s->block_by_id)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3043 free(s->block_by_id);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3044
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3045 if (s->hdr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3046 cram_free_slice_header(s->hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3047
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3048 if (s->seqs_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3049 cram_free_block(s->seqs_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3050
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3051 if (s->qual_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3052 cram_free_block(s->qual_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3053
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3054 if (s->name_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3055 cram_free_block(s->name_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3056
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3057 if (s->aux_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3058 cram_free_block(s->aux_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3059
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3060 if (s->aux_OQ_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3061 cram_free_block(s->aux_OQ_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3062
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3063 if (s->aux_BQ_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3064 cram_free_block(s->aux_BQ_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3065
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3066 if (s->aux_FZ_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3067 cram_free_block(s->aux_FZ_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3068
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3069 if (s->aux_oq_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3070 cram_free_block(s->aux_oq_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3071
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3072 if (s->aux_os_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3073 cram_free_block(s->aux_os_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3074
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3075 if (s->aux_oz_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3076 cram_free_block(s->aux_oz_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3077
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3078 if (s->base_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3079 cram_free_block(s->base_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3080
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3081 if (s->soft_blk)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3082 cram_free_block(s->soft_blk);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3083
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3084 if (s->cigar)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3085 free(s->cigar);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3086
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3087 if (s->crecs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3088 free(s->crecs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3089
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3090 if (s->features)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3091 free(s->features);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3092
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3093 if (s->TN)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3094 free(s->TN);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3095
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3096 if (s->pair_keys)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3097 string_pool_destroy(s->pair_keys);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3098
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3099 if (s->pair[0])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3100 kh_destroy(m_s2i, s->pair[0]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3101 if (s->pair[1])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3102 kh_destroy(m_s2i, s->pair[1]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3103
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3104 free(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3105 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3106
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3107 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3108 * Creates a new empty slice in memory, for subsequent writing to
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3109 * disk.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3110 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3111 * Returns cram_slice ptr on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3112 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3113 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3114 cram_slice *cram_new_slice(enum cram_content_type type, int nrecs) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3115 cram_slice *s = calloc(1, sizeof(*s));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3116 if (!s)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3117 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3118
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3119 if (!(s->hdr = (cram_block_slice_hdr *)calloc(1, sizeof(*s->hdr))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3120 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3121 s->hdr->content_type = type;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3122
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3123 s->hdr_block = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3124 s->block = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3125 s->block_by_id = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3126 s->last_apos = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3127 if (!(s->crecs = malloc(nrecs * sizeof(cram_record)))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3128 s->cigar = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3129 s->cigar_alloc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3130 s->ncigar = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3131
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3132 if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3133 if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3134 if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3135 if (!(s->aux_blk = cram_new_block(EXTERNAL, DS_aux))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3136 if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3137 if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3138
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3139 s->features = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3140 s->nfeatures = s->afeatures = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3141
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3142 #ifndef TN_external
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3143 s->TN = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3144 s->nTN = s->aTN = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3145 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3146
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3147 // Volatile keys as we do realloc in dstring
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3148 if (!(s->pair_keys = string_pool_create(8192))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3149 if (!(s->pair[0] = kh_init(m_s2i))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3150 if (!(s->pair[1] = kh_init(m_s2i))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3151
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3152 #ifdef BA_external
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3153 s->BA_len = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3154 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3155
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3156 return s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3157
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3158 err:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3159 if (s)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3160 cram_free_slice(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3161
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3162 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3163 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3164
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3165 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3166 * Loads an entire slice.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3167 * FIXME: In 1.0 the native unit of slices within CRAM is broken
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3168 * as slices contain references to objects in other slices.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3169 * To work around this while keeping the slice oriented outer loop
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3170 * we read all slices and stitch them together into a fake large
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3171 * slice instead.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3172 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3173 * Returns cram_slice ptr on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3174 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3175 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3176 cram_slice *cram_read_slice(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3177 cram_block *b = cram_read_block(fd);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3178 cram_slice *s = calloc(1, sizeof(*s));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3179 int i, n, max_id, min_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3180
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3181 if (!b || !s)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3182 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3183
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3184 s->hdr_block = b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3185 switch (b->content_type) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3186 case MAPPED_SLICE:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3187 case UNMAPPED_SLICE:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3188 if (!(s->hdr = cram_decode_slice_header(fd, b)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3189 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3190 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3191
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3192 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3193 fprintf(stderr, "Unexpected block of type %s\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3194 cram_content_type2str(b->content_type));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3195 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3196 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3197
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3198 s->block = calloc(n = s->hdr->num_blocks, sizeof(*s->block));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3199 if (!s->block)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3200 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3201
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3202 for (max_id = i = 0, min_id = INT_MAX; i < n; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3203 if (!(s->block[i] = cram_read_block(fd)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3204 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3205
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3206 if (s->block[i]->content_type == EXTERNAL) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3207 if (max_id < s->block[i]->content_id)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3208 max_id = s->block[i]->content_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3209 if (min_id > s->block[i]->content_id)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3210 min_id = s->block[i]->content_id;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3211 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3212 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3213 if (min_id >= 0 && max_id < 1024) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3214 if (!(s->block_by_id = calloc(1024, sizeof(s->block[0]))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3215 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3216
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3217 for (i = 0; i < n; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3218 if (s->block[i]->content_type != EXTERNAL)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3219 continue;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3220 s->block_by_id[s->block[i]->content_id] = s->block[i];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3221 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3222 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3223
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3224 /* Initialise encoding/decoding tables */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3225 s->cigar = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3226 s->cigar_alloc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3227 s->ncigar = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3228
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3229 if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3230 if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3231 if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3232 if (!(s->aux_blk = cram_new_block(EXTERNAL, DS_aux))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3233 if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3234 if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC))) goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3235
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3236 s->crecs = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3237
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3238 s->last_apos = s->hdr->ref_seq_start;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3239
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3240 return s;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3241
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3242 err:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3243 if (b)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3244 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3245 if (s) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3246 s->hdr_block = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3247 cram_free_slice(s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3248 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3249 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3250 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3251
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3252
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3253 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3254 * CRAM file definition (header)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3255 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3256
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3257 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3258 * Reads a CRAM file definition structure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3259 * Returns file_def ptr on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3260 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3261 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3262 cram_file_def *cram_read_file_def(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3263 cram_file_def *def = malloc(sizeof(*def));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3264 if (!def)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3265 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3266
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3267 if (26 != hread(fd->fp, &def->magic[0], 26)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3268 free(def);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3269 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3270 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3271
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3272 if (memcmp(def->magic, "CRAM", 4) != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3273 free(def);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3274 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3275 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3276
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3277 if (def->major_version > 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3278 fprintf(stderr, "CRAM version number mismatch\n"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3279 "Expected 1.x, 2.x or 3.x, got %d.%d\n",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3280 def->major_version, def->minor_version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3281 free(def);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3282 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3283 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3284
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3285 fd->first_container += 26;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3286 fd->last_slice = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3287
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3288 return def;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3289 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3290
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3291 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3292 * Writes a cram_file_def structure to cram_fd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3293 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3294 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3295 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3296 int cram_write_file_def(cram_fd *fd, cram_file_def *def) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3297 return (hwrite(fd->fp, &def->magic[0], 26) == 26) ? 0 : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3298 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3299
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3300 void cram_free_file_def(cram_file_def *def) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3301 if (def) free(def);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3302 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3303
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3304 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3305 * SAM header I/O
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3306 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3307
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3308
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3309 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3310 * Reads the SAM header from the first CRAM data block.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3311 * Also performs minimal parsing to extract read-group
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3312 * and sample information.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3313
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3314 * Returns SAM hdr ptr on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3315 * NULL on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3316 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3317 SAM_hdr *cram_read_SAM_hdr(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3318 int32_t header_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3319 char *header;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3320 SAM_hdr *hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3321
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3322 /* 1.1 onwards stores the header in the first block of a container */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3323 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3324 /* Length */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3325 if (-1 == int32_decode(fd, &header_len))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3326 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3327
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3328 /* Alloc and read */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3329 if (NULL == (header = malloc(header_len+1)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3330 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3331
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3332 *header = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3333 if (header_len != hread(fd->fp, header, header_len))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3334 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3335
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3336 fd->first_container += 4 + header_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3337 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3338 cram_container *c = cram_read_container(fd);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3339 cram_block *b;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3340 int i, len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3341
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3342 if (!c)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3343 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3344
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3345 if (c->num_blocks < 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3346 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3347 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3348 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3349
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3350 if (!(b = cram_read_block(fd))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3351 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3352 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3353 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3354 cram_uncompress_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3355
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3356 len = b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3357 itf8_size(b->content_id) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3358 itf8_size(b->uncomp_size) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3359 itf8_size(b->comp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3360
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3361 /* Extract header from 1st block */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3362 if (-1 == int32_get(b, &header_len) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3363 b->uncomp_size - 4 < header_len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3364 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3365 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3366 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3367 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3368 if (NULL == (header = malloc(header_len+1))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3369 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3370 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3371 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3372 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3373 memcpy(header, BLOCK_END(b), header_len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3374 header[header_len]='\0';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3375 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3376
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3377 /* Consume any remaining blocks */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3378 for (i = 1; i < c->num_blocks; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3379 if (!(b = cram_read_block(fd))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3380 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3381 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3382 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3383 len += b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3384 itf8_size(b->content_id) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3385 itf8_size(b->uncomp_size) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3386 itf8_size(b->comp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3387 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3388 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3389
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3390 if (c->length && c->length > len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3391 // Consume padding
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3392 char *pads = malloc(c->length - len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3393 if (!pads) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3394 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3395 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3396 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3397
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3398 if (c->length - len != hread(fd->fp, pads, c->length - len)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3399 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3400 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3401 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3402 free(pads);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3403 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3404
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3405 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3406 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3407
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3408 /* Parse */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3409 hdr = sam_hdr_parse_(header, header_len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3410 free(header);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3411
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3412 return hdr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3413 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3414
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3415 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3416 * Converts 'in' to a full pathname to store in out.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3417 * Out must be at least PATH_MAX bytes long.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3418 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3419 static void full_path(char *out, char *in) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3420 if (*in == '/') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3421 strncpy(out, in, PATH_MAX);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3422 out[PATH_MAX-1] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3423 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3424 int len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3425
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3426 // unable to get dir or out+in is too long
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3427 if (!getcwd(out, PATH_MAX) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3428 (len = strlen(out))+1+strlen(in) >= PATH_MAX) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3429 strncpy(out, in, PATH_MAX);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3430 out[PATH_MAX-1] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3431 return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3432 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3433
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3434 sprintf(out+len, "/%.*s", PATH_MAX - len, in);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3435
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3436 // FIXME: cope with `pwd`/../../../foo.fa ?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3437 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3438 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3439
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3440 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3441 * Writes a CRAM SAM header.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3442 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3443 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3444 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3445 int cram_write_SAM_hdr(cram_fd *fd, SAM_hdr *hdr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3446 int header_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3447 int blank_block = (CRAM_MAJOR_VERS(fd->version) >= 3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3448
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3449 /* Write CRAM MAGIC if not yet written. */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3450 if (fd->file_def->major_version == 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3451 fd->file_def->major_version = CRAM_MAJOR_VERS(fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3452 fd->file_def->minor_version = CRAM_MINOR_VERS(fd->version);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3453 if (0 != cram_write_file_def(fd, fd->file_def))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3454 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3455 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3456
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3457 /* 1.0 requires and UNKNOWN read-group */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3458 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3459 if (!sam_hdr_find_rg(hdr, "UNKNOWN"))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3460 if (sam_hdr_add(hdr, "RG",
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3461 "ID", "UNKNOWN", "SM", "UNKNOWN", NULL))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3462 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3463 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3464
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3465 /* Fix M5 strings */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3466 if (fd->refs && !fd->no_ref) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3467 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3468 for (i = 0; i < hdr->nref; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3469 SAM_hdr_type *ty;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3470 char *ref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3471
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3472 if (!(ty = sam_hdr_find(hdr, "SQ", "SN", hdr->ref[i].name)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3473 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3474
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3475 if (!sam_hdr_find_key(hdr, ty, "M5", NULL)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3476 char unsigned buf[16], buf2[33];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3477 int j, rlen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3478 MD5_CTX md5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3479
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3480 if (!fd->refs ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3481 !fd->refs->ref_id ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3482 !fd->refs->ref_id[i]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3483 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3484 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3485 rlen = fd->refs->ref_id[i]->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3486 MD5_Init(&md5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3487 ref = cram_get_ref(fd, i, 1, rlen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3488 if (NULL == ref) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3489 rlen = fd->refs->ref_id[i]->length; /* In case it just loaded */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3490 MD5_Update(&md5, ref, rlen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3491 MD5_Final(buf, &md5);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3492 cram_ref_decr(fd->refs, i);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3493
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3494 for (j = 0; j < 16; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3495 buf2[j*2+0] = "0123456789abcdef"[buf[j]>>4];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3496 buf2[j*2+1] = "0123456789abcdef"[buf[j]&15];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3497 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3498 buf2[32] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3499 if (sam_hdr_update(hdr, ty, "M5", buf2, NULL))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3500 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3501 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3502
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3503 if (fd->ref_fn) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3504 char ref_fn[PATH_MAX];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3505 full_path(ref_fn, fd->ref_fn);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3506 if (sam_hdr_update(hdr, ty, "UR", ref_fn, NULL))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3507 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3508 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3509 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3510 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3511
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3512 if (sam_hdr_rebuild(hdr))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3513 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3514
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3515 /* Length */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3516 header_len = sam_hdr_length(hdr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3517 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3518 if (-1 == int32_encode(fd, header_len))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3519 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3520
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3521 /* Text data */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3522 if (header_len != hwrite(fd->fp, sam_hdr_str(hdr), header_len))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3523 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3524 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3525 /* Create block(s) inside a container */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3526 cram_block *b = cram_new_block(FILE_HEADER, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3527 cram_container *c = cram_new_container(0, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3528 int padded_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3529 char *pads;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3530 int is_cram_3 = (CRAM_MAJOR_VERS(fd->version) >= 3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3531
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3532 if (!b || !c) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3533 if (b) cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3534 if (c) cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3535 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3536 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3537
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3538 int32_put(b, header_len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3539 BLOCK_APPEND(b, sam_hdr_str(hdr), header_len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3540 BLOCK_UPLEN(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3541
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3542 // Compress header block if V3.0 and above
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3543 if (CRAM_MAJOR_VERS(fd->version) >= 3 && fd->level > 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3544 int method = 1<<GZIP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3545 if (fd->use_bz2)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3546 method |= 1<<BZIP2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3547 if (fd->use_lzma)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3548 method |= 1<<LZMA;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3549 cram_compress_block(fd, b, NULL, method, fd->level);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3550 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3551
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3552 if (blank_block) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3553 c->length = b->comp_size + 2 + 4*is_cram_3 +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3554 itf8_size(b->content_id) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3555 itf8_size(b->uncomp_size) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3556 itf8_size(b->comp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3557
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3558 c->num_blocks = 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3559 c->num_landmarks = 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3560 if (!(c->landmark = malloc(2*sizeof(*c->landmark)))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3561 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3562 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3563 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3564 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3565 c->landmark[0] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3566 c->landmark[1] = c->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3567
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3568 // Plus extra storage for uncompressed secondary blank block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3569 padded_length = MIN(c->length*.5, 10000);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3570 c->length += padded_length + 2 + 4*is_cram_3 +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3571 itf8_size(b->content_id) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3572 itf8_size(padded_length)*2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3573 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3574 // Pad the block instead.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3575 c->num_blocks = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3576 c->num_landmarks = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3577 if (!(c->landmark = malloc(sizeof(*c->landmark))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3578 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3579 c->landmark[0] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3580
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3581 padded_length = MAX(c->length*1.5, 10000) - c->length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3582
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3583 c->length = b->comp_size + padded_length +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3584 2 + 4*is_cram_3 +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3585 itf8_size(b->content_id) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3586 itf8_size(b->uncomp_size) +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3587 itf8_size(b->comp_size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3588
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3589 if (NULL == (pads = calloc(1, padded_length))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3590 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3591 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3592 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3593 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3594 BLOCK_APPEND(b, pads, padded_length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3595 BLOCK_UPLEN(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3596 free(pads);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3597 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3598
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3599 if (-1 == cram_write_container(fd, c)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3600 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3601 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3602 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3603 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3604
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3605 if (-1 == cram_write_block(fd, b)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3606 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3607 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3608 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3609 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3610
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3611 if (blank_block) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3612 BLOCK_RESIZE(b, padded_length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3613 memset(BLOCK_DATA(b), 0, padded_length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3614 BLOCK_SIZE(b) = padded_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3615 BLOCK_UPLEN(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3616 b->method = RAW;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3617 if (-1 == cram_write_block(fd, b)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3618 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3619 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3620 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3621 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3622 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3623
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3624 cram_free_block(b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3625 cram_free_container(c);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3626 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3627
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3628 if (-1 == refs_from_header(fd->refs, fd, fd->header))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3629 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3630 if (-1 == refs2id(fd->refs, fd->header))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3631 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3632
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3633 if (0 != hflush(fd->fp))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3634 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3635
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3636 RP("=== Finishing saving header ===\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3637
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3638 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3639 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3640
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3641 /* ----------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3642 * The top-level cram opening, closing and option handling
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3643 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3644
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3645 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3646 * Initialises the lookup tables. These could be global statics, but they're
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3647 * clumsy to setup in a multi-threaded environment unless we generate
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3648 * verbatim code and include that.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3649 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3650 static void cram_init_tables(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3651 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3652
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3653 memset(fd->L1, 4, 256);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3654 fd->L1['A'] = 0; fd->L1['a'] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3655 fd->L1['C'] = 1; fd->L1['c'] = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3656 fd->L1['G'] = 2; fd->L1['g'] = 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3657 fd->L1['T'] = 3; fd->L1['t'] = 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3658
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3659 memset(fd->L2, 5, 256);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3660 fd->L2['A'] = 0; fd->L2['a'] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3661 fd->L2['C'] = 1; fd->L2['c'] = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3662 fd->L2['G'] = 2; fd->L2['g'] = 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3663 fd->L2['T'] = 3; fd->L2['t'] = 3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3664 fd->L2['N'] = 4; fd->L2['n'] = 4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3665
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3666 if (CRAM_MAJOR_VERS(fd->version) == 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3667 for (i = 0; i < 0x200; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3668 int f = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3669
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3670 if (i & CRAM_FPAIRED) f |= BAM_FPAIRED;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3671 if (i & CRAM_FPROPER_PAIR) f |= BAM_FPROPER_PAIR;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3672 if (i & CRAM_FUNMAP) f |= BAM_FUNMAP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3673 if (i & CRAM_FREVERSE) f |= BAM_FREVERSE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3674 if (i & CRAM_FREAD1) f |= BAM_FREAD1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3675 if (i & CRAM_FREAD2) f |= BAM_FREAD2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3676 if (i & CRAM_FSECONDARY) f |= BAM_FSECONDARY;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3677 if (i & CRAM_FQCFAIL) f |= BAM_FQCFAIL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3678 if (i & CRAM_FDUP) f |= BAM_FDUP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3679
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3680 fd->bam_flag_swap[i] = f;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3681 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3682
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3683 for (i = 0; i < 0x1000; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3684 int g = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3685
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3686 if (i & BAM_FPAIRED) g |= CRAM_FPAIRED;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3687 if (i & BAM_FPROPER_PAIR) g |= CRAM_FPROPER_PAIR;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3688 if (i & BAM_FUNMAP) g |= CRAM_FUNMAP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3689 if (i & BAM_FREVERSE) g |= CRAM_FREVERSE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3690 if (i & BAM_FREAD1) g |= CRAM_FREAD1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3691 if (i & BAM_FREAD2) g |= CRAM_FREAD2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3692 if (i & BAM_FSECONDARY) g |= CRAM_FSECONDARY;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3693 if (i & BAM_FQCFAIL) g |= CRAM_FQCFAIL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3694 if (i & BAM_FDUP) g |= CRAM_FDUP;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3695
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3696 fd->cram_flag_swap[i] = g;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3697 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3698 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3699 /* NOP */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3700 for (i = 0; i < 0x1000; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3701 fd->bam_flag_swap[i] = i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3702 for (i = 0; i < 0x1000; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3703 fd->cram_flag_swap[i] = i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3704 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3705
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3706 memset(fd->cram_sub_matrix, 4, 32*32);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3707 for (i = 0; i < 32; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3708 fd->cram_sub_matrix[i]['A'&0x1f]=0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3709 fd->cram_sub_matrix[i]['C'&0x1f]=1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3710 fd->cram_sub_matrix[i]['G'&0x1f]=2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3711 fd->cram_sub_matrix[i]['T'&0x1f]=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3712 fd->cram_sub_matrix[i]['N'&0x1f]=4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3713 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3714 for (i = 0; i < 20; i+=4) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3715 int j;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3716 for (j = 0; j < 20; j++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3717 fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3718 fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3719 fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3720 fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3721 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3722 fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+0]&0x1f]=0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3723 fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+1]&0x1f]=1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3724 fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+2]&0x1f]=2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3725 fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+3]&0x1f]=3;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3726 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3727 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3728
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3729 // Default version numbers for CRAM
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3730 static int major_version = 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3731 static int minor_version = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3732
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3733 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3734 * Opens a CRAM file for read (mode "rb") or write ("wb").
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3735 * The filename may be "-" to indicate stdin or stdout.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3736 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3737 * Returns file handle on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3738 * NULL on failure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3739 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3740 cram_fd *cram_open(const char *filename, const char *mode) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3741 hFILE *fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3742 cram_fd *fd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3743 char fmode[3]= { mode[0], '\0', '\0' };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3744
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3745 if (strlen(mode) > 1 && (mode[1] == 'b' || mode[1] == 'c')) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3746 fmode[1] = 'b';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3747 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3748
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3749 fp = hopen(filename, fmode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3750 if (!fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3751 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3752
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3753 fd = cram_dopen(fp, filename, mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3754 if (!fd)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3755 hclose_abruptly(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3756
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3757 return fd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3758 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3759
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3760 /* Opens an existing stream for reading or writing.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3761 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3762 * Returns file handle on success;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3763 * NULL on failure.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3764 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3765 cram_fd *cram_dopen(hFILE *fp, const char *filename, const char *mode) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3766 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3767 char *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3768 cram_fd *fd = calloc(1, sizeof(*fd));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3769 if (!fd)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3770 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3771
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3772 fd->level = 5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3773 for (i = 0; mode[i]; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3774 if (mode[i] >= '0' && mode[i] <= '9') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3775 fd->level = mode[i] - '0';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3776 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3777 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3778 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3779
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3780 fd->fp = fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3781 fd->mode = *mode;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3782 fd->first_container = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3783
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3784 if (fd->mode == 'r') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3785 /* Reader */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3786
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3787 if (!(fd->file_def = cram_read_file_def(fd)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3788 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3789
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3790 fd->version = fd->file_def->major_version * 256 +
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3791 fd->file_def->minor_version;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3792
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3793 if (!(fd->header = cram_read_SAM_hdr(fd)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3794 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3795
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3796 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3797 /* Writer */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3798 cram_file_def *def = calloc(1, sizeof(*def));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3799 if (!def)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3800 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3801
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3802 fd->file_def = def;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3803
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3804 def->magic[0] = 'C';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3805 def->magic[1] = 'R';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3806 def->magic[2] = 'A';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3807 def->magic[3] = 'M';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3808 def->major_version = 0; // Indicator to write file def later.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3809 def->minor_version = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3810 memset(def->file_id, 0, 20);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3811 strncpy(def->file_id, filename, 20);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3812
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3813 fd->version = major_version * 256 + minor_version;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3814
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3815 /* SAM header written later along with this file_def */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3816 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3817
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3818 cram_init_tables(fd);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3819
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3820 fd->prefix = strdup((cp = strrchr(filename, '/')) ? cp+1 : filename);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3821 if (!fd->prefix)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3822 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3823 fd->first_base = fd->last_base = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3824 fd->record_counter = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3825
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3826 fd->ctr = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3827 fd->refs = refs_create();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3828 if (!fd->refs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3829 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3830 fd->ref_id = -2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3831 fd->ref = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3832
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3833 fd->decode_md = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3834 fd->verbose = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3835 fd->seqs_per_slice = SEQS_PER_SLICE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3836 fd->slices_per_container = SLICE_PER_CNT;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3837 fd->embed_ref = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3838 fd->no_ref = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3839 fd->ignore_md5 = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3840 fd->use_bz2 = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3841 fd->use_rans = (CRAM_MAJOR_VERS(fd->version) >= 3);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3842 fd->use_lzma = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3843 fd->multi_seq = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3844 fd->unsorted = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3845 fd->shared_ref = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3846
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3847 fd->index = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3848 fd->own_pool = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3849 fd->pool = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3850 fd->rqueue = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3851 fd->job_pending = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3852 fd->ooc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3853 fd->required_fields = INT_MAX;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3854
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3855 for (i = 0; i < DS_END; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3856 fd->m[i] = cram_new_metrics();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3857
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3858 fd->range.refid = -2; // no ref.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3859 fd->eof = 1; // See samtools issue #150
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3860 fd->ref_fn = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3861
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3862 fd->bl = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3863
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3864 /* Initialise dummy refs from the @SQ headers */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3865 if (-1 == refs_from_header(fd->refs, fd, fd->header))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3866 goto err;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3867
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3868 return fd;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3869
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3870 err:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3871 if (fd)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3872 free(fd);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3873
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3874 return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3875 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3876
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3877 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3878 * Seek within a CRAM file.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3879 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3880 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3881 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3882 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3883 int cram_seek(cram_fd *fd, off_t offset, int whence) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3884 char buf[65536];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3885
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3886 fd->ooc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3887
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3888 if (hseek(fd->fp, offset, whence) >= 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3889 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3890
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3891 if (!(whence == SEEK_CUR && offset >= 0))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3892 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3893
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3894 /* Couldn't fseek, but we're in SEEK_CUR mode so read instead */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3895 while (offset > 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3896 int len = MIN(65536, offset);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3897 if (len != hread(fd->fp, buf, len))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3898 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3899 offset -= len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3900 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3901
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3902 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3903 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3904
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3905 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3906 * Flushes a CRAM file.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3907 * Useful for when writing to stdout without wishing to close the stream.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3908 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3909 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3910 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3911 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3912 int cram_flush(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3913 if (!fd)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3914 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3915
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3916 if (fd->mode == 'w' && fd->ctr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3917 if(fd->ctr->slice)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3918 fd->ctr->curr_slice++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3919 if (-1 == cram_flush_container_mt(fd, fd->ctr))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3920 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3921 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3922
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3923 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3924 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3925
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3926 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3927 * Closes a CRAM file.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3928 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3929 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3930 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3931 int cram_close(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3932 spare_bams *bl, *next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3933 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3934
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3935 if (!fd)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3936 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3937
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3938 if (fd->mode == 'w' && fd->ctr) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3939 if(fd->ctr->slice)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3940 fd->ctr->curr_slice++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3941 if (-1 == cram_flush_container_mt(fd, fd->ctr))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3942 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3943 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3944
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3945 if (fd->pool) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3946 t_pool_flush(fd->pool);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3947
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3948 if (0 != cram_flush_result(fd))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3949 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3950
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3951 pthread_mutex_destroy(&fd->metrics_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3952 pthread_mutex_destroy(&fd->ref_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3953 pthread_mutex_destroy(&fd->bam_list_lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3954
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3955 fd->ctr = NULL; // prevent double freeing
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3956
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3957 //fprintf(stderr, "CRAM: destroy queue %p\n", fd->rqueue);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3958
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3959 t_results_queue_destroy(fd->rqueue);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3960 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3961
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3962 if (fd->mode == 'w') {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3963 /* Write EOF block */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3964 if (CRAM_MAJOR_VERS(fd->version) == 3) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3965 if (38 != hwrite(fd->fp,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3966 "\x0f\x00\x00\x00\xff\xff\xff\xff" // Cont HDR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3967 "\x0f\xe0\x45\x4f\x46\x00\x00\x00" // Cont HDR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3968 "\x00\x01\x00" // Cont HDR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3969 "\x05\xbd\xd9\x4f" // CRC32
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3970 "\x00\x01\x00\x06\x06" // Comp.HDR blk
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3971 "\x01\x00\x01\x00\x01\x00" // Comp.HDR blk
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3972 "\xee\x63\x01\x4b", // CRC32
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3973 38))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3974 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3975 } else {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3976 if (30 != hwrite(fd->fp,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3977 "\x0b\x00\x00\x00\xff\xff\xff\xff"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3978 "\x0f\xe0\x45\x4f\x46\x00\x00\x00"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3979 "\x00\x01\x00\x00\x01\x00\x06\x06"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3980 "\x01\x00\x01\x00\x01\x00", 30))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3981 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3982 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3983 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3984
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3985 for (bl = fd->bl; bl; bl = next) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3986 int i, max_rec = fd->seqs_per_slice * fd->slices_per_container;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3987
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3988 next = bl->next;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3989 for (i = 0; i < max_rec; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3990 if (bl->bams[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3991 bam_free(bl->bams[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3992 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3993 free(bl->bams);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3994 free(bl);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3995 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3996
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3997 if (hclose(fd->fp) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3998 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3999
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4000 if (fd->file_def)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4001 cram_free_file_def(fd->file_def);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4002
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4003 if (fd->header)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4004 sam_hdr_free(fd->header);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4005
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4006 free(fd->prefix);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4007
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4008 if (fd->ctr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4009 cram_free_container(fd->ctr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4010
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4011 if (fd->refs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4012 refs_free(fd->refs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4013 if (fd->ref_free)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4014 free(fd->ref_free);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4015
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4016 for (i = 0; i < DS_END; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4017 if (fd->m[i])
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4018 free(fd->m[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4019
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4020 if (fd->index)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4021 cram_index_free(fd);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4022
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4023 if (fd->own_pool && fd->pool)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4024 t_pool_destroy(fd->pool, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4025
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4026 free(fd);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4027 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4028 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4029
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4030 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4031 * Returns 1 if we hit an EOF while reading.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4032 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4033 int cram_eof(cram_fd *fd) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4034 return fd->eof;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4035 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4036
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4037
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4038 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4039 * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4040 * Use this immediately after opening.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4041 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4042 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4043 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4044 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4045 int cram_set_option(cram_fd *fd, enum cram_option opt, ...) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4046 int r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4047 va_list args;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4048
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4049 va_start(args, opt);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4050 r = cram_set_voption(fd, opt, args);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4051 va_end(args);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4052
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4053 return r;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4054 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4055
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4056 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4057 * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4058 * Use this immediately after opening.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4059 *
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4060 * Returns 0 on success
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4061 * -1 on failure
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4062 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4063 int cram_set_voption(cram_fd *fd, enum cram_option opt, va_list args) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4064 refs_t *refs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4065
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4066 if (!fd)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4067 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4068
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4069 switch (opt) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4070 case CRAM_OPT_DECODE_MD:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4071 fd->decode_md = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4072 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4073
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4074 case CRAM_OPT_PREFIX:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4075 if (fd->prefix)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4076 free(fd->prefix);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4077 if (!(fd->prefix = strdup(va_arg(args, char *))))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4078 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4079 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4080
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4081 case CRAM_OPT_VERBOSITY:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4082 fd->verbose = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4083 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4084
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4085 case CRAM_OPT_SEQS_PER_SLICE:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4086 fd->seqs_per_slice = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4087 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4088
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4089 case CRAM_OPT_SLICES_PER_CONTAINER:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4090 fd->slices_per_container = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4091 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4092
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4093 case CRAM_OPT_EMBED_REF:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4094 fd->embed_ref = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4095 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4096
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4097 case CRAM_OPT_NO_REF:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4098 fd->no_ref = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4099 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4100
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4101 case CRAM_OPT_IGNORE_MD5:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4102 fd->ignore_md5 = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4103 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4104
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4105 case CRAM_OPT_USE_BZIP2:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4106 fd->use_bz2 = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4107 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4108
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4109 case CRAM_OPT_USE_RANS:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4110 fd->use_rans = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4111 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4112
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4113 case CRAM_OPT_USE_LZMA:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4114 fd->use_lzma = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4115 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4116
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4117 case CRAM_OPT_SHARED_REF:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4118 fd->shared_ref = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4119 refs = va_arg(args, refs_t *);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4120 if (refs != fd->refs) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4121 if (fd->refs)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4122 refs_free(fd->refs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4123 fd->refs = refs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4124 fd->refs->count++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4125 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4126 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4127
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4128 case CRAM_OPT_RANGE:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4129 fd->range = *va_arg(args, cram_range *);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4130 return cram_seek_to_refpos(fd, &fd->range);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4131
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4132 case CRAM_OPT_REFERENCE:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4133 return cram_load_reference(fd, va_arg(args, char *));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4134
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4135 case CRAM_OPT_VERSION: {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4136 int major, minor;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4137 char *s = va_arg(args, char *);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4138 if (2 != sscanf(s, "%d.%d", &major, &minor)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4139 fprintf(stderr, "Malformed version string %s\n", s);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4140 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4141 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4142 if (!((major == 1 && minor == 0) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4143 (major == 2 && (minor == 0 || minor == 1)) ||
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4144 (major == 3 && minor == 0))) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4145 fprintf(stderr, "Unknown version string; "
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4146 "use 1.0, 2.0, 2.1 or 3.0\n");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4147 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4148 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4149 fd->version = major*256 + minor;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4150
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4151 if (CRAM_MAJOR_VERS(fd->version) >= 3)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4152 fd->use_rans = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4153 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4154 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4155
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4156 case CRAM_OPT_MULTI_SEQ_PER_SLICE:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4157 fd->multi_seq = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4158 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4159
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4160 case CRAM_OPT_NTHREADS: {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4161 int nthreads = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4162 if (nthreads > 1) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4163 if (!(fd->pool = t_pool_init(nthreads*2, nthreads)))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4164 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4165
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4166 fd->rqueue = t_results_queue_init();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4167 pthread_mutex_init(&fd->metrics_lock, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4168 pthread_mutex_init(&fd->ref_lock, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4169 pthread_mutex_init(&fd->bam_list_lock, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4170 fd->shared_ref = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4171 fd->own_pool = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4172 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4173 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4174 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4175
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4176 case CRAM_OPT_THREAD_POOL:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4177 fd->pool = va_arg(args, t_pool *);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4178 if (fd->pool) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4179 fd->rqueue = t_results_queue_init();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4180 pthread_mutex_init(&fd->metrics_lock, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4181 pthread_mutex_init(&fd->ref_lock, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4182 pthread_mutex_init(&fd->bam_list_lock, NULL);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4183 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4184 fd->shared_ref = 1; // Needed to avoid clobbering ref between threads
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4185 fd->own_pool = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4186
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4187 //fd->qsize = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4188 //fd->decoded = calloc(fd->qsize, sizeof(cram_container *));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4189 //t_pool_dispatch(fd->pool, cram_decoder_thread, fd);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4190 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4191
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4192 case CRAM_OPT_REQUIRED_FIELDS:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4193 fd->required_fields = va_arg(args, int);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4194 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4195
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4196 default:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4197 fprintf(stderr, "Unknown CRAM option code %d\n", opt);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4198 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4199 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4200
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4201 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4202 }