0
|
1 /* The MIT License
|
|
2
|
|
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
|
|
4 2011, 2012 Attractive Chaos <attractor@live.co.uk>
|
|
5
|
|
6 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7 of this software and associated documentation files (the "Software"), to deal
|
|
8 in the Software without restriction, including without limitation the rights
|
|
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10 copies of the Software, and to permit persons to whom the Software is
|
|
11 furnished to do so, subject to the following conditions:
|
|
12
|
|
13 The above copyright notice and this permission notice shall be included in
|
|
14 all copies or substantial portions of the Software.
|
|
15
|
|
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22 THE SOFTWARE.
|
|
23 */
|
|
24
|
|
25 /* The BGZF library was originally written by Bob Handsaker from the Broad
|
|
26 * Institute. It was later improved by the SAMtools developers. */
|
|
27
|
|
28 #ifndef __BGZF_H
|
|
29 #define __BGZF_H
|
|
30
|
|
31 #include <stdint.h>
|
|
32 #include <stdio.h>
|
|
33 #include <zlib.h>
|
|
34 #include <sys/types.h>
|
|
35
|
|
36 #define BGZF_BLOCK_SIZE 0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE
|
|
37 #define BGZF_MAX_BLOCK_SIZE 0x10000
|
|
38
|
|
39 #define BGZF_ERR_ZLIB 1
|
|
40 #define BGZF_ERR_HEADER 2
|
|
41 #define BGZF_ERR_IO 4
|
|
42 #define BGZF_ERR_MISUSE 8
|
|
43
|
|
44 typedef struct {
|
|
45 int errcode:16, is_write:2, compress_level:14;
|
|
46 int cache_size;
|
|
47 int block_length, block_offset;
|
|
48 int64_t block_address;
|
|
49 void *uncompressed_block, *compressed_block;
|
|
50 void *cache; // a pointer to a hash table
|
|
51 void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading
|
|
52 void *mt; // only used for multi-threading
|
|
53 } BGZF;
|
|
54
|
|
55 #ifndef KSTRING_T
|
|
56 #define KSTRING_T kstring_t
|
|
57 typedef struct __kstring_t {
|
|
58 size_t l, m;
|
|
59 char *s;
|
|
60 } kstring_t;
|
|
61 #endif
|
|
62
|
|
63 #ifdef __cplusplus
|
|
64 extern "C" {
|
|
65 #endif
|
|
66
|
|
67 /******************
|
|
68 * Basic routines *
|
|
69 ******************/
|
|
70
|
|
71 /**
|
|
72 * Open an existing file descriptor for reading or writing.
|
|
73 *
|
|
74 * @param fd file descriptor
|
|
75 * @param mode mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies
|
|
76 * the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored.
|
|
77 * @return BGZF file handler; 0 on error
|
|
78 */
|
|
79 BGZF* bgzf_dopen(int fd, const char *mode);
|
|
80
|
|
81 #define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility
|
|
82
|
|
83 /**
|
|
84 * Open the specified file for reading or writing.
|
|
85 */
|
|
86 BGZF* bgzf_open(const char* path, const char *mode);
|
|
87
|
|
88 /**
|
|
89 * Close the BGZF and free all associated resources.
|
|
90 *
|
|
91 * @param fp BGZF file handler
|
|
92 * @return 0 on success and -1 on error
|
|
93 */
|
|
94 int bgzf_close(BGZF *fp);
|
|
95
|
|
96 /**
|
|
97 * Read up to _length_ bytes from the file storing into _data_.
|
|
98 *
|
|
99 * @param fp BGZF file handler
|
|
100 * @param data data array to read into
|
|
101 * @param length size of data to read
|
|
102 * @return number of bytes actually read; 0 on end-of-file and -1 on error
|
|
103 */
|
|
104 ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length);
|
|
105
|
|
106 /**
|
|
107 * Write _length_ bytes from _data_ to the file.
|
|
108 *
|
|
109 * @param fp BGZF file handler
|
|
110 * @param data data array to write
|
|
111 * @param length size of data to write
|
|
112 * @return number of bytes actually written; -1 on error
|
|
113 */
|
|
114 ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length);
|
|
115
|
|
116 /**
|
|
117 * Write the data in the buffer to the file.
|
|
118 */
|
|
119 int bgzf_flush(BGZF *fp);
|
|
120
|
|
121 /**
|
|
122 * Return a virtual file pointer to the current location in the file.
|
|
123 * No interpetation of the value should be made, other than a subsequent
|
|
124 * call to bgzf_seek can be used to position the file at the same point.
|
|
125 * Return value is non-negative on success.
|
|
126 */
|
|
127 #define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF))
|
|
128
|
|
129 /**
|
|
130 * Set the file to read from the location specified by _pos_.
|
|
131 *
|
|
132 * @param fp BGZF file handler
|
|
133 * @param pos virtual file offset returned by bgzf_tell()
|
|
134 * @param whence must be SEEK_SET
|
|
135 * @return 0 on success and -1 on error
|
|
136 */
|
|
137 int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence);
|
|
138
|
|
139 /**
|
|
140 * Check if the BGZF end-of-file (EOF) marker is present
|
|
141 *
|
|
142 * @param fp BGZF file handler opened for reading
|
|
143 * @return 1 if EOF is present; 0 if not or on I/O error
|
|
144 */
|
|
145 int bgzf_check_EOF(BGZF *fp);
|
|
146
|
|
147 /**
|
|
148 * Check if a file is in the BGZF format
|
|
149 *
|
|
150 * @param fn file name
|
|
151 * @return 1 if _fn_ is BGZF; 0 if not or on I/O error
|
|
152 */
|
|
153 int bgzf_is_bgzf(const char *fn);
|
|
154
|
|
155 /*********************
|
|
156 * Advanced routines *
|
|
157 *********************/
|
|
158
|
|
159 /**
|
|
160 * Set the cache size. Only effective when compiled with -DBGZF_CACHE.
|
|
161 *
|
|
162 * @param fp BGZF file handler
|
|
163 * @param size size of cache in bytes; 0 to disable caching (default)
|
|
164 */
|
|
165 void bgzf_set_cache_size(BGZF *fp, int size);
|
|
166
|
|
167 /**
|
|
168 * Flush the file if the remaining buffer size is smaller than _size_
|
|
169 */
|
|
170 int bgzf_flush_try(BGZF *fp, ssize_t size);
|
|
171
|
|
172 /**
|
|
173 * Read one byte from a BGZF file. It is faster than bgzf_read()
|
|
174 * @param fp BGZF file handler
|
|
175 * @return byte read; -1 on end-of-file or error
|
|
176 */
|
|
177 int bgzf_getc(BGZF *fp);
|
|
178
|
|
179 /**
|
|
180 * Read one line from a BGZF file. It is faster than bgzf_getc()
|
|
181 *
|
|
182 * @param fp BGZF file handler
|
|
183 * @param delim delimitor
|
|
184 * @param str string to write to; must be initialized
|
|
185 * @return length of the string; 0 on end-of-file; negative on error
|
|
186 */
|
|
187 int bgzf_getline(BGZF *fp, int delim, kstring_t *str);
|
|
188
|
|
189 /**
|
|
190 * Read the next BGZF block.
|
|
191 */
|
|
192 int bgzf_read_block(BGZF *fp);
|
|
193
|
|
194 /**
|
|
195 * Enable multi-threading (only effective on writing)
|
|
196 *
|
|
197 * @param fp BGZF file handler; must be opened for writing
|
|
198 * @param n_threads #threads used for writing
|
|
199 * @param n_sub_blks #blocks processed by each thread; a value 64-256 is recommended
|
|
200 */
|
|
201 int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks);
|
|
202
|
|
203 #ifdef __cplusplus
|
|
204 }
|
|
205 #endif
|
|
206
|
|
207 #endif
|