| 0 | 1 /* The MIT License | 
|  | 2 | 
|  | 3    Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology | 
|  | 4                  2011, 2012 Attractive Chaos <attractor@live.co.uk> | 
|  | 5 | 
|  | 6    Permission is hereby granted, free of charge, to any person obtaining a copy | 
|  | 7    of this software and associated documentation files (the "Software"), to deal | 
|  | 8    in the Software without restriction, including without limitation the rights | 
|  | 9    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|  | 10    copies of the Software, and to permit persons to whom the Software is | 
|  | 11    furnished to do so, subject to the following conditions: | 
|  | 12 | 
|  | 13    The above copyright notice and this permission notice shall be included in | 
|  | 14    all copies or substantial portions of the Software. | 
|  | 15 | 
|  | 16    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | 17    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | 18    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
|  | 19    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|  | 20    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|  | 21    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 
|  | 22    THE SOFTWARE. | 
|  | 23 */ | 
|  | 24 | 
|  | 25 /* The BGZF library was originally written by Bob Handsaker from the Broad | 
|  | 26  * Institute. It was later improved by the SAMtools developers. */ | 
|  | 27 | 
|  | 28 #ifndef __BGZF_H | 
|  | 29 #define __BGZF_H | 
|  | 30 | 
|  | 31 #include <stdint.h> | 
|  | 32 #include <stdio.h> | 
|  | 33 #include <zlib.h> | 
|  | 34 #include <sys/types.h> | 
|  | 35 | 
|  | 36 #define BGZF_BLOCK_SIZE     0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE | 
|  | 37 #define BGZF_MAX_BLOCK_SIZE 0x10000 | 
|  | 38 | 
|  | 39 #define BGZF_ERR_ZLIB   1 | 
|  | 40 #define BGZF_ERR_HEADER 2 | 
|  | 41 #define BGZF_ERR_IO     4 | 
|  | 42 #define BGZF_ERR_MISUSE 8 | 
|  | 43 | 
|  | 44 typedef struct { | 
|  | 45 	int errcode:16, is_write:2, compress_level:14; | 
|  | 46 	int cache_size; | 
|  | 47     int block_length, block_offset; | 
|  | 48     int64_t block_address; | 
|  | 49     void *uncompressed_block, *compressed_block; | 
|  | 50 	void *cache; // a pointer to a hash table | 
|  | 51 	void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading | 
|  | 52 	void *mt; // only used for multi-threading | 
|  | 53 } BGZF; | 
|  | 54 | 
|  | 55 #ifndef KSTRING_T | 
|  | 56 #define KSTRING_T kstring_t | 
|  | 57 typedef struct __kstring_t { | 
|  | 58 	size_t l, m; | 
|  | 59 	char *s; | 
|  | 60 } kstring_t; | 
|  | 61 #endif | 
|  | 62 | 
|  | 63 #ifdef __cplusplus | 
|  | 64 extern "C" { | 
|  | 65 #endif | 
|  | 66 | 
|  | 67 	/****************** | 
|  | 68 	 * Basic routines * | 
|  | 69 	 ******************/ | 
|  | 70 | 
|  | 71 	/** | 
|  | 72 	 * Open an existing file descriptor for reading or writing. | 
|  | 73 	 * | 
|  | 74 	 * @param fd    file descriptor | 
|  | 75 	 * @param mode  mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies | 
|  | 76 	 *              the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored. | 
|  | 77 	 * @return      BGZF file handler; 0 on error | 
|  | 78 	 */ | 
|  | 79 	BGZF* bgzf_dopen(int fd, const char *mode); | 
|  | 80 | 
|  | 81 	#define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility | 
|  | 82 | 
|  | 83 	/** | 
|  | 84 	 * Open the specified file for reading or writing. | 
|  | 85 	 */ | 
|  | 86 	BGZF* bgzf_open(const char* path, const char *mode); | 
|  | 87 | 
|  | 88 	/** | 
|  | 89 	 * Close the BGZF and free all associated resources. | 
|  | 90 	 * | 
|  | 91 	 * @param fp    BGZF file handler | 
|  | 92 	 * @return      0 on success and -1 on error | 
|  | 93 	 */ | 
|  | 94 	int bgzf_close(BGZF *fp); | 
|  | 95 | 
|  | 96 	/** | 
|  | 97 	 * Read up to _length_ bytes from the file storing into _data_. | 
|  | 98 	 * | 
|  | 99 	 * @param fp     BGZF file handler | 
|  | 100 	 * @param data   data array to read into | 
|  | 101 	 * @param length size of data to read | 
|  | 102 	 * @return       number of bytes actually read; 0 on end-of-file and -1 on error | 
|  | 103 	 */ | 
|  | 104 	ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length); | 
|  | 105 | 
|  | 106 	/** | 
|  | 107 	 * Write _length_ bytes from _data_ to the file. | 
|  | 108 	 * | 
|  | 109 	 * @param fp     BGZF file handler | 
|  | 110 	 * @param data   data array to write | 
|  | 111 	 * @param length size of data to write | 
|  | 112 	 * @return       number of bytes actually written; -1 on error | 
|  | 113 	 */ | 
|  | 114 	ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length); | 
|  | 115 | 
|  | 116 	/** | 
|  | 117 	 * Write the data in the buffer to the file. | 
|  | 118 	 */ | 
|  | 119 	int bgzf_flush(BGZF *fp); | 
|  | 120 | 
|  | 121 	/** | 
|  | 122 	 * Return a virtual file pointer to the current location in the file. | 
|  | 123 	 * No interpetation of the value should be made, other than a subsequent | 
|  | 124 	 * call to bgzf_seek can be used to position the file at the same point. | 
|  | 125 	 * Return value is non-negative on success. | 
|  | 126 	 */ | 
|  | 127 	#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) | 
|  | 128 | 
|  | 129 	/** | 
|  | 130 	 * Set the file to read from the location specified by _pos_. | 
|  | 131 	 * | 
|  | 132 	 * @param fp     BGZF file handler | 
|  | 133 	 * @param pos    virtual file offset returned by bgzf_tell() | 
|  | 134 	 * @param whence must be SEEK_SET | 
|  | 135 	 * @return       0 on success and -1 on error | 
|  | 136 	 */ | 
|  | 137 	int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence); | 
|  | 138 | 
|  | 139 	/** | 
|  | 140 	 * Check if the BGZF end-of-file (EOF) marker is present | 
|  | 141 	 * | 
|  | 142 	 * @param fp    BGZF file handler opened for reading | 
|  | 143 	 * @return      1 if EOF is present; 0 if not or on I/O error | 
|  | 144 	 */ | 
|  | 145 	int bgzf_check_EOF(BGZF *fp); | 
|  | 146 | 
|  | 147 	/** | 
|  | 148 	 * Check if a file is in the BGZF format | 
|  | 149 	 * | 
|  | 150 	 * @param fn    file name | 
|  | 151 	 * @return      1 if _fn_ is BGZF; 0 if not or on I/O error | 
|  | 152 	 */ | 
|  | 153 	 int bgzf_is_bgzf(const char *fn); | 
|  | 154 | 
|  | 155 	/********************* | 
|  | 156 	 * Advanced routines * | 
|  | 157 	 *********************/ | 
|  | 158 | 
|  | 159 	/** | 
|  | 160 	 * Set the cache size. Only effective when compiled with -DBGZF_CACHE. | 
|  | 161 	 * | 
|  | 162 	 * @param fp    BGZF file handler | 
|  | 163 	 * @param size  size of cache in bytes; 0 to disable caching (default) | 
|  | 164 	 */ | 
|  | 165 	void bgzf_set_cache_size(BGZF *fp, int size); | 
|  | 166 | 
|  | 167 	/** | 
|  | 168 	 * Flush the file if the remaining buffer size is smaller than _size_ | 
|  | 169 	 */ | 
|  | 170 	int bgzf_flush_try(BGZF *fp, ssize_t size); | 
|  | 171 | 
|  | 172 	/** | 
|  | 173 	 * Read one byte from a BGZF file. It is faster than bgzf_read() | 
|  | 174 	 * @param fp     BGZF file handler | 
|  | 175 	 * @return       byte read; -1 on end-of-file or error | 
|  | 176 	 */ | 
|  | 177 	int bgzf_getc(BGZF *fp); | 
|  | 178 | 
|  | 179 	/** | 
|  | 180 	 * Read one line from a BGZF file. It is faster than bgzf_getc() | 
|  | 181 	 * | 
|  | 182 	 * @param fp     BGZF file handler | 
|  | 183 	 * @param delim  delimitor | 
|  | 184 	 * @param str    string to write to; must be initialized | 
|  | 185 	 * @return       length of the string; 0 on end-of-file; negative on error | 
|  | 186 	 */ | 
|  | 187 	int bgzf_getline(BGZF *fp, int delim, kstring_t *str); | 
|  | 188 | 
|  | 189 	/** | 
|  | 190 	 * Read the next BGZF block. | 
|  | 191 	 */ | 
|  | 192 	int bgzf_read_block(BGZF *fp); | 
|  | 193 | 
|  | 194 	/** | 
|  | 195 	 * Enable multi-threading (only effective on writing) | 
|  | 196 	 * | 
|  | 197 	 * @param fp          BGZF file handler; must be opened for writing | 
|  | 198 	 * @param n_threads   #threads used for writing | 
|  | 199 	 * @param n_sub_blks  #blocks processed by each thread; a value 64-256 is recommended | 
|  | 200 	 */ | 
|  | 201 	int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks); | 
|  | 202 | 
|  | 203 #ifdef __cplusplus | 
|  | 204 } | 
|  | 205 #endif | 
|  | 206 | 
|  | 207 #endif |