0
|
1 /* The MIT License
|
|
2
|
|
3 Copyright (c) 2008 Genome Research Ltd (GRL).
|
|
4
|
|
5 Permission is hereby granted, free of charge, to any person obtaining
|
|
6 a copy of this software and associated documentation files (the
|
|
7 "Software"), to deal in the Software without restriction, including
|
|
8 without limitation the rights to use, copy, modify, merge, publish,
|
|
9 distribute, sublicense, and/or sell copies of the Software, and to
|
|
10 permit persons to whom the Software is furnished to do so, subject to
|
|
11 the following conditions:
|
|
12
|
|
13 The above copyright notice and this permission notice shall be
|
|
14 included in all copies or substantial portions of the Software.
|
|
15
|
|
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23 SOFTWARE.
|
|
24 */
|
|
25
|
|
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
|
|
27
|
|
28 /*
|
|
29 An example:
|
|
30
|
|
31 #include "khash.h"
|
|
32 KHASH_MAP_INIT_INT(32, char)
|
|
33 int main() {
|
|
34 int ret, is_missing;
|
|
35 khiter_t k;
|
|
36 khash_t(32) *h = kh_init(32);
|
|
37 k = kh_put(32, h, 5, &ret);
|
|
38 if (!ret) kh_del(32, h, k);
|
|
39 kh_value(h, k) = 10;
|
|
40 k = kh_get(32, h, 10);
|
|
41 is_missing = (k == kh_end(h));
|
|
42 k = kh_get(32, h, 5);
|
|
43 kh_del(32, h, k);
|
|
44 for (k = kh_begin(h); k != kh_end(h); ++k)
|
|
45 if (kh_exist(h, k)) kh_value(h, k) = 1;
|
|
46 kh_destroy(32, h);
|
|
47 return 0;
|
|
48 }
|
|
49 */
|
|
50
|
|
51 /*
|
|
52 2008-09-19 (0.2.3):
|
|
53
|
|
54 * Corrected the example
|
|
55 * Improved interfaces
|
|
56
|
|
57 2008-09-11 (0.2.2):
|
|
58
|
|
59 * Improved speed a little in kh_put()
|
|
60
|
|
61 2008-09-10 (0.2.1):
|
|
62
|
|
63 * Added kh_clear()
|
|
64 * Fixed a compiling error
|
|
65
|
|
66 2008-09-02 (0.2.0):
|
|
67
|
|
68 * Changed to token concatenation which increases flexibility.
|
|
69
|
|
70 2008-08-31 (0.1.2):
|
|
71
|
|
72 * Fixed a bug in kh_get(), which has not been tested previously.
|
|
73
|
|
74 2008-08-31 (0.1.1):
|
|
75
|
|
76 * Added destructor
|
|
77 */
|
|
78
|
|
79
|
|
80 #ifndef __AC_KHASH_H
|
|
81 #define __AC_KHASH_H
|
|
82
|
|
83 /*!
|
|
84 @header
|
|
85
|
|
86 Generic hash table library.
|
|
87
|
|
88 @copyright Heng Li
|
|
89 */
|
|
90
|
|
91 #define AC_VERSION_KHASH_H "0.2.2"
|
|
92
|
|
93 #include <stdint.h>
|
|
94 #include <stdlib.h>
|
|
95 #include <string.h>
|
|
96
|
|
97 typedef uint32_t khint_t;
|
|
98 typedef khint_t khiter_t;
|
|
99
|
|
100 #define __ac_HASH_PRIME_SIZE 32
|
|
101 static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
|
|
102 {
|
|
103 0ul, 3ul, 11ul, 23ul, 53ul,
|
|
104 97ul, 193ul, 389ul, 769ul, 1543ul,
|
|
105 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
|
|
106 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
|
|
107 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
|
|
108 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
|
|
109 3221225473ul, 4294967291ul
|
|
110 };
|
|
111
|
|
112 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
|
113 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
|
114 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
|
115 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
|
116 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
|
117 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
|
118 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
|
119
|
|
120 static const double __ac_HASH_UPPER = 0.77;
|
|
121
|
|
122 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
|
|
123 typedef struct { \
|
|
124 khint_t n_buckets, size, n_occupied, upper_bound; \
|
|
125 uint32_t *flags; \
|
|
126 khkey_t *keys; \
|
|
127 khval_t *vals; \
|
|
128 } kh_##name##_t; \
|
|
129 static inline kh_##name##_t *kh_init_##name() { \
|
|
130 return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
|
|
131 } \
|
|
132 static inline void kh_destroy_##name(kh_##name##_t *h) \
|
|
133 { \
|
|
134 if (h) { \
|
|
135 free(h->keys); free(h->flags); \
|
|
136 free(h->vals); \
|
|
137 free(h); \
|
|
138 } \
|
|
139 } \
|
|
140 static inline void kh_clear_##name(kh_##name##_t *h) \
|
|
141 { \
|
|
142 if (h && h->flags) { \
|
|
143 memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t)); \
|
|
144 h->size = h->n_occupied = 0; \
|
|
145 } \
|
|
146 } \
|
|
147 static inline khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
|
|
148 { \
|
|
149 if (h->n_buckets) { \
|
|
150 khint_t inc, k, i, last; \
|
|
151 k = __hash_func(key); i = k % h->n_buckets; \
|
|
152 inc = 1 + k % (h->n_buckets - 1); last = i; \
|
|
153 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
|
154 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
|
|
155 else i += inc; \
|
|
156 if (i == last) return h->n_buckets; \
|
|
157 } \
|
|
158 return __ac_iseither(h->flags, i)? h->n_buckets : i; \
|
|
159 } else return 0; \
|
|
160 } \
|
|
161 static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
|
|
162 { \
|
|
163 uint32_t *new_flags = 0; \
|
|
164 khint_t j = 1; \
|
|
165 { \
|
|
166 khint_t t = __ac_HASH_PRIME_SIZE - 1; \
|
|
167 while (__ac_prime_list[t] > new_n_buckets) --t; \
|
|
168 new_n_buckets = __ac_prime_list[t+1]; \
|
|
169 if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
|
|
170 else { \
|
|
171 new_flags = (uint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \
|
|
172 memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \
|
|
173 if (h->n_buckets < new_n_buckets) { \
|
|
174 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
|
|
175 if (kh_is_map) \
|
|
176 h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
|
|
177 } \
|
|
178 } \
|
|
179 } \
|
|
180 if (j) { \
|
|
181 for (j = 0; j != h->n_buckets; ++j) { \
|
|
182 if (__ac_iseither(h->flags, j) == 0) { \
|
|
183 khkey_t key = h->keys[j]; \
|
|
184 khval_t val; \
|
|
185 if (kh_is_map) val = h->vals[j]; \
|
|
186 __ac_set_isdel_true(h->flags, j); \
|
|
187 while (1) { \
|
|
188 khint_t inc, k, i; \
|
|
189 k = __hash_func(key); \
|
|
190 i = k % new_n_buckets; \
|
|
191 inc = 1 + k % (new_n_buckets - 1); \
|
|
192 while (!__ac_isempty(new_flags, i)) { \
|
|
193 if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
|
|
194 else i += inc; \
|
|
195 } \
|
|
196 __ac_set_isempty_false(new_flags, i); \
|
|
197 if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
|
|
198 { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
|
|
199 if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
|
|
200 __ac_set_isdel_true(h->flags, i); \
|
|
201 } else { \
|
|
202 h->keys[i] = key; \
|
|
203 if (kh_is_map) h->vals[i] = val; \
|
|
204 break; \
|
|
205 } \
|
|
206 } \
|
|
207 } \
|
|
208 } \
|
|
209 if (h->n_buckets > new_n_buckets) { \
|
|
210 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
|
|
211 if (kh_is_map) \
|
|
212 h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
|
|
213 } \
|
|
214 free(h->flags); \
|
|
215 h->flags = new_flags; \
|
|
216 h->n_buckets = new_n_buckets; \
|
|
217 h->n_occupied = h->size; \
|
|
218 h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
|
|
219 } \
|
|
220 } \
|
|
221 static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
|
|
222 { \
|
|
223 khint_t x; \
|
|
224 if (h->n_occupied >= h->upper_bound) { \
|
|
225 if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
|
|
226 else kh_resize_##name(h, h->n_buckets + 1); \
|
|
227 } \
|
|
228 { \
|
|
229 khint_t inc, k, i, site, last; \
|
|
230 x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
|
|
231 if (__ac_isempty(h->flags, i)) x = i; \
|
|
232 else { \
|
|
233 inc = 1 + k % (h->n_buckets - 1); last = i; \
|
|
234 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
|
|
235 if (__ac_isdel(h->flags, i)) site = i; \
|
|
236 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
|
|
237 else i += inc; \
|
|
238 if (i == last) { x = site; break; } \
|
|
239 } \
|
|
240 if (x == h->n_buckets) { \
|
|
241 if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
|
|
242 else x = i; \
|
|
243 } \
|
|
244 } \
|
|
245 } \
|
|
246 if (__ac_isempty(h->flags, x)) { \
|
|
247 h->keys[x] = key; \
|
|
248 __ac_set_isboth_false(h->flags, x); \
|
|
249 ++h->size; ++h->n_occupied; \
|
|
250 *ret = 1; \
|
|
251 } else if (__ac_isdel(h->flags, x)) { \
|
|
252 h->keys[x] = key; \
|
|
253 __ac_set_isboth_false(h->flags, x); \
|
|
254 ++h->size; \
|
|
255 *ret = 2; \
|
|
256 } else *ret = 0; \
|
|
257 return x; \
|
|
258 } \
|
|
259 static inline void kh_del_##name(kh_##name##_t *h, khint_t x) \
|
|
260 { \
|
|
261 if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
|
|
262 __ac_set_isdel_true(h->flags, x); \
|
|
263 --h->size; \
|
|
264 } \
|
|
265 }
|
|
266
|
|
267 /* --- BEGIN OF HASH FUNCTIONS --- */
|
|
268
|
|
269 /*! @function
|
|
270 @abstract Integer hash function
|
|
271 @param key The integer [uint32_t]
|
|
272 @return The hash value [khint_t]
|
|
273 */
|
|
274 #define kh_int_hash_func(key) (uint32_t)(key)
|
|
275 /*! @function
|
|
276 @abstract Integer comparison function
|
|
277 */
|
|
278 #define kh_int_hash_equal(a, b) ((a) == (b))
|
|
279 /*! @function
|
|
280 @abstract 64-bit integer hash function
|
|
281 @param key The integer [uint64_t]
|
|
282 @return The hash value [khint_t]
|
|
283 */
|
|
284 #define kh_int64_hash_func(key) (uint32_t)((key)>>33^(key)^(key)<<11)
|
|
285 /*! @function
|
|
286 @abstract 64-bit integer comparison function
|
|
287 */
|
|
288 #define kh_int64_hash_equal(a, b) ((a) == (b))
|
|
289 /*! @function
|
|
290 @abstract const char* hash function
|
|
291 @param s Pointer to a null terminated string
|
|
292 @return The hash value
|
|
293 */
|
|
294 static inline khint_t __ac_X31_hash_string(const char *s)
|
|
295 {
|
|
296 khint_t h = *s;
|
|
297 if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
|
|
298 return h;
|
|
299 }
|
|
300 /*! @function
|
|
301 @abstract Another interface to const char* hash function
|
|
302 @param key Pointer to a null terminated string [const char*]
|
|
303 @return The hash value [khint_t]
|
|
304 */
|
|
305 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
|
|
306 /*! @function
|
|
307 @abstract Const char* comparison function
|
|
308 */
|
|
309 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
|
|
310
|
|
311 /* --- END OF HASH FUNCTIONS --- */
|
|
312
|
|
313 /* Other necessary macros... */
|
|
314
|
|
315 /*!
|
|
316 @abstract Type of the hash table.
|
|
317 @param name Name of the hash table [symbol]
|
|
318 */
|
|
319 #define khash_t(name) kh_##name##_t
|
|
320
|
|
321 /*! @function
|
|
322 @abstract Initiate a hash table.
|
|
323 @param name Name of the hash table [symbol]
|
|
324 @return Pointer to the hash table [khash_t(name)*]
|
|
325 */
|
|
326 #define kh_init(name) kh_init_##name()
|
|
327
|
|
328 /*! @function
|
|
329 @abstract Destroy a hash table.
|
|
330 @param name Name of the hash table [symbol]
|
|
331 @param h Pointer to the hash table [khash_t(name)*]
|
|
332 */
|
|
333 #define kh_destroy(name, h) kh_destroy_##name(h)
|
|
334
|
|
335 /*! @function
|
|
336 @abstract Reset a hash table without deallocating memory.
|
|
337 @param name Name of the hash table [symbol]
|
|
338 @param h Pointer to the hash table [khash_t(name)*]
|
|
339 */
|
|
340 #define kh_clear(name, h) kh_clear_##name(h)
|
|
341
|
|
342 /*! @function
|
|
343 @abstract Resize a hash table.
|
|
344 @param name Name of the hash table [symbol]
|
|
345 @param h Pointer to the hash table [khash_t(name)*]
|
|
346 @param s New size [khint_t]
|
|
347 */
|
|
348 #define kh_resize(name, h, s) kh_resize_##name(h, s)
|
|
349
|
|
350 /*! @function
|
|
351 @abstract Insert a key to the hash table.
|
|
352 @param name Name of the hash table [symbol]
|
|
353 @param h Pointer to the hash table [khash_t(name)*]
|
|
354 @param k Key [type of keys]
|
|
355 @param r Extra return code: 0 if the key is present in the hash table;
|
|
356 1 if the bucket is empty (never used); 2 if the element in
|
|
357 the bucket has been deleted [int*]
|
|
358 @return Iterator to the inserted element [khint_t]
|
|
359 */
|
|
360 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
|
|
361
|
|
362 /*! @function
|
|
363 @abstract Retrieve a key from the hash table.
|
|
364 @param name Name of the hash table [symbol]
|
|
365 @param h Pointer to the hash table [khash_t(name)*]
|
|
366 @param k Key [type of keys]
|
|
367 @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
|
|
368 */
|
|
369 #define kh_get(name, h, k) kh_get_##name(h, k)
|
|
370
|
|
371 /*! @function
|
|
372 @abstract Remove a key from the hash table.
|
|
373 @param name Name of the hash table [symbol]
|
|
374 @param h Pointer to the hash table [khash_t(name)*]
|
|
375 @param k Iterator to the element to be deleted [khint_t]
|
|
376 */
|
|
377 #define kh_del(name, h, k) kh_del_##name(h, k)
|
|
378
|
|
379
|
|
380 /*! @function
|
|
381 @abstract Test whether a bucket contains data.
|
|
382 @param h Pointer to the hash table [khash_t(name)*]
|
|
383 @param x Iterator to the bucket [khint_t]
|
|
384 @return 1 if containing data; 0 otherwise [int]
|
|
385 */
|
|
386 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
|
|
387
|
|
388 /*! @function
|
|
389 @abstract Get key given an iterator
|
|
390 @param h Pointer to the hash table [khash_t(name)*]
|
|
391 @param x Iterator to the bucket [khint_t]
|
|
392 @return Key [type of keys]
|
|
393 */
|
|
394 #define kh_key(h, x) ((h)->keys[x])
|
|
395
|
|
396 /*! @function
|
|
397 @abstract Get value given an iterator
|
|
398 @param h Pointer to the hash table [khash_t(name)*]
|
|
399 @param x Iterator to the bucket [khint_t]
|
|
400 @return Value [type of values]
|
|
401 @discussion For hash sets, calling this results in segfault.
|
|
402 */
|
|
403 #define kh_val(h, x) ((h)->vals[x])
|
|
404
|
|
405 /*! @function
|
|
406 @abstract Alias of kh_val()
|
|
407 */
|
|
408 #define kh_value(h, x) ((h)->vals[x])
|
|
409
|
|
410 /*! @function
|
|
411 @abstract Get the start iterator
|
|
412 @param h Pointer to the hash table [khash_t(name)*]
|
|
413 @return The start iterator [khint_t]
|
|
414 */
|
|
415 #define kh_begin(h) (khint_t)(0)
|
|
416
|
|
417 /*! @function
|
|
418 @abstract Get the end iterator
|
|
419 @param h Pointer to the hash table [khash_t(name)*]
|
|
420 @return The end iterator [khint_t]
|
|
421 */
|
|
422 #define kh_end(h) ((h)->n_buckets)
|
|
423
|
|
424 /*! @function
|
|
425 @abstract Get the number of elements in the hash table
|
|
426 @param h Pointer to the hash table [khash_t(name)*]
|
|
427 @return Number of elements in the hash table [khint_t]
|
|
428 */
|
|
429 #define kh_size(h) ((h)->size)
|
|
430
|
|
431 /*! @function
|
|
432 @abstract Get the number of buckets in the hash table
|
|
433 @param h Pointer to the hash table [khash_t(name)*]
|
|
434 @return Number of buckets in the hash table [khint_t]
|
|
435 */
|
|
436 #define kh_n_buckets(h) ((h)->n_buckets)
|
|
437
|
|
438 /* More conenient interfaces */
|
|
439
|
|
440 /*! @function
|
|
441 @abstract Instantiate a hash set containing integer keys
|
|
442 @param name Name of the hash table [symbol]
|
|
443 */
|
|
444 #define KHASH_SET_INIT_INT(name) \
|
|
445 KHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
|
|
446
|
|
447 /*! @function
|
|
448 @abstract Instantiate a hash map containing integer keys
|
|
449 @param name Name of the hash table [symbol]
|
|
450 @param khval_t Type of values [type]
|
|
451 */
|
|
452 #define KHASH_MAP_INIT_INT(name, khval_t) \
|
|
453 KHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
|
|
454
|
|
455 /*! @function
|
|
456 @abstract Instantiate a hash map containing 64-bit integer keys
|
|
457 @param name Name of the hash table [symbol]
|
|
458 */
|
|
459 #define KHASH_SET_INIT_INT64(name) \
|
|
460 KHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
|
|
461
|
|
462 /*! @function
|
|
463 @abstract Instantiate a hash map containing 64-bit integer keys
|
|
464 @param name Name of the hash table [symbol]
|
|
465 @param khval_t Type of values [type]
|
|
466 */
|
|
467 #define KHASH_MAP_INIT_INT64(name, khval_t) \
|
|
468 KHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
|
|
469
|
|
470 typedef const char *kh_cstr_t;
|
|
471 /*! @function
|
|
472 @abstract Instantiate a hash map containing const char* keys
|
|
473 @param name Name of the hash table [symbol]
|
|
474 */
|
|
475 #define KHASH_SET_INIT_STR(name) \
|
|
476 KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
|
|
477
|
|
478 /*! @function
|
|
479 @abstract Instantiate a hash map containing const char* keys
|
|
480 @param name Name of the hash table [symbol]
|
|
481 @param khval_t Type of values [type]
|
|
482 */
|
|
483 #define KHASH_MAP_INIT_STR(name, khval_t) \
|
|
484 KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
|
|
485
|
|
486 #endif /* __AC_KHASH_H */
|