Mercurial > repos > clustalomega > clustalomega
comparison clustalomega/clustal-omega-0.2.0/src/squid/gki.c @ 0:ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author | clustalomega |
---|---|
date | Tue, 07 Jun 2011 17:04:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ff1768533a07 |
---|---|
1 /***************************************************************** | |
2 * SQUID - a library of functions for biological sequence analysis | |
3 * Copyright (C) 1992-2002 Washington University School of Medicine | |
4 * | |
5 * This source code is freely distributed under the terms of the | |
6 * GNU General Public License. See the files COPYRIGHT and LICENSE | |
7 * for details. | |
8 *****************************************************************/ | |
9 | |
10 /* gki.c | |
11 * SRE, Sat May 1 14:49:08 1999 | |
12 * | |
13 * "generic key index" module: emulation of Perl hashes. | |
14 * Maps keys (ASCII char strings) to array index. Dynamically | |
15 * resizes the hash table. | |
16 * | |
17 * Limitations: | |
18 * - hash table can only grow; no provision for deleting keys | |
19 * or downsizing the hash table. | |
20 * - Maximum hash table size set at 100003. Performance | |
21 * will degrade for key sets much larger than this. | |
22 * - Assumes that integers are 32 bits (or greater). | |
23 * | |
24 * Defines a typedef'd structure: | |
25 * gki - a key index hash table. | |
26 * Provides functions: | |
27 * GKIInit() - start a hash table. | |
28 * GKIStoreKey() - store a new key, get a unique index. | |
29 * GKIKeyIndex() - retrieve an existing key's index. | |
30 * GKIFree() - free a hash table. | |
31 * GKIStatus() - Debugging: prints internal status of a hash struct | |
32 * | |
33 * | |
34 * Note that there are no dependencies on squid; the gki.c/gki.h | |
35 * pair are base ANSI C and can be reused anywhere. | |
36 ***************************************************************** | |
37 * | |
38 * API for storing/reading stuff: | |
39 * moral equivalent of Perl's $foo{$key} = whatever, $bar{$key} = whatever: | |
40 * #include "gki.h" | |
41 * | |
42 * gki *hash; | |
43 * int idx; | |
44 * char *key; | |
45 * | |
46 * hash = GKIInit(); | |
47 * (Storing:) | |
48 * (foreach key) { | |
49 * idx = GKIStoreKey(hash, key); | |
50 * (reallocate foo, bar as needed) | |
51 * foo[idx] = whatever; | |
52 * bar[idx] = whatever; | |
53 * } | |
54 * (Reading:) | |
55 * (foreach key) { | |
56 * idx = GKIKeyIndex(hash, key); | |
57 * if (idx == -1) {no_such_key; } | |
58 * (do something with) foo[idx]; | |
59 * (do something with) bar[idx]; | |
60 * } | |
61 * GKIFree(); | |
62 * | |
63 ***************************************************************** | |
64 * | |
65 * Timings on wrasse for 45402 keys in /usr/dict/words using | |
66 * Tests/test_gki: | |
67 * 250 msec store (6 usec/store) | |
68 * 140 msec retrieve (3 usec/retrieve) | |
69 * and using the 13408 names of Pfam's GP120.full alignment: | |
70 * 70 msec store (5 usec/store) | |
71 * 50 msec retrieve (4 usec/retrieve) | |
72 * | |
73 * RCS $Id: gki.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: gki.c,v 1.3 2000/12/21 23:42:59 eddy Exp) | |
74 */ | |
75 | |
76 | |
77 | |
78 #include <stdio.h> | |
79 #include <stdlib.h> | |
80 #include <string.h> | |
81 #include <limits.h> | |
82 #include "squid.h" | |
83 #include "gki.h" | |
84 | |
85 /* | |
86 * Best hash table sizes are prime numbers (see Knuth vol 3, Sorting | |
87 * and Searching). | |
88 * gki_primes[] defines the ascending order of hash table sizes | |
89 * that we use in upsizing the hash table dynamically. | |
90 * useful site for testing primes: | |
91 * http://www.idbsu.edu/people/jbrennan/algebra/numbers/sieve.html | |
92 * Because of the way gki_hashvalue works, the largest number | |
93 * must be < INT_MAX / 128 / 128 : 131072 on a 32 bit machine. | |
94 */ | |
95 static int gki_primes[] = { 101, 1009, 10007, 100003 }; | |
96 #define GKI_NPRIMES 4 | |
97 #define GKI_ALPHABETSIZE 128 | |
98 | |
99 static GKI *gki_alloc(int primelevel); | |
100 static int gki_hashvalue(GKI *hash, char *key); | |
101 static int gki_upsize(GKI *old); | |
102 | |
103 | |
104 /* Function: GKIInit() | |
105 * Date: SRE, Sat May 1 11:12:24 1999 [May Day geek-out] | |
106 * | |
107 * Purpose: Initialize a hash table for key indexing. | |
108 * Simply a wrapper around a level 0 gki_alloc(). | |
109 * | |
110 * Args: (void) | |
111 * | |
112 * Returns: An allocated hash table structure. | |
113 * Caller frees with GKIFree(). | |
114 */ | |
115 GKI * | |
116 GKIInit(void) | |
117 { | |
118 GKI *hash; | |
119 hash = gki_alloc(0); | |
120 return hash; | |
121 } | |
122 | |
123 /* Function: GKIFree() | |
124 * Date: SRE, Sat May 1 11:13:26 1999 [May Day geek-out] | |
125 * | |
126 * Purpose: Free a key index hash table. | |
127 * | |
128 * Args: hash - the gki structure | |
129 * | |
130 * Returns: (void). | |
131 * hash table is destroyed. | |
132 */ | |
133 void | |
134 GKIFree(GKI *hash) | |
135 { | |
136 struct gki_elem *ptr; | |
137 int i; | |
138 | |
139 if (hash == NULL) return; /* tolerate a NULL */ | |
140 | |
141 for (i = 0; i < hash->nhash; i++) | |
142 while (hash->table[i] != NULL) | |
143 { | |
144 ptr = hash->table[i]->nxt; | |
145 /* NULL keys can occur after we've gki_upsize'd */ | |
146 if (hash->table[i]->key != NULL) free(hash->table[i]->key); | |
147 free(hash->table[i]); | |
148 hash->table[i] = ptr; | |
149 } | |
150 free(hash->table); | |
151 free(hash); | |
152 } | |
153 | |
154 /* Function: GKIStoreKey() | |
155 * Date: SRE, Sat May 1 11:16:48 1999 [May Day geek-out] | |
156 * | |
157 * Purpose: Store a key in the key index hash table. | |
158 * Associate it with a unique "key index", counting | |
159 * from 0. (It's this index that lets us map | |
160 * the hashed keys to indexed C arrays, (clumsily) | |
161 * emulating Perl's hashes.) | |
162 * | |
163 * Does *not* check to see if the key's already | |
164 * in the table, so it's possible to store multiple | |
165 * copies of a key with different indices; probably | |
166 * not what you want, so if you're not sure the | |
167 * key is unique, check the table first with | |
168 * GKIKeyIndex(). | |
169 * | |
170 * Args: hash - GKI structure to store the key in | |
171 * key - string to store | |
172 * | |
173 * Returns: the new key's index. Since it's always the | |
174 * last one in the current array, this index is | |
175 * just hash->nkeys-1. | |
176 * On a malloc failure, returns -1. | |
177 * hash table is modified. | |
178 */ | |
179 int | |
180 GKIStoreKey(GKI *hash, char *key) | |
181 { | |
182 int val; | |
183 struct gki_elem *ptr; | |
184 | |
185 val = gki_hashvalue(hash, key); | |
186 | |
187 ptr = hash->table[val]; | |
188 hash->table[val] = MallocOrDie(sizeof(struct gki_elem)); | |
189 hash->table[val]->key = MallocOrDie(sizeof(char) * (strlen(key)+1)); | |
190 strcpy(hash->table[val]->key, key); | |
191 | |
192 hash->table[val]->idx = hash->nkeys; | |
193 hash->table[val]->nxt = ptr; | |
194 | |
195 hash->nkeys++; | |
196 /* time to upsize? */ | |
197 if (hash->nkeys > 3*hash->nhash && hash->primelevel < GKI_NPRIMES-1) | |
198 gki_upsize(hash); | |
199 | |
200 return hash->nkeys-1; | |
201 } | |
202 | |
203 /* Function: GKIKeyIndex() | |
204 * Date: SRE, Sat May 1 11:20:42 1999 [May Day geek-out] | |
205 * | |
206 * Purpose: Look up a key in the hash table. Return | |
207 * its index (0..nkeys-1), else -1 if the key | |
208 * isn't in the hash (yet). | |
209 * | |
210 * Args: hash - the GKI hash table to search in | |
211 * key - the key to look up | |
212 * | |
213 * Returns: -1 if key is not found; | |
214 * index of key if it is found (range 0..nkeys-1). | |
215 * hash table is unchanged. | |
216 */ | |
217 int | |
218 GKIKeyIndex(GKI *hash, char *key) | |
219 { | |
220 struct gki_elem *ptr; | |
221 int val; | |
222 | |
223 val = gki_hashvalue(hash, key); | |
224 for (ptr = hash->table[val]; ptr != NULL; ptr = ptr->nxt) | |
225 if (strcmp(key, ptr->key) == 0) return ptr->idx; | |
226 return -1; | |
227 } | |
228 | |
229 /* Function: GKIStatus() | |
230 * Date: SRE, Sat May 1 11:11:13 1999 [St. Louis] | |
231 * | |
232 * Purpose: (DEBUGGING) How are we doing? Calculate some | |
233 * simple statistics for the hash table. | |
234 * | |
235 * Args: hash - the GKI hash table to look at | |
236 * | |
237 * Returns: (void) | |
238 * Prints diagnostics on stdout. | |
239 * hash table is unchanged. | |
240 */ | |
241 void | |
242 GKIStatus(GKI *hash) | |
243 { | |
244 struct gki_elem *ptr; | |
245 int i; | |
246 int nkeys; | |
247 int nempty = 0; | |
248 int maxkeys = -1; | |
249 int minkeys = INT_MAX; | |
250 | |
251 for (i = 0; i < hash->nhash; i++) | |
252 { | |
253 nkeys = 0; | |
254 for (ptr = hash->table[i]; ptr != NULL; ptr = ptr->nxt) | |
255 nkeys++; | |
256 | |
257 if (nkeys == 0) nempty++; | |
258 if (nkeys > maxkeys) maxkeys = nkeys; | |
259 if (nkeys < minkeys) minkeys = nkeys; | |
260 } | |
261 | |
262 printf("Total keys: %d\n", hash->nkeys); | |
263 printf("Hash table size: %d\n", hash->nhash); | |
264 printf("Average occupancy: %.1f\n", (float) hash->nkeys / (float) hash->nhash); | |
265 printf("Unoccupied slots: %d\n", nempty); | |
266 printf("Most in one slot: %d\n", maxkeys); | |
267 printf("Least in one slot: %d\n", minkeys); | |
268 | |
269 } | |
270 | |
271 | |
272 /* Function: gki_alloc() | |
273 * Date: SRE, Sat May 1 11:55:47 1999 [May Day geek-out] | |
274 * | |
275 * Purpose: Allocate a hash table structure with the | |
276 * size given by primelevel. | |
277 * | |
278 * Args: primelevel - level 0..GKI_NPRIMES-1, specifying | |
279 * the size of the table; see gki_primes[] | |
280 * array. | |
281 * | |
282 * Returns: An allocated hash table structure. | |
283 * Caller frees with GKIFree(). | |
284 */ | |
285 static GKI * | |
286 gki_alloc(int primelevel) | |
287 { | |
288 GKI *hash; | |
289 int i; | |
290 | |
291 if (primelevel < 0 || primelevel >= GKI_NPRIMES) | |
292 Die("bad primelevel in gki_alloc()"); | |
293 hash = MallocOrDie(sizeof(GKI)); | |
294 | |
295 hash->primelevel = primelevel; | |
296 hash->nhash = gki_primes[hash->primelevel]; | |
297 hash->table = MallocOrDie(sizeof(struct gki_elem) * hash->nhash); | |
298 for (i = 0; i < hash->nhash; i++) | |
299 hash->table[i] = NULL; | |
300 hash->nkeys = 0; | |
301 return hash; | |
302 } | |
303 | |
304 | |
305 /* Function: gki_hashvalue() | |
306 * Date: SRE, Sat May 1 11:14:10 1999 [May Day geek-out] | |
307 * | |
308 * Purpose: Calculate the hash value for a key. Usually | |
309 * we expect a one-word key, but the function will | |
310 * hash any ASCII string effectively. The hash function | |
311 * is a simple one (see p. 233 of Sedgewick, | |
312 * Algorithms in C). | |
313 * Slightly optimized: does two characters at a time | |
314 * before doing the modulo; this gives us a significant | |
315 * speedup. | |
316 * | |
317 * Args: hash - the gki structure (we need to know the hash table size) | |
318 * key - a string to calculate the hash value for | |
319 * | |
320 * Returns: a hash value, in the range 0..hash->nhash-1. | |
321 * hash table is unmodified. | |
322 */ | |
323 static int | |
324 gki_hashvalue(GKI *hash, char *key) | |
325 { | |
326 int val = 0; | |
327 | |
328 for (; *key != '\0'; key++) | |
329 { | |
330 val = GKI_ALPHABETSIZE*val + *key; | |
331 if (*(++key) == '\0') { val = val % hash->nhash; break; } | |
332 val = (GKI_ALPHABETSIZE*val + *key) % hash->nhash; | |
333 } | |
334 return val; | |
335 } | |
336 | |
337 /* Function: gki_upsize() | |
338 * Date: SRE, Sat May 1 11:46:07 1999 [May Day geek-out] | |
339 * | |
340 * Purpose: Grow the hash table to the next available size. | |
341 * | |
342 * Args: old - the GKI hash table to reallocate. | |
343 * | |
344 * Returns: 1 on success (the hash table is changed); | |
345 * 0 on failure; the table is already at its maximum size, | |
346 * and the hash table is returned unchanged. | |
347 */ | |
348 static int | |
349 gki_upsize(GKI *old) | |
350 { | |
351 GKI *new; | |
352 int i; | |
353 struct gki_elem *optr; | |
354 struct gki_elem *nptr; | |
355 int val; | |
356 | |
357 if (old->primelevel >= GKI_NPRIMES-1) return 0; | |
358 new = gki_alloc(old->primelevel+1); | |
359 | |
360 /* Read the old, store in the new, while *not changing* | |
361 * any key indices. Because of the way the lists are | |
362 * treated as LIFO stacks, all the lists are reversed | |
363 * in the new structure. | |
364 */ | |
365 for (i = 0; i < old->nhash; i++) | |
366 { | |
367 optr = old->table[i]; | |
368 while (optr != NULL) | |
369 { | |
370 val = gki_hashvalue(new, optr->key); | |
371 | |
372 nptr = new->table[val]; | |
373 new->table[val] = optr; | |
374 optr = optr->nxt; | |
375 new->table[val]->nxt = nptr; | |
376 } | |
377 } | |
378 free(old->table); | |
379 | |
380 /* Now swap within the interior of the structures, so the old | |
381 * structure is updated to the new structure. | |
382 * (nkeys is identical, so we don't need to swap that element.) | |
383 */ | |
384 old->primelevel = new->primelevel; | |
385 old->nhash = new->nhash; | |
386 old->table = new->table; | |
387 free(new); | |
388 return 1; | |
389 } |