-
Notifications
You must be signed in to change notification settings - Fork 0
/
hashtable.c
349 lines (306 loc) · 8.81 KB
/
hashtable.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
/* $Id: hashtable.c,v 1.7 2001/12/19 22:36:06 acs Exp $
Written by Adam Siepel, Spring 2001
Copyright 2001, Adam Siepel */
/* Custom hashtable. Expects keys to be signed permutations, and only
keeps track of membership -- i.e., does not allow association of
data with keys */
/* Compile with -DUSEDB to use "db" behind the interface presented by
these functions */
/* Compile with -DTHREADSAFE to allow multiple threads to have
concurrent access to a single hashtable. Note: we assume that "db"
is not threadsafe, and enforce locking ourselves when USEDB is
defined (db man page says "None of the access methods provide any
form of concurrent access, locking, or transactions"). When USEDB
is not defined (i.e., when the custom hashtable is in use), we
allow parallel computation of the hashing function, and enforce
locking separately within each bucket. In all cases, locking is
accomplished according to the rules of the readers-writers problem */
#include "hashtable.h"
#include "errno.h"
#include <sys/stat.h>
#include <fcntl.h>
#include "stdlib.h"
#include "math.h"
#include "med_util.h"
/* Initialize the hash */
Hashtable *
new_hashtable ( int ngenes, int expected_size, float loading_factor )
{
Hashtable *h = ( Hashtable * ) malloc ( sizeof ( Hashtable ) );
#ifdef USEDB
h->db = dbopen ( NULL, O_RDWR, S_IRWXU, DB_HASH, NULL );
if ( h->db == NULL )
{
fprintf ( stderr, "Error creating hashtable.\n" );
exit ( errno );
}
h->ngenes = ngenes;
#ifdef THREADSAFE
mythread_rwlock_init ( &h->rwlock );
#endif
#else
int i;
h->ngenes = ngenes;
/* Choose the number of slots so as to fill pages evenly */
h->nbuckets = PAGESIZE / sizeof ( int * ); /* experiments indicate that
there is little value in
having this size be larger
than one page */
/* The size of each bucket depends on the loading factor, the expected
size of the table, and the number of buckets */
h->bucketsize =
ceil ( loading_factor * expected_size / ( float ) h->nbuckets );
/* Choose the number of digits to sample when calculating hashkeys */
h->idxdigits = SAMPLE_PERCENTAGE * h->ngenes;
if ( h->idxdigits < MIN_DIGITS )
h->idxdigits = MIN_DIGITS;
if ( h->idxdigits > h->ngenes )
h->idxdigits = h->ngenes;
h->table = ( int ** ) calloc ( h->nbuckets, sizeof ( int * ) );
h->sizes = ( int * ) calloc ( h->nbuckets, sizeof ( int ) );
if ( h->table == NULL || h->sizes == NULL )
{
fprintf ( stderr, "Error allocating space for hashtable.\n" );
exit ( errno );
}
#ifdef THREADSAFE
h->rwlock =
( mythread_rwlock_t * ) calloc ( h->nbuckets,
sizeof ( mythread_rwlock_t ) );
if ( h->rwlock == NULL )
{
fprintf ( stderr, "Error allocating space for hashtable.\n" );
exit ( errno );
}
#endif
for ( i = 0; i < h->nbuckets; i++ )
{
h->table[i] = NULL; /* we will allocate these as needed */
h->sizes[i] = h->bucketsize;
#ifdef THREADSAFE
mythread_rwlock_init ( &h->rwlock[i] );
#endif
}
#endif
return h;
}
/* Insert specified permutation */
void
ht_insert ( Hashtable * h, int *perm )
{
#ifdef USEDB
DBT key, data;
#ifdef THREADSAFE
mythread_rwlock_wrlock ( &h->rwlock );
#endif
key.data = perm;
key.size = h->ngenes * sizeof ( int );
data.data = malloc ( sizeof ( int ) );
*( int * ) data.data = 1;
data.size = sizeof ( int * );
if ( h->db->put ( h->db, &key, &data, 0 ) != 0 )
{
fprintf ( stderr, "Error inserting in hashtable.\n" );
exit ( errno );
}
#ifdef THREADSAFE
mythread_rwlock_wrunlock ( &h->rwlock );
#endif
#else
int k;
k = hash ( h, perm );
#ifdef THREADSAFE
mythread_rwlock_wrlock ( &h->rwlock[k] );
#endif
ht_insert_key ( h, perm, k );
#ifdef THREADSAFE
mythread_rwlock_wrunlock ( &h->rwlock[k] );
#endif
#endif
}
#ifndef USEDB
void
ht_insert_key ( Hashtable * h, int *perm, int k )
{
int i = 0;
if ( h->table[k] == NULL )
{
h->table[k] =
( int * ) calloc ( h->sizes[k] * h->ngenes, sizeof ( int ) );
if ( h->table[k] == NULL )
{
fprintf ( stderr, "Error allocating memory for hashtable.\n" );
exit ( errno );
}
for ( i = 0; i < h->sizes[k]; i++ )
h->table[k][i * h->ngenes] = 0;
}
for ( i = 0;; i++ )
{
if ( i == h->sizes[k] )
ht_realloc_bucket ( h, k );
if ( h->table[k][i * h->ngenes] == 0 )
{
permcopy ( &h->table[k][i * h->ngenes], perm, h->ngenes );
break;
}
}
}
void
ht_clear ( Hashtable * h )
{
int i, j;
for ( i = 0; i < h->nbuckets; i++ )
{
if ( h->table[i] != NULL )
for ( j = 0; j < h->sizes[i] &&
h->table[i][j * h->ngenes] != 0; j++ )
h->table[i][j * h->ngenes] = 0;
}
}
#endif
/* Find specified permutation. Return 1 if it is present, 0
otherwise. The "create" parameter allows you to require that the
entry be created if it cannot be found (even if create is 1, the
return value will still indicate whether the entry existed
previously) */
int
ht_find ( Hashtable * h, int *perm, int create )
{
#ifdef USEDB
DBT key, data;
int retval;
key.data = perm;
key.size = h->ngenes * sizeof ( int );
#ifdef THREADSAFE
if ( create == 1 )
mythread_rwlock_wrlock ( &h->rwlock );
else
mythread_rwlock_rdlock ( &h->rwlock );
#endif
retval = h->db->get ( h->db, &key, &data, 0 );
if ( retval == 1 && create == 1 )
{
data.data = malloc ( sizeof ( int ) );
*( int * ) data.data = 1;
data.size = sizeof ( int * );
if ( h->db->put ( h->db, &key, &data, 0 ) != 0 )
{
fprintf ( stderr, "Error inserting in hashtable.\n" );
exit ( errno );
}
}
#ifdef THREADSAFE
if ( create == 1 )
mythread_rwlock_wrunlock ( &h->rwlock );
else
mythread_rwlock_rdunlock ( &h->rwlock );
#endif
return ( retval == 0 ? 1 : 0 );
#else
int i, k;
int retval = 0;
k = hash ( h, perm );
/* printf("%d\n", k); */
#ifdef THREADSAFE
if ( create == 1 )
mythread_rwlock_wrlock ( &h->rwlock[k] );
else
mythread_rwlock_rdlock ( &h->rwlock[k] );
#endif
if ( h->table[k] == NULL )
{
if ( create == 1 )
{
ht_insert_key ( h, perm, k );
}
}
else
{
for ( i = 0;; i++ )
{
if ( i == h->sizes[k] )
{
if ( create == 1 )
{
ht_realloc_bucket ( h, k );
}
else
break;
}
if ( h->table[k][i * h->ngenes] == 0 )
{
if ( create == 1 )
permcopy ( &h->table[k][i * h->ngenes], perm, h->ngenes );
break;
}
else if ( permcmp ( &h->table[k][i * h->ngenes],
perm, h->ngenes ) == 0 )
{
retval = 1;
break;
}
}
}
#ifdef THREADSAFE
if ( create == 1 )
mythread_rwlock_wrunlock ( &h->rwlock[k] );
else
mythread_rwlock_rdunlock ( &h->rwlock[k] );
#endif
return retval;
#endif
}
/* Free memory for hash */
void
ht_free ( Hashtable * h )
{
#ifdef USEDB
if ( h->db->close ( h->db ) != 0 )
{
fprintf ( stderr, "Error closing hashtable.\n" );
exit ( errno );
}
#else
int i;
for ( i = 0; i < h->nbuckets; i++ )
free ( h->table[i] );
free ( h->table );
#endif
free ( h->sizes );
free ( h );
}
#ifndef USEDB
void
ht_realloc_bucket ( Hashtable * h, int k )
{
int tmp, i;
tmp = h->sizes[k];
h->sizes[k] *= 2;
h->table[k] = ( int * ) realloc ( h->table[k],
h->sizes[k] * h->ngenes *
sizeof ( int ) );
if ( h->table[k] == NULL )
{
fprintf ( stderr, "Error allocating extra space for hashtable.\n" );
exit ( errno );
}
for ( i = tmp; i < h->sizes[k]; i++ )
h->table[k][i * h->ngenes] = 0;
}
/* Hashing function: operates on a modest percentage of the digits */
int
hash ( Hashtable * h, int *perm )
{
int i, a;
unsigned long k;
a = h->ngenes / h->idxdigits; /* sample every ath digit in the perm,
for a total of idxdigits samples */
k = 0;
for ( i = 0; i < h->idxdigits; i++ )
{
k = MULTIPLIER * k + abs ( perm[i * a] );
}
return k % h->nbuckets;
}
#endif