PostgreSQL Source Code git master
dshash.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dshash.c
4 * Concurrent hash tables backed by dynamic shared memory areas.
5 *
6 * This is an open hashing hash table, with a linked list at each table
7 * entry. It supports dynamic resizing, as required to prevent the linked
8 * lists from growing too long on average. Currently, only growing is
9 * supported: the hash table never becomes smaller.
10 *
11 * To deal with concurrency, it has a fixed size set of partitions, each of
12 * which is independently locked. Each bucket maps to a partition; so insert,
13 * find and iterate operations normally only acquire one lock. Therefore,
14 * good concurrency is achieved whenever such operations don't collide at the
15 * lock partition level. However, when a resize operation begins, all
16 * partition locks must be acquired simultaneously for a brief period. This
17 * is only expected to happen a small number of times until a stable size is
18 * found, since growth is geometric.
19 *
20 * Future versions may support iterators and incremental resizing; for now
21 * the implementation is minimalist.
22 *
23 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
25 *
26 * IDENTIFICATION
27 * src/backend/lib/dshash.c
28 *
29 *-------------------------------------------------------------------------
30 */
31
32#include "postgres.h"
33
34#include <limits.h>
35
36#include "common/hashfn.h"
37#include "lib/dshash.h"
38#include "storage/lwlock.h"
39#include "utils/dsa.h"
40
41/*
42 * An item in the hash table. This wraps the user's entry object in an
43 * envelop that holds a pointer back to the bucket and a pointer to the next
44 * item in the bucket.
45 */
47{
48 /* The next item in the same bucket. */
50 /* The hashed key, to avoid having to recompute it. */
52 /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */
53};
54
55/*
56 * The number of partitions for locking purposes. This is set to match
57 * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
58 * the buffer pool must be good enough for any other purpose. This could
59 * become a runtime parameter in future.
60 */
61#define DSHASH_NUM_PARTITIONS_LOG2 7
62#define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
63
64/* A magic value used to identify our hash tables. */
65#define DSHASH_MAGIC 0x75ff6a20
66
67/*
68 * Tracking information for each lock partition. Initially, each partition
69 * corresponds to one bucket, but each time the hash table grows, the buckets
70 * covered by each partition split so the number of buckets covered doubles.
71 *
72 * We might want to add padding here so that each partition is on a different
73 * cache line, but doing so would bloat this structure considerably.
74 */
75typedef struct dshash_partition
76{
77 LWLock lock; /* Protects all buckets in this partition. */
78 size_t count; /* # of items in this partition's buckets */
80
81/*
82 * The head object for a hash table. This will be stored in dynamic shared
83 * memory.
84 */
86{
91
92 /*
93 * The following members are written to only when ALL partitions locks are
94 * held. They can be read when any one partition lock is held.
95 */
96
97 /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
98 size_t size_log2; /* log2(number of buckets) */
99 dsa_pointer buckets; /* current bucket array */
101
102/*
103 * Per-backend state for a dynamic hash table.
104 */
106{
107 dsa_area *area; /* Backing dynamic shared memory area. */
108 dshash_parameters params; /* Parameters. */
109 void *arg; /* User-supplied data pointer. */
110 dshash_table_control *control; /* Control object in DSM. */
111 dsa_pointer *buckets; /* Current bucket pointers in DSM. */
112 size_t size_log2; /* log2(number of buckets) */
113};
114
115/* Given a pointer to an item, find the entry (user data) it holds. */
116#define ENTRY_FROM_ITEM(item) \
117 ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
118
119/* Given a pointer to an entry, find the item that holds it. */
120#define ITEM_FROM_ENTRY(entry) \
121 ((dshash_table_item *)((char *)(entry) - \
122 MAXALIGN(sizeof(dshash_table_item))))
123
124/* How many resize operations (bucket splits) have there been? */
125#define NUM_SPLITS(size_log2) \
126 (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
127
128/* How many buckets are there in a given size? */
129#define NUM_BUCKETS(size_log2) \
130 (((size_t) 1) << (size_log2))
131
132/* How many buckets are there in each partition at a given size? */
133#define BUCKETS_PER_PARTITION(size_log2) \
134 (((size_t) 1) << NUM_SPLITS(size_log2))
135
136/* Max entries before we need to grow. Half + quarter = 75% load factor. */
137#define MAX_COUNT_PER_PARTITION(hash_table) \
138 (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
139 BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
140
141/* Choose partition based on the highest order bits of the hash. */
142#define PARTITION_FOR_HASH(hash) \
143 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
144
145/*
146 * Find the bucket index for a given hash and table size. Each time the table
147 * doubles in size, the appropriate bucket for a given hash value doubles and
148 * possibly adds one, depending on the newly revealed bit, so that all buckets
149 * are split.
150 */
151#define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \
152 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
153
154/* The index of the first bucket in a given partition. */
155#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
156 ((partition) << NUM_SPLITS(size_log2))
157
158/* Choose partition based on bucket index. */
159#define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2) \
160 ((bucket_idx) >> NUM_SPLITS(size_log2))
161
162/* The head of the active bucket for a given hash value (lvalue). */
163#define BUCKET_FOR_HASH(hash_table, hash) \
164 (hash_table->buckets[ \
165 BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \
166 hash_table->size_log2)])
167
168static void delete_item(dshash_table *hash_table,
169 dshash_table_item *item);
170static void resize(dshash_table *hash_table, size_t new_size_log2);
171static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
172static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
173 const void *key,
174 dsa_pointer item_pointer);
175static void insert_item_into_bucket(dshash_table *hash_table,
176 dsa_pointer item_pointer,
177 dshash_table_item *item,
178 dsa_pointer *bucket);
180 const void *key,
181 dsa_pointer *bucket);
182static bool delete_key_from_bucket(dshash_table *hash_table,
183 const void *key,
184 dsa_pointer *bucket_head);
185static bool delete_item_from_bucket(dshash_table *hash_table,
186 dshash_table_item *item,
187 dsa_pointer *bucket_head);
188static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
189static inline bool equal_keys(dshash_table *hash_table,
190 const void *a, const void *b);
191static inline void copy_key(dshash_table *hash_table, void *dest,
192 const void *src);
193
194#define PARTITION_LOCK(hash_table, i) \
195 (&(hash_table)->control->partitions[(i)].lock)
196
197#define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
198 Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
199 DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
200
201/*
202 * Create a new hash table backed by the given dynamic shared area, with the
203 * given parameters. The returned object is allocated in backend-local memory
204 * using the current MemoryContext. 'arg' will be passed through to the
205 * compare, hash, and copy functions.
206 */
208dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
209{
210 dshash_table *hash_table;
211 dsa_pointer control;
212
213 /* Allocate the backend-local object representing the hash table. */
214 hash_table = palloc(sizeof(dshash_table));
215
216 /* Allocate the control object in shared memory. */
217 control = dsa_allocate(area, sizeof(dshash_table_control));
218
219 /* Set up the local and shared hash table structs. */
220 hash_table->area = area;
221 hash_table->params = *params;
222 hash_table->arg = arg;
223 hash_table->control = dsa_get_address(area, control);
224 hash_table->control->handle = control;
225 hash_table->control->magic = DSHASH_MAGIC;
226 hash_table->control->lwlock_tranche_id = params->tranche_id;
227
228 /* Set up the array of lock partitions. */
229 {
231 int tranche_id = hash_table->control->lwlock_tranche_id;
232 int i;
233
234 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
235 {
236 LWLockInitialize(&partitions[i].lock, tranche_id);
237 partitions[i].count = 0;
238 }
239 }
240
241 /*
242 * Set up the initial array of buckets. Our initial size is the same as
243 * the number of partitions.
244 */
246 hash_table->control->buckets =
250 if (!DsaPointerIsValid(hash_table->control->buckets))
251 {
252 dsa_free(area, control);
254 (errcode(ERRCODE_OUT_OF_MEMORY),
255 errmsg("out of memory"),
256 errdetail("Failed on DSA request of size %zu.",
258 }
259 hash_table->buckets = dsa_get_address(area,
260 hash_table->control->buckets);
261 hash_table->size_log2 = hash_table->control->size_log2;
262
263 return hash_table;
264}
265
266/*
267 * Attach to an existing hash table using a handle. The returned object is
268 * allocated in backend-local memory using the current MemoryContext. 'arg'
269 * will be passed through to the compare and hash functions.
270 */
273 dshash_table_handle handle, void *arg)
274{
275 dshash_table *hash_table;
276 dsa_pointer control;
277
278 /* Allocate the backend-local object representing the hash table. */
279 hash_table = palloc(sizeof(dshash_table));
280
281 /* Find the control object in shared memory. */
282 control = handle;
283
284 /* Set up the local hash table struct. */
285 hash_table->area = area;
286 hash_table->params = *params;
287 hash_table->arg = arg;
288 hash_table->control = dsa_get_address(area, control);
289 Assert(hash_table->control->magic == DSHASH_MAGIC);
290
291 /*
292 * These will later be set to the correct values by
293 * ensure_valid_bucket_pointers(), at which time we'll be holding a
294 * partition lock for interlocking against concurrent resizing.
295 */
296 hash_table->buckets = NULL;
297 hash_table->size_log2 = 0;
298
299 return hash_table;
300}
301
302/*
303 * Detach from a hash table. This frees backend-local resources associated
304 * with the hash table, but the hash table will continue to exist until it is
305 * either explicitly destroyed (by a backend that is still attached to it), or
306 * the area that backs it is returned to the operating system.
307 */
308void
310{
312
313 /* The hash table may have been destroyed. Just free local memory. */
314 pfree(hash_table);
315}
316
317/*
318 * Destroy a hash table, returning all memory to the area. The caller must be
319 * certain that no other backend will attempt to access the hash table before
320 * calling this function. Other backend must explicitly call dshash_detach to
321 * free up backend-local memory associated with the hash table. The backend
322 * that calls dshash_destroy must not call dshash_detach.
323 */
324void
326{
327 size_t size;
328 size_t i;
329
330 Assert(hash_table->control->magic == DSHASH_MAGIC);
332
333 /* Free all the entries. */
334 size = NUM_BUCKETS(hash_table->size_log2);
335 for (i = 0; i < size; ++i)
336 {
337 dsa_pointer item_pointer = hash_table->buckets[i];
338
339 while (DsaPointerIsValid(item_pointer))
340 {
341 dshash_table_item *item;
342 dsa_pointer next_item_pointer;
343
344 item = dsa_get_address(hash_table->area, item_pointer);
345 next_item_pointer = item->next;
346 dsa_free(hash_table->area, item_pointer);
347 item_pointer = next_item_pointer;
348 }
349 }
350
351 /*
352 * Vandalize the control block to help catch programming errors where
353 * other backends access the memory formerly occupied by this hash table.
354 */
355 hash_table->control->magic = 0;
356
357 /* Free the active table and control object. */
358 dsa_free(hash_table->area, hash_table->control->buckets);
359 dsa_free(hash_table->area, hash_table->control->handle);
360
361 pfree(hash_table);
362}
363
364/*
365 * Get a handle that can be used by other processes to attach to this hash
366 * table.
367 */
370{
371 Assert(hash_table->control->magic == DSHASH_MAGIC);
372
373 return hash_table->control->handle;
374}
375
376/*
377 * Look up an entry, given a key. Returns a pointer to an entry if one can be
378 * found with the given key. Returns NULL if the key is not found. If a
379 * non-NULL value is returned, the entry is locked and must be released by
380 * calling dshash_release_lock. If an error is raised before
381 * dshash_release_lock is called, the lock will be released automatically, but
382 * the caller must take care to ensure that the entry is not left corrupted.
383 * The lock mode is either shared or exclusive depending on 'exclusive'.
384 *
385 * The caller must not hold a lock already.
386 *
387 * Note that the lock held is in fact an LWLock, so interrupts will be held on
388 * return from this function, and not resumed until dshash_release_lock is
389 * called. It is a very good idea for the caller to release the lock quickly.
390 */
391void *
392dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
393{
395 size_t partition;
396 dshash_table_item *item;
397
398 hash = hash_key(hash_table, key);
399 partition = PARTITION_FOR_HASH(hash);
400
401 Assert(hash_table->control->magic == DSHASH_MAGIC);
403
404 LWLockAcquire(PARTITION_LOCK(hash_table, partition),
405 exclusive ? LW_EXCLUSIVE : LW_SHARED);
407
408 /* Search the active bucket. */
409 item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
410
411 if (!item)
412 {
413 /* Not found. */
414 LWLockRelease(PARTITION_LOCK(hash_table, partition));
415 return NULL;
416 }
417 else
418 {
419 /* The caller will free the lock by calling dshash_release_lock. */
420 return ENTRY_FROM_ITEM(item);
421 }
422}
423
424/*
425 * Returns a pointer to an exclusively locked item which must be released with
426 * dshash_release_lock. If the key is found in the hash table, 'found' is set
427 * to true and a pointer to the existing entry is returned. If the key is not
428 * found, 'found' is set to false, and a pointer to a newly created entry is
429 * returned.
430 *
431 * Notes above dshash_find() regarding locking and error handling equally
432 * apply here.
433 */
434void *
436 const void *key,
437 bool *found)
438{
440 size_t partition_index;
441 dshash_partition *partition;
442 dshash_table_item *item;
443
444 hash = hash_key(hash_table, key);
445 partition_index = PARTITION_FOR_HASH(hash);
446 partition = &hash_table->control->partitions[partition_index];
447
448 Assert(hash_table->control->magic == DSHASH_MAGIC);
450
451restart:
452 LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
455
456 /* Search the active bucket. */
457 item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
458
459 if (item)
460 *found = true;
461 else
462 {
463 *found = false;
464
465 /* Check if we are getting too full. */
466 if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
467 {
468 /*
469 * The load factor (= keys / buckets) for all buckets protected by
470 * this partition is > 0.75. Presumably the same applies
471 * generally across the whole hash table (though we don't attempt
472 * to track that directly to avoid contention on some kind of
473 * central counter; we just assume that this partition is
474 * representative). This is a good time to resize.
475 *
476 * Give up our existing lock first, because resizing needs to
477 * reacquire all the locks in the right order to avoid deadlocks.
478 */
479 LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
480 resize(hash_table, hash_table->size_log2 + 1);
481
482 goto restart;
483 }
484
485 /* Finally we can try to insert the new item. */
486 item = insert_into_bucket(hash_table, key,
487 &BUCKET_FOR_HASH(hash_table, hash));
488 item->hash = hash;
489 /* Adjust per-lock-partition counter for load factor knowledge. */
490 ++partition->count;
491 }
492
493 /* The caller must release the lock with dshash_release_lock. */
494 return ENTRY_FROM_ITEM(item);
495}
496
497/*
498 * Remove an entry by key. Returns true if the key was found and the
499 * corresponding entry was removed.
500 *
501 * To delete an entry that you already have a pointer to, see
502 * dshash_delete_entry.
503 */
504bool
505dshash_delete_key(dshash_table *hash_table, const void *key)
506{
508 size_t partition;
509 bool found;
510
511 Assert(hash_table->control->magic == DSHASH_MAGIC);
513
514 hash = hash_key(hash_table, key);
515 partition = PARTITION_FOR_HASH(hash);
516
517 LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
519
520 if (delete_key_from_bucket(hash_table, key,
521 &BUCKET_FOR_HASH(hash_table, hash)))
522 {
523 Assert(hash_table->control->partitions[partition].count > 0);
524 found = true;
525 --hash_table->control->partitions[partition].count;
526 }
527 else
528 found = false;
529
530 LWLockRelease(PARTITION_LOCK(hash_table, partition));
531
532 return found;
533}
534
535/*
536 * Remove an entry. The entry must already be exclusively locked, and must
537 * have been obtained by dshash_find or dshash_find_or_insert. Note that this
538 * function releases the lock just like dshash_release_lock.
539 *
540 * To delete an entry by key, see dshash_delete_key.
541 */
542void
543dshash_delete_entry(dshash_table *hash_table, void *entry)
544{
545 dshash_table_item *item = ITEM_FROM_ENTRY(entry);
546 size_t partition = PARTITION_FOR_HASH(item->hash);
547
548 Assert(hash_table->control->magic == DSHASH_MAGIC);
549 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
550 LW_EXCLUSIVE));
551
552 delete_item(hash_table, item);
553 LWLockRelease(PARTITION_LOCK(hash_table, partition));
554}
555
556/*
557 * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
558 */
559void
560dshash_release_lock(dshash_table *hash_table, void *entry)
561{
562 dshash_table_item *item = ITEM_FROM_ENTRY(entry);
563 size_t partition_index = PARTITION_FOR_HASH(item->hash);
564
565 Assert(hash_table->control->magic == DSHASH_MAGIC);
566
567 LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
568}
569
570/*
571 * A compare function that forwards to memcmp.
572 */
573int
574dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
575{
576 return memcmp(a, b, size);
577}
578
579/*
580 * A hash function that forwards to tag_hash.
581 */
583dshash_memhash(const void *v, size_t size, void *arg)
584{
585 return tag_hash(v, size);
586}
587
588/*
589 * A copy function that forwards to memcpy.
590 */
591void
592dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
593{
594 (void) memcpy(dest, src, size);
595}
596
597/*
598 * A compare function that forwards to strcmp.
599 */
600int
601dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
602{
603 Assert(strlen((const char *) a) < size);
604 Assert(strlen((const char *) b) < size);
605
606 return strcmp((const char *) a, (const char *) b);
607}
608
609/*
610 * A hash function that forwards to string_hash.
611 */
613dshash_strhash(const void *v, size_t size, void *arg)
614{
615 Assert(strlen((const char *) v) < size);
616
617 return string_hash((const char *) v, size);
618}
619
620/*
621 * A copy function that forwards to strcpy.
622 */
623void
624dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
625{
626 Assert(strlen((const char *) src) < size);
627
628 (void) strcpy((char *) dest, (const char *) src);
629}
630
631/*
632 * Sequentially scan through dshash table and return all the elements one by
633 * one, return NULL when all elements have been returned.
634 *
635 * dshash_seq_term needs to be called when a scan finished. The caller may
636 * delete returned elements midst of a scan by using dshash_delete_current()
637 * if exclusive = true.
638 */
639void
641 bool exclusive)
642{
643 status->hash_table = hash_table;
644 status->curbucket = 0;
645 status->nbuckets = 0;
646 status->curitem = NULL;
648 status->curpartition = -1;
649 status->exclusive = exclusive;
650}
651
652/*
653 * Returns the next element.
654 *
655 * Returned elements are locked and the caller may not release the lock. It is
656 * released by future calls to dshash_seq_next() or dshash_seq_term().
657 */
658void *
660{
661 dsa_pointer next_item_pointer;
662
663 /*
664 * Not yet holding any partition locks. Need to determine the size of the
665 * hash table, it could have been resized since we were looking last.
666 * Since we iterate in partition order, we can start by unconditionally
667 * lock partition 0.
668 *
669 * Once we hold the lock, no resizing can happen until the scan ends. So
670 * we don't need to repeatedly call ensure_valid_bucket_pointers().
671 */
672 if (status->curpartition == -1)
673 {
674 Assert(status->curbucket == 0);
676
677 status->curpartition = 0;
678
680 status->curpartition),
681 status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
682
684
685 status->nbuckets =
687 next_item_pointer = status->hash_table->buckets[status->curbucket];
688 }
689 else
690 next_item_pointer = status->pnextitem;
691
693 status->curpartition),
694 status->exclusive ? LW_EXCLUSIVE : LW_SHARED));
695
696 /* Move to the next bucket if we finished the current bucket */
697 while (!DsaPointerIsValid(next_item_pointer))
698 {
699 int next_partition;
700
701 if (++status->curbucket >= status->nbuckets)
702 {
703 /* all buckets have been scanned. finish. */
704 return NULL;
705 }
706
707 /* Check if move to the next partition */
708 next_partition =
710 status->hash_table->size_log2);
711
712 if (status->curpartition != next_partition)
713 {
714 /*
715 * Move to the next partition. Lock the next partition then
716 * release the current, not in the reverse order to avoid
717 * concurrent resizing. Avoid dead lock by taking lock in the
718 * same order with resize().
719 */
721 next_partition),
722 status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
724 status->curpartition));
725 status->curpartition = next_partition;
726 }
727
728 next_item_pointer = status->hash_table->buckets[status->curbucket];
729 }
730
731 status->curitem =
732 dsa_get_address(status->hash_table->area, next_item_pointer);
733
734 /*
735 * The caller may delete the item. Store the next item in case of
736 * deletion.
737 */
738 status->pnextitem = status->curitem->next;
739
740 return ENTRY_FROM_ITEM(status->curitem);
741}
742
743/*
744 * Terminates the seqscan and release all locks.
745 *
746 * Needs to be called after finishing or when exiting a seqscan.
747 */
748void
750{
751 if (status->curpartition >= 0)
753}
754
755/*
756 * Remove the current entry of the seq scan.
757 */
758void
760{
761 dshash_table *hash_table = status->hash_table;
762 dshash_table_item *item = status->curitem;
763 size_t partition PG_USED_FOR_ASSERTS_ONLY;
764
765 partition = PARTITION_FOR_HASH(item->hash);
766
767 Assert(status->exclusive);
768 Assert(hash_table->control->magic == DSHASH_MAGIC);
769 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
770 LW_EXCLUSIVE));
771
772 delete_item(hash_table, item);
773}
774
775/*
776 * Print debugging information about the internal state of the hash table to
777 * stderr. The caller must hold no partition locks.
778 */
779void
781{
782 size_t i;
783 size_t j;
784
785 Assert(hash_table->control->magic == DSHASH_MAGIC);
787
788 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
789 {
790 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
792 }
793
795
796 fprintf(stderr,
797 "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
798 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
799 {
800 dshash_partition *partition = &hash_table->control->partitions[i];
801 size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
802 size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
803
804 fprintf(stderr, " partition %zu\n", i);
805 fprintf(stderr,
806 " active buckets (key count = %zu)\n", partition->count);
807
808 for (j = begin; j < end; ++j)
809 {
810 size_t count = 0;
811 dsa_pointer bucket = hash_table->buckets[j];
812
813 while (DsaPointerIsValid(bucket))
814 {
815 dshash_table_item *item;
816
817 item = dsa_get_address(hash_table->area, bucket);
818
819 bucket = item->next;
820 ++count;
821 }
822 fprintf(stderr, " bucket %zu (key count = %zu)\n", j, count);
823 }
824 }
825
826 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
827 LWLockRelease(PARTITION_LOCK(hash_table, i));
828}
829
830/*
831 * Delete a locked item to which we have a pointer.
832 */
833static void
835{
836 size_t hash = item->hash;
837 size_t partition = PARTITION_FOR_HASH(hash);
838
839 Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
840
841 if (delete_item_from_bucket(hash_table, item,
842 &BUCKET_FOR_HASH(hash_table, hash)))
843 {
844 Assert(hash_table->control->partitions[partition].count > 0);
845 --hash_table->control->partitions[partition].count;
846 }
847 else
848 {
849 Assert(false);
850 }
851}
852
853/*
854 * Grow the hash table if necessary to the requested number of buckets. The
855 * requested size must be double some previously observed size.
856 *
857 * Must be called without any partition lock held.
858 */
859static void
860resize(dshash_table *hash_table, size_t new_size_log2)
861{
862 dsa_pointer old_buckets;
863 dsa_pointer new_buckets_shared;
864 dsa_pointer *new_buckets;
865 size_t size;
866 size_t new_size = ((size_t) 1) << new_size_log2;
867 size_t i;
868
869 /*
870 * Acquire the locks for all lock partitions. This is expensive, but we
871 * shouldn't have to do it many times.
872 */
873 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
874 {
875 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
876
878 if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
879 {
880 /*
881 * Another backend has already increased the size; we can avoid
882 * obtaining all the locks and return early.
883 */
884 LWLockRelease(PARTITION_LOCK(hash_table, 0));
885 return;
886 }
887 }
888
889 Assert(new_size_log2 == hash_table->control->size_log2 + 1);
890
891 /* Allocate the space for the new table. */
892 new_buckets_shared =
893 dsa_allocate_extended(hash_table->area,
894 sizeof(dsa_pointer) * new_size,
896 new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
897
898 /*
899 * We've allocated the new bucket array; all that remains to do now is to
900 * reinsert all items, which amounts to adjusting all the pointers.
901 */
902 size = ((size_t) 1) << hash_table->control->size_log2;
903 for (i = 0; i < size; ++i)
904 {
905 dsa_pointer item_pointer = hash_table->buckets[i];
906
907 while (DsaPointerIsValid(item_pointer))
908 {
909 dshash_table_item *item;
910 dsa_pointer next_item_pointer;
911
912 item = dsa_get_address(hash_table->area, item_pointer);
913 next_item_pointer = item->next;
914 insert_item_into_bucket(hash_table, item_pointer, item,
915 &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
916 new_size_log2)]);
917 item_pointer = next_item_pointer;
918 }
919 }
920
921 /* Swap the hash table into place and free the old one. */
922 old_buckets = hash_table->control->buckets;
923 hash_table->control->buckets = new_buckets_shared;
924 hash_table->control->size_log2 = new_size_log2;
925 hash_table->buckets = new_buckets;
926 dsa_free(hash_table->area, old_buckets);
927
928 /* Release all the locks. */
929 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
930 LWLockRelease(PARTITION_LOCK(hash_table, i));
931}
932
933/*
934 * Make sure that our backend-local bucket pointers are up to date. The
935 * caller must have locked one lock partition, which prevents resize() from
936 * running concurrently.
937 */
938static inline void
940{
941 if (hash_table->size_log2 != hash_table->control->size_log2)
942 {
943 hash_table->buckets = dsa_get_address(hash_table->area,
944 hash_table->control->buckets);
945 hash_table->size_log2 = hash_table->control->size_log2;
946 }
947}
948
949/*
950 * Scan a locked bucket for a match, using the provided compare function.
951 */
952static inline dshash_table_item *
953find_in_bucket(dshash_table *hash_table, const void *key,
954 dsa_pointer item_pointer)
955{
956 while (DsaPointerIsValid(item_pointer))
957 {
958 dshash_table_item *item;
959
960 item = dsa_get_address(hash_table->area, item_pointer);
961 if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
962 return item;
963 item_pointer = item->next;
964 }
965 return NULL;
966}
967
968/*
969 * Insert an already-allocated item into a bucket.
970 */
971static void
973 dsa_pointer item_pointer,
974 dshash_table_item *item,
975 dsa_pointer *bucket)
976{
977 Assert(item == dsa_get_address(hash_table->area, item_pointer));
978
979 item->next = *bucket;
980 *bucket = item_pointer;
981}
982
983/*
984 * Allocate space for an entry with the given key and insert it into the
985 * provided bucket.
986 */
987static dshash_table_item *
989 const void *key,
990 dsa_pointer *bucket)
991{
992 dsa_pointer item_pointer;
993 dshash_table_item *item;
994
995 item_pointer = dsa_allocate(hash_table->area,
996 hash_table->params.entry_size +
997 MAXALIGN(sizeof(dshash_table_item)));
998 item = dsa_get_address(hash_table->area, item_pointer);
999 copy_key(hash_table, ENTRY_FROM_ITEM(item), key);
1000 insert_item_into_bucket(hash_table, item_pointer, item, bucket);
1001 return item;
1002}
1003
1004/*
1005 * Search a bucket for a matching key and delete it.
1006 */
1007static bool
1009 const void *key,
1010 dsa_pointer *bucket_head)
1011{
1012 while (DsaPointerIsValid(*bucket_head))
1013 {
1014 dshash_table_item *item;
1015
1016 item = dsa_get_address(hash_table->area, *bucket_head);
1017
1018 if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
1019 {
1021
1022 next = item->next;
1023 dsa_free(hash_table->area, *bucket_head);
1024 *bucket_head = next;
1025
1026 return true;
1027 }
1028 bucket_head = &item->next;
1029 }
1030 return false;
1031}
1032
1033/*
1034 * Delete the specified item from the bucket.
1035 */
1036static bool
1038 dshash_table_item *item,
1039 dsa_pointer *bucket_head)
1040{
1041 while (DsaPointerIsValid(*bucket_head))
1042 {
1043 dshash_table_item *bucket_item;
1044
1045 bucket_item = dsa_get_address(hash_table->area, *bucket_head);
1046
1047 if (bucket_item == item)
1048 {
1050
1051 next = item->next;
1052 dsa_free(hash_table->area, *bucket_head);
1053 *bucket_head = next;
1054 return true;
1055 }
1056 bucket_head = &bucket_item->next;
1057 }
1058 return false;
1059}
1060
1061/*
1062 * Compute the hash value for a key.
1063 */
1064static inline dshash_hash
1065hash_key(dshash_table *hash_table, const void *key)
1066{
1067 return hash_table->params.hash_function(key,
1068 hash_table->params.key_size,
1069 hash_table->arg);
1070}
1071
1072/*
1073 * Check whether two keys compare equal.
1074 */
1075static inline bool
1076equal_keys(dshash_table *hash_table, const void *a, const void *b)
1077{
1078 return hash_table->params.compare_function(a, b,
1079 hash_table->params.key_size,
1080 hash_table->arg) == 0;
1081}
1082
1083/*
1084 * Copy a key.
1085 */
1086static inline void
1087copy_key(dshash_table *hash_table, void *dest, const void *src)
1088{
1089 hash_table->params.copy_function(dest, src,
1090 hash_table->params.key_size,
1091 hash_table->arg);
1092}
static int32 next
Definition: blutils.c:224
#define MAXALIGN(LEN)
Definition: c.h:815
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:228
uint32_t uint32
Definition: c.h:543
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
void * dsa_get_address(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:957
dsa_pointer dsa_allocate_extended(dsa_area *area, size_t size, int flags)
Definition: dsa.c:686
void dsa_free(dsa_area *area, dsa_pointer dp)
Definition: dsa.c:841
uint64 dsa_pointer
Definition: dsa.h:62
#define dsa_allocate(area, size)
Definition: dsa.h:109
#define InvalidDsaPointer
Definition: dsa.h:78
#define DSA_ALLOC_NO_OOM
Definition: dsa.h:74
#define DsaPointerIsValid(x)
Definition: dsa.h:106
#define DSA_ALLOC_HUGE
Definition: dsa.h:73
#define DSA_ALLOC_ZERO
Definition: dsa.h:75
bool dshash_delete_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:505
void dshash_memcpy(void *dest, const void *src, size_t size, void *arg)
Definition: dshash.c:592
void dshash_delete_entry(dshash_table *hash_table, void *entry)
Definition: dshash.c:543
void dshash_strcpy(void *dest, const void *src, size_t size, void *arg)
Definition: dshash.c:624
void dshash_destroy(dshash_table *hash_table)
Definition: dshash.c:325
void dshash_release_lock(dshash_table *hash_table, void *entry)
Definition: dshash.c:560
#define PARTITION_LOCK(hash_table, i)
Definition: dshash.c:194
void dshash_detach(dshash_table *hash_table)
Definition: dshash.c:309
void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table, bool exclusive)
Definition: dshash.c:640
void * dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
Definition: dshash.c:392
#define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2)
Definition: dshash.c:151
static bool delete_key_from_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket_head)
Definition: dshash.c:1008
dshash_hash dshash_strhash(const void *v, size_t size, void *arg)
Definition: dshash.c:613
static dshash_hash hash_key(dshash_table *hash_table, const void *key)
Definition: dshash.c:1065
#define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table)
Definition: dshash.c:197
#define ITEM_FROM_ENTRY(entry)
Definition: dshash.c:120
dshash_table_handle dshash_get_hash_table_handle(dshash_table *hash_table)
Definition: dshash.c:369
void dshash_dump(dshash_table *hash_table)
Definition: dshash.c:780
static bool equal_keys(dshash_table *hash_table, const void *a, const void *b)
Definition: dshash.c:1076
dshash_table * dshash_attach(dsa_area *area, const dshash_parameters *params, dshash_table_handle handle, void *arg)
Definition: dshash.c:272
void dshash_seq_term(dshash_seq_status *status)
Definition: dshash.c:749
#define DSHASH_MAGIC
Definition: dshash.c:65
static void insert_item_into_bucket(dshash_table *hash_table, dsa_pointer item_pointer, dshash_table_item *item, dsa_pointer *bucket)
Definition: dshash.c:972
static bool delete_item_from_bucket(dshash_table *hash_table, dshash_table_item *item, dsa_pointer *bucket_head)
Definition: dshash.c:1037
#define DSHASH_NUM_PARTITIONS
Definition: dshash.c:62
int dshash_strcmp(const void *a, const void *b, size_t size, void *arg)
Definition: dshash.c:601
#define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2)
Definition: dshash.c:159
static dshash_table_item * insert_into_bucket(dshash_table *hash_table, const void *key, dsa_pointer *bucket)
Definition: dshash.c:988
void * dshash_find_or_insert(dshash_table *hash_table, const void *key, bool *found)
Definition: dshash.c:435
#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2)
Definition: dshash.c:155
#define MAX_COUNT_PER_PARTITION(hash_table)
Definition: dshash.c:137
#define ENTRY_FROM_ITEM(item)
Definition: dshash.c:116
#define DSHASH_NUM_PARTITIONS_LOG2
Definition: dshash.c:61
static void delete_item(dshash_table *hash_table, dshash_table_item *item)
Definition: dshash.c:834
dshash_hash dshash_memhash(const void *v, size_t size, void *arg)
Definition: dshash.c:583
#define NUM_BUCKETS(size_log2)
Definition: dshash.c:129
static void ensure_valid_bucket_pointers(dshash_table *hash_table)
Definition: dshash.c:939
static dshash_table_item * find_in_bucket(dshash_table *hash_table, const void *key, dsa_pointer item_pointer)
Definition: dshash.c:953
void * dshash_seq_next(dshash_seq_status *status)
Definition: dshash.c:659
dshash_table * dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
Definition: dshash.c:208
static void resize(dshash_table *hash_table, size_t new_size_log2)
Definition: dshash.c:860
static void copy_key(dshash_table *hash_table, void *dest, const void *src)
Definition: dshash.c:1087
int dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
Definition: dshash.c:574
struct dshash_table_control dshash_table_control
#define PARTITION_FOR_HASH(hash)
Definition: dshash.c:142
struct dshash_partition dshash_partition
#define BUCKET_FOR_HASH(hash_table, hash)
Definition: dshash.c:163
void dshash_delete_current(dshash_seq_status *status)
Definition: dshash.c:759
dsa_pointer dshash_table_handle
Definition: dshash.h:24
uint32 dshash_hash
Definition: dshash.h:30
int errdetail(const char *fmt,...)
Definition: elog.c:1216
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:150
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677
uint32 string_hash(const void *key, Size keysize)
Definition: hashfn.c:660
Assert(PointerIsAligned(start, uint64))
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int j
Definition: isn.c:78
int i
Definition: isn.c:77
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1977
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1174
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2021
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1894
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:698
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
void * arg
static int partitions
Definition: pgbench.c:224
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
Definition: lwlock.h:42
Definition: dsa.c:348
dshash_hash_function hash_function
Definition: dshash.h:59
size_t key_size
Definition: dshash.h:56
dshash_compare_function compare_function
Definition: dshash.h:58
dshash_copy_function copy_function
Definition: dshash.h:60
size_t entry_size
Definition: dshash.h:57
LWLock lock
Definition: dshash.c:77
size_t count
Definition: dshash.c:78
bool exclusive
Definition: dshash.h:80
dshash_table_item * curitem
Definition: dshash.h:77
dsa_pointer pnextitem
Definition: dshash.h:78
int curpartition
Definition: dshash.h:79
dshash_table * hash_table
Definition: dshash.h:74
size_t size_log2
Definition: dshash.c:98
dshash_partition partitions[DSHASH_NUM_PARTITIONS]
Definition: dshash.c:89
dshash_table_handle handle
Definition: dshash.c:87
dsa_pointer buckets
Definition: dshash.c:99
dshash_hash hash
Definition: dshash.c:51
dsa_pointer next
Definition: dshash.c:49
dshash_parameters params
Definition: dshash.c:108
dsa_pointer * buckets
Definition: dshash.c:111
dsa_area * area
Definition: dshash.c:107
void * arg
Definition: dshash.c:109
size_t size_log2
Definition: dshash.c:112
dshash_table_control * control
Definition: dshash.c:110