@@ -47,6 +47,7 @@ static void ExecHashSkewTableInsert(HashJoinTable hashtable,
4747 int bucketNumber );
4848static void ExecHashRemoveNextSkewBucket (HashJoinTable hashtable );
4949
50+ static void * dense_alloc (HashJoinTable hashtable , Size size );
5051
5152/* ----------------------------------------------------------------
5253 * ExecHash
@@ -293,6 +294,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
293294 hashtable -> spaceUsedSkew = 0 ;
294295 hashtable -> spaceAllowedSkew =
295296 hashtable -> spaceAllowed * SKEW_WORK_MEM_PERCENT / 100 ;
297+ hashtable -> chunks = NULL ;
296298
297299 /*
298300 * Get info about the hash functions to be used for each hash key. Also
@@ -556,10 +558,10 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
556558 int oldnbatch = hashtable -> nbatch ;
557559 int curbatch = hashtable -> curbatch ;
558560 int nbatch ;
559- int i ;
560561 MemoryContext oldcxt ;
561562 long ninmemory ;
562563 long nfreed ;
564+ HashMemoryChunk oldchunks ;
563565
564566 /* do nothing if we've decided to shut off growth */
565567 if (!hashtable -> growEnabled )
@@ -612,51 +614,65 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
612614 */
613615 ninmemory = nfreed = 0 ;
614616
615- for (i = 0 ; i < hashtable -> nbuckets ; i ++ )
616- {
617- HashJoinTuple prevtuple ;
618- HashJoinTuple tuple ;
617+ /*
618+ * We will scan through the chunks directly, so that we can reset the
619+ * buckets now and not have to keep track which tuples in the buckets have
620+ * already been processed. We will free the old chunks as we go.
621+ */
622+ memset (hashtable -> buckets , 0 , sizeof (HashJoinTuple * ) * hashtable -> nbuckets );
623+ oldchunks = hashtable -> chunks ;
624+ hashtable -> chunks = NULL ;
619625
620- prevtuple = NULL ;
621- tuple = hashtable -> buckets [i ];
626+ /* so, let's scan through the old chunks, and all tuples in each chunk */
627+ while (oldchunks != NULL )
628+ {
629+ HashMemoryChunk nextchunk = oldchunks -> next ;
630+ /* position within the buffer (up to oldchunks->used) */
631+ size_t idx = 0 ;
622632
623- while (tuple != NULL )
633+ /* process all tuples stored in this chunk (and then free it) */
634+ while (idx < oldchunks -> used )
624635 {
625- /* save link in case we delete */
626- HashJoinTuple nexttuple = tuple -> next ;
636+ HashJoinTuple hashTuple = (HashJoinTuple ) (oldchunks -> data + idx );
637+ MinimalTuple tuple = HJTUPLE_MINTUPLE (hashTuple );
638+ int hashTupleSize = (HJTUPLE_OVERHEAD + tuple -> t_len );
627639 int bucketno ;
628640 int batchno ;
629641
630642 ninmemory ++ ;
631- ExecHashGetBucketAndBatch (hashtable , tuple -> hashvalue ,
643+ ExecHashGetBucketAndBatch (hashtable , hashTuple -> hashvalue ,
632644 & bucketno , & batchno );
633- Assert ( bucketno == i );
645+
634646 if (batchno == curbatch )
635647 {
636- /* keep tuple */
637- prevtuple = tuple ;
648+ /* keep tuple in memory - copy it into the new chunk */
649+ HashJoinTuple copyTuple =
650+ (HashJoinTuple ) dense_alloc (hashtable , hashTupleSize );
651+ memcpy (copyTuple , hashTuple , hashTupleSize );
652+
653+ /* and add it back to the appropriate bucket */
654+ copyTuple -> next = hashtable -> buckets [bucketno ];
655+ hashtable -> buckets [bucketno ] = copyTuple ;
638656 }
639657 else
640658 {
641659 /* dump it out */
642660 Assert (batchno > curbatch );
643- ExecHashJoinSaveTuple (HJTUPLE_MINTUPLE (tuple ),
644- tuple -> hashvalue ,
661+ ExecHashJoinSaveTuple (HJTUPLE_MINTUPLE (hashTuple ),
662+ hashTuple -> hashvalue ,
645663 & hashtable -> innerBatchFile [batchno ]);
646- /* and remove from hash table */
647- if (prevtuple )
648- prevtuple -> next = nexttuple ;
649- else
650- hashtable -> buckets [i ] = nexttuple ;
651- /* prevtuple doesn't change */
652- hashtable -> spaceUsed -=
653- HJTUPLE_OVERHEAD + HJTUPLE_MINTUPLE (tuple )-> t_len ;
654- pfree (tuple );
664+
665+ hashtable -> spaceUsed -= hashTupleSize ;
655666 nfreed ++ ;
656667 }
657668
658- tuple = nexttuple ;
669+ /* next tuple in this chunk */
670+ idx += MAXALIGN (hashTupleSize );
659671 }
672+
673+ /* we're done with this chunk - free it and proceed to the next one */
674+ pfree (oldchunks );
675+ oldchunks = nextchunk ;
660676 }
661677
662678#ifdef HJDEBUG
@@ -717,8 +733,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
717733
718734 /* Create the HashJoinTuple */
719735 hashTupleSize = HJTUPLE_OVERHEAD + tuple -> t_len ;
720- hashTuple = (HashJoinTuple ) MemoryContextAlloc (hashtable -> batchCxt ,
721- hashTupleSize );
736+ hashTuple = (HashJoinTuple ) dense_alloc (hashtable , hashTupleSize );
737+
722738 hashTuple -> hashvalue = hashvalue ;
723739 memcpy (HJTUPLE_MINTUPLE (hashTuple ), tuple , tuple -> t_len );
724740
@@ -1068,6 +1084,9 @@ ExecHashTableReset(HashJoinTable hashtable)
10681084 hashtable -> spaceUsed = 0 ;
10691085
10701086 MemoryContextSwitchTo (oldcxt );
1087+
1088+ /* Forget the chunks (the memory was freed by the context reset above). */
1089+ hashtable -> chunks = NULL ;
10711090}
10721091
10731092/*
@@ -1462,3 +1481,79 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable)
14621481 hashtable -> spaceUsedSkew = 0 ;
14631482 }
14641483}
1484+
1485+ /*
1486+ * Allocate 'size' bytes from the currently active HashMemoryChunk
1487+ */
1488+ static void *
1489+ dense_alloc (HashJoinTable hashtable , Size size )
1490+ {
1491+ HashMemoryChunk newChunk ;
1492+ char * ptr ;
1493+
1494+ /* just in case the size is not already aligned properly */
1495+ size = MAXALIGN (size );
1496+
1497+ /*
1498+ * If tuple size is larger than of 1/4 of chunk size, allocate a separate
1499+ * chunk.
1500+ */
1501+ if (size > HASH_CHUNK_THRESHOLD )
1502+ {
1503+ /* allocate new chunk and put it at the beginning of the list */
1504+ newChunk = (HashMemoryChunk ) MemoryContextAlloc (hashtable -> batchCxt ,
1505+ offsetof(HashMemoryChunkData , data ) + size );
1506+ newChunk -> maxlen = size ;
1507+ newChunk -> used = 0 ;
1508+ newChunk -> ntuples = 0 ;
1509+
1510+ /*
1511+ * Add this chunk to the list after the first existing chunk, so that
1512+ * we don't lose the remaining space in the "current" chunk.
1513+ */
1514+ if (hashtable -> chunks != NULL )
1515+ {
1516+ newChunk -> next = hashtable -> chunks -> next ;
1517+ hashtable -> chunks -> next = newChunk ;
1518+ }
1519+ else
1520+ {
1521+ newChunk -> next = hashtable -> chunks ;
1522+ hashtable -> chunks = newChunk ;
1523+ }
1524+
1525+ newChunk -> used += size ;
1526+ newChunk -> ntuples += 1 ;
1527+
1528+ return newChunk -> data ;
1529+ }
1530+
1531+ /*
1532+ * See if we have enough space for it in the current chunk (if any).
1533+ * If not, allocate a fresh chunk.
1534+ */
1535+ if ((hashtable -> chunks == NULL ) ||
1536+ (hashtable -> chunks -> maxlen - hashtable -> chunks -> used ) < size )
1537+ {
1538+ /* allocate new chunk and put it at the beginning of the list */
1539+ newChunk = (HashMemoryChunk ) MemoryContextAlloc (hashtable -> batchCxt ,
1540+ offsetof(HashMemoryChunkData , data ) + HASH_CHUNK_SIZE );
1541+
1542+ newChunk -> maxlen = HASH_CHUNK_SIZE ;
1543+ newChunk -> used = size ;
1544+ newChunk -> ntuples = 1 ;
1545+
1546+ newChunk -> next = hashtable -> chunks ;
1547+ hashtable -> chunks = newChunk ;
1548+
1549+ return newChunk -> data ;
1550+ }
1551+
1552+ /* There is enough space in the current chunk, let's add the tuple */
1553+ ptr = hashtable -> chunks -> data + hashtable -> chunks -> used ;
1554+ hashtable -> chunks -> used += size ;
1555+ hashtable -> chunks -> ntuples += 1 ;
1556+
1557+ /* return pointer to the start of the tuple memory */
1558+ return ptr ;
1559+ }
0 commit comments