@@ -386,7 +386,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
386386 */
387387
388388/* Target bucket loading (tuples per bucket) */
389- #define NTUP_PER_BUCKET 10
389+ #define NTUP_PER_BUCKET 1
390390
391391void
392392ExecChooseHashTableSize (double ntuples , int tupwidth , bool useskew ,
@@ -396,12 +396,13 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
396396{
397397 int tupsize ;
398398 double inner_rel_bytes ;
399+ long bucket_bytes ;
399400 long hash_table_bytes ;
400401 long skew_table_bytes ;
401402 long max_pointers ;
402- int nbatch ;
403+ int nbatch = 1 ;
403404 int nbuckets ;
404- int i ;
405+ double dbuckets ;
405406
406407 /* Force a plausible relation size if no info */
407408 if (ntuples <= 0.0 )
@@ -460,56 +461,61 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
460461
461462 /*
462463 * Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
463- * memory is filled. Set nbatch to the smallest power of 2 that appears
464- * sufficient. The Min() steps limit the results so that the pointer
465- * arrays we'll try to allocate do not exceed work_mem.
464+ * memory is filled, assuming a single batch. The Min() step limits the
465+ * results so that the pointer arrays we'll try to allocate do not exceed
466+ * work_mem.
466467 */
467468 max_pointers = (work_mem * 1024L ) / sizeof (void * );
468469 /* also ensure we avoid integer overflow in nbatch and nbuckets */
469470 max_pointers = Min (max_pointers , INT_MAX / 2 );
471+ dbuckets = ceil (ntuples / NTUP_PER_BUCKET );
472+ dbuckets = Min (dbuckets , max_pointers );
473+ nbuckets = Max ((int ) dbuckets , 1024 );
474+ nbuckets = 1 << my_log2 (nbuckets );
475+ bucket_bytes = sizeof (HashJoinTuple ) * nbuckets ;
470476
471- if (inner_rel_bytes > hash_table_bytes )
477+ /*
478+ * If there's not enough space to store the projected number of tuples
479+ * and the required bucket headers, we will need multiple batches.
480+ */
481+ if (inner_rel_bytes + bucket_bytes > hash_table_bytes )
472482 {
473483 /* We'll need multiple batches */
474484 long lbuckets ;
475485 double dbatch ;
476486 int minbatch ;
487+ long bucket_size ;
477488
478- lbuckets = (hash_table_bytes / tupsize ) / NTUP_PER_BUCKET ;
489+ /*
490+ * Estimate the number of buckets we'll want to have when work_mem
491+ * is entirely full. Each bucket will contain a bucket pointer plus
492+ * NTUP_PER_BUCKET tuples, whose projected size already includes
493+ * overhead for the hash code, pointer to the next tuple, etc.
494+ */
495+ bucket_size = (tupsize * NTUP_PER_BUCKET + sizeof (HashJoinTuple ));
496+ lbuckets = 1 << my_log2 (hash_table_bytes / bucket_size );
479497 lbuckets = Min (lbuckets , max_pointers );
480498 nbuckets = (int ) lbuckets ;
499+ bucket_bytes = nbuckets * sizeof (HashJoinTuple );
500+
501+ /*
502+ * Buckets are simple pointers to hashjoin tuples, while tupsize
503+ * includes the pointer, hash code, and MinimalTupleData. So buckets
504+ * should never really exceed 25% of work_mem (even for
505+ * NTUP_PER_BUCKET=1); except maybe * for work_mem values that are
506+ * not 2^N bytes, where we might get more * because of doubling.
507+ * So let's look for 50% here.
508+ */
509+ Assert (bucket_bytes <= hash_table_bytes / 2 );
481510
482- dbatch = ceil (inner_rel_bytes / hash_table_bytes );
511+ /* Calculate required number of batches. */
512+ dbatch = ceil (inner_rel_bytes / (hash_table_bytes - bucket_bytes ));
483513 dbatch = Min (dbatch , max_pointers );
484514 minbatch = (int ) dbatch ;
485515 nbatch = 2 ;
486516 while (nbatch < minbatch )
487517 nbatch <<= 1 ;
488518 }
489- else
490- {
491- /* We expect the hashtable to fit in memory */
492- double dbuckets ;
493-
494- dbuckets = ceil (ntuples / NTUP_PER_BUCKET );
495- dbuckets = Min (dbuckets , max_pointers );
496- nbuckets = (int ) dbuckets ;
497-
498- nbatch = 1 ;
499- }
500-
501- /*
502- * Both nbuckets and nbatch must be powers of 2 to make
503- * ExecHashGetBucketAndBatch fast. We already fixed nbatch; now inflate
504- * nbuckets to the next larger power of 2. We also force nbuckets to not
505- * be real small, by starting the search at 2^10. (Note: above we made
506- * sure that nbuckets is not more than INT_MAX / 2, so this loop cannot
507- * overflow, nor can the final shift to recalculate nbuckets.)
508- */
509- i = 10 ;
510- while ((1 << i ) < nbuckets )
511- i ++ ;
512- nbuckets = (1 << i );
513519
514520 * numbuckets = nbuckets ;
515521 * numbatches = nbatch ;
@@ -754,7 +760,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
754760 hashtable -> spaceUsed += hashTupleSize ;
755761 if (hashtable -> spaceUsed > hashtable -> spacePeak )
756762 hashtable -> spacePeak = hashtable -> spaceUsed ;
757- if (hashtable -> spaceUsed > hashtable -> spaceAllowed )
763+ if (hashtable -> spaceUsed + hashtable -> nbuckets * sizeof (HashJoinTuple )
764+ > hashtable -> spaceAllowed )
758765 ExecHashIncreaseNumBatches (hashtable );
759766 }
760767 else
0 commit comments