99 *
1010 *
1111 * IDENTIFICATION
12- * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.45 2002/10/31 21:59:32 tgl Exp $
12+ * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.45.2.1 2007/04/26 23:25:48 tgl Exp $
1313 *
1414 *-------------------------------------------------------------------------
1515 */
@@ -71,6 +71,9 @@ static bool expand_table(HTAB *hashp);
7171static bool hdefault (HTAB * hashp );
7272static bool init_htab (HTAB * hashp , long nelem );
7373static void hash_corrupted (HTAB * hashp );
74+ static void register_seq_scan (HTAB * hashp );
75+ static void deregister_seq_scan (HTAB * hashp );
76+ static bool has_seq_scans (HTAB * hashp );
7477
7578
7679/*
@@ -166,6 +169,8 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
166169 return NULL ;
167170 }
168171
172+ hashp -> frozen = false;
173+
169174 if (!hdefault (hashp ))
170175 return NULL ;
171176
@@ -623,6 +628,10 @@ hash_search(HTAB *hashp,
623628 if (currBucket != NULL )
624629 return (void * ) ELEMENTKEY (currBucket );
625630
631+ /* disallow inserts if frozen */
632+ if (hashp -> frozen )
633+ elog (ERROR , "cannot insert into a frozen hashtable" );
634+
626635 /* get the next free element */
627636 currBucket = hctl -> freeList ;
628637 if (currBucket == NULL )
@@ -645,8 +654,12 @@ hash_search(HTAB *hashp,
645654
646655 /* caller is expected to fill the data field on return */
647656
648- /* Check if it is time to split the segment */
649- if (++ hctl -> nentries / (long ) (hctl -> max_bucket + 1 ) > hctl -> ffactor )
657+ /*
658+ * Check if it is time to split a bucket. Can't split if table
659+ * is the subject of any active hash_seq_search scans.
660+ */
661+ if (++ hctl -> nentries / (long ) (hctl -> max_bucket + 1 ) >= hctl -> ffactor &&
662+ !has_seq_scans (hashp ))
650663 {
651664 /*
652665 * NOTE: failure to expand table is not a fatal error, it
@@ -665,22 +678,34 @@ hash_search(HTAB *hashp,
665678}
666679
667680/*
668- * hash_seq_init/_search
681+ * hash_seq_init/_search/_term
669682 * Sequentially search through hash table and return
670683 * all the elements one by one, return NULL when no more.
671684 *
685+ * hash_seq_term should be called if and only if the scan is abandoned before
686+ * completion; if hash_seq_search returns NULL then it has already done the
687+ * end-of-scan cleanup.
688+ *
672689 * NOTE: caller may delete the returned element before continuing the scan.
673690 * However, deleting any other element while the scan is in progress is
674691 * UNDEFINED (it might be the one that curIndex is pointing at!). Also,
675692 * if elements are added to the table while the scan is in progress, it is
676693 * unspecified whether they will be visited by the scan or not.
694+ *
695+ * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
696+ * worry about hash_seq_term cleanup, if the hashtable is first locked against
697+ * further insertions by calling hash_freeze. This is used by nodeAgg.c,
698+ * wherein it is inconvenient to track whether a scan is still open, and
699+ * there's no possibility of further insertions after readout has begun.
677700 */
678701void
679702hash_seq_init (HASH_SEQ_STATUS * status , HTAB * hashp )
680703{
681704 status -> hashp = hashp ;
682705 status -> curBucket = 0 ;
683706 status -> curEntry = NULL ;
707+ if (!hashp -> frozen )
708+ register_seq_scan (hashp );
684709}
685710
686711void *
@@ -734,9 +759,40 @@ hash_seq_search(HASH_SEQ_STATUS *status)
734759 ++ status -> curBucket ;
735760 }
736761
762+ hash_seq_term (status );
737763 return NULL ; /* out of buckets */
738764}
739765
766+ void
767+ hash_seq_term (HASH_SEQ_STATUS * status )
768+ {
769+ if (!status -> hashp -> frozen )
770+ deregister_seq_scan (status -> hashp );
771+ }
772+
773+ /*
774+ * hash_freeze
775+ * Freeze a hashtable against future insertions (deletions are
776+ * still allowed)
777+ *
778+ * The reason for doing this is that by preventing any more bucket splits,
779+ * we no longer need to worry about registering hash_seq_search scans,
780+ * and thus caller need not be careful about ensuring hash_seq_term gets
781+ * called at the right times.
782+ *
783+ * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
784+ * with active scans (since hash_seq_term would then do the wrong thing).
785+ */
786+ void
787+ hash_freeze (HTAB * hashp )
788+ {
789+ if (hashp -> isshared )
790+ elog (ERROR , "cannot freeze shared hashtable" );
791+ if (!hashp -> frozen && has_seq_scans (hashp ))
792+ elog (ERROR , "cannot freeze hashtable with active scans" );
793+ hashp -> frozen = true;
794+ }
795+
740796
741797/********************************* UTILITIES ************************/
742798
@@ -948,3 +1004,108 @@ my_log2(long num)
9481004 ;
9491005 return i ;
9501006}
1007+
1008+
1009+ /************************* SEQ SCAN TRACKING ************************/
1010+
1011+ /*
1012+ * We track active hash_seq_search scans here. The need for this mechanism
1013+ * comes from the fact that a scan will get confused if a bucket split occurs
1014+ * while it's in progress: it might visit entries twice, or even miss some
1015+ * entirely (if it's partway through the same bucket that splits). Hence
1016+ * we want to inhibit bucket splits if there are any active scans on the
1017+ * table being inserted into. This is a fairly rare case in current usage,
1018+ * so just postponing the split until the next insertion seems sufficient.
1019+ *
1020+ * Given present usages of the function, only a few scans are likely to be
1021+ * open concurrently; so a finite-size stack of open scans seems sufficient,
1022+ * and we don't worry that linear search is too slow. Note that we do
1023+ * allow multiple scans of the same hashtable to be open concurrently.
1024+ *
1025+ * This mechanism can support concurrent scan and insertion in a shared
1026+ * hashtable if it's the same backend doing both. It would fail otherwise,
1027+ * but locking reasons seem to preclude any such scenario anyway, so we don't
1028+ * worry.
1029+ *
1030+ * This arrangement is reasonably robust if a transient hashtable is deleted
1031+ * without notifying us. The absolute worst case is we might inhibit splits
1032+ * in another table created later at exactly the same address. We will give
1033+ * a warning at transaction end for reference leaks, so any bugs leading to
1034+ * lack of notification should be easy to catch.
1035+ */
1036+
1037+ #define MAX_SEQ_SCANS 100
1038+
1039+ static HTAB * seq_scan_tables [MAX_SEQ_SCANS ]; /* tables being scanned */
1040+ static int num_seq_scans = 0 ;
1041+
1042+
1043+ /* Register a table as having an active hash_seq_search scan */
1044+ static void
1045+ register_seq_scan (HTAB * hashp )
1046+ {
1047+ if (num_seq_scans >= MAX_SEQ_SCANS )
1048+ elog (ERROR , "too many active hash_seq_search scans" );
1049+ seq_scan_tables [num_seq_scans ] = hashp ;
1050+ num_seq_scans ++ ;
1051+ }
1052+
1053+ /* Deregister an active scan */
1054+ static void
1055+ deregister_seq_scan (HTAB * hashp )
1056+ {
1057+ int i ;
1058+
1059+ /* Search backward since it's most likely at the stack top */
1060+ for (i = num_seq_scans - 1 ; i >= 0 ; i -- )
1061+ {
1062+ if (seq_scan_tables [i ] == hashp )
1063+ {
1064+ seq_scan_tables [i ] = seq_scan_tables [num_seq_scans - 1 ];
1065+ num_seq_scans -- ;
1066+ return ;
1067+ }
1068+ }
1069+ elog (ERROR , "no hash_seq_search scan for hash table \"%s\"" ,
1070+ hashp -> tabname );
1071+ }
1072+
1073+ /* Check if a table has any active scan */
1074+ static bool
1075+ has_seq_scans (HTAB * hashp )
1076+ {
1077+ int i ;
1078+
1079+ for (i = 0 ; i < num_seq_scans ; i ++ )
1080+ {
1081+ if (seq_scan_tables [i ] == hashp )
1082+ return true;
1083+ }
1084+ return false;
1085+ }
1086+
1087+ /* Clean up any open scans at end of transaction */
1088+ void
1089+ AtEOXact_HashTables (bool isCommit )
1090+ {
1091+ /*
1092+ * During abort cleanup, open scans are expected; just silently clean 'em
1093+ * out. An open scan at commit means someone forgot a hash_seq_term()
1094+ * call, so complain.
1095+ *
1096+ * Note: it's tempting to try to print the tabname here, but refrain for
1097+ * fear of touching deallocated memory. This isn't a user-facing message
1098+ * anyway, so it needn't be pretty.
1099+ */
1100+ if (isCommit )
1101+ {
1102+ int i ;
1103+
1104+ for (i = 0 ; i < num_seq_scans ; i ++ )
1105+ {
1106+ elog (WARNING , "leaked hash_seq_search scan for hash table %p" ,
1107+ seq_scan_tables [i ]);
1108+ }
1109+ }
1110+ num_seq_scans = 0 ;
1111+ }
0 commit comments