@@ -55,8 +55,6 @@ static void _bt_insert_parent(Relation rel, Buffer buf, Buffer rbuf,
5555 BTStack stack , bool is_root , bool is_only );
5656static bool _bt_pgaddtup (Page page , Size itemsize , IndexTuple itup ,
5757 OffsetNumber itup_off );
58- static bool _bt_isequal (TupleDesc itupdesc , BTScanInsert itup_key ,
59- Page page , OffsetNumber offnum );
6058static void _bt_vacuum_one_page (Relation rel , Buffer buffer , Relation heapRel );
6159
6260/*
@@ -91,9 +89,31 @@ _bt_doinsert(Relation rel, IndexTuple itup,
9189
9290 /* we need an insertion scan key to do our search, so build one */
9391 itup_key = _bt_mkscankey (rel , itup );
94- /* No scantid until uniqueness established in checkingunique case */
95- if (checkingunique && itup_key -> heapkeyspace )
96- itup_key -> scantid = NULL ;
92+
93+ if (checkingunique )
94+ {
95+ if (!itup_key -> anynullkeys )
96+ {
97+ /* No (heapkeyspace) scantid until uniqueness established */
98+ itup_key -> scantid = NULL ;
99+ }
100+ else
101+ {
102+ /*
103+ * Scan key for new tuple contains NULL key values. Bypass
104+ * checkingunique steps. They are unnecessary because core code
105+ * considers NULL unequal to every value, including NULL.
106+ *
107+ * This optimization avoids O(N^2) behavior within the
108+ * _bt_findinsertloc() heapkeyspace path when a unique index has a
109+ * large number of "duplicates" with NULL key values.
110+ */
111+ checkingunique = false;
112+ /* Tuple is unique in the sense that core code cares about */
113+ Assert (checkUnique != UNIQUE_CHECK_EXISTING );
114+ is_unique = true;
115+ }
116+ }
97117
98118 /*
99119 * Fill in the BTInsertState working area, to track the current page and
@@ -209,7 +229,7 @@ _bt_doinsert(Relation rel, IndexTuple itup,
209229 * NOTE: obviously, _bt_check_unique can only detect keys that are already
210230 * in the index; so it cannot defend against concurrent insertions of the
211231 * same key. We protect against that by means of holding a write lock on
212- * the first page the value could be on, regardless of the value of its
232+ * the first page the value could be on, with omitted/-inf value for the
213233 * implicit heap TID tiebreaker attribute. Any other would-be inserter of
214234 * the same key must acquire a write lock on the same page, so only one
215235 * would-be inserter can be making the check at one time. Furthermore,
@@ -266,10 +286,9 @@ _bt_doinsert(Relation rel, IndexTuple itup,
266286 /*
267287 * The only conflict predicate locking cares about for indexes is when
268288 * an index tuple insert conflicts with an existing lock. We don't
269- * know the actual page we're going to insert to yet because scantid
270- * was not filled in initially, but it's okay to use the "first valid"
271- * page instead. This reasoning also applies to INCLUDE indexes,
272- * whose extra attributes are not considered part of the key space.
289+ * know the actual page we're going to insert on for sure just yet in
290+ * checkingunique and !heapkeyspace cases, but it's okay to use the
291+ * first page the value could be on (with scantid omitted) instead.
273292 */
274293 CheckForSerializableConflictIn (rel , NULL , insertstate .buf );
275294
@@ -315,13 +334,16 @@ _bt_doinsert(Relation rel, IndexTuple itup,
315334 * As a side-effect, sets state in insertstate that can later be used by
316335 * _bt_findinsertloc() to reuse most of the binary search work we do
317336 * here.
337+ *
338+ * Do not call here when there are NULL values in scan key. NULL should be
339+ * considered unequal to NULL when checking for duplicates, but we are not
340+ * prepared to handle that correctly.
318341 */
319342static TransactionId
320343_bt_check_unique (Relation rel , BTInsertState insertstate , Relation heapRel ,
321344 IndexUniqueCheck checkUnique , bool * is_unique ,
322345 uint32 * speculativeToken )
323346{
324- TupleDesc itupdesc = RelationGetDescr (rel );
325347 IndexTuple itup = insertstate -> itup ;
326348 BTScanInsert itup_key = insertstate -> itup_key ;
327349 SnapshotData SnapshotDirty ;
@@ -354,6 +376,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
354376 * Scan over all equal tuples, looking for live conflicts.
355377 */
356378 Assert (!insertstate -> bounds_valid || insertstate -> low == offset );
379+ Assert (!itup_key -> anynullkeys );
357380 Assert (itup_key -> scantid == NULL );
358381 for (;;)
359382 {
@@ -375,16 +398,16 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
375398 * original page, which may indicate that we need to examine a
376399 * second or subsequent page.
377400 *
378- * Note that this optimization avoids calling _bt_isequal()
379- * entirely when there are no duplicates, as long as the offset
380- * where the key will go is not at the end of the page.
401+ * Note that this optimization allows us to avoid calling
402+ * _bt_compare() directly when there are no duplicates, as long as
403+ * the offset where the key will go is not at the end of the page.
381404 */
382405 if (nbuf == InvalidBuffer && offset == insertstate -> stricthigh )
383406 {
384407 Assert (insertstate -> bounds_valid );
385408 Assert (insertstate -> low >= P_FIRSTDATAKEY (opaque ));
386409 Assert (insertstate -> low <= insertstate -> stricthigh );
387- Assert (! _bt_isequal ( itupdesc , itup_key , page , offset ));
410+ Assert (_bt_compare ( rel , itup_key , page , offset ) < 0 );
388411 break ;
389412 }
390413
@@ -394,9 +417,9 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
394417 * We can skip items that are marked killed.
395418 *
396419 * In the presence of heavy update activity an index may contain
397- * many killed items with the same key; running _bt_isequal () on
420+ * many killed items with the same key; running _bt_compare () on
398421 * each killed item gets expensive. Just advance over killed
399- * items as quickly as we can. We only apply _bt_isequal () when
422+ * items as quickly as we can. We only apply _bt_compare () when
400423 * we get to a non-killed item. Even those comparisons could be
401424 * avoided (in the common case where there is only one page to
402425 * visit) by reusing bounds, but just skipping dead items is fast
@@ -407,13 +430,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
407430 ItemPointerData htid ;
408431 bool all_dead ;
409432
410- /*
411- * _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's
412- * how we handling NULLs - and so we must not use _bt_compare
413- * in real comparison, but only for ordering/finding items on
414- * pages. - vadim 03/24/97
415- */
416- if (!_bt_isequal (itupdesc , itup_key , page , offset ))
433+ if (_bt_compare (rel , itup_key , page , offset ) != 0 )
417434 break ; /* we're past all the equal tuples */
418435
419436 /* okay, we gotta fetch the heap tuple ... */
@@ -2184,58 +2201,6 @@ _bt_pgaddtup(Page page,
21842201 return true;
21852202}
21862203
2187- /*
2188- * _bt_isequal - used in _bt_doinsert in check for duplicates.
2189- *
2190- * This is very similar to _bt_compare, except for NULL and negative infinity
2191- * handling. Rule is simple: NOT_NULL not equal NULL, NULL not equal NULL too.
2192- */
2193- static bool
2194- _bt_isequal (TupleDesc itupdesc , BTScanInsert itup_key , Page page ,
2195- OffsetNumber offnum )
2196- {
2197- IndexTuple itup ;
2198- ScanKey scankey ;
2199- int i ;
2200-
2201- /* Better be comparing to a non-pivot item */
2202- Assert (P_ISLEAF ((BTPageOpaque ) PageGetSpecialPointer (page )));
2203- Assert (offnum >= P_FIRSTDATAKEY ((BTPageOpaque ) PageGetSpecialPointer (page )));
2204- Assert (itup_key -> scantid == NULL );
2205-
2206- scankey = itup_key -> scankeys ;
2207- itup = (IndexTuple ) PageGetItem (page , PageGetItemId (page , offnum ));
2208-
2209- for (i = 1 ; i <= itup_key -> keysz ; i ++ )
2210- {
2211- AttrNumber attno ;
2212- Datum datum ;
2213- bool isNull ;
2214- int32 result ;
2215-
2216- attno = scankey -> sk_attno ;
2217- Assert (attno == i );
2218- datum = index_getattr (itup , attno , itupdesc , & isNull );
2219-
2220- /* NULLs are never equal to anything */
2221- if (isNull || (scankey -> sk_flags & SK_ISNULL ))
2222- return false;
2223-
2224- result = DatumGetInt32 (FunctionCall2Coll (& scankey -> sk_func ,
2225- scankey -> sk_collation ,
2226- datum ,
2227- scankey -> sk_argument ));
2228-
2229- if (result != 0 )
2230- return false;
2231-
2232- scankey ++ ;
2233- }
2234-
2235- /* if we get here, the keys are equal */
2236- return true;
2237- }
2238-
22392204/*
22402205 * _bt_vacuum_one_page - vacuum just one index page.
22412206 *
0 commit comments