@@ -132,6 +132,8 @@ static void bt_downlink_missing_check(BtreeCheckState *state);
132132static void bt_tuple_present_callback (Relation index , HeapTuple htup ,
133133 Datum * values , bool * isnull ,
134134 bool tupleIsAlive , void * checkstate );
135+ static IndexTuple bt_normalize_tuple (BtreeCheckState * state ,
136+ IndexTuple itup );
135137static inline bool offset_is_negative_infinity (BTPageOpaque opaque ,
136138 OffsetNumber offset );
137139static inline bool invariant_leq_offset (BtreeCheckState * state ,
@@ -907,7 +909,16 @@ bt_target_page_check(BtreeCheckState *state)
907909
908910 /* Fingerprint leaf page tuples (those that point to the heap) */
909911 if (state -> heapallindexed && P_ISLEAF (topaque ) && !ItemIdIsDead (itemid ))
910- bloom_add_element (state -> filter , (unsigned char * ) itup , tupsize );
912+ {
913+ IndexTuple norm ;
914+
915+ norm = bt_normalize_tuple (state , itup );
916+ bloom_add_element (state -> filter , (unsigned char * ) norm ,
917+ IndexTupleSize (norm ));
918+ /* Be tidy */
919+ if (norm != itup )
920+ pfree (norm );
921+ }
911922
912923 /*
913924 * * High key check *
@@ -1671,35 +1682,18 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
16711682 bool * isnull , bool tupleIsAlive , void * checkstate )
16721683{
16731684 BtreeCheckState * state = (BtreeCheckState * ) checkstate ;
1674- IndexTuple itup ;
1685+ IndexTuple itup , norm ;
16751686
16761687 Assert (state -> heapallindexed );
16771688
1678- /*
1679- * Generate an index tuple for fingerprinting.
1680- *
1681- * Index tuple formation is assumed to be deterministic, and IndexTuples
1682- * are assumed immutable. While the LP_DEAD bit is mutable in leaf pages,
1683- * that's ItemId metadata, which was not fingerprinted. (There will often
1684- * be some dead-to-everyone IndexTuples fingerprinted by the Bloom filter,
1685- * but we only try to detect the absence of needed tuples, so that's
1686- * okay.)
1687- *
1688- * Note that we rely on deterministic index_form_tuple() TOAST
1689- * compression. If index_form_tuple() was ever enhanced to compress datums
1690- * out-of-line, or otherwise varied when or how compression was applied,
1691- * our assumption would break, leading to false positive reports of
1692- * corruption. It's also possible that non-pivot tuples could in the
1693- * future have alternative equivalent representations (e.g. by using the
1694- * INDEX_ALT_TID_MASK bit). For now, we don't decompress/normalize toasted
1695- * values as part of fingerprinting.
1696- */
1689+ /* Generate a normalized index tuple for fingerprinting */
16971690 itup = index_form_tuple (RelationGetDescr (index ), values , isnull );
16981691 itup -> t_tid = htup -> t_self ;
1692+ norm = bt_normalize_tuple (state , itup );
16991693
17001694 /* Probe Bloom filter -- tuple should be present */
1701- if (bloom_lacks_element (state -> filter , (unsigned char * ) itup ,
1702- IndexTupleSize (itup )))
1695+ if (bloom_lacks_element (state -> filter , (unsigned char * ) norm ,
1696+ IndexTupleSize (norm )))
17031697 ereport (ERROR ,
17041698 (errcode (ERRCODE_DATA_CORRUPTED ),
17051699 errmsg ("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"" ,
@@ -1713,6 +1707,115 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
17131707
17141708 state -> heaptuplespresent ++ ;
17151709 pfree (itup );
1710+ /* Cannot leak memory here */
1711+ if (norm != itup )
1712+ pfree (norm );
1713+ }
1714+
1715+ /*
1716+ * Normalize an index tuple for fingerprinting.
1717+ *
1718+ * In general, index tuple formation is assumed to be deterministic by
1719+ * heapallindexed verification, and IndexTuples are assumed immutable. While
1720+ * the LP_DEAD bit is mutable in leaf pages, that's ItemId metadata, which is
1721+ * not fingerprinted. Normalization is required to compensate for corner
1722+ * cases where the determinism assumption doesn't quite work.
1723+ *
1724+ * There is currently one such case: index_form_tuple() does not try to hide
1725+ * the source TOAST state of input datums. The executor applies TOAST
1726+ * compression for heap tuples based on different criteria to the compression
1727+ * applied within btinsert()'s call to index_form_tuple(): it sometimes
1728+ * compresses more aggressively, resulting in compressed heap tuple datums but
1729+ * uncompressed corresponding index tuple datums. A subsequent heapallindexed
1730+ * verification will get a logically equivalent though bitwise unequal tuple
1731+ * from index_form_tuple(). False positive heapallindexed corruption reports
1732+ * could occur without normalizing away the inconsistency.
1733+ *
1734+ * Returned tuple is often caller's own original tuple. Otherwise, it is a
1735+ * new representation of caller's original index tuple, palloc()'d in caller's
1736+ * memory context.
1737+ *
1738+ * Note: This routine is not concerned with distinctions about the
1739+ * representation of tuples beyond those that might break heapallindexed
1740+ * verification. In particular, it won't try to normalize opclass-equal
1741+ * datums with potentially distinct representations (e.g., btree/numeric_ops
1742+ * index datums will not get their display scale normalized-away here).
1743+ * Normalization may need to be expanded to handle more cases in the future,
1744+ * though. For example, it's possible that non-pivot tuples could in the
1745+ * future have alternative logically equivalent representations due to using
1746+ * the INDEX_ALT_TID_MASK bit to implement intelligent deduplication.
1747+ */
1748+ static IndexTuple
1749+ bt_normalize_tuple (BtreeCheckState * state , IndexTuple itup )
1750+ {
1751+ TupleDesc tupleDescriptor = RelationGetDescr (state -> rel );
1752+ Datum normalized [INDEX_MAX_KEYS ];
1753+ bool isnull [INDEX_MAX_KEYS ];
1754+ bool toast_free [INDEX_MAX_KEYS ];
1755+ bool formnewtup = false;
1756+ IndexTuple reformed ;
1757+ int i ;
1758+
1759+ /* Easy case: It's immediately clear that tuple has no varlena datums */
1760+ if (!IndexTupleHasVarwidths (itup ))
1761+ return itup ;
1762+
1763+ for (i = 0 ; i < tupleDescriptor -> natts ; i ++ )
1764+ {
1765+ Form_pg_attribute att ;
1766+
1767+ att = TupleDescAttr (tupleDescriptor , i );
1768+
1769+ /* Assume untoasted/already normalized datum initially */
1770+ toast_free [i ] = false;
1771+ normalized [i ] = index_getattr (itup , att -> attnum ,
1772+ tupleDescriptor ,
1773+ & isnull [i ]);
1774+ if (att -> attbyval || att -> attlen != -1 || isnull [i ])
1775+ continue ;
1776+
1777+ /*
1778+ * Callers always pass a tuple that could safely be inserted into the
1779+ * index without further processing, so an external varlena header
1780+ * should never be encountered here
1781+ */
1782+ if (VARATT_IS_EXTERNAL (DatumGetPointer (normalized [i ])))
1783+ ereport (ERROR ,
1784+ (errcode (ERRCODE_INDEX_CORRUPTED ),
1785+ errmsg ("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"" ,
1786+ ItemPointerGetBlockNumber (& (itup -> t_tid )),
1787+ ItemPointerGetOffsetNumber (& (itup -> t_tid )),
1788+ RelationGetRelationName (state -> rel ))));
1789+ else if (VARATT_IS_COMPRESSED (DatumGetPointer (normalized [i ])))
1790+ {
1791+ formnewtup = true;
1792+ normalized [i ] = PointerGetDatum (PG_DETOAST_DATUM (normalized [i ]));
1793+ toast_free [i ] = true;
1794+ }
1795+ }
1796+
1797+ /* Easier case: Tuple has varlena datums, none of which are compressed */
1798+ if (!formnewtup )
1799+ return itup ;
1800+
1801+ /*
1802+ * Hard case: Tuple had compressed varlena datums that necessitate
1803+ * creating normalized version of the tuple from uncompressed input datums
1804+ * (normalized input datums). This is rather naive, but shouldn't be
1805+ * necessary too often.
1806+ *
1807+ * Note that we rely on deterministic index_form_tuple() TOAST compression
1808+ * of normalized input.
1809+ */
1810+ reformed = index_form_tuple (tupleDescriptor , normalized , isnull );
1811+ reformed -> t_tid = itup -> t_tid ;
1812+
1813+ /* Cannot leak memory here */
1814+ for (i = 0 ; i < tupleDescriptor -> natts ; i ++ )
1815+ if (toast_free [i ])
1816+ pfree (DatumGetPointer (normalized [i ]));
1817+
1818+ return reformed ;
17161819}
17171820
17181821/*
0 commit comments