3232#include "storage/indexfsm.h"
3333#include "storage/lmgr.h"
3434#include "storage/predicate.h"
35+ #include "utils/memdebug.h"
3536#include "utils/snapmgr.h"
3637
3738static BTMetaPageData * _bt_getmeta (Relation rel , Buffer metabuf );
@@ -198,8 +199,8 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
198199 }
199200
200201 /* trade in our read lock for a write lock */
201- LockBuffer ( metabuf , BUFFER_LOCK_UNLOCK );
202- LockBuffer ( metabuf , BT_WRITE );
202+ _bt_unlockbuf ( rel , metabuf );
203+ _bt_lockbuf ( rel , metabuf , BT_WRITE );
203204
204205 START_CRIT_SECTION ();
205206
@@ -340,8 +341,8 @@ _bt_getroot(Relation rel, int access)
340341 }
341342
342343 /* trade in our read lock for a write lock */
343- LockBuffer ( metabuf , BUFFER_LOCK_UNLOCK );
344- LockBuffer ( metabuf , BT_WRITE );
344+ _bt_unlockbuf ( rel , metabuf );
345+ _bt_lockbuf ( rel , metabuf , BT_WRITE );
345346
346347 /*
347348 * Race condition: if someone else initialized the metadata between
@@ -434,8 +435,8 @@ _bt_getroot(Relation rel, int access)
434435 * else accessing the new root page while it's unlocked, since no one
435436 * else knows where it is yet.
436437 */
437- LockBuffer ( rootbuf , BUFFER_LOCK_UNLOCK );
438- LockBuffer ( rootbuf , BT_READ );
438+ _bt_unlockbuf ( rel , rootbuf );
439+ _bt_lockbuf ( rel , rootbuf , BT_READ );
439440
440441 /* okay, metadata is correct, release lock on it without caching */
441442 _bt_relbuf (rel , metabuf );
@@ -783,10 +784,20 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedX
783784 * blkno == P_NEW means to get an unallocated index page. The page
784785 * will be initialized before returning it.
785786 *
787+ * The general rule in nbtree is that it's never okay to access a
788+ * page without holding both a buffer pin and a buffer lock on
789+ * the page's buffer.
790+ *
786791 * When this routine returns, the appropriate lock is set on the
787792 * requested buffer and its reference count has been incremented
788793 * (ie, the buffer is "locked and pinned"). Also, we apply
789- * _bt_checkpage to sanity-check the page (except in P_NEW case).
794+ * _bt_checkpage to sanity-check the page (except in P_NEW case),
795+ * and perform Valgrind client requests that help Valgrind detect
796+ * unsafe page accesses.
797+ *
798+ * Note: raw LockBuffer() calls are disallowed in nbtree; all
799+ * buffer lock requests need to go through wrapper functions such
800+ * as _bt_lockbuf().
790801 */
791802Buffer
792803_bt_getbuf (Relation rel , BlockNumber blkno , int access )
@@ -797,7 +808,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
797808 {
798809 /* Read an existing block of the relation */
799810 buf = ReadBuffer (rel , blkno );
800- LockBuffer ( buf , access );
811+ _bt_lockbuf ( rel , buf , access );
801812 _bt_checkpage (rel , buf );
802813 }
803814 else
@@ -837,7 +848,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
837848 if (blkno == InvalidBlockNumber )
838849 break ;
839850 buf = ReadBuffer (rel , blkno );
840- if (ConditionalLockBuffer ( buf ))
851+ if (_bt_conditionallockbuf ( rel , buf ))
841852 {
842853 page = BufferGetPage (buf );
843854 if (_bt_page_recyclable (page ))
@@ -890,7 +901,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
890901 buf = ReadBuffer (rel , P_NEW );
891902
892903 /* Acquire buffer lock on new page */
893- LockBuffer ( buf , BT_WRITE );
904+ _bt_lockbuf ( rel , buf , BT_WRITE );
894905
895906 /*
896907 * Release the file-extension lock; it's now OK for someone else to
@@ -931,9 +942,10 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
931942
932943 Assert (blkno != P_NEW );
933944 if (BufferIsValid (obuf ))
934- LockBuffer ( obuf , BUFFER_LOCK_UNLOCK );
945+ _bt_unlockbuf ( rel , obuf );
935946 buf = ReleaseAndReadBuffer (obuf , rel , blkno );
936- LockBuffer (buf , access );
947+ _bt_lockbuf (rel , buf , access );
948+
937949 _bt_checkpage (rel , buf );
938950 return buf ;
939951}
@@ -946,7 +958,102 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
946958void
947959_bt_relbuf (Relation rel , Buffer buf )
948960{
949- UnlockReleaseBuffer (buf );
961+ _bt_unlockbuf (rel , buf );
962+ ReleaseBuffer (buf );
963+ }
964+
965+ /*
966+ * _bt_lockbuf() -- lock a pinned buffer.
967+ *
968+ * Lock is acquired without acquiring another pin. This is like a raw
969+ * LockBuffer() call, but performs extra steps needed by Valgrind.
970+ *
971+ * Note: Caller may need to call _bt_checkpage() with buf when pin on buf
972+ * wasn't originally acquired in _bt_getbuf() or _bt_relandgetbuf().
973+ */
974+ void
975+ _bt_lockbuf (Relation rel , Buffer buf , int access )
976+ {
977+ /* LockBuffer() asserts that pin is held by this backend */
978+ LockBuffer (buf , access );
979+
980+ /*
981+ * It doesn't matter that _bt_unlockbuf() won't get called in the
982+ * event of an nbtree error (e.g. a unique violation error). That
983+ * won't cause Valgrind false positives.
984+ *
985+ * The nbtree client requests are superimposed on top of the
986+ * bufmgr.c buffer pin client requests. In the event of an nbtree
987+ * error the buffer will certainly get marked as defined when the
988+ * backend once again acquires its first pin on the buffer. (Of
989+ * course, if the backend never touches the buffer again then it
990+ * doesn't matter that it remains non-accessible to Valgrind.)
991+ *
992+ * Note: When an IndexTuple C pointer gets computed using an
993+ * ItemId read from a page while a lock was held, the C pointer
994+ * becomes unsafe to dereference forever as soon as the lock is
995+ * released. Valgrind can only detect cases where the pointer
996+ * gets dereferenced with no _current_ lock/pin held, though.
997+ */
998+ if (!RelationUsesLocalBuffers (rel ))
999+ VALGRIND_MAKE_MEM_DEFINED (BufferGetPage (buf ), BLCKSZ );
1000+ }
1001+
1002+ /*
1003+ * _bt_unlockbuf() -- unlock a pinned buffer.
1004+ */
1005+ void
1006+ _bt_unlockbuf (Relation rel , Buffer buf )
1007+ {
1008+ /*
1009+ * Buffer is pinned and locked, which means that it is expected to be
1010+ * defined and addressable. Check that proactively.
1011+ */
1012+ VALGRIND_CHECK_MEM_IS_DEFINED (BufferGetPage (buf ), BLCKSZ );
1013+
1014+ /* LockBuffer() asserts that pin is held by this backend */
1015+ LockBuffer (buf , BUFFER_LOCK_UNLOCK );
1016+
1017+ if (!RelationUsesLocalBuffers (rel ))
1018+ VALGRIND_MAKE_MEM_NOACCESS (BufferGetPage (buf ), BLCKSZ );
1019+ }
1020+
1021+ /*
1022+ * _bt_conditionallockbuf() -- conditionally BT_WRITE lock pinned
1023+ * buffer.
1024+ *
1025+ * Note: Caller may need to call _bt_checkpage() with buf when pin on buf
1026+ * wasn't originally acquired in _bt_getbuf() or _bt_relandgetbuf().
1027+ */
1028+ bool
1029+ _bt_conditionallockbuf (Relation rel , Buffer buf )
1030+ {
1031+ /* ConditionalLockBuffer() asserts that pin is held by this backend */
1032+ if (!ConditionalLockBuffer (buf ))
1033+ return false;
1034+
1035+ if (!RelationUsesLocalBuffers (rel ))
1036+ VALGRIND_MAKE_MEM_DEFINED (BufferGetPage (buf ), BLCKSZ );
1037+
1038+ return true;
1039+ }
1040+
1041+ /*
1042+ * _bt_upgradelockbufcleanup() -- upgrade lock to super-exclusive/cleanup
1043+ * lock.
1044+ */
1045+ void
1046+ _bt_upgradelockbufcleanup (Relation rel , Buffer buf )
1047+ {
1048+ /*
1049+ * Buffer is pinned and locked, which means that it is expected to be
1050+ * defined and addressable. Check that proactively.
1051+ */
1052+ VALGRIND_CHECK_MEM_IS_DEFINED (BufferGetPage (buf ), BLCKSZ );
1053+
1054+ /* LockBuffer() asserts that pin is held by this backend */
1055+ LockBuffer (buf , BUFFER_LOCK_UNLOCK );
1056+ LockBufferForCleanup (buf );
9501057}
9511058
9521059/*
@@ -1580,7 +1687,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
15801687 * To avoid deadlocks, we'd better drop the leaf page lock
15811688 * before going further.
15821689 */
1583- LockBuffer ( leafbuf , BUFFER_LOCK_UNLOCK );
1690+ _bt_unlockbuf ( rel , leafbuf );
15841691
15851692 /*
15861693 * Check that the left sibling of leafbuf (if any) is not
@@ -1616,7 +1723,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
16161723 * (Page deletion can cope with the stack being to the left of
16171724 * leafbuf, but not to the right of leafbuf.)
16181725 */
1619- LockBuffer ( leafbuf , BT_WRITE );
1726+ _bt_lockbuf ( rel , leafbuf , BT_WRITE );
16201727 continue ;
16211728 }
16221729
@@ -1970,7 +2077,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
19702077 leafleftsib = opaque -> btpo_prev ;
19712078 leafrightsib = opaque -> btpo_next ;
19722079
1973- LockBuffer ( leafbuf , BUFFER_LOCK_UNLOCK );
2080+ _bt_unlockbuf ( rel , leafbuf );
19742081
19752082 /*
19762083 * Check here, as calling loops will have locks held, preventing
@@ -2005,7 +2112,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
20052112 * To avoid deadlocks, we'd better drop the target page lock before
20062113 * going further.
20072114 */
2008- LockBuffer ( buf , BUFFER_LOCK_UNLOCK );
2115+ _bt_unlockbuf ( rel , buf );
20092116 }
20102117
20112118 /*
@@ -2022,7 +2129,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
20222129 * table.)
20232130 */
20242131 if (target != leafblkno )
2025- LockBuffer ( leafbuf , BT_WRITE );
2132+ _bt_lockbuf ( rel , leafbuf , BT_WRITE );
20262133 if (leftsib != P_NONE )
20272134 {
20282135 lbuf = _bt_getbuf (rel , leftsib , BT_WRITE );
@@ -2072,7 +2179,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
20722179 * rather than a superexclusive lock, since no scan will stop on an empty
20732180 * page.
20742181 */
2075- LockBuffer ( buf , BT_WRITE );
2182+ _bt_lockbuf ( rel , buf , BT_WRITE );
20762183 page = BufferGetPage (buf );
20772184 opaque = (BTPageOpaque ) PageGetSpecialPointer (page );
20782185
0 commit comments