88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.52 2006/03/31 23:32:05 tgl Exp $
11+ * $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.53 2006/11/19 21:33:22 tgl Exp $
1212 *
1313 * NOTES
1414 * Overflow pages look like ordinary relation pages.
2020#include "access/hash.h"
2121
2222
23- static BlockNumber _hash_getovflpage (Relation rel , Buffer metabuf );
23+ static Buffer _hash_getovflpage (Relation rel , Buffer metabuf );
2424static uint32 _hash_firstfreebit (uint32 map );
2525
2626
@@ -99,18 +99,14 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
9999Buffer
100100_hash_addovflpage (Relation rel , Buffer metabuf , Buffer buf )
101101{
102- BlockNumber ovflblkno ;
103102 Buffer ovflbuf ;
104103 Page page ;
105104 Page ovflpage ;
106105 HashPageOpaque pageopaque ;
107106 HashPageOpaque ovflopaque ;
108107
109- /* allocate an empty overflow page */
110- ovflblkno = _hash_getovflpage (rel , metabuf );
111-
112- /* lock the overflow page */
113- ovflbuf = _hash_getbuf (rel , ovflblkno , HASH_WRITE );
108+ /* allocate and lock an empty overflow page */
109+ ovflbuf = _hash_getovflpage (rel , metabuf );
114110 ovflpage = BufferGetPage (ovflbuf );
115111
116112 /*
@@ -150,7 +146,7 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
150146 MarkBufferDirty (ovflbuf );
151147
152148 /* logically chain overflow page to previous page */
153- pageopaque -> hasho_nextblkno = ovflblkno ;
149+ pageopaque -> hasho_nextblkno = BufferGetBlockNumber ( ovflbuf ) ;
154150 _hash_wrtbuf (rel , buf );
155151
156152 return ovflbuf ;
@@ -159,16 +155,18 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
159155/*
160156 * _hash_getovflpage()
161157 *
162- * Find an available overflow page and return its block number.
158+ * Find an available overflow page and return it. The returned buffer
159+ * is pinned and write-locked, but its contents are not initialized.
163160 *
164161 * The caller must hold a pin, but no lock, on the metapage buffer.
165- * The buffer is returned in the same state.
162+ * That buffer is left in the same state at exit .
166163 */
167- static BlockNumber
164+ static Buffer
168165_hash_getovflpage (Relation rel , Buffer metabuf )
169166{
170167 HashMetaPage metap ;
171168 Buffer mapbuf = 0 ;
169+ Buffer newbuf ;
172170 BlockNumber blkno ;
173171 uint32 orig_firstfree ;
174172 uint32 splitnum ;
@@ -243,11 +241,10 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
243241 _hash_chgbufaccess (rel , metabuf , HASH_NOLOCK , HASH_WRITE );
244242 }
245243
246- /* No Free Page Found - have to allocate a new page */
247- bit = metap -> hashm_spares [splitnum ];
248- metap -> hashm_spares [splitnum ]++ ;
249-
250- /* Check if we need to allocate a new bitmap page */
244+ /*
245+ * No free pages --- have to extend the relation to add an overflow page.
246+ * First, check to see if we have to add a new bitmap page too.
247+ */
251248 if (last_bit == (uint32 ) (BMPGSZ_BIT (metap ) - 1 ))
252249 {
253250 /*
@@ -258,22 +255,39 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
258255 * marked "in use". Subsequent pages do not exist yet, but it is
259256 * convenient to pre-mark them as "in use" too.
260257 */
261- _hash_initbitmap (rel , metap , bitno_to_blkno (metap , bit ));
262-
263258 bit = metap -> hashm_spares [splitnum ];
259+ _hash_initbitmap (rel , metap , bitno_to_blkno (metap , bit ));
264260 metap -> hashm_spares [splitnum ]++ ;
265261 }
266262 else
267263 {
268264 /*
269- * Nothing to do here; since the page was past the last used page, we
270- * know its bitmap bit was preinitialized to "in use".
265+ * Nothing to do here; since the page will be past the last used page,
266+ * we know its bitmap bit was preinitialized to "in use".
271267 */
272268 }
273269
274270 /* Calculate address of the new overflow page */
271+ bit = metap -> hashm_spares [splitnum ];
275272 blkno = bitno_to_blkno (metap , bit );
276273
274+ /*
275+ * We have to fetch the page with P_NEW to ensure smgr's idea of the
276+ * relation length stays in sync with ours. XXX It's annoying to do this
277+ * with metapage write lock held; would be better to use a lock that
278+ * doesn't block incoming searches. Best way to fix it would be to stop
279+ * maintaining hashm_spares[hashm_ovflpoint] and rely entirely on the
280+ * smgr relation length to track where new overflow pages come from;
281+ * then we could release the metapage before we do the smgrextend.
282+ * FIXME later (not in beta...)
283+ */
284+ newbuf = _hash_getbuf (rel , P_NEW , HASH_WRITE );
285+ if (BufferGetBlockNumber (newbuf ) != blkno )
286+ elog (ERROR , "unexpected hash relation size: %u, should be %u" ,
287+ BufferGetBlockNumber (newbuf ), blkno );
288+
289+ metap -> hashm_spares [splitnum ]++ ;
290+
277291 /*
278292 * Adjust hashm_firstfree to avoid redundant searches. But don't risk
279293 * changing it if someone moved it while we were searching bitmap pages.
@@ -284,7 +298,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
284298 /* Write updated metapage and release lock, but not pin */
285299 _hash_chgbufaccess (rel , metabuf , HASH_WRITE , HASH_NOLOCK );
286300
287- return blkno ;
301+ return newbuf ;
288302
289303found :
290304 /* convert bit to bit number within page */
@@ -300,7 +314,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
300314 /* convert bit to absolute bit number */
301315 bit += (i << BMPG_SHIFT (metap ));
302316
303- /* Calculate address of the new overflow page */
317+ /* Calculate address of the recycled overflow page */
304318 blkno = bitno_to_blkno (metap , bit );
305319
306320 /*
@@ -320,7 +334,8 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
320334 _hash_chgbufaccess (rel , metabuf , HASH_READ , HASH_NOLOCK );
321335 }
322336
323- return blkno ;
337+ /* Fetch and return the recycled page */
338+ return _hash_getbuf (rel , blkno , HASH_WRITE );
324339}
325340
326341/*
@@ -388,7 +403,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
388403 prevblkno = ovflopaque -> hasho_prevblkno ;
389404 bucket = ovflopaque -> hasho_bucket ;
390405
391- /* Zero the page for debugging's sake; then write and release it */
406+ /*
407+ * Zero the page for debugging's sake; then write and release it.
408+ * (Note: if we failed to zero the page here, we'd have problems
409+ * with the Assert in _hash_pageinit() when the page is reused.)
410+ */
392411 MemSet (ovflpage , 0 , BufferGetPageSize (ovflbuf ));
393412 _hash_wrtbuf (rel , ovflbuf );
394413
@@ -488,12 +507,19 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
488507 /*
489508 * It is okay to write-lock the new bitmap page while holding metapage
490509 * write lock, because no one else could be contending for the new page.
510+ * Also, the metapage lock makes it safe to extend the index using P_NEW,
511+ * which we want to do to ensure the smgr's idea of the relation size
512+ * stays in step with ours.
491513 *
492514 * There is some loss of concurrency in possibly doing I/O for the new
493515 * page while holding the metapage lock, but this path is taken so seldom
494516 * that it's not worth worrying about.
495517 */
496- buf = _hash_getbuf (rel , blkno , HASH_WRITE );
518+ buf = _hash_getbuf (rel , P_NEW , HASH_WRITE );
519+ if (BufferGetBlockNumber (buf ) != blkno )
520+ elog (ERROR , "unexpected hash relation size: %u, should be %u" ,
521+ BufferGetBlockNumber (buf ), blkno );
522+
497523 pg = BufferGetPage (buf );
498524
499525 /* initialize the page */
0 commit comments