2121#include "utils/rel.h"
2222
2323
24- static Buffer _hash_getovflpage (Relation rel , Buffer metabuf );
2524static uint32 _hash_firstfreebit (uint32 map );
2625
2726
@@ -113,13 +112,30 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
113112 Page ovflpage ;
114113 HashPageOpaque pageopaque ;
115114 HashPageOpaque ovflopaque ;
116-
117- /* allocate and lock an empty overflow page */
118- ovflbuf = _hash_getovflpage (rel , metabuf );
115+ HashMetaPage metap ;
116+ Buffer mapbuf = InvalidBuffer ;
117+ Buffer newmapbuf = InvalidBuffer ;
118+ BlockNumber blkno ;
119+ uint32 orig_firstfree ;
120+ uint32 splitnum ;
121+ uint32 * freep = NULL ;
122+ uint32 max_ovflpg ;
123+ uint32 bit ;
124+ uint32 bitmap_page_bit ;
125+ uint32 first_page ;
126+ uint32 last_bit ;
127+ uint32 last_page ;
128+ uint32 i ,
129+ j ;
130+ bool page_found = false;
119131
120132 /*
121- * Write-lock the tail page. It is okay to hold two buffer locks here
122- * since there cannot be anyone else contending for access to ovflbuf.
133+ * Write-lock the tail page. Here, we need to maintain locking order such
134+ * that, first acquire the lock on tail page of bucket, then on meta page
135+ * to find and lock the bitmap page and if it is found, then lock on meta
136+ * page is released, then finally acquire the lock on new overflow buffer.
137+ * We need this locking order to avoid deadlock with backends that are
138+ * doing inserts.
123139 */
124140 LockBuffer (buf , BUFFER_LOCK_EXCLUSIVE );
125141
@@ -153,60 +169,6 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
153169 buf = _hash_getbuf (rel , nextblkno , HASH_WRITE , LH_OVERFLOW_PAGE );
154170 }
155171
156- /* now that we have correct backlink, initialize new overflow page */
157- ovflpage = BufferGetPage (ovflbuf );
158- ovflopaque = (HashPageOpaque ) PageGetSpecialPointer (ovflpage );
159- ovflopaque -> hasho_prevblkno = BufferGetBlockNumber (buf );
160- ovflopaque -> hasho_nextblkno = InvalidBlockNumber ;
161- ovflopaque -> hasho_bucket = pageopaque -> hasho_bucket ;
162- ovflopaque -> hasho_flag = LH_OVERFLOW_PAGE ;
163- ovflopaque -> hasho_page_id = HASHO_PAGE_ID ;
164-
165- MarkBufferDirty (ovflbuf );
166-
167- /* logically chain overflow page to previous page */
168- pageopaque -> hasho_nextblkno = BufferGetBlockNumber (ovflbuf );
169- MarkBufferDirty (buf );
170- if (retain_pin )
171- {
172- /* pin will be retained only for the primary bucket page */
173- Assert (pageopaque -> hasho_flag & LH_BUCKET_PAGE );
174- LockBuffer (buf , BUFFER_LOCK_UNLOCK );
175- }
176- else
177- _hash_relbuf (rel , buf );
178-
179- return ovflbuf ;
180- }
181-
182- /*
183- * _hash_getovflpage()
184- *
185- * Find an available overflow page and return it. The returned buffer
186- * is pinned and write-locked, and has had _hash_pageinit() applied,
187- * but it is caller's responsibility to fill the special space.
188- *
189- * The caller must hold a pin, but no lock, on the metapage buffer.
190- * That buffer is left in the same state at exit.
191- */
192- static Buffer
193- _hash_getovflpage (Relation rel , Buffer metabuf )
194- {
195- HashMetaPage metap ;
196- Buffer mapbuf = 0 ;
197- Buffer newbuf ;
198- BlockNumber blkno ;
199- uint32 orig_firstfree ;
200- uint32 splitnum ;
201- uint32 * freep = NULL ;
202- uint32 max_ovflpg ;
203- uint32 bit ;
204- uint32 first_page ;
205- uint32 last_bit ;
206- uint32 last_page ;
207- uint32 i ,
208- j ;
209-
210172 /* Get exclusive lock on the meta page */
211173 LockBuffer (metabuf , BUFFER_LOCK_EXCLUSIVE );
212174
@@ -255,11 +217,31 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
255217 for (; bit <= last_inpage ; j ++ , bit += BITS_PER_MAP )
256218 {
257219 if (freep [j ] != ALL_SET )
220+ {
221+ page_found = true;
222+
223+ /* Reacquire exclusive lock on the meta page */
224+ LockBuffer (metabuf , BUFFER_LOCK_EXCLUSIVE );
225+
226+ /* convert bit to bit number within page */
227+ bit += _hash_firstfreebit (freep [j ]);
228+ bitmap_page_bit = bit ;
229+
230+ /* convert bit to absolute bit number */
231+ bit += (i << BMPG_SHIFT (metap ));
232+ /* Calculate address of the recycled overflow page */
233+ blkno = bitno_to_blkno (metap , bit );
234+
235+ /* Fetch and init the recycled page */
236+ ovflbuf = _hash_getinitbuf (rel , blkno );
237+
258238 goto found ;
239+ }
259240 }
260241
261242 /* No free space here, try to advance to next map page */
262243 _hash_relbuf (rel , mapbuf );
244+ mapbuf = InvalidBuffer ;
263245 i ++ ;
264246 j = 0 ; /* scan from start of next map page */
265247 bit = 0 ;
@@ -283,8 +265,15 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
283265 * convenient to pre-mark them as "in use" too.
284266 */
285267 bit = metap -> hashm_spares [splitnum ];
286- _hash_initbitmap (rel , metap , bitno_to_blkno (metap , bit ), MAIN_FORKNUM );
287- metap -> hashm_spares [splitnum ]++ ;
268+
269+ /* metapage already has a write lock */
270+ if (metap -> hashm_nmaps >= HASH_MAX_BITMAPS )
271+ ereport (ERROR ,
272+ (errcode (ERRCODE_PROGRAM_LIMIT_EXCEEDED ),
273+ errmsg ("out of overflow pages in hash index \"%s\"" ,
274+ RelationGetRelationName (rel ))));
275+
276+ newmapbuf = _hash_getnewbuf (rel , bitno_to_blkno (metap , bit ), MAIN_FORKNUM );
288277 }
289278 else
290279 {
@@ -295,49 +284,57 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
295284 }
296285
297286 /* Calculate address of the new overflow page */
298- bit = metap -> hashm_spares [splitnum ];
287+ bit = BufferIsValid (newmapbuf ) ?
288+ metap -> hashm_spares [splitnum ] + 1 : metap -> hashm_spares [splitnum ];
299289 blkno = bitno_to_blkno (metap , bit );
300290
301291 /*
302292 * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
303293 * relation length stays in sync with ours. XXX It's annoying to do this
304294 * with metapage write lock held; would be better to use a lock that
305295 * doesn't block incoming searches.
296+ *
297+ * It is okay to hold two buffer locks here (one on tail page of bucket
298+ * and other on new overflow page) since there cannot be anyone else
299+ * contending for access to ovflbuf.
306300 */
307- newbuf = _hash_getnewbuf (rel , blkno , MAIN_FORKNUM );
301+ ovflbuf = _hash_getnewbuf (rel , blkno , MAIN_FORKNUM );
308302
309- metap -> hashm_spares [ splitnum ] ++ ;
303+ found :
310304
311305 /*
312- * Adjust hashm_firstfree to avoid redundant searches. But don't risk
313- * changing it if someone moved it while we were searching bitmap pages.
306+ * Do the update.
314307 */
315- if (metap -> hashm_firstfree == orig_firstfree )
316- metap -> hashm_firstfree = bit + 1 ;
317-
318- /* Write updated metapage and release lock, but not pin */
319- MarkBufferDirty (metabuf );
320- LockBuffer (metabuf , BUFFER_LOCK_UNLOCK );
321-
322- return newbuf ;
323-
324- found :
325- /* convert bit to bit number within page */
326- bit += _hash_firstfreebit (freep [j ]);
327-
328- /* mark page "in use" in the bitmap */
329- SETBIT (freep , bit );
330- MarkBufferDirty (mapbuf );
331- _hash_relbuf (rel , mapbuf );
308+ if (page_found )
309+ {
310+ Assert (BufferIsValid (mapbuf ));
332311
333- /* Reacquire exclusive lock on the meta page */
334- LockBuffer (metabuf , BUFFER_LOCK_EXCLUSIVE );
312+ /* mark page "in use" in the bitmap */
313+ SETBIT (freep , bitmap_page_bit );
314+ MarkBufferDirty (mapbuf );
315+ }
316+ else
317+ {
318+ /* update the count to indicate new overflow page is added */
319+ metap -> hashm_spares [splitnum ]++ ;
335320
336- /* convert bit to absolute bit number */
337- bit += (i << BMPG_SHIFT (metap ));
321+ if (BufferIsValid (newmapbuf ))
322+ {
323+ _hash_initbitmapbuffer (newmapbuf , metap -> hashm_bmsize , false);
324+ MarkBufferDirty (newmapbuf );
325+
326+ /* add the new bitmap page to the metapage's list of bitmaps */
327+ metap -> hashm_mapp [metap -> hashm_nmaps ] = BufferGetBlockNumber (newmapbuf );
328+ metap -> hashm_nmaps ++ ;
329+ metap -> hashm_spares [splitnum ]++ ;
330+ MarkBufferDirty (metabuf );
331+ }
338332
339- /* Calculate address of the recycled overflow page */
340- blkno = bitno_to_blkno (metap , bit );
333+ /*
334+ * for new overflow page, we don't need to explicitly set the bit in
335+ * bitmap page, as by default that will be set to "in use".
336+ */
337+ }
341338
342339 /*
343340 * Adjust hashm_firstfree to avoid redundant searches. But don't risk
@@ -346,19 +343,39 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
346343 if (metap -> hashm_firstfree == orig_firstfree )
347344 {
348345 metap -> hashm_firstfree = bit + 1 ;
349-
350- /* Write updated metapage and release lock, but not pin */
351346 MarkBufferDirty (metabuf );
352- LockBuffer (metabuf , BUFFER_LOCK_UNLOCK );
353347 }
348+
349+ /* initialize new overflow page */
350+ ovflpage = BufferGetPage (ovflbuf );
351+ ovflopaque = (HashPageOpaque ) PageGetSpecialPointer (ovflpage );
352+ ovflopaque -> hasho_prevblkno = BufferGetBlockNumber (buf );
353+ ovflopaque -> hasho_nextblkno = InvalidBlockNumber ;
354+ ovflopaque -> hasho_bucket = pageopaque -> hasho_bucket ;
355+ ovflopaque -> hasho_flag = LH_OVERFLOW_PAGE ;
356+ ovflopaque -> hasho_page_id = HASHO_PAGE_ID ;
357+
358+ MarkBufferDirty (ovflbuf );
359+
360+ /* logically chain overflow page to previous page */
361+ pageopaque -> hasho_nextblkno = BufferGetBlockNumber (ovflbuf );
362+
363+ MarkBufferDirty (buf );
364+
365+ if (retain_pin )
366+ LockBuffer (buf , BUFFER_LOCK_UNLOCK );
354367 else
355- {
356- /* We didn't change the metapage, so no need to write */
357- LockBuffer (metabuf , BUFFER_LOCK_UNLOCK );
358- }
368+ _hash_relbuf (rel , buf );
369+
370+ if (BufferIsValid (mapbuf ))
371+ _hash_relbuf (rel , mapbuf );
372+
373+ LockBuffer (metabuf , BUFFER_LOCK_UNLOCK );
374+
375+ if (BufferIsValid (newmapbuf ))
376+ _hash_relbuf (rel , newmapbuf );
359377
360- /* Fetch, init, and return the recycled page */
361- return _hash_getinitbuf (rel , blkno );
378+ return ovflbuf ;
362379}
363380
364381/*
@@ -615,6 +632,42 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
615632}
616633
617634
635+ /*
636+ * _hash_initbitmapbuffer()
637+ *
638+ * Initialize a new bitmap page. All bits in the new bitmap page are set to
639+ * "1", indicating "in use".
640+ */
641+ void
642+ _hash_initbitmapbuffer (Buffer buf , uint16 bmsize , bool initpage )
643+ {
644+ Page pg ;
645+ HashPageOpaque op ;
646+ uint32 * freep ;
647+
648+ pg = BufferGetPage (buf );
649+
650+ /* initialize the page */
651+ if (initpage )
652+ _hash_pageinit (pg , BufferGetPageSize (buf ));
653+
654+ /* initialize the page's special space */
655+ op = (HashPageOpaque ) PageGetSpecialPointer (pg );
656+ op -> hasho_prevblkno = InvalidBlockNumber ;
657+ op -> hasho_nextblkno = InvalidBlockNumber ;
658+ op -> hasho_bucket = -1 ;
659+ op -> hasho_flag = LH_BITMAP_PAGE ;
660+ op -> hasho_page_id = HASHO_PAGE_ID ;
661+
662+ /* set all of the bits to 1 */
663+ freep = HashPageGetBitmap (pg );
664+ MemSet (freep , 0xFF , bmsize );
665+
666+ /* Set pd_lower just past the end of the bitmap page data. */
667+ ((PageHeader ) pg )-> pd_lower = ((char * ) freep + bmsize ) - (char * ) pg ;
668+ }
669+
670+
618671/*
619672 * _hash_squeezebucket(rel, bucket)
620673 *
0 commit comments