@@ -268,65 +268,20 @@ bool
268268hashgettuple (IndexScanDesc scan , ScanDirection dir )
269269{
270270 HashScanOpaque so = (HashScanOpaque ) scan -> opaque ;
271- Relation rel = scan -> indexRelation ;
272- Buffer buf ;
273- Page page ;
274- OffsetNumber offnum ;
275- ItemPointer current ;
276271 bool res ;
277272
278273 /* Hash indexes are always lossy since we store only the hash code */
279274 scan -> xs_recheck = true;
280275
281- /*
282- * We hold pin but not lock on current buffer while outside the hash AM.
283- * Reacquire the read lock here.
284- */
285- if (BufferIsValid (so -> hashso_curbuf ))
286- LockBuffer (so -> hashso_curbuf , BUFFER_LOCK_SHARE );
287-
288276 /*
289277 * If we've already initialized this scan, we can just advance it in the
290278 * appropriate direction. If we haven't done so yet, we call a routine to
291279 * get the first item in the scan.
292280 */
293- current = & (so -> hashso_curpos );
294- if (ItemPointerIsValid (current ))
281+ if (!HashScanPosIsValid (so -> currPos ))
282+ res = _hash_first (scan , dir );
283+ else
295284 {
296- /*
297- * An insertion into the current index page could have happened while
298- * we didn't have read lock on it. Re-find our position by looking
299- * for the TID we previously returned. (Because we hold a pin on the
300- * primary bucket page, no deletions or splits could have occurred;
301- * therefore we can expect that the TID still exists in the current
302- * index page, at an offset >= where we were.)
303- */
304- OffsetNumber maxoffnum ;
305-
306- buf = so -> hashso_curbuf ;
307- Assert (BufferIsValid (buf ));
308- page = BufferGetPage (buf );
309-
310- /*
311- * We don't need test for old snapshot here as the current buffer is
312- * pinned, so vacuum can't clean the page.
313- */
314- maxoffnum = PageGetMaxOffsetNumber (page );
315- for (offnum = ItemPointerGetOffsetNumber (current );
316- offnum <= maxoffnum ;
317- offnum = OffsetNumberNext (offnum ))
318- {
319- IndexTuple itup ;
320-
321- itup = (IndexTuple ) PageGetItem (page , PageGetItemId (page , offnum ));
322- if (ItemPointerEquals (& (so -> hashso_heappos ), & (itup -> t_tid )))
323- break ;
324- }
325- if (offnum > maxoffnum )
326- elog (ERROR , "failed to re-find scan position within index \"%s\"" ,
327- RelationGetRelationName (rel ));
328- ItemPointerSetOffsetNumber (current , offnum );
329-
330285 /*
331286 * Check to see if we should kill the previously-fetched tuple.
332287 */
@@ -341,47 +296,18 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir)
341296 * entries.
342297 */
343298 if (so -> killedItems == NULL )
344- so -> killedItems = palloc ( MaxIndexTuplesPerPage *
345- sizeof (HashScanPosItem ));
299+ so -> killedItems = ( int * )
300+ palloc ( MaxIndexTuplesPerPage * sizeof (int ));
346301
347302 if (so -> numKilled < MaxIndexTuplesPerPage )
348- {
349- so -> killedItems [so -> numKilled ].heapTid = so -> hashso_heappos ;
350- so -> killedItems [so -> numKilled ].indexOffset =
351- ItemPointerGetOffsetNumber (& (so -> hashso_curpos ));
352- so -> numKilled ++ ;
353- }
303+ so -> killedItems [so -> numKilled ++ ] = so -> currPos .itemIndex ;
354304 }
355305
356306 /*
357307 * Now continue the scan.
358308 */
359309 res = _hash_next (scan , dir );
360310 }
361- else
362- res = _hash_first (scan , dir );
363-
364- /*
365- * Skip killed tuples if asked to.
366- */
367- if (scan -> ignore_killed_tuples )
368- {
369- while (res )
370- {
371- offnum = ItemPointerGetOffsetNumber (current );
372- page = BufferGetPage (so -> hashso_curbuf );
373- if (!ItemIdIsDead (PageGetItemId (page , offnum )))
374- break ;
375- res = _hash_next (scan , dir );
376- }
377- }
378-
379- /* Release read lock on current buffer, but keep it pinned */
380- if (BufferIsValid (so -> hashso_curbuf ))
381- LockBuffer (so -> hashso_curbuf , BUFFER_LOCK_UNLOCK );
382-
383- /* Return current heap TID on success */
384- scan -> xs_ctup .t_self = so -> hashso_heappos ;
385311
386312 return res ;
387313}
@@ -396,35 +322,21 @@ hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
396322 HashScanOpaque so = (HashScanOpaque ) scan -> opaque ;
397323 bool res ;
398324 int64 ntids = 0 ;
325+ HashScanPosItem * currItem ;
399326
400327 res = _hash_first (scan , ForwardScanDirection );
401328
402329 while (res )
403330 {
404- bool add_tuple ;
331+ currItem = & so -> currPos . items [ so -> currPos . itemIndex ] ;
405332
406333 /*
407- * Skip killed tuples if asked to.
334+ * _hash_first and _hash_next handle eliminate dead index entries
335+ * whenever scan->ignored_killed_tuples is true. Therefore, there's
336+ * nothing to do here except add the results to the TIDBitmap.
408337 */
409- if (scan -> ignore_killed_tuples )
410- {
411- Page page ;
412- OffsetNumber offnum ;
413-
414- offnum = ItemPointerGetOffsetNumber (& (so -> hashso_curpos ));
415- page = BufferGetPage (so -> hashso_curbuf );
416- add_tuple = !ItemIdIsDead (PageGetItemId (page , offnum ));
417- }
418- else
419- add_tuple = true;
420-
421- /* Save tuple ID, and continue scanning */
422- if (add_tuple )
423- {
424- /* Note we mark the tuple ID as requiring recheck */
425- tbm_add_tuples (tbm , & (so -> hashso_heappos ), 1 , true);
426- ntids ++ ;
427- }
338+ tbm_add_tuples (tbm , & (currItem -> heapTid ), 1 , true);
339+ ntids ++ ;
428340
429341 res = _hash_next (scan , ForwardScanDirection );
430342 }
@@ -448,12 +360,9 @@ hashbeginscan(Relation rel, int nkeys, int norderbys)
448360 scan = RelationGetIndexScan (rel , nkeys , norderbys );
449361
450362 so = (HashScanOpaque ) palloc (sizeof (HashScanOpaqueData ));
451- so -> hashso_curbuf = InvalidBuffer ;
363+ HashScanPosInvalidate ( so -> currPos ) ;
452364 so -> hashso_bucket_buf = InvalidBuffer ;
453365 so -> hashso_split_bucket_buf = InvalidBuffer ;
454- /* set position invalid (this will cause _hash_first call) */
455- ItemPointerSetInvalid (& (so -> hashso_curpos ));
456- ItemPointerSetInvalid (& (so -> hashso_heappos ));
457366
458367 so -> hashso_buc_populated = false;
459368 so -> hashso_buc_split = false;
@@ -476,22 +385,17 @@ hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
476385 HashScanOpaque so = (HashScanOpaque ) scan -> opaque ;
477386 Relation rel = scan -> indexRelation ;
478387
479- /*
480- * Before leaving current page, deal with any killed items. Also, ensure
481- * that we acquire lock on current page before calling _hash_kill_items.
482- */
483- if (so -> numKilled > 0 )
388+ if (HashScanPosIsValid (so -> currPos ))
484389 {
485- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_SHARE );
486- _hash_kill_items ( scan );
487- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_UNLOCK );
390+ /* Before leaving current page, deal with any killed items */
391+ if ( so -> numKilled > 0 )
392+ _hash_kill_items ( scan );
488393 }
489394
490395 _hash_dropscanbuf (rel , so );
491396
492397 /* set position invalid (this will cause _hash_first call) */
493- ItemPointerSetInvalid (& (so -> hashso_curpos ));
494- ItemPointerSetInvalid (& (so -> hashso_heappos ));
398+ HashScanPosInvalidate (so -> currPos );
495399
496400 /* Update scan key, if a new one is given */
497401 if (scankey && scan -> numberOfKeys > 0 )
@@ -514,15 +418,11 @@ hashendscan(IndexScanDesc scan)
514418 HashScanOpaque so = (HashScanOpaque ) scan -> opaque ;
515419 Relation rel = scan -> indexRelation ;
516420
517- /*
518- * Before leaving current page, deal with any killed items. Also, ensure
519- * that we acquire lock on current page before calling _hash_kill_items.
520- */
521- if (so -> numKilled > 0 )
421+ if (HashScanPosIsValid (so -> currPos ))
522422 {
523- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_SHARE );
524- _hash_kill_items ( scan );
525- LockBuffer ( so -> hashso_curbuf , BUFFER_LOCK_UNLOCK );
423+ /* Before leaving current page, deal with any killed items */
424+ if ( so -> numKilled > 0 )
425+ _hash_kill_items ( scan );
526426 }
527427
528428 _hash_dropscanbuf (rel , so );
@@ -755,16 +655,15 @@ hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
755655 * primary bucket page. The lock won't necessarily be held continuously,
756656 * though, because we'll release it when visiting overflow pages.
757657 *
758- * It would be very bad if this function cleaned a page while some other
759- * backend was in the midst of scanning it, because hashgettuple assumes
760- * that the next valid TID will be greater than or equal to the current
761- * valid TID. There can't be any concurrent scans in progress when we first
762- * enter this function because of the cleanup lock we hold on the primary
763- * bucket page, but as soon as we release that lock, there might be. We
764- * handle that by conspiring to prevent those scans from passing our cleanup
765- * scan. To do that, we lock the next page in the bucket chain before
766- * releasing the lock on the previous page. (This type of lock chaining is
767- * not ideal, so we might want to look for a better solution at some point.)
658+ * There can't be any concurrent scans in progress when we first enter this
659+ * function because of the cleanup lock we hold on the primary bucket page,
660+ * but as soon as we release that lock, there might be. If those scans got
661+ * ahead of our cleanup scan, they might see a tuple before we kill it and
662+ * wake up only after VACUUM has completed and the TID has been recycled for
663+ * an unrelated tuple. To avoid that calamity, we prevent scans from passing
664+ * our cleanup scan by locking the next page in the bucket chain before
665+ * releasing the lock on the previous page. (This type of lock chaining is not
666+ * ideal, so we might want to look for a better solution at some point.)
768667 *
769668 * We need to retain a pin on the primary bucket to ensure that no concurrent
770669 * split can start.
0 commit comments