@@ -403,12 +403,6 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
403403 /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
404404 LWLockAcquire (shared -> buffer_locks [slotno ], LW_EXCLUSIVE );
405405
406- /*
407- * Temporarily mark page as recently-used to discourage
408- * SlruSelectLRUPage from selecting it again for someone else.
409- */
410- SlruRecentlyUsed (shared , slotno );
411-
412406 /* Release control lock while doing I/O */
413407 LWLockRelease (shared -> ControlLock );
414408
@@ -909,9 +903,12 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
909903 {
910904 int slotno ;
911905 int cur_count ;
912- int bestslot ;
913- int best_delta ;
914- int best_page_number ;
906+ int bestvalidslot = 0 ; /* keep compiler quiet */
907+ int best_valid_delta = -1 ;
908+ int best_valid_page_number = 0 ; /* keep compiler quiet */
909+ int bestinvalidslot = 0 ; /* keep compiler quiet */
910+ int best_invalid_delta = -1 ;
911+ int best_invalid_page_number = 0 ; /* keep compiler quiet */
915912
916913 /* See if page already has a buffer assigned */
917914 for (slotno = 0 ; slotno < shared -> num_slots ; slotno ++ )
@@ -922,8 +919,16 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
922919 }
923920
924921 /*
925- * If we find any EMPTY slot, just select that one. Else locate the
926- * least-recently-used slot to replace.
922+ * If we find any EMPTY slot, just select that one. Else choose a
923+ * victim page to replace. We normally take the least recently used
924+ * valid page, but we will never take the slot containing
925+ * latest_page_number, even if it appears least recently used. We
926+ * will select a slot that is already I/O busy only if there is no
927+ * other choice: a read-busy slot will not be least recently used once
928+ * the read finishes, and waiting for an I/O on a write-busy slot is
929+ * inferior to just picking some other slot. Testing shows the slot
930+ * we pick instead will often be clean, allowing us to begin a read
931+ * at once.
927932 *
928933 * Normally the page_lru_count values will all be different and so
929934 * there will be a well-defined LRU page. But since we allow
@@ -932,9 +937,6 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
932937 * acquire the same lru_count values. In that case we break ties by
933938 * choosing the furthest-back page.
934939 *
935- * In no case will we select the slot containing latest_page_number
936- * for replacement, even if it appears least recently used.
937- *
938940 * Notice that this next line forcibly advances cur_lru_count to a
939941 * value that is certainly beyond any value that will be in the
940942 * page_lru_count array after the loop finishes. This ensures that
@@ -944,9 +946,6 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
944946 * multiple pages with the same lru_count.
945947 */
946948 cur_count = (shared -> cur_lru_count )++ ;
947- best_delta = -1 ;
948- bestslot = 0 ; /* no-op, just keeps compiler quiet */
949- best_page_number = 0 ; /* ditto */
950949 for (slotno = 0 ; slotno < shared -> num_slots ; slotno ++ )
951950 {
952951 int this_delta ;
@@ -968,34 +967,57 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
968967 this_delta = 0 ;
969968 }
970969 this_page_number = shared -> page_number [slotno ];
971- if ((this_delta > best_delta ||
972- (this_delta == best_delta &&
973- ctl -> PagePrecedes (this_page_number , best_page_number ))) &&
974- this_page_number != shared -> latest_page_number )
970+ if (this_page_number == shared -> latest_page_number )
971+ continue ;
972+ if (shared -> page_status [slotno ] == SLRU_PAGE_VALID )
973+ {
974+ if (this_delta > best_valid_delta ||
975+ (this_delta == best_valid_delta &&
976+ ctl -> PagePrecedes (this_page_number ,
977+ best_valid_page_number )))
978+ {
979+ bestvalidslot = slotno ;
980+ best_valid_delta = this_delta ;
981+ best_valid_page_number = this_page_number ;
982+ }
983+ }
984+ else
975985 {
976- bestslot = slotno ;
977- best_delta = this_delta ;
978- best_page_number = this_page_number ;
986+ if (this_delta > best_invalid_delta ||
987+ (this_delta == best_invalid_delta &&
988+ ctl -> PagePrecedes (this_page_number ,
989+ best_invalid_page_number )))
990+ {
991+ bestinvalidslot = slotno ;
992+ best_invalid_delta = this_delta ;
993+ best_invalid_page_number = this_page_number ;
994+ }
979995 }
980996 }
981997
998+ /*
999+ * If all pages (except possibly the latest one) are I/O busy, we'll
1000+ * have to wait for an I/O to complete and then retry. In that unhappy
1001+ * case, we choose to wait for the I/O on the least recently used slot,
1002+ * on the assumption that it was likely initiated first of all the I/Os
1003+ * in progress and may therefore finish first.
1004+ */
1005+ if (best_valid_delta < 0 )
1006+ {
1007+ SimpleLruWaitIO (ctl , bestinvalidslot );
1008+ continue ;
1009+ }
1010+
9821011 /*
9831012 * If the selected page is clean, we're set.
9841013 */
985- if (shared -> page_status [bestslot ] == SLRU_PAGE_VALID &&
986- !shared -> page_dirty [bestslot ])
987- return bestslot ;
1014+ if (!shared -> page_dirty [bestvalidslot ])
1015+ return bestvalidslot ;
9881016
9891017 /*
990- * We need to wait for I/O. Normal case is that it's dirty and we
991- * must initiate a write, but it's possible that the page is already
992- * write-busy, or in the worst case still read-busy. In those cases
993- * we wait for the existing I/O to complete.
1018+ * Write the page.
9941019 */
995- if (shared -> page_status [bestslot ] == SLRU_PAGE_VALID )
996- SlruInternalWritePage (ctl , bestslot , NULL );
997- else
998- SimpleLruWaitIO (ctl , bestslot );
1020+ SlruInternalWritePage (ctl , bestvalidslot , NULL );
9991021
10001022 /*
10011023 * Now loop back and try again. This is the easiest way of dealing
0 commit comments