88 *
99 *
1010 * IDENTIFICATION
11- * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.107 2001/02/18 04:39:42 tgl Exp $
11+ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.108 2001/03/21 10:13:29 vadim Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -727,7 +727,6 @@ BufferSync()
727727 RelFileNode rnode ;
728728 XLogRecPtr recptr ;
729729 Relation reln = NULL ;
730- bool dirty = false;
731730
732731 for (i = 0 , bufHdr = BufferDescriptors ; i < NBuffers ; i ++ , bufHdr ++ )
733732 {
@@ -741,16 +740,44 @@ BufferSync()
741740 }
742741
743742 /*
744- * Pin buffer and ensure that no one reads it from disk
743+ * We can check bufHdr->cntxDirty here *without* holding any lock
744+ * on buffer context as long as we set this flag in access methods
745+ * *before* logging changes with XLogInsert(): if someone will set
746+ * cntxDirty just after our check we don't worry because of our
747+ * checkpoint.redo points before log record for upcoming changes
748+ * and so we are not required to write such dirty buffer.
749+ */
750+ if (!(bufHdr -> flags & BM_DIRTY ) && !(bufHdr -> cntxDirty ))
751+ {
752+ SpinRelease (BufMgrLock );
753+ continue ;
754+ }
755+
756+ /*
757+ * IO synchronization. Note that we do it with unpinned buffer
758+ * to avoid conflicts with FlushRelationBuffers.
745759 */
746- PinBuffer (bufHdr );
747- /* Synchronize with BufferAlloc */
748760 if (bufHdr -> flags & BM_IO_IN_PROGRESS )
761+ {
749762 WaitIO (bufHdr , BufMgrLock );
763+ if (!(bufHdr -> flags & BM_VALID ) ||
764+ (!(bufHdr -> flags & BM_DIRTY ) && !(bufHdr -> cntxDirty )))
765+ {
766+ SpinRelease (BufMgrLock );
767+ continue ;
768+ }
769+ }
770+
771+ /*
772+ * Here: no one doing IO for this buffer and it's dirty.
773+ * Pin buffer now and set IO state for it *before* acquiring
774+ * shlock to avoid conflicts with FlushRelationBuffers.
775+ */
776+ PinBuffer (bufHdr );
777+ StartBufferIO (bufHdr , false); /* output IO start */
750778
751779 buffer = BufferDescriptorGetBuffer (bufHdr );
752780 rnode = bufHdr -> tag .rnode ;
753- dirty = bufHdr -> flags & BM_DIRTY ;
754781
755782 SpinRelease (BufMgrLock );
756783
@@ -764,91 +791,68 @@ BufferSync()
764791 */
765792 LockBuffer (buffer , BUFFER_LOCK_SHARE );
766793
767- if (!dirty && !(bufHdr -> cntxDirty ))
768- {
769- LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
770- SpinAcquire (BufMgrLock );
771- UnpinBuffer (bufHdr );
772- SpinRelease (BufMgrLock );
773- if (reln != (Relation ) NULL )
774- RelationDecrementReferenceCount (reln );
775- continue ;
776- }
777-
778794 /*
779795 * Force XLOG flush for buffer' LSN
780796 */
781797 recptr = BufferGetLSN (bufHdr );
782798 XLogFlush (recptr );
783799
784800 /*
785- * Now it's safe to write buffer to disk
786- * (if no one else already)
801+ * Now it's safe to write buffer to disk. Note that no one else
802+ * should not be able to write it while we were busy with locking
803+ * and log flushing because of we setted IO flag.
787804 */
788805 SpinAcquire (BufMgrLock );
789- if (bufHdr -> flags & BM_IO_IN_PROGRESS )
790- WaitIO (bufHdr , BufMgrLock );
806+ Assert (bufHdr -> flags & BM_DIRTY || bufHdr -> cntxDirty );
807+ bufHdr -> flags &= ~BM_JUST_DIRTIED ;
808+ SpinRelease (BufMgrLock );
791809
792- if (bufHdr -> flags & BM_DIRTY || bufHdr -> cntxDirty )
810+ if (reln == ( Relation ) NULL )
793811 {
794- bufHdr -> flags &= ~BM_JUST_DIRTIED ;
795- StartBufferIO (bufHdr , false); /* output IO start */
796-
797- SpinRelease (BufMgrLock );
798-
799- if (reln == (Relation ) NULL )
800- {
801- status = smgrblindwrt (DEFAULT_SMGR ,
802- bufHdr -> tag .rnode ,
803- bufHdr -> tag .blockNum ,
804- (char * ) MAKE_PTR (bufHdr -> data ),
805- true); /* must fsync */
806- }
807- else
808- {
809- status = smgrwrite (DEFAULT_SMGR , reln ,
812+ status = smgrblindwrt (DEFAULT_SMGR ,
813+ bufHdr -> tag .rnode ,
810814 bufHdr -> tag .blockNum ,
811- (char * ) MAKE_PTR (bufHdr -> data ));
812- }
815+ (char * ) MAKE_PTR (bufHdr -> data ),
816+ true); /* must fsync */
817+ }
818+ else
819+ {
820+ status = smgrwrite (DEFAULT_SMGR , reln ,
821+ bufHdr -> tag .blockNum ,
822+ (char * ) MAKE_PTR (bufHdr -> data ));
823+ }
813824
814- if (status == SM_FAIL ) /* disk failure ?! */
815- elog (STOP , "BufferSync: cannot write %u for %s" ,
816- bufHdr -> tag .blockNum , bufHdr -> blind .relname );
825+ if (status == SM_FAIL ) /* disk failure ?! */
826+ elog (STOP , "BufferSync: cannot write %u for %s" ,
827+ bufHdr -> tag .blockNum , bufHdr -> blind .relname );
817828
818- /*
819- * Note that it's safe to change cntxDirty here because of
820- * we protect it from upper writers by share lock and from
821- * other bufmgr routines by BM_IO_IN_PROGRESS
822- */
823- bufHdr -> cntxDirty = false;
829+ /*
830+ * Note that it's safe to change cntxDirty here because of
831+ * we protect it from upper writers by share lock and from
832+ * other bufmgr routines by BM_IO_IN_PROGRESS
833+ */
834+ bufHdr -> cntxDirty = false;
824835
825- /*
826- * Release the per-buffer readlock, reacquire BufMgrLock.
827- */
828- LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
829- BufferFlushCount ++ ;
836+ /*
837+ * Release the per-buffer readlock, reacquire BufMgrLock.
838+ */
839+ LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
840+ BufferFlushCount ++ ;
830841
831- SpinAcquire (BufMgrLock );
842+ SpinAcquire (BufMgrLock );
832843
833- bufHdr -> flags &= ~BM_IO_IN_PROGRESS ; /* mark IO finished */
834- TerminateBufferIO (bufHdr ); /* Sync IO finished */
844+ bufHdr -> flags &= ~BM_IO_IN_PROGRESS ; /* mark IO finished */
845+ TerminateBufferIO (bufHdr ); /* Sync IO finished */
835846
836- /*
837- * If this buffer was marked by someone as DIRTY while
838- * we were flushing it out we must not clear DIRTY
839- * flag - vadim 01/17/97
840- */
841- if (!(bufHdr -> flags & BM_JUST_DIRTIED ))
842- bufHdr -> flags &= ~BM_DIRTY ;
843- UnpinBuffer (bufHdr );
844- SpinRelease (BufMgrLock );
845- }
846- else
847- {
848- UnpinBuffer (bufHdr );
849- SpinRelease (BufMgrLock );
850- LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
851- }
847+ /*
848+ * If this buffer was marked by someone as DIRTY while
849+ * we were flushing it out we must not clear DIRTY
850+ * flag - vadim 01/17/97
851+ */
852+ if (!(bufHdr -> flags & BM_JUST_DIRTIED ))
853+ bufHdr -> flags &= ~BM_DIRTY ;
854+ UnpinBuffer (bufHdr );
855+ SpinRelease (BufMgrLock );
852856
853857 /* drop refcnt obtained by RelationNodeCacheGetRelation */
854858 if (reln != (Relation ) NULL )
@@ -2079,6 +2083,12 @@ LockBuffer(Buffer buffer, int mode)
20792083 buf -> w_lock = true;
20802084 * buflock |= BL_W_LOCK ;
20812085
2086+ /*
2087+ * This is not the best place to set cntxDirty flag (eg indices
2088+ * do not always change buffer they lock in excl mode). But please
2089+ * remember that it's critical to set cntxDirty *before* logging
2090+ * changes with XLogInsert() - see comments in BufferSync().
2091+ */
20822092 buf -> cntxDirty = true;
20832093
20842094 if (* buflock & BL_RI_LOCK )
0 commit comments