@@ -470,10 +470,10 @@ ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
470470 * commit time, add ourselves to a list of processes that need their XIDs
471471 * cleared. The first process to add itself to the list will acquire
472472 * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
473- * on behalf of all group members. This avoids a great deal of context
474- * switching when many processes are trying to commit at once, since the lock
475- * only needs to be handed from the last share-locker to one process waiting
476- * for the exclusive lock, rather than to each one in turn .
473+ * on behalf of all group members. This avoids a great deal of contention
474+ * around ProcArrayLock when many processes are trying to commit at once,
475+ * since the lock need not be repeatedly handed off from one committing
476+ * process to the next .
477477 */
478478static void
479479ProcArrayGroupClearXid (PGPROC * proc , TransactionId latestXid )
@@ -487,28 +487,39 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
487487 Assert (TransactionIdIsValid (allPgXact [proc -> pgprocno ].xid ));
488488
489489 /* Add ourselves to the list of processes needing a group XID clear. */
490+ proc -> clearXid = true;
490491 proc -> backendLatestXid = latestXid ;
491492 while (true)
492493 {
493- nextidx = pg_atomic_read_u32 (& procglobal -> nextClearXidElem );
494+ nextidx = pg_atomic_read_u32 (& procglobal -> firstClearXidElem );
494495 pg_atomic_write_u32 (& proc -> nextClearXidElem , nextidx );
495496
496- if (pg_atomic_compare_exchange_u32 (& procglobal -> nextClearXidElem ,
497+ if (pg_atomic_compare_exchange_u32 (& procglobal -> firstClearXidElem ,
497498 & nextidx ,
498499 (uint32 ) proc -> pgprocno ))
499500 break ;
500501 }
501502
502- /* If the list was not empty, the leader will clear our XID. */
503+ /*
504+ * If the list was not empty, the leader will clear our XID. It is
505+ * impossible to have followers without a leader because the first process
506+ * that has added itself to the list will always have nextidx as
507+ * INVALID_PGPROCNO.
508+ */
503509 if (nextidx != INVALID_PGPROCNO )
504510 {
505511 /* Sleep until the leader clears our XID. */
506- while ( pg_atomic_read_u32 ( & proc -> nextClearXidElem ) != INVALID_PGPROCNO )
512+ for (;; )
507513 {
508- extraWaits ++ ;
514+ /* acts as a read barrier */
509515 PGSemaphoreLock (& proc -> sem );
516+ if (!proc -> clearXid )
517+ break ;
518+ extraWaits ++ ;
510519 }
511520
521+ Assert (pg_atomic_read_u32 (& proc -> nextClearXidElem ) == INVALID_PGPROCNO );
522+
512523 /* Fix semaphore count for any absorbed wakeups */
513524 while (extraWaits -- > 0 )
514525 PGSemaphoreUnlock (& proc -> sem );
@@ -520,12 +531,13 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
520531
521532 /*
522533 * Now that we've got the lock, clear the list of processes waiting for
523- * group XID clearing, saving a pointer to the head of the list.
534+ * group XID clearing, saving a pointer to the head of the list. Trying
535+ * to pop elements one at a time could lead to an ABA problem.
524536 */
525537 while (true)
526538 {
527- nextidx = pg_atomic_read_u32 (& procglobal -> nextClearXidElem );
528- if (pg_atomic_compare_exchange_u32 (& procglobal -> nextClearXidElem ,
539+ nextidx = pg_atomic_read_u32 (& procglobal -> firstClearXidElem );
540+ if (pg_atomic_compare_exchange_u32 (& procglobal -> firstClearXidElem ,
529541 & nextidx ,
530542 INVALID_PGPROCNO ))
531543 break ;
@@ -563,6 +575,11 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
563575 wakeidx = pg_atomic_read_u32 (& proc -> nextClearXidElem );
564576 pg_atomic_write_u32 (& proc -> nextClearXidElem , INVALID_PGPROCNO );
565577
578+ /* ensure all previous writes are visible before follower continues. */
579+ pg_write_barrier ();
580+
581+ proc -> clearXid = false;
582+
566583 if (proc != MyProc )
567584 PGSemaphoreUnlock (& proc -> sem );
568585 }
0 commit comments