Rewrite caching mechanism.
authorRobert Haas <rhaas@postgresql.org>
Thu, 15 Sep 2011 19:54:42 +0000 (14:54 -0500)
committerRobert Haas <rhaas@postgresql.org>
Fri, 14 Oct 2011 18:38:14 +0000 (14:38 -0400)
This leaves behind some duplicate code that needs to be cleaned up, but
gets us much closer to being able to do a proper global_xmin computation.

src/backend/storage/ipc/snaparray.c

index 3cf79258786dad7c0a435b4cdb5ef9a16201d894..38042a3632aa5ed251acef68c872e79bcee628ea 100644 (file)
@@ -508,21 +508,10 @@ SnapArrayRemoveRunningXids(TransactionId xid, int nchildren,
 Snapshot
 SnapArrayGetSnapshotData(Snapshot snapshot)
 {
+       TransactionId   xmin;
        TransactionId   xmax;
-       TransactionId   new_xmin;
-       TransactionId   new_xmax;
-       TransactionId   highest_removed_subxid;
-       uint32                  num_running_xids;
-       uint32                  num_removed_xids;
-       uint32                  num_new_running_xids;
-       TransactionId  *running_xids;
-       TransactionId  *removed_xids;
-       TransactionId  *new_running_xids;
-       uint32                  n;
-       uint32                  xids_added;
-       uint32                  certainly_removed_xids = 0;
-       bool                    needsort = false;
        bool                    have_lock = false;
+       uint32                  num_running_xids;
 
        /*
         * Allocate enough memory for the largest possible snapshot.  This could
@@ -552,78 +541,10 @@ retry:
                goto retry;
        }
 
-       /* Data must begin with a snapshot summary. */
-       Assert(SnapArrayCache.size >= SNAPARRAY_SUMMARY_ITEMS);
-       Assert(SnapArrayCache.buffer[0] == InvalidTransactionId);
-       xmax = SnapArrayCache.buffer[1];
-       highest_removed_subxid = SnapArrayCache.buffer[2];
+       /* Work out xmin and xmax. */
        num_running_xids = (uint32) SnapArrayCache.buffer[3];
-       num_removed_xids =
-               SnapArrayCache.size - (num_running_xids + SNAPARRAY_SUMMARY_ITEMS);
-       running_xids = SnapArrayCache.buffer + SNAPARRAY_SUMMARY_ITEMS;
-       removed_xids = running_xids + num_running_xids;
-
-       /*
-        * Scan the removed XIDs.  This is enables us to work out the new xmax
-        * value, the number of XIDs we're certain to be able to remove from the
-        * running list (because they're newer than highest_removed_subxid), and
-        * whether or not the list of removed XIDs needs to be sorted.
-        */
-       new_xmax = xmax;
-       for (n = 0; n < num_removed_xids; ++n)
-       {
-               TransactionId   xid = removed_xids[n];
-
-               if (TransactionIdFollowsOrEquals(xid, new_xmax))
-               {
-                       new_xmax = removed_xids[n];
-                       TransactionIdAdvance(new_xmax);
-               }
-               if (TransactionIdFollows(xid, highest_removed_subxid))
-                       ++certainly_removed_xids;
-               if (n > 0 && TransactionIdPrecedes(xid, removed_xids[n-1]))
-                       needsort = true;
-       }
-
-       /*
-        * Sort the removed XIDs (unless they are already in order).
-        *
-        * This is actually mutating the underlying cache, which is OK, because
-        * changing the order of the removed XIDs doesn't change the semantics.
-        * We skip this if the data is already in order, which could happen
-        * either because we've sorted the same data on a previous trip through
-        * this function, or because all removed XIDs added since our last visit
-        * were removed in ascending XID order.
-        *
-        * NB: Some quicksort implementations don't perform well on data that's
-        * already mostly or entirely sorted.  Skipping the sort in the case where
-        * the data is completely in order should ameliorate any problems in this
-        * area quite a bit, but we might need to pick another sort algorithm if
-        * this probes problematic.
-        */
-       if (needsort)
-       {
-               xid_cmp_base = xmax;
-               qsort(removed_xids, num_removed_xids, sizeof(TransactionId), xid_cmp);
-       }
-
-       /* Work out number of new XIDs being added. */
-       if (new_xmax >= xmax)
-               xids_added = new_xmax - xmax;
-       else
-               xids_added = new_xmax - xmax - FirstNormalTransactionId;
-
-       /*
-        * Work out new list of running XIDs.
-        */
-       num_new_running_xids =
-               num_running_xids + xids_added - certainly_removed_xids;
-       new_running_xids = palloc(sizeof(TransactionId) * num_new_running_xids);
-       num_new_running_xids =
-               SnapArrayComputeRunningXids(xmax, new_xmax,
-                                                                       num_running_xids, running_xids,
-                                                                       num_removed_xids, removed_xids,
-                                                                       num_new_running_xids, new_running_xids);
+       xmax = SnapArrayCache.buffer[1];
+       xmin = num_running_xids > 0 ? SnapArrayCache.buffer[4] : xmax;
 
        /*
         * Each backend must advertise the xmin value of its oldest snapshot in
@@ -657,11 +578,10 @@ retry:
         * that shouldn't matter, since we never allow the real global_xmin to
         * go backward.
         */
-       new_xmin = num_new_running_xids > 0 ? new_running_xids[0] : new_xmax;
        if (!TransactionIdIsValid(TransactionXmin))
        {
                /* Advertise new xmin. */
-               MyProc->xmin = TransactionXmin = new_xmin;
+               MyProc->xmin = TransactionXmin = xmin;
 
                /*
                 * We must make sure that snapshot->xmin is set before we read
@@ -672,7 +592,7 @@ retry:
                pg_memory_barrier();
 
                /* Now we can do the actual check. */
-               if (TransactionIdPrecedes(new_xmin, SnapArray->fresh_xmin))
+               if (TransactionIdPrecedes(xmin, SnapArray->fresh_xmin))
                {
                        if (have_lock)
                                elog(ERROR, "stale stapshot while holding SnapArrayLock");
@@ -697,17 +617,14 @@ retry:
                LWLockRelease(SnapArrayLock);
 
        /* Populate the new snapshot. */
-       snapshot->xmin = new_xmin;
-       snapshot->xmax = new_xmax;
-       snapshot->oxcnt = num_new_running_xids;
-       if (num_new_running_xids > 0)
-       {
-               memcpy(snapshot->oxip, new_running_xids,
-                          num_new_running_xids * sizeof(TransactionId));
-               pfree(new_running_xids);
-       }
+       snapshot->xmin = xmin;
+       snapshot->xmax = xmax;
+       snapshot->oxcnt = num_running_xids;
+       if (num_running_xids > 0)
+               memcpy(snapshot->oxip, SnapArrayCache.buffer + SNAPARRAY_SUMMARY_ITEMS,
+                          num_running_xids * sizeof(TransactionId));
        snapshot->takenDuringRecovery = true;           /* XXX FIXME */
-       snapshot->highest_removed_subxid = highest_removed_subxid;
+       snapshot->highest_removed_subxid = SnapArrayCache.buffer[2];
        snapshot->curcid = GetCurrentCommandId(false);
 
        /*
@@ -719,7 +636,7 @@ retry:
         * follow our snapshot xmin, so the worst thing that can happen here is
         * that we read a slightly out-of-date, older value.  That's acceptable.
         */
-       RecentXmin = new_xmin;
+       RecentXmin = xmin;
        RecentGlobalXmin = SnapArray->global_xmin - vacuum_defer_cleanup_age;
        if (!TransactionIdIsNormal(RecentGlobalXmin))
                RecentGlobalXmin = FirstNormalTransactionId;
@@ -741,6 +658,11 @@ retry:
  * from shared memory, but we can optimize away duplicate reads of the same
  * data.
  *
+ * The data we read will always begin with a snapshot summary.  If it's
+ * followed by removed XIDs, we fold those into our snapshot summary and
+ * generate a new summary.  Callers therefore don't need to worry about
+ * removed XIDs; they can work only with the summary format.
+ *
  * If have_lock = false, we do not hold SnapArrayLock and must guard against
  * wraparound.  If this routine returns false, it means that we failed to copy
  * all the data needed for our cache before the buffer wrapped, and the cache
@@ -759,8 +681,19 @@ SnapArrayUpdateCache(bool have_lock)
        uint64          write_pointer;
        uint64          read_starts_at;
        uint64          buffer_space_needed;
-       uint32          skip = 0;
+       uint32          num_running_xids;
+       uint32          num_removed_xids;
+       uint32          n;
+       uint32          m;
+       uint32          r;
+       TransactionId   xmax;
+       TransactionId   new_xmax;
+       TransactionId   highest_removed_subxid;
        TransactionId  *buffer;
+       TransactionId  *running_xids;
+       TransactionId  *removed_xids;
+       uint32          xids_added;
+       bool            need_sort = false;
 
        /* Read start and stop pointers. */
        if (have_lock)
@@ -801,7 +734,7 @@ SnapArrayUpdateCache(bool have_lock)
                buffer_space_needed = stop_pointer - read_starts_at;
        }
 
-       /* If our local cache is not large enough to hold the data, grow it. */
+       /* Grow backend-local cache, if necessary. */
        if (buffer_space_needed > SnapArrayCache.entries)
        {
                SnapArrayCache.buffer =
@@ -828,7 +761,7 @@ SnapArrayUpdateCache(bool have_lock)
                SpinLockAcquire(&SnapArray->write_mutex);
                write_pointer = SnapArray->write_pointer;
                SpinLockRelease(&SnapArray->write_mutex);
-               if (write_pointer > start_pointer + skip + SnapArray->ring_buffer_size)
+               if (write_pointer > read_starts_at + SnapArray->ring_buffer_size)
                {
                        /* Wraparound detected.  Update statistics and let caller know. */
                        SpinLockAcquire(&SnapArray->misc_mutex);
@@ -843,6 +776,228 @@ SnapArrayUpdateCache(bool have_lock)
        SnapArrayCache.last_start_pointer = start_pointer;
        SnapArrayCache.last_stop_pointer = stop_pointer;
 
+       /* Data must begin with a snapshot summary. */
+       Assert(SnapArrayCache.size >= SNAPARRAY_SUMMARY_ITEMS);
+       Assert(SnapArrayCache.buffer[0] == InvalidTransactionId);
+       xmax = SnapArrayCache.buffer[1];
+       highest_removed_subxid = SnapArrayCache.buffer[2];
+       num_running_xids = (uint32) SnapArrayCache.buffer[3];
+       num_removed_xids =
+               SnapArrayCache.size - (num_running_xids + SNAPARRAY_SUMMARY_ITEMS);
+
+       /* If there are no removed XIDs, we're done. */
+       if (num_removed_xids == 0)
+               return true;
+
+       /* Work out location of running and removed XIDs. */
+       running_xids = SnapArrayCache.buffer + SNAPARRAY_SUMMARY_ITEMS;
+       removed_xids = running_xids + num_running_xids;
+
+       /*
+        * Scan the removed XIDs.  This is enables us to work out the new xmax
+        * value and whether or not the list of removed XIDs needs to be sorted.
+        */
+       new_xmax = xmax;
+       for (n = 0; n < num_removed_xids; ++n)
+       {
+               TransactionId   xid = removed_xids[n];
+
+               if (TransactionIdFollowsOrEquals(xid, new_xmax))
+               {
+                       new_xmax = removed_xids[n];
+                       TransactionIdAdvance(new_xmax);
+               }
+               if (n > 0 && TransactionIdPrecedes(xid, removed_xids[n-1]))
+                       need_sort = true;
+       }
+
+       {
+               uint32 k;
+               StringInfoData  buf;
+               initStringInfo(&buf);
+               appendStringInfo(&buf,
+                       "xmax: %lu, highest_removed_subxid: %lu, %u xids: [",
+                       (unsigned long) xmax, (unsigned long) highest_removed_subxid,
+                       (unsigned) num_running_xids);
+               for (k = 0; k < num_running_xids; ++k)
+                       appendStringInfo(&buf, k ? " %lu" : "%lu",
+                                                               (unsigned long) running_xids[k]);
+               appendStringInfo(&buf, "]; %u removed xids: [", num_removed_xids);
+               for (k = 0; k < num_removed_xids; ++k)
+                       appendStringInfo(&buf, k ? " %lu" : "%lu",
+                                                               (unsigned long) removed_xids[k]);
+               appendStringInfo(&buf, "] new_xmax=%lu", (unsigned long) new_xmax);
+               elog(LOG, "[snaparray] %s", buf.data);
+       }
+
+       /*
+        * Sort the removed XIDs (unless they are already in order).
+        *
+        * We skip this if the data is already in order, which could happen
+        * either because we've sorted the same data on a previous trip through
+        * this function, or because all removed XIDs added since our last visit
+        * were removed in ascending XID order.
+        *
+        * NB: Some quicksort implementations don't perform well on data that's
+        * already mostly or entirely sorted.  Skipping the sort in the case where
+        * the data is completely in order should ameliorate any problems in this
+        * area quite a bit, but we might need to pick another sort algorithm if
+        * this probes problematic.
+        */
+       if (need_sort)
+       {
+               xid_cmp_base = xmax;
+               qsort(removed_xids, num_removed_xids, sizeof(TransactionId), xid_cmp);
+       }
+
+       /*
+        * Scan the list of running XIDs and remove any that appear in the
+        * removed list.  Since the list of removed XIDs is guaranteed to be
+        * sorted at this point, we can basically do a merge join.
+        */
+       m = 0;
+       r = 0;
+       for (n = 0; n < num_running_xids; ++n)
+       {
+               bool    match = false;
+
+               while (1)
+               {
+                       if (m >= num_removed_xids)
+                               break;
+                       if (TransactionIdEquals(removed_xids[m], running_xids[n]))
+                       {
+                               match = true;
+                               ++m;
+                               break;
+                       }
+                       if (TransactionIdFollows(removed_xids[m], running_xids[n]))
+                               break;
+                       ++m;
+               }
+
+               if (!match)
+               {
+                       running_xids[r] = running_xids[n];
+                       ++r;
+               }
+       }
+
+       /*
+        * Next, we have to add any XIDs greater than or equal to the old xmax and
+        * less than the new xmax that have not been removed.  Compute how many.
+        */
+       if (new_xmax >= xmax)
+               xids_added = new_xmax - xmax;
+       else
+               xids_added = new_xmax - xmax - FirstNormalTransactionId;
+       Assert(m <= num_removed_xids);
+       Assert(num_removed_xids - m <= xids_added);
+       xids_added = xids_added - (num_removed_xids - m);
+
+       if (xids_added != 0)
+       {
+               TransactionId      *zap;
+               uint32          nzap;
+               bool            needs_pfree;
+
+               /* Grow backend-local cache, if necessary. */
+               buffer_space_needed = r + xids_added;
+               if (buffer_space_needed >= SnapArrayCache.entries)
+               {
+                       SnapArrayCache.buffer =
+                               repalloc(SnapArrayCache.buffer,
+                                                sizeof(TransactionId) * buffer_space_needed);
+                       SnapArrayCache.entries = buffer_space_needed;
+
+                       /* Buffer might have moved, so must update this. */
+                       running_xids = SnapArrayCache.buffer + SNAPARRAY_SUMMARY_ITEMS;
+                       removed_xids = running_xids + num_running_xids;
+               }
+
+               /*
+                * If the XIDs we're about to add are going to overwrite the list of
+                * XIDs that we're going to remove, we need to copy the list of removed
+                * XIDs into a temporary array before we start.  Otherwise, we can use
+                * the existing data where it is.
+                */
+               nzap = num_removed_xids - m;
+               if (r + xids_added > m)
+               {
+                       zap = palloc(nzap * sizeof(TransactionId));
+                       memcpy(zap, removed_xids + m, nzap * sizeof(TransactionId));
+                       needs_pfree = true;
+               }
+               else
+               {
+                       zap = removed_xids + m;
+                       needs_pfree = false;
+               }
+
+               /*
+                * Since the list of removed XIDs is sorted, we can do this in O(n+m)
+                * time, where n is the amount by which xmax has advanced and m is the
+                * number of removed XIDs greater than or equal to the old xmax.
+                */
+               n = 0;
+               while (TransactionIdPrecedes(xmax, new_xmax))
+               {
+                       bool    match = false;
+
+                       while (1)
+                       {
+                               if (n >= nzap)
+                                       break;
+                               if (TransactionIdEquals(xmax, zap[n]))
+                               {
+                                       match = true;
+                                       break;
+                               }
+                               if (TransactionIdPrecedesOrEquals(xmax, zap[n]))
+                                       break;
+                               ++n;
+                       }
+
+                       if (!match)
+                       {
+                               running_xids[r] = xmax;
+                               ++r;
+                       }
+
+                       TransactionIdAdvance(xmax);
+               }
+
+               /* Cleanup. */
+               if (needs_pfree)
+                       pfree(zap);
+               if (r != buffer_space_needed)
+               {
+                       elog(FATAL, "[snaparray] expected %u but got %u [xids_added=%u,m=%u]", (unsigned) buffer_space_needed, (unsigned int) r, (unsigned int) xids_added, (unsigned int) m);
+               }
+               Assert(r == buffer_space_needed);
+       }
+
+       /*
+        * Update cached snapshot to reflect new details.
+        *
+        * We no longer need the highest_removed_subxid if (1) there are no longer
+        * any running XIDs or (2) the new xmin follows highest_removed_subxid.
+        * We are careful to clear it whenever possible to avoid problems when the
+        * XID space eventually wraps around.
+        */
+       SnapArrayCache.buffer[1] = new_xmax;
+       if (r == 0 ||
+               TransactionIdPrecedes(SnapArrayCache.buffer[2],
+                                                         SnapArrayCache.buffer[SNAPARRAY_SUMMARY_ITEMS]))
+               SnapArrayCache.buffer[2] = InvalidTransactionId;
+       SnapArrayCache.buffer[3] = (TransactionId) r;
+
+       /*
+        * Discard list of removed XIDs; they've been folded into the snapshot
+        * and are no longer needed.
+        */
+       SnapArrayCache.size = SNAPARRAY_SUMMARY_ITEMS + r;
+
        return true;
 }