Snapshot
SnapArrayGetSnapshotData(Snapshot snapshot)
{
+ TransactionId xmin;
TransactionId xmax;
- TransactionId new_xmin;
- TransactionId new_xmax;
- TransactionId highest_removed_subxid;
- uint32 num_running_xids;
- uint32 num_removed_xids;
- uint32 num_new_running_xids;
- TransactionId *running_xids;
- TransactionId *removed_xids;
- TransactionId *new_running_xids;
- uint32 n;
- uint32 xids_added;
- uint32 certainly_removed_xids = 0;
- bool needsort = false;
bool have_lock = false;
+ uint32 num_running_xids;
/*
* Allocate enough memory for the largest possible snapshot. This could
goto retry;
}
- /* Data must begin with a snapshot summary. */
- Assert(SnapArrayCache.size >= SNAPARRAY_SUMMARY_ITEMS);
- Assert(SnapArrayCache.buffer[0] == InvalidTransactionId);
- xmax = SnapArrayCache.buffer[1];
- highest_removed_subxid = SnapArrayCache.buffer[2];
+ /* Work out xmin and xmax. */
num_running_xids = (uint32) SnapArrayCache.buffer[3];
- num_removed_xids =
- SnapArrayCache.size - (num_running_xids + SNAPARRAY_SUMMARY_ITEMS);
- running_xids = SnapArrayCache.buffer + SNAPARRAY_SUMMARY_ITEMS;
- removed_xids = running_xids + num_running_xids;
-
- /*
- * Scan the removed XIDs. This is enables us to work out the new xmax
- * value, the number of XIDs we're certain to be able to remove from the
- * running list (because they're newer than highest_removed_subxid), and
- * whether or not the list of removed XIDs needs to be sorted.
- */
- new_xmax = xmax;
- for (n = 0; n < num_removed_xids; ++n)
- {
- TransactionId xid = removed_xids[n];
-
- if (TransactionIdFollowsOrEquals(xid, new_xmax))
- {
- new_xmax = removed_xids[n];
- TransactionIdAdvance(new_xmax);
- }
- if (TransactionIdFollows(xid, highest_removed_subxid))
- ++certainly_removed_xids;
- if (n > 0 && TransactionIdPrecedes(xid, removed_xids[n-1]))
- needsort = true;
- }
-
- /*
- * Sort the removed XIDs (unless they are already in order).
- *
- * This is actually mutating the underlying cache, which is OK, because
- * changing the order of the removed XIDs doesn't change the semantics.
- * We skip this if the data is already in order, which could happen
- * either because we've sorted the same data on a previous trip through
- * this function, or because all removed XIDs added since our last visit
- * were removed in ascending XID order.
- *
- * NB: Some quicksort implementations don't perform well on data that's
- * already mostly or entirely sorted. Skipping the sort in the case where
- * the data is completely in order should ameliorate any problems in this
- * area quite a bit, but we might need to pick another sort algorithm if
- * this probes problematic.
- */
- if (needsort)
- {
- xid_cmp_base = xmax;
- qsort(removed_xids, num_removed_xids, sizeof(TransactionId), xid_cmp);
- }
-
- /* Work out number of new XIDs being added. */
- if (new_xmax >= xmax)
- xids_added = new_xmax - xmax;
- else
- xids_added = new_xmax - xmax - FirstNormalTransactionId;
-
- /*
- * Work out new list of running XIDs.
- */
- num_new_running_xids =
- num_running_xids + xids_added - certainly_removed_xids;
- new_running_xids = palloc(sizeof(TransactionId) * num_new_running_xids);
- num_new_running_xids =
- SnapArrayComputeRunningXids(xmax, new_xmax,
- num_running_xids, running_xids,
- num_removed_xids, removed_xids,
- num_new_running_xids, new_running_xids);
+ xmax = SnapArrayCache.buffer[1];
+ xmin = num_running_xids > 0 ? SnapArrayCache.buffer[4] : xmax;
/*
* Each backend must advertise the xmin value of its oldest snapshot in
* that shouldn't matter, since we never allow the real global_xmin to
* go backward.
*/
- new_xmin = num_new_running_xids > 0 ? new_running_xids[0] : new_xmax;
if (!TransactionIdIsValid(TransactionXmin))
{
/* Advertise new xmin. */
- MyProc->xmin = TransactionXmin = new_xmin;
+ MyProc->xmin = TransactionXmin = xmin;
/*
* We must make sure that snapshot->xmin is set before we read
pg_memory_barrier();
/* Now we can do the actual check. */
- if (TransactionIdPrecedes(new_xmin, SnapArray->fresh_xmin))
+ if (TransactionIdPrecedes(xmin, SnapArray->fresh_xmin))
{
if (have_lock)
elog(ERROR, "stale stapshot while holding SnapArrayLock");
LWLockRelease(SnapArrayLock);
/* Populate the new snapshot. */
- snapshot->xmin = new_xmin;
- snapshot->xmax = new_xmax;
- snapshot->oxcnt = num_new_running_xids;
- if (num_new_running_xids > 0)
- {
- memcpy(snapshot->oxip, new_running_xids,
- num_new_running_xids * sizeof(TransactionId));
- pfree(new_running_xids);
- }
+ snapshot->xmin = xmin;
+ snapshot->xmax = xmax;
+ snapshot->oxcnt = num_running_xids;
+ if (num_running_xids > 0)
+ memcpy(snapshot->oxip, SnapArrayCache.buffer + SNAPARRAY_SUMMARY_ITEMS,
+ num_running_xids * sizeof(TransactionId));
snapshot->takenDuringRecovery = true; /* XXX FIXME */
- snapshot->highest_removed_subxid = highest_removed_subxid;
+ snapshot->highest_removed_subxid = SnapArrayCache.buffer[2];
snapshot->curcid = GetCurrentCommandId(false);
/*
* follow our snapshot xmin, so the worst thing that can happen here is
* that we read a slightly out-of-date, older value. That's acceptable.
*/
- RecentXmin = new_xmin;
+ RecentXmin = xmin;
RecentGlobalXmin = SnapArray->global_xmin - vacuum_defer_cleanup_age;
if (!TransactionIdIsNormal(RecentGlobalXmin))
RecentGlobalXmin = FirstNormalTransactionId;
* from shared memory, but we can optimize away duplicate reads of the same
* data.
*
+ * The data we read will always begin with a snapshot summary. If it's
+ * followed by removed XIDs, we fold those into our snapshot summary and
+ * generate a new summary. Callers therefore don't need to worry about
+ * removed XIDs; they can work only with the summary format.
+ *
* If have_lock = false, we do not hold SnapArrayLock and must guard against
* wraparound. If this routine returns false, it means that we failed to copy
* all the data needed for our cache before the buffer wrapped, and the cache
uint64 write_pointer;
uint64 read_starts_at;
uint64 buffer_space_needed;
- uint32 skip = 0;
+ uint32 num_running_xids;
+ uint32 num_removed_xids;
+ uint32 n;
+ uint32 m;
+ uint32 r;
+ TransactionId xmax;
+ TransactionId new_xmax;
+ TransactionId highest_removed_subxid;
TransactionId *buffer;
+ TransactionId *running_xids;
+ TransactionId *removed_xids;
+ uint32 xids_added;
+ bool need_sort = false;
/* Read start and stop pointers. */
if (have_lock)
buffer_space_needed = stop_pointer - read_starts_at;
}
- /* If our local cache is not large enough to hold the data, grow it. */
+ /* Grow backend-local cache, if necessary. */
if (buffer_space_needed > SnapArrayCache.entries)
{
SnapArrayCache.buffer =
SpinLockAcquire(&SnapArray->write_mutex);
write_pointer = SnapArray->write_pointer;
SpinLockRelease(&SnapArray->write_mutex);
- if (write_pointer > start_pointer + skip + SnapArray->ring_buffer_size)
+ if (write_pointer > read_starts_at + SnapArray->ring_buffer_size)
{
/* Wraparound detected. Update statistics and let caller know. */
SpinLockAcquire(&SnapArray->misc_mutex);
SnapArrayCache.last_start_pointer = start_pointer;
SnapArrayCache.last_stop_pointer = stop_pointer;
+ /* Data must begin with a snapshot summary. */
+ Assert(SnapArrayCache.size >= SNAPARRAY_SUMMARY_ITEMS);
+ Assert(SnapArrayCache.buffer[0] == InvalidTransactionId);
+ xmax = SnapArrayCache.buffer[1];
+ highest_removed_subxid = SnapArrayCache.buffer[2];
+ num_running_xids = (uint32) SnapArrayCache.buffer[3];
+ num_removed_xids =
+ SnapArrayCache.size - (num_running_xids + SNAPARRAY_SUMMARY_ITEMS);
+
+ /* If there are no removed XIDs, we're done. */
+ if (num_removed_xids == 0)
+ return true;
+
+ /* Work out location of running and removed XIDs. */
+ running_xids = SnapArrayCache.buffer + SNAPARRAY_SUMMARY_ITEMS;
+ removed_xids = running_xids + num_running_xids;
+
+ /*
+ * Scan the removed XIDs. This is enables us to work out the new xmax
+ * value and whether or not the list of removed XIDs needs to be sorted.
+ */
+ new_xmax = xmax;
+ for (n = 0; n < num_removed_xids; ++n)
+ {
+ TransactionId xid = removed_xids[n];
+
+ if (TransactionIdFollowsOrEquals(xid, new_xmax))
+ {
+ new_xmax = removed_xids[n];
+ TransactionIdAdvance(new_xmax);
+ }
+ if (n > 0 && TransactionIdPrecedes(xid, removed_xids[n-1]))
+ need_sort = true;
+ }
+
+ {
+ uint32 k;
+ StringInfoData buf;
+ initStringInfo(&buf);
+ appendStringInfo(&buf,
+ "xmax: %lu, highest_removed_subxid: %lu, %u xids: [",
+ (unsigned long) xmax, (unsigned long) highest_removed_subxid,
+ (unsigned) num_running_xids);
+ for (k = 0; k < num_running_xids; ++k)
+ appendStringInfo(&buf, k ? " %lu" : "%lu",
+ (unsigned long) running_xids[k]);
+ appendStringInfo(&buf, "]; %u removed xids: [", num_removed_xids);
+ for (k = 0; k < num_removed_xids; ++k)
+ appendStringInfo(&buf, k ? " %lu" : "%lu",
+ (unsigned long) removed_xids[k]);
+ appendStringInfo(&buf, "] new_xmax=%lu", (unsigned long) new_xmax);
+ elog(LOG, "[snaparray] %s", buf.data);
+ }
+
+ /*
+ * Sort the removed XIDs (unless they are already in order).
+ *
+ * We skip this if the data is already in order, which could happen
+ * either because we've sorted the same data on a previous trip through
+ * this function, or because all removed XIDs added since our last visit
+ * were removed in ascending XID order.
+ *
+ * NB: Some quicksort implementations don't perform well on data that's
+ * already mostly or entirely sorted. Skipping the sort in the case where
+ * the data is completely in order should ameliorate any problems in this
+ * area quite a bit, but we might need to pick another sort algorithm if
+ * this probes problematic.
+ */
+ if (need_sort)
+ {
+ xid_cmp_base = xmax;
+ qsort(removed_xids, num_removed_xids, sizeof(TransactionId), xid_cmp);
+ }
+
+ /*
+ * Scan the list of running XIDs and remove any that appear in the
+ * removed list. Since the list of removed XIDs is guaranteed to be
+ * sorted at this point, we can basically do a merge join.
+ */
+ m = 0;
+ r = 0;
+ for (n = 0; n < num_running_xids; ++n)
+ {
+ bool match = false;
+
+ while (1)
+ {
+ if (m >= num_removed_xids)
+ break;
+ if (TransactionIdEquals(removed_xids[m], running_xids[n]))
+ {
+ match = true;
+ ++m;
+ break;
+ }
+ if (TransactionIdFollows(removed_xids[m], running_xids[n]))
+ break;
+ ++m;
+ }
+
+ if (!match)
+ {
+ running_xids[r] = running_xids[n];
+ ++r;
+ }
+ }
+
+ /*
+ * Next, we have to add any XIDs greater than or equal to the old xmax and
+ * less than the new xmax that have not been removed. Compute how many.
+ */
+ if (new_xmax >= xmax)
+ xids_added = new_xmax - xmax;
+ else
+ xids_added = new_xmax - xmax - FirstNormalTransactionId;
+ Assert(m <= num_removed_xids);
+ Assert(num_removed_xids - m <= xids_added);
+ xids_added = xids_added - (num_removed_xids - m);
+
+ if (xids_added != 0)
+ {
+ TransactionId *zap;
+ uint32 nzap;
+ bool needs_pfree;
+
+ /* Grow backend-local cache, if necessary. */
+ buffer_space_needed = r + xids_added;
+ if (buffer_space_needed >= SnapArrayCache.entries)
+ {
+ SnapArrayCache.buffer =
+ repalloc(SnapArrayCache.buffer,
+ sizeof(TransactionId) * buffer_space_needed);
+ SnapArrayCache.entries = buffer_space_needed;
+
+ /* Buffer might have moved, so must update this. */
+ running_xids = SnapArrayCache.buffer + SNAPARRAY_SUMMARY_ITEMS;
+ removed_xids = running_xids + num_running_xids;
+ }
+
+ /*
+ * If the XIDs we're about to add are going to overwrite the list of
+ * XIDs that we're going to remove, we need to copy the list of removed
+ * XIDs into a temporary array before we start. Otherwise, we can use
+ * the existing data where it is.
+ */
+ nzap = num_removed_xids - m;
+ if (r + xids_added > m)
+ {
+ zap = palloc(nzap * sizeof(TransactionId));
+ memcpy(zap, removed_xids + m, nzap * sizeof(TransactionId));
+ needs_pfree = true;
+ }
+ else
+ {
+ zap = removed_xids + m;
+ needs_pfree = false;
+ }
+
+ /*
+ * Since the list of removed XIDs is sorted, we can do this in O(n+m)
+ * time, where n is the amount by which xmax has advanced and m is the
+ * number of removed XIDs greater than or equal to the old xmax.
+ */
+ n = 0;
+ while (TransactionIdPrecedes(xmax, new_xmax))
+ {
+ bool match = false;
+
+ while (1)
+ {
+ if (n >= nzap)
+ break;
+ if (TransactionIdEquals(xmax, zap[n]))
+ {
+ match = true;
+ break;
+ }
+ if (TransactionIdPrecedesOrEquals(xmax, zap[n]))
+ break;
+ ++n;
+ }
+
+ if (!match)
+ {
+ running_xids[r] = xmax;
+ ++r;
+ }
+
+ TransactionIdAdvance(xmax);
+ }
+
+ /* Cleanup. */
+ if (needs_pfree)
+ pfree(zap);
+ if (r != buffer_space_needed)
+ {
+ elog(FATAL, "[snaparray] expected %u but got %u [xids_added=%u,m=%u]", (unsigned) buffer_space_needed, (unsigned int) r, (unsigned int) xids_added, (unsigned int) m);
+ }
+ Assert(r == buffer_space_needed);
+ }
+
+ /*
+ * Update cached snapshot to reflect new details.
+ *
+ * We no longer need the highest_removed_subxid if (1) there are no longer
+ * any running XIDs or (2) the new xmin follows highest_removed_subxid.
+ * We are careful to clear it whenever possible to avoid problems when the
+ * XID space eventually wraps around.
+ */
+ SnapArrayCache.buffer[1] = new_xmax;
+ if (r == 0 ||
+ TransactionIdPrecedes(SnapArrayCache.buffer[2],
+ SnapArrayCache.buffer[SNAPARRAY_SUMMARY_ITEMS]))
+ SnapArrayCache.buffer[2] = InvalidTransactionId;
+ SnapArrayCache.buffer[3] = (TransactionId) r;
+
+ /*
+ * Discard list of removed XIDs; they've been folded into the snapshot
+ * and are no longer needed.
+ */
+ SnapArrayCache.size = SNAPARRAY_SUMMARY_ITEMS + r;
+
return true;
}