--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * freepage.c
+ * Management of free memory pages.
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/mmgr/freepage.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "utils/freepage.h"
+
+/* Magic numbers to identify various page types */
+#define FREE_PAGE_SPAN_LEADER_MAGIC 0xea4020f0
+#define FREE_PAGE_LEAF_MAGIC 0x98eae728
+#define FREE_PAGE_INTERNAL_MAGIC 0x19aa32c9
+
+/* Doubly linked list of spans of free pages; stored in first page of span. */
+struct FreePageSpanLeader
+{
+ int magic; /* always FREE_PAGE_SPAN_LEADER_MAGIC */
+ Size npages; /* number of pages in span */
+ relptr(FreePageSpanLeader) prev;
+ relptr(FreePageSpanLeader) next;
+};
+
+/* Common header for btree leaf and internal pages. */
+typedef struct FreePageBtreeHeader
+{
+ int magic; /* FREE_PAGE_LEAF_MAGIC or FREE_PAGE_INTERNAL_MAGIC */
+ Size nused; /* number of items used */
+ relptr(FreePageBtree) parent; /* uplink */
+} FreePageBtreeHeader;
+
+/* Internal key; points to next level of btree. */
+typedef struct FreePageBtreeInternalKey
+{
+ Size first_page; /* low bound for keys on child page */
+ relptr(FreePageBtree) child; /* downlink */
+} FreePageBtreeInternalKey;
+
+/* Leaf key; no payload data. */
+typedef struct FreePageBtreeLeafKey
+{
+ Size first_page; /* low bound for keys on child page */
+ Size last_page; /* high bound for keys on child page */
+} FreePageBtreeLeafKey;
+
+/* Work out how many keys will fit on a page. */
+#define FPM_ITEMS_PER_INTERNAL_PAGE \
+ ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
+ sizeof(FreePageBtreeInternalKey))
+#define FPM_ITEMS_PER_LEAF_PAGE \
+ ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
+ sizeof(FreePageBtreeLeafKey))
+
+/* A btree page of either sort */
+struct FreePageBtree
+{
+ FreePageBtreeHeader hdr;
+ union
+ {
+ FreePageBtreeInternalKey internal_key[FPM_ITEMS_PER_INTERNAL_PAGE];
+ FreePageBtreeLeafKey leaf_key[FPM_ITEMS_PER_LEAF_PAGE];
+ } u;
+};
+
+/* Results of a btree search */
+typedef struct FreePageBtreeSearchResult
+{
+ FreePageBtree *page_exact;
+ Size index_exact;
+ FreePageBtree *page_next;
+ Size index_next;
+ FreePageBtree *page_prev;
+ Size index_prev;
+} FreePageBtreeSearchResult;
+
+/* Helper functions */
+static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
+ Size index);
+static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
+ FreePageBtreeSearchResult *result);
+static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
+static Size FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page);
+
+/*
+ * Initialize a new, empty free page manager.
+ *
+ * 'fpm' should reference caller-provided memory large enough to contain a
+ * FreePageManager. We'll initialize it here.
+ *
+ * 'base' is the address to which all pointers are relative. When managing
+ * a dynamic shared memory segment, it should normally be the base of the
+ * segment. When managing backend-private memory, it can be either NULL or,
+ * if managing a single contiguous extent of memory, the start of that extent.
+ *
+ * 'lock' is the lock to be used to synchronize access to this FreePageManager.
+ * It can be NULL if synchronization is not required, either because we're
+ * managing backend-private memory or because we're managing shared memory but
+ * synchronization is caller-provided or not required. (For example, if only
+ * one process is allocating and freeing memory, locking isn't needed.)
+ *
+ * 'lock_address_is_fixed' should be false if the LWLock to be used for
+ * synchronization is stored in the same dynamic shared memory segment as
+ * the managed region, and true if it is stored in the main shared memory
+ * segment. Storing the LWLock in some other dynamic shared memory segment
+ * isn't supported. This is ignored when lock is NULL.
+ */
+void
+FreePageManagerInitialize(FreePageManager *fpm, char *base, LWLock *lock,
+ bool lock_address_is_fixed)
+{
+ Size f;
+
+ relptr_store(base, fpm->self, fpm);
+ relptr_store(base, fpm->lock, lock);
+ fpm->lock_address_is_fixed = lock_address_is_fixed;
+ relptr_store(base, fpm->root, (FreePageBtree *) NULL);
+
+ for (f = 0; f < FPM_NUM_FREELISTS; f++)
+ relptr_store(base, fpm->freelist[f], (FreePageSpanLeader *) NULL);
+}
+
+/*
+ * Allocate a run of pages of the given length from the free page manager.
+ * The return value indicates whether we were able to satisfy the request;
+ * if true, the first page of the allocation is stored in *first_page.
+ */
+bool
+FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
+{
+ LWLock *lock = fpm_lock(fpm);
+ char *base = fpm_segment_base(fpm);
+ FreePageSpanLeader *victim = NULL;
+ Size victim_page = 0; /* placate compiler */
+ Size f;
+
+ /* Acquire lock (if there is one). */
+ if (lock != NULL)
+ LWLockAcquire(lock, LW_EXCLUSIVE);
+
+ /*
+ * Search for a free span.
+ *
+ * Right now, we use a simple best-fit policy here, but it's possible for
+ * this to result in memory fragmentation if we're repeatedly asked to
+ * allocate chunks just a little smaller than what we have available.
+ * Hopefully, this is unlikely, because we expect most requests to be
+ * single pages (for the bootstrap allocator) or superblock-sized chunks
+ * (for the superblock allocator, and for address space map memory),
+ * but no policy can be optimal under all circumstances unless it has
+ * knowledge of future allocation patterns.
+ */
+ for (f = Max(npages, FPM_NUM_FREELISTS) - 1; f < FPM_NUM_FREELISTS; ++f)
+ {
+ /* Skip empty freelists. */
+ if (relptr_is_null(fpm->freelist[f]))
+ continue;
+
+ /*
+ * All of the freelists except the last one contain only items of a
+ * single size, so we just take the first one. But the final free
+ * list contains everything too big for any of the other lists, so
+ * we need to search the list.
+ */
+ if (f < FPM_NUM_FREELISTS - 1)
+ victim = relptr_access(base, fpm->freelist[f]);
+ else
+ {
+ FreePageSpanLeader *candidate;
+
+ candidate = relptr_access(base, fpm->freelist[f]);
+ do
+ {
+ if (candidate->npages >= npages && (victim == NULL ||
+ victim->npages > candidate->npages))
+ {
+ victim = candidate;
+ if (victim->npages == npages)
+ break;
+ }
+ candidate = relptr_access(base, candidate->next);
+ } while (candidate != NULL);
+ }
+ break;
+ }
+
+ /* If we found a victim, remove it from the freelist and btree. */
+ if (victim != NULL)
+ {
+ FreePageSpanLeader *prev = relptr_access(base, victim->prev);
+ FreePageSpanLeader *next = relptr_access(base, victim->next);
+ FreePageBtreeSearchResult result;
+
+ if (prev != NULL)
+ relptr_copy(prev->next, victim->next);
+ else
+ relptr_copy(fpm->freelist[f], victim->next);
+ if (next != NULL)
+ relptr_copy(next->prev, victim->prev);
+
+ victim_page = fpm_pointer_to_page(base, victim);
+ FreePageBtreeSearch(fpm, victim_page, &result);
+ Assert(result.page_exact != NULL);
+ FreePageBtreeRemove(fpm, result.page_exact, result.index_exact);
+ }
+
+ /* Release lock (if there is one). */
+ if (lock != NULL)
+ LWLockRelease(lock);
+
+ /* Return results to caller. */
+ if (victim == NULL)
+ return false;
+ *first_page = victim_page;
+ return true;
+}
+
+/*
+ * Remove from the btree the item in the given position on the given page.
+ */
+static void
+FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
+{
+ Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
+ Assert(index < btp->hdr.nused);
+
+ /* Shuffle remaining keys. */
+ --btp->hdr.nused;
+ if (index < btp->hdr.nused)
+ memmove(&btp->u.leaf_key[index], btp->u.leaf_key[index + 1],
+ sizeof(FreePageBtreeLeafKey) * btp->hdr.nused - index);
+
+ /*
+ * XXX. At this point, the key is gone, but the node may be empty or
+ * may contain very few keys. If we're the root page, that's life.
+ * Otherwise, unlink this page from the parent. Alternatively, if this
+ * page is non-empty but less than half full, try to merge it with a
+ * sibling, which will likewise delete one key from the parent.
+ *
+ * Either way, we free up a page; make that into a free span. Attempt
+ * a "no-split" insertion of that span into the btree: that is, succeed if
+ * the page can be combined with an existing span with which it is
+ * contiguous or if the btree page into which it needs to be inserted
+ * isn't full. Otherwise, skip the insertion, which will lose the ability
+ * to consolidate that span with adjacent spans, but that's life sometimes.
+ * Put the span on the free list, setting a flag to indicate whether or not
+ * we managed to insert it into the btree.
+ *
+ * After freeing the page, repeat this process for our parent, which may
+ * now be empty or underfull.
+ */
+}
+
+/*
+ * Search the btree for an entry for the given first page and initialize
+ * *result with the results of the search. If an exact match is found,
+ * result->page_exact and result->index_exact will be set to the page and
+ * slot where it is located. Otherwise, result->page_exact will be NULL;
+ * result->page_next and result->index_next will indicate the location of
+ * the following key (unless the proposed first_page would follow everything
+ * currently in the tree, in which case result->page_next will be NULL); and
+ * result->page_prev and result->index_prev will indicate the preceding
+ * key (unless the proposed first_page would precede everything currently
+ * in the tree, in which case result->page_prev will be NULL). Except
+ * as described above, the contents of fields in the result object are
+ * undefined on return.
+ */
+static void
+FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
+ FreePageBtreeSearchResult *result)
+{
+ char *base = fpm_segment_base(fpm);
+ FreePageBtree *btp = relptr_access(base, fpm->root);
+ Size index;
+
+ /* If the btree is empty, then this would be the only item. */
+ if (btp == NULL)
+ {
+ result->page_exact = NULL;
+ result->page_next = NULL;
+ result->page_prev = NULL;
+ return;
+ }
+
+ /* Descend until we hit a leaf. */
+ while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
+ {
+ index = FreePageBtreeSearchInternal(btp, first_page);
+ btp = relptr_access(base, btp->u.internal_key[index].child);
+ }
+
+ /* Search leaf page. */
+ index = FreePageBtreeSearchLeaf(btp, first_page);
+
+ /* Did we get an exact match? If so, return it. */
+ if (first_page == btp->u.leaf_key[index].first_page)
+ {
+ result->page_exact = btp;
+ result->index_exact = index;
+ return;
+ }
+
+ /* No exact match, so we have the next key. */
+ result->page_exact = NULL;
+ result->page_next = btp;
+ result->index_next = index;
+
+ /* Find the previous key. */
+ if (index > 0)
+ {
+ /* Previous key on same page. */
+ result->page_prev = btp;
+ result->index_prev = index - 1;
+ }
+ else
+ {
+ /* Walk up tree until we can move left. */
+ while (index == 0)
+ {
+ btp = relptr_access(base, btp->hdr.parent);
+ if (btp == NULL)
+ {
+ /* We're the first key in the btree. */
+ result->page_prev = NULL;
+ return;
+ }
+ index = FreePageBtreeSearchInternal(btp, first_page);
+ }
+
+ /* Move left. */
+ btp = relptr_access(base, btp->u.internal_key[index - 1].child);
+
+ /* Descend right. */
+ while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
+ {
+ Size nused = btp->hdr.nused;
+
+ btp = relptr_access(base, btp->u.internal_key[nused - 1].child);
+ }
+
+ /* Get rightmost key on page. */
+ result->page_prev = btp;
+ result->index_prev = btp->hdr.nused - 1;
+ }
+}
+
+/*
+ * Search an internal page for the first key greater than or equal to a given
+ * page number.
+ */
+static Size
+FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page)
+{
+ Size low = 0;
+ Size high = btp->hdr.nused - 1;
+
+ Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
+ Assert(high > 0 && high < FPM_ITEMS_PER_INTERNAL_PAGE);
+
+ while (low < high)
+ {
+ Size mid = (low + high + 1) / 2;
+
+ if (first_page < btp->u.internal_key[mid].first_page)
+ high = mid - 1;
+ else
+ low = mid;
+ }
+
+ return low;
+}
+
+/*
+ * Search a leaf page for the first key greater than or equal to a given
+ * page number.
+ */
+static Size
+FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page)
+{
+ Size low = 0;
+ Size high = btp->hdr.nused - 1;
+
+ Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
+ Assert(high > 0 && high < FPM_ITEMS_PER_LEAF_PAGE);
+
+ while (low < high)
+ {
+ Size mid = (low + high + 1) / 2;
+
+ if (first_page < btp->u.leaf_key[mid].first_page)
+ high = mid - 1;
+ else
+ low = mid;
+ }
+
+ return low;
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * freepage.h
+ * Management of page-organized free memory.
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/relptr.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FREEPAGE_H
+#define FREEPAGE_H
+
+#include "storage/lwlock.h"
+#include "utils/relptr.h"
+
+/* Forward declarations. */
+typedef struct FreePageSpanLeader FreePageSpanLeader;
+typedef struct FreePageBtree FreePageBtree;
+typedef struct FreePageManager FreePageManager;
+
+/*
+ * PostgreSQL normally uses 8kB pages for most things, but many common
+ * architecture/operating system pairings use a 4kB page size for memory
+ * allocation, so we do that here also. We assume that a large allocation
+ * is likely to begin on a page boundary; if not, we'll discard bytes from
+ * the beginning and end of the object and use only the middle portion that
+ * is properly aligned. This works, but is not ideal, so it's best to keep
+ * this conservatively small. There don't seem to be any common architectures
+ * where the page size is less than 4kB, so this should be good enough; also,
+ * making it smaller would increase the space consumed by the address space
+ * map, which also uses this page size.
+ */
+#define FPM_PAGE_SIZE 4096
+
+/*
+ * Each freelist except for the last contains only spans of one particular
+ * size. Everything larger goes on the last one. In some sense this seems
+ * like a waste since most allocations are in a few common sizes, but it
+ * means that small allocations can simply pop the head of the relevant list
+ * without needing to worry about whether the object we find there is of
+ * precisely the correct size (because we know it must be).
+ */
+#define FPM_NUM_FREELISTS 129
+
+/* Everything we need in order to manage free pages (see freepage.c) */
+struct FreePageManager
+{
+ relptr(FreePageManager) self;
+ relptr(LWLock) lock;
+ bool lock_address_is_fixed;
+ relptr(FreePageBtree) root;
+ relptr(FreePageSpanLeader) freelist[FPM_NUM_FREELISTS];
+};
+
+/* Macros to convert between page numbers (expressed as Size) and pointers. */
+#define fpm_page_to_pointer(base, page) \
+ (AssertVariableIsOfTypeMacro(page, Size), \
+ (base) + FPM_PAGE_SIZE * (page))
+#define fpm_pointer_to_page(base, ptr) \
+ (((Size) (((char *) (ptr)) - (base))) / FPM_PAGE_SIZE)
+
+/* Macros to check alignment of absolute and relative pointers. */
+#define fpm_pointer_is_page_aligned(base, ptr) \
+ (((Size) (((char *) (ptr)) - (base))) % FPM_PAGE_SIZE == 0)
+#define fpm_relptr_is_page_aligned(base, relptr) \
+ ((relptr).relptr_off % FPM_PAGE_SIZE == 0)
+
+/* Macro to find base address of the segment containing a FreePageManager. */
+#define fpm_segment_base(fpm) \
+ (((char *) fpm) - fpm->self.relptr_off)
+
+/* Macro to find the lwlock for the FreePageManager. */
+#define fpm_lock(fpm) \
+ (relptr_access((fpm)->lock_address_is_fixed ? NULL : \
+ fpm_segment_base(fpm), (fpm)->lock))
+
+/* Functions to manipulate the free page map. */
+extern void FreePageManagerInitialize(FreePageManager *fpm, char *base,
+ LWLock *lock, bool lock_address_is_fixed);
+extern bool FreePageManagerGet(FreePageManager *fpm, Size npages,
+ Size *first_page);
+
+#endif /* FREEPAGE_H */