From: Robert Haas Date: Mon, 17 Feb 2014 05:47:47 +0000 (-0500) Subject: More hacking. X-Git-Url: http://git.postgresql.org/gitweb/static/close/reject?a=commitdiff_plain;h=a59db665d4696ef40381190ab48302245a9d0500;p=users%2Frhaas%2Fpostgres.git More hacking. --- diff --git a/src/backend/utils/mmgr/freepage.c b/src/backend/utils/mmgr/freepage.c index cf845f3302..58cabfc66d 100644 --- a/src/backend/utils/mmgr/freepage.c +++ b/src/backend/utils/mmgr/freepage.c @@ -79,6 +79,7 @@ typedef struct FreePageBtreeSearchResult Size index_next; FreePageBtree *page_prev; Size index_prev; + unsigned split_depth; } FreePageBtreeSearchResult; /* Helper functions */ @@ -217,6 +218,9 @@ FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page) FreePageBtreeSearch(fpm, victim_page, &result); Assert(result.page_exact != NULL); FreePageBtreeRemove(fpm, result.page_exact, result.index_exact); + + /* XXX. But the span we found might have been oversized ... we + * need to put the rest back! */ } /* Release lock (if there is one). */ @@ -375,8 +379,10 @@ FreePageBtreeRemoveLeaf(FreePageBtree *btp, Size index) * currently in the tree, in which case result->page_next will be NULL); and * result->page_prev and result->index_prev will indicate the preceding * key (unless the proposed first_page would precede everything currently - * in the tree, in which case result->page_prev will be NULL). Except - * as described above, the contents of fields in the result object are + * in the tree, in which case result->page_prev will be NULL). + * result->split_depth will contain the number of tree levels that will need + * to be split to insert a key into result->page_exact or result->page_next. + * Except as described above, the contents of fields in the result object are * undefined on return. */ static void @@ -387,6 +393,8 @@ FreePageBtreeSearch(FreePageManager *fpm, Size first_page, FreePageBtree *btp = relptr_access(base, fpm->btree_root); Size index; + result->split_depth = 0; + /* If the btree is empty, there's nothing to find. */ if (btp == NULL) { @@ -400,15 +408,35 @@ FreePageBtreeSearch(FreePageManager *fpm, Size first_page, while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC) { index = FreePageBtreeSearchInternal(btp, first_page); + /* * If the index is 0, we're not going to find it, but we keep * descending anyway so that we can find the element that follows it. */ if (index > 0) --index; + + /* Track required split depth for leaf insert. */ + if (btp->hdr.nused >= FPM_ITEMS_PER_INTERNAL_PAGE) + { + Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE); + result->split_depth++; + } + else + result->split_depth = 0; + btp = relptr_access(base, btp->u.internal_key[index].child); } + /* Track required split depth for leaf insert. */ + if (btp->hdr.nused >= FPM_ITEMS_PER_LEAF_PAGE) + { + Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE); + result->split_depth++; + } + else + result->split_depth = 0; + /* Search leaf page. */ index = FreePageBtreeSearchLeaf(btp, first_page); if (index >= btp->hdr.nused) @@ -595,6 +623,8 @@ FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages, FreePageBtreeSearchResult result; FreePageBtreeLeafKey *prevkey = NULL; FreePageBtreeLeafKey *nextkey = NULL; + FreePageBtree *btp; + Size index; /* Search the btree. */ FreePageBtreeSearch(fpm, first_page, &result); @@ -666,32 +696,50 @@ FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages, return true; } - /* - * At this point, we know that the item can't be consolidated with either - * the preceding or following span, so we need to insert it. If there's - * space on the page that contains the following key, then we can just - * insert it there. - * - * Note that it's not so easy to insert on the page that contains the - * preceding key, because the new key we're inserting is greater than - * anything that's on that page right now and might also be greater than - * the upper bound for that page. - */ - if (result.page_next->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE) + /* Split leaf page and as many of its ancestors as necessary. */ + if (result.split_depth > 0) { - FreePageBtree *btp = result.page_next; - Size index = result.index_next; + /* + * XXX. Try any coping strategies we want to use to avoid a split, + * such as inserting on page_prev instead of page_next, or shuffling + * keys between siblings. + */ - memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index], - sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index)); - btp->u.leaf_key[index].first_page = first_page; - btp->u.leaf_key[index].npages = npages; - ++btp->hdr.nused; + /* + * XXX. Check whether the btree_recycle list contains at least as many + * pages as result->split_depth. If it does not, fix that by getting + * enough pages to make up the difference and then recycling them. + */ - /* If new first key on page, ancestors might need adjustment. */ - if (index == 0) - FreePageBtreeReduceAncestorKeys(fpm, result.page_next); + /* + * XXX. Re-search the btree, as contents may have shifted during + * flight. There's no point in rechecking for consolidation + * possibilities, and rechecking split-avoidance strategies doesn't + * seem particularly worthwhile, so just repeat the search. + * Note that the we must be sure that the action of populating the + * btree_recycle list can't make any page more full, so it had better + * not try to do anything fancy like merge siblings. It's OK if + * it makes a page less full, though. + */ - return true; + /* + * XXX. Split the page and as many of its ancestors as needed. Make + * sure the search object remains up to date, or re-do the search after + * splitting. + */ } + + /* Physically add the key to the page. */ + Assert(result.page_next->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE); + btp = result.page_next; + index = result.index_next; + memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index], + sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index)); + btp->u.leaf_key[index].first_page = first_page; + btp->u.leaf_key[index].npages = npages; + ++btp->hdr.nused; + + /* If new first key on page, ancestors might need adjustment. */ + if (index == 0) + FreePageBtreeReduceAncestorKeys(fpm, result.page_next); }