6969 * using Algorithm D.
7070 *
7171 * When merging runs, we use a heap containing just the frontmost tuple from
72- * each source run; we repeatedly output the smallest tuple and insert the
73- * next tuple from its source tape (if any). When the heap empties, the merge
74- * is complete. The basic merge algorithm thus needs very little memory ---
75- * only M tuples for an M-way merge, and M is constrained to a small number.
76- * However, we can still make good use of our full workMem allocation by
77- * pre-reading additional tuples from each source tape. Without prereading,
78- * our access pattern to the temporary file would be very erratic; on average
79- * we'd read one block from each of M source tapes during the same time that
80- * we're writing M blocks to the output tape, so there is no sequentiality of
81- * access at all, defeating the read-ahead methods used by most Unix kernels.
82- * Worse, the output tape gets written into a very random sequence of blocks
83- * of the temp file, ensuring that things will be even worse when it comes
84- * time to read that tape. A straightforward merge pass thus ends up doing a
85- * lot of waiting for disk seeks. We can improve matters by prereading from
86- * each source tape sequentially, loading about workMem/M bytes from each tape
87- * in turn. Then we run the merge algorithm, writing but not reading until
88- * one of the preloaded tuple series runs out. Then we switch back to preread
89- * mode, fill memory again, and repeat. This approach helps to localize both
90- * read and write accesses.
72+ * each source run; we repeatedly output the smallest tuple and replace it
73+ * with the next tuple from its source tape (if any). When the heap empties,
74+ * the merge is complete. The basic merge algorithm thus needs very little
75+ * memory --- only M tuples for an M-way merge, and M is constrained to a
76+ * small number. However, we can still make good use of our full workMem
77+ * allocation by pre-reading additional tuples from each source tape. Without
78+ * prereading, our access pattern to the temporary file would be very erratic;
79+ * on average we'd read one block from each of M source tapes during the same
80+ * time that we're writing M blocks to the output tape, so there is no
81+ * sequentiality of access at all, defeating the read-ahead methods used by
82+ * most Unix kernels. Worse, the output tape gets written into a very random
83+ * sequence of blocks of the temp file, ensuring that things will be even
84+ * worse when it comes time to read that tape. A straightforward merge pass
85+ * thus ends up doing a lot of waiting for disk seeks. We can improve matters
86+ * by prereading from each source tape sequentially, loading about workMem/M
87+ * bytes from each tape in turn. Then we run the merge algorithm, writing but
88+ * not reading until one of the preloaded tuple series runs out. Then we
89+ * switch back to preread mode, fill memory again, and repeat. This approach
90+ * helps to localize both read and write accesses.
9191 *
9292 * When the caller requests random access to the sort result, we form
9393 * the final sorted run on a logical tape which is then "frozen", so
@@ -569,8 +569,10 @@ static void make_bounded_heap(Tuplesortstate *state);
569569static void sort_bounded_heap (Tuplesortstate * state );
570570static void tuplesort_sort_memtuples (Tuplesortstate * state );
571571static void tuplesort_heap_insert (Tuplesortstate * state , SortTuple * tuple ,
572- int tupleindex , bool checkIndex );
573- static void tuplesort_heap_siftup (Tuplesortstate * state , bool checkIndex );
572+ bool checkIndex );
573+ static void tuplesort_heap_replace_top (Tuplesortstate * state , SortTuple * tuple ,
574+ bool checkIndex );
575+ static void tuplesort_heap_delete_top (Tuplesortstate * state , bool checkIndex );
574576static void reversedirection (Tuplesortstate * state );
575577static unsigned int getlen (Tuplesortstate * state , int tapenum , bool eofOK );
576578static void markrunend (Tuplesortstate * state , int tapenum );
@@ -1617,10 +1619,10 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple)
16171619 }
16181620 else
16191621 {
1620- /* discard top of heap, sift up, insert new tuple */
1622+ /* discard top of heap, replacing it with the new tuple */
16211623 free_sort_tuple (state , & state -> memtuples [0 ]);
1622- tuplesort_heap_siftup ( state , false);
1623- tuplesort_heap_insert (state , tuple , 0 , false);
1624+ tuple -> tupindex = 0 ; /* not used */
1625+ tuplesort_heap_replace_top (state , tuple , false);
16241626 }
16251627 break ;
16261628
@@ -1665,7 +1667,8 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple)
16651667 * initial COMPARETUP() call is required for the tuple, to
16661668 * determine that the tuple does not belong in RUN_FIRST).
16671669 */
1668- tuplesort_heap_insert (state , tuple , state -> currentRun , true);
1670+ tuple -> tupindex = state -> currentRun ;
1671+ tuplesort_heap_insert (state , tuple , true);
16691672 }
16701673 else
16711674 {
@@ -1987,7 +1990,6 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
19871990 * more generally.
19881991 */
19891992 * stup = state -> memtuples [0 ];
1990- tuplesort_heap_siftup (state , false);
19911993 if ((tupIndex = state -> mergenext [srcTape ]) == 0 )
19921994 {
19931995 /*
@@ -2008,18 +2010,25 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
20082010 */
20092011 if ((tupIndex = state -> mergenext [srcTape ]) == 0 )
20102012 {
2013+ /* Remove the top node from the heap */
2014+ tuplesort_heap_delete_top (state , false);
20112015 /* Free tape's buffer, avoiding dangling pointer */
20122016 if (state -> batchUsed )
20132017 mergebatchfreetape (state , srcTape , stup , should_free );
20142018 return true;
20152019 }
20162020 }
2017- /* pull next preread tuple from list, insert in heap */
2021+
2022+ /*
2023+ * pull next preread tuple from list, and replace the returned
2024+ * tuple at top of the heap with it.
2025+ */
20182026 newtup = & state -> memtuples [tupIndex ];
20192027 state -> mergenext [srcTape ] = newtup -> tupindex ;
20202028 if (state -> mergenext [srcTape ] == 0 )
20212029 state -> mergelast [srcTape ] = 0 ;
2022- tuplesort_heap_insert (state , newtup , srcTape , false);
2030+ newtup -> tupindex = srcTape ;
2031+ tuplesort_heap_replace_top (state , newtup , false);
20232032 /* put the now-unused memtuples entry on the freelist */
20242033 newtup -> tupindex = state -> mergefreelist ;
20252034 state -> mergefreelist = tupIndex ;
@@ -2394,7 +2403,8 @@ inittapes(Tuplesortstate *state)
23942403 /* Must copy source tuple to avoid possible overwrite */
23952404 SortTuple stup = state -> memtuples [j ];
23962405
2397- tuplesort_heap_insert (state , & stup , 0 , false);
2406+ stup .tupindex = RUN_FIRST ;
2407+ tuplesort_heap_insert (state , & stup , false);
23982408 }
23992409 Assert (state -> memtupcount == ntuples );
24002410 }
@@ -2642,22 +2652,29 @@ mergeonerun(Tuplesortstate *state)
26422652 /* writetup adjusted total free space, now fix per-tape space */
26432653 spaceFreed = state -> availMem - priorAvail ;
26442654 state -> mergeavailmem [srcTape ] += spaceFreed ;
2645- /* compact the heap */
2646- tuplesort_heap_siftup (state , false);
26472655 if ((tupIndex = state -> mergenext [srcTape ]) == 0 )
26482656 {
26492657 /* out of preloaded data on this tape, try to read more */
26502658 mergepreread (state );
26512659 /* if still no data, we've reached end of run on this tape */
26522660 if ((tupIndex = state -> mergenext [srcTape ]) == 0 )
2661+ {
2662+ /* remove the written-out tuple from the heap */
2663+ tuplesort_heap_delete_top (state , false);
26532664 continue ;
2665+ }
26542666 }
2655- /* pull next preread tuple from list, insert in heap */
2667+
2668+ /*
2669+ * pull next preread tuple from list, and replace the written-out
2670+ * tuple in the heap with it.
2671+ */
26562672 tup = & state -> memtuples [tupIndex ];
26572673 state -> mergenext [srcTape ] = tup -> tupindex ;
26582674 if (state -> mergenext [srcTape ] == 0 )
26592675 state -> mergelast [srcTape ] = 0 ;
2660- tuplesort_heap_insert (state , tup , srcTape , false);
2676+ tup -> tupindex = srcTape ;
2677+ tuplesort_heap_replace_top (state , tup , false);
26612678 /* put the now-unused memtuples entry on the freelist */
26622679 tup -> tupindex = state -> mergefreelist ;
26632680 state -> mergefreelist = tupIndex ;
@@ -2793,7 +2810,8 @@ beginmerge(Tuplesortstate *state, bool finalMergeBatch)
27932810 state -> mergenext [srcTape ] = tup -> tupindex ;
27942811 if (state -> mergenext [srcTape ] == 0 )
27952812 state -> mergelast [srcTape ] = 0 ;
2796- tuplesort_heap_insert (state , tup , srcTape , false);
2813+ tup -> tupindex = srcTape ;
2814+ tuplesort_heap_insert (state , tup , false);
27972815 /* put the now-unused memtuples entry on the freelist */
27982816 tup -> tupindex = state -> mergefreelist ;
27992817 state -> mergefreelist = tupIndex ;
@@ -3275,13 +3293,12 @@ dumptuples(Tuplesortstate *state, bool alltuples)
32753293 * Still holding out for a case favorable to replacement
32763294 * selection. Still incrementally spilling using heap.
32773295 *
3278- * Dump the heap's frontmost entry, and sift up to remove it from
3279- * the heap.
3296+ * Dump the heap's frontmost entry, and remove it from the heap.
32803297 */
32813298 Assert (state -> memtupcount > 0 );
32823299 WRITETUP (state , state -> tp_tapenum [state -> destTape ],
32833300 & state -> memtuples [0 ]);
3284- tuplesort_heap_siftup (state , true);
3301+ tuplesort_heap_delete_top (state , true);
32853302 }
32863303 else
32873304 {
@@ -3644,27 +3661,29 @@ make_bounded_heap(Tuplesortstate *state)
36443661 state -> memtupcount = 0 ; /* make the heap empty */
36453662 for (i = 0 ; i < tupcount ; i ++ )
36463663 {
3647- if (state -> memtupcount >= state -> bound &&
3648- COMPARETUP (state , & state -> memtuples [i ], & state -> memtuples [0 ]) <= 0 )
3649- {
3650- /* New tuple would just get thrown out, so skip it */
3651- free_sort_tuple (state , & state -> memtuples [i ]);
3652- CHECK_FOR_INTERRUPTS ();
3653- }
3654- else
3664+ if (state -> memtupcount < state -> bound )
36553665 {
36563666 /* Insert next tuple into heap */
36573667 /* Must copy source tuple to avoid possible overwrite */
36583668 SortTuple stup = state -> memtuples [i ];
36593669
3660- tuplesort_heap_insert (state , & stup , 0 , false);
3661-
3662- /* If heap too full, discard largest entry */
3663- if (state -> memtupcount > state -> bound )
3670+ stup .tupindex = 0 ; /* not used */
3671+ tuplesort_heap_insert (state , & stup , false);
3672+ }
3673+ else
3674+ {
3675+ /*
3676+ * The heap is full. Replace the largest entry with the new
3677+ * tuple, or just discard it, if it's larger than anything already
3678+ * in the heap.
3679+ */
3680+ if (COMPARETUP (state , & state -> memtuples [i ], & state -> memtuples [0 ]) <= 0 )
36643681 {
3665- free_sort_tuple (state , & state -> memtuples [0 ]);
3666- tuplesort_heap_siftup ( state , false );
3682+ free_sort_tuple (state , & state -> memtuples [i ]);
3683+ CHECK_FOR_INTERRUPTS ( );
36673684 }
3685+ else
3686+ tuplesort_heap_replace_top (state , & state -> memtuples [i ], false);
36683687 }
36693688 }
36703689
@@ -3685,16 +3704,16 @@ sort_bounded_heap(Tuplesortstate *state)
36853704 Assert (tupcount == state -> bound );
36863705
36873706 /*
3688- * We can unheapify in place because each sift-up will remove the largest
3689- * entry, which we can promptly store in the newly freed slot at the end.
3690- * Once we're down to a single-entry heap, we're done.
3707+ * We can unheapify in place because each delete-top call will remove the
3708+ * largest entry, which we can promptly store in the newly freed slot at
3709+ * the end. Once we're down to a single-entry heap, we're done.
36913710 */
36923711 while (state -> memtupcount > 1 )
36933712 {
36943713 SortTuple stup = state -> memtuples [0 ];
36953714
36963715 /* this sifts-up the next-largest entry and decreases memtupcount */
3697- tuplesort_heap_siftup (state , false);
3716+ tuplesort_heap_delete_top (state , false);
36983717 state -> memtuples [state -> memtupcount ] = stup ;
36993718 }
37003719 state -> memtupcount = tupcount ;
@@ -3738,30 +3757,21 @@ tuplesort_sort_memtuples(Tuplesortstate *state)
37383757 * Insert a new tuple into an empty or existing heap, maintaining the
37393758 * heap invariant. Caller is responsible for ensuring there's room.
37403759 *
3741- * Note: we assume *tuple is a temporary variable that can be scribbled on.
3742- * For some callers, tuple actually points to a memtuples[] entry above the
3760+ * Note: For some callers, tuple points to a memtuples[] entry above the
37433761 * end of the heap. This is safe as long as it's not immediately adjacent
37443762 * to the end of the heap (ie, in the [memtupcount] array entry) --- if it
37453763 * is, it might get overwritten before being moved into the heap!
37463764 */
37473765static void
37483766tuplesort_heap_insert (Tuplesortstate * state , SortTuple * tuple ,
3749- int tupleindex , bool checkIndex )
3767+ bool checkIndex )
37503768{
37513769 SortTuple * memtuples ;
37523770 int j ;
37533771
3754- /*
3755- * Save the tupleindex --- see notes above about writing on *tuple. It's a
3756- * historical artifact that tupleindex is passed as a separate argument
3757- * and not in *tuple, but it's notationally convenient so let's leave it
3758- * that way.
3759- */
3760- tuple -> tupindex = tupleindex ;
3761-
37623772 memtuples = state -> memtuples ;
37633773 Assert (state -> memtupcount < state -> memtupsize );
3764- Assert (!checkIndex || tupleindex == RUN_FIRST );
3774+ Assert (!checkIndex || tuple -> tupindex == RUN_FIRST );
37653775
37663776 CHECK_FOR_INTERRUPTS ();
37673777
@@ -3783,25 +3793,51 @@ tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple,
37833793}
37843794
37853795/*
3786- * The tuple at state->memtuples[0] has been removed from the heap.
3787- * Decrement memtupcount, and sift up to maintain the heap invariant.
3796+ * Remove the tuple at state->memtuples[0] from the heap. Decrement
3797+ * memtupcount, and sift up to maintain the heap invariant.
3798+ *
3799+ * The caller has already free'd the tuple the top node points to,
3800+ * if necessary.
37883801 */
37893802static void
3790- tuplesort_heap_siftup (Tuplesortstate * state , bool checkIndex )
3803+ tuplesort_heap_delete_top (Tuplesortstate * state , bool checkIndex )
37913804{
37923805 SortTuple * memtuples = state -> memtuples ;
37933806 SortTuple * tuple ;
3794- int i ,
3795- n ;
37963807
37973808 Assert (!checkIndex || state -> currentRun == RUN_FIRST );
37983809 if (-- state -> memtupcount <= 0 )
37993810 return ;
38003811
3812+ /*
3813+ * Remove the last tuple in the heap, and re-insert it, by replacing the
3814+ * current top node with it.
3815+ */
3816+ tuple = & memtuples [state -> memtupcount ];
3817+ tuplesort_heap_replace_top (state , tuple , checkIndex );
3818+ }
3819+
3820+ /*
3821+ * Replace the tuple at state->memtuples[0] with a new tuple. Sift up to
3822+ * maintain the heap invariant.
3823+ *
3824+ * This corresponds to Knuth's "sift-up" algorithm (Algorithm 5.2.3H,
3825+ * Heapsort, steps H3-H8).
3826+ */
3827+ static void
3828+ tuplesort_heap_replace_top (Tuplesortstate * state , SortTuple * tuple ,
3829+ bool checkIndex )
3830+ {
3831+ SortTuple * memtuples = state -> memtuples ;
3832+ int i ,
3833+ n ;
3834+
3835+ Assert (!checkIndex || state -> currentRun == RUN_FIRST );
3836+ Assert (state -> memtupcount >= 1 );
3837+
38013838 CHECK_FOR_INTERRUPTS ();
38023839
38033840 n = state -> memtupcount ;
3804- tuple = & memtuples [n ]; /* tuple that must be reinserted */
38053841 i = 0 ; /* i is where the "hole" is */
38063842 for (;;)
38073843 {
0 commit comments