88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.17 2010/01/02 16:57:33 momjian Exp $
11+ * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.18 2010/02/11 14:29:50 teodor Exp $
1212 *-------------------------------------------------------------------------
1313 */
1414
2222#define DEF_NENTRY 2048
2323#define DEF_NPTR 4
2424
25- void
26- ginInitBA ( BuildAccumulator * accum )
25+ static void *
26+ ginAppendData ( void * old , void * new , void * arg )
2727{
28- accum -> maxdepth = 1 ;
29- accum -> stackpos = 0 ;
30- accum -> entries = NULL ;
31- accum -> stack = NULL ;
32- accum -> allocatedMemory = 0 ;
33- accum -> entryallocator = NULL ;
34- }
28+ EntryAccumulator * eo = (EntryAccumulator * )old ,
29+ * en = (EntryAccumulator * )new ;
3530
36- static EntryAccumulator *
37- EAAllocate (BuildAccumulator * accum )
38- {
39- if (accum -> entryallocator == NULL || accum -> length >= DEF_NENTRY )
40- {
41- accum -> entryallocator = palloc (sizeof (EntryAccumulator ) * DEF_NENTRY );
42- accum -> allocatedMemory += GetMemoryChunkSpace (accum -> entryallocator );
43- accum -> length = 0 ;
44- }
45-
46- accum -> length ++ ;
47- return accum -> entryallocator + accum -> length - 1 ;
48- }
31+ BuildAccumulator * accum = (BuildAccumulator * )arg ;
4932
50- /*
51- * Stores heap item pointer. For robust, it checks that
52- * item pointer are ordered
53- */
54- static void
55- ginInsertData (BuildAccumulator * accum , EntryAccumulator * entry , ItemPointer heapptr )
56- {
57- if (entry -> number >= entry -> length )
33+ if (eo -> number >= eo -> length )
5834 {
59- accum -> allocatedMemory -= GetMemoryChunkSpace (entry -> list );
60- entry -> length *= 2 ;
61- entry -> list = (ItemPointerData * ) repalloc (entry -> list ,
62- sizeof (ItemPointerData ) * entry -> length );
63- accum -> allocatedMemory += GetMemoryChunkSpace (entry -> list );
35+ accum -> allocatedMemory -= GetMemoryChunkSpace (eo -> list );
36+ eo -> length *= 2 ;
37+ eo -> list = (ItemPointerData * ) repalloc (eo -> list ,
38+ sizeof (ItemPointerData ) * eo -> length );
39+ accum -> allocatedMemory += GetMemoryChunkSpace (eo -> list );
6440 }
6541
66- if (entry -> shouldSort == FALSE)
42+ /* If item pointers are not ordered, they will need to be sorted. */
43+ if (eo -> shouldSort == FALSE)
6744 {
68- int res = compareItemPointers ( entry -> list + entry -> number - 1 , heapptr ) ;
45+ int res ;
6946
47+ res = compareItemPointers (eo -> list + eo -> number - 1 , en -> list );
7048 Assert (res != 0 );
7149
7250 if (res > 0 )
73- entry -> shouldSort = TRUE;
51+ eo -> shouldSort = TRUE;
7452 }
7553
76- entry -> list [entry -> number ] = * heapptr ;
77- entry -> number ++ ;
54+ eo -> list [eo -> number ] = en -> list [0 ];
55+ eo -> number ++ ;
56+
57+ return old ;
58+ }
59+
60+ static int
61+ cmpEntryAccumulator (const void * a , const void * b , void * arg )
62+ {
63+ EntryAccumulator * ea = (EntryAccumulator * )a ;
64+ EntryAccumulator * eb = (EntryAccumulator * )b ;
65+ BuildAccumulator * accum = (BuildAccumulator * )arg ;
66+
67+ return compareAttEntries (accum -> ginstate , ea -> attnum , ea -> value ,
68+ eb -> attnum , eb -> value );
69+ }
70+
71+ void
72+ ginInitBA (BuildAccumulator * accum )
73+ {
74+ accum -> allocatedMemory = 0 ;
75+ accum -> entryallocator = NULL ;
76+ accum -> tree = rb_create (cmpEntryAccumulator , ginAppendData , NULL , accum );
77+ accum -> iterator = NULL ;
78+ accum -> tmpList = NULL ;
7879}
7980
8081/*
@@ -103,111 +104,104 @@ getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
103104static void
104105ginInsertEntry (BuildAccumulator * accum , ItemPointer heapptr , OffsetNumber attnum , Datum entry )
105106{
106- EntryAccumulator * ea = accum -> entries ,
107- * pea = NULL ;
108- int res = 0 ;
109- uint32 depth = 1 ;
110-
111- while (ea )
107+ EntryAccumulator * key ,
108+ * ea ;
109+
110+ /*
111+ * Allocate memory by rather big chunk to decrease overhead, we don't
112+ * keep pointer to previously allocated chunks because they will free
113+ * by MemoryContextReset() call.
114+ */
115+ if (accum -> entryallocator == NULL || accum -> length >= DEF_NENTRY )
112116 {
113- res = compareAttEntries (accum -> ginstate , attnum , entry , ea -> attnum , ea -> value );
114- if (res == 0 )
115- break ; /* found */
116- else
117- {
118- pea = ea ;
119- if (res < 0 )
120- ea = ea -> left ;
121- else
122- ea = ea -> right ;
123- }
124- depth ++ ;
117+ accum -> entryallocator = palloc (sizeof (EntryAccumulator ) * DEF_NENTRY );
118+ accum -> allocatedMemory += GetMemoryChunkSpace (accum -> entryallocator );
119+ accum -> length = 0 ;
125120 }
126121
127- if (depth > accum -> maxdepth )
128- accum -> maxdepth = depth ;
122+ /* "Allocate" new key in chunk */
123+ key = accum -> entryallocator + accum -> length ;
124+ accum -> length ++ ;
125+
126+ key -> attnum = attnum ;
127+ key -> value = entry ;
128+ /* To prevent multiple palloc/pfree cycles, we reuse array */
129+ if (accum -> tmpList == NULL )
130+ accum -> tmpList =
131+ (ItemPointerData * ) palloc (sizeof (ItemPointerData ) * DEF_NPTR );
132+ key -> list = accum -> tmpList ;
133+ key -> list [0 ] = * heapptr ;
134+
135+ ea = rb_insert (accum -> tree , key );
129136
130137 if (ea == NULL )
131138 {
132- ea = EAAllocate (accum );
133-
134- ea -> left = ea -> right = NULL ;
135- ea -> attnum = attnum ;
136- ea -> value = getDatumCopy (accum , attnum , entry );
137- ea -> length = DEF_NPTR ;
138- ea -> number = 1 ;
139- ea -> shouldSort = FALSE;
140- ea -> list = (ItemPointerData * ) palloc (sizeof (ItemPointerData ) * DEF_NPTR );
141- accum -> allocatedMemory += GetMemoryChunkSpace (ea -> list );
142- ea -> list [0 ] = * heapptr ;
143-
144- if (pea == NULL )
145- accum -> entries = ea ;
146- else
147- {
148- Assert (res != 0 );
149- if (res < 0 )
150- pea -> left = ea ;
151- else
152- pea -> right = ea ;
153- }
139+ /*
140+ * The key has been inserted, so continue initialization.
141+ */
142+ key -> value = getDatumCopy (accum , attnum , entry );
143+ key -> length = DEF_NPTR ;
144+ key -> number = 1 ;
145+ key -> shouldSort = FALSE;
146+ accum -> allocatedMemory += GetMemoryChunkSpace (key -> list );
147+ accum -> tmpList = NULL ;
154148 }
155149 else
156- ginInsertData (accum , ea , heapptr );
157- }
158-
159- /*
160- * insert middle of left part the middle of right one,
161- * then calls itself for each parts
162- */
163- static void
164- ginChooseElem (BuildAccumulator * accum , ItemPointer heapptr , OffsetNumber attnum ,
165- Datum * entries , uint32 nentry ,
166- uint32 low , uint32 high , uint32 offset )
167- {
168- uint32 pos ;
169- uint32 middle = (low + high ) >> 1 ;
170-
171- pos = (low + middle ) >> 1 ;
172- if (low != middle && pos >= offset && pos - offset < nentry )
173- ginInsertEntry (accum , heapptr , attnum , entries [pos - offset ]);
174- pos = (high + middle + 1 ) >> 1 ;
175- if (middle + 1 != high && pos >= offset && pos - offset < nentry )
176- ginInsertEntry (accum , heapptr , attnum , entries [pos - offset ]);
177-
178- if (low != middle )
179- ginChooseElem (accum , heapptr , attnum , entries , nentry , low , middle , offset );
180- if (high != middle + 1 )
181- ginChooseElem (accum , heapptr , attnum , entries , nentry , middle + 1 , high , offset );
150+ {
151+ /*
152+ * The key has been appended, so "free" allocated
153+ * key by decrementing chunk's counter.
154+ */
155+ accum -> length -- ;
156+ }
182157}
183158
184159/*
185- * Insert one heap pointer. Suppose entries is sorted.
186- * Insertion order tries to get binary tree balanced: first insert middle value,
187- * next middle on left part and middle of right part.
160+ * Insert one heap pointer.
161+ *
162+ * Since the entries are being inserted into a balanced binary tree, you
163+ * might think that the order of insertion wouldn't be critical, but it turns
164+ * out that inserting the entries in sorted order results in a lot of
165+ * rebalancing operations and is slow. To prevent this, we attempt to insert
166+ * the nodes in an order that will produce a nearly-balanced tree if the input
167+ * is in fact sorted.
168+ *
169+ * We do this as follows. First, we imagine that we have an array whose size
170+ * is the smallest power of two greater than or equal to the actual array
171+ * size. Second, we insert the middle entry of our virtual array into the
172+ * tree; then, we insert the middles of each half of out virtual array, then
173+ * middles of quarters, etc.
188174 */
189- void
175+ void
190176ginInsertRecordBA (BuildAccumulator * accum , ItemPointer heapptr , OffsetNumber attnum ,
191177 Datum * entries , int32 nentry )
192178{
193- uint32 i ,
194- nbit = 0 ,
195- offset ;
179+ uint32 step = nentry ;
196180
197181 if (nentry <= 0 )
198182 return ;
199183
200184 Assert (ItemPointerIsValid (heapptr ) && attnum >= FirstOffsetNumber );
201185
202- i = nentry - 1 ;
203- for (; i > 0 ; i >>= 1 )
204- nbit ++ ;
186+ /*
187+ * step will contain largest power of 2 and <= nentry
188+ */
189+ step |= (step >> 1 );
190+ step |= (step >> 2 );
191+ step |= (step >> 4 );
192+ step |= (step >> 8 );
193+ step |= (step >> 16 );
194+ step >>= 1 ;
195+ step ++ ;
205196
206- nbit = 1 << nbit ;
207- offset = ( nbit - nentry ) / 2 ;
197+ while ( step > 0 ) {
198+ int i ;
208199
209- ginInsertEntry (accum , heapptr , attnum , entries [(nbit >> 1 ) - offset ]);
210- ginChooseElem (accum , heapptr , attnum , entries , nentry , 0 , nbit , offset );
200+ for (i = step - 1 ; i < nentry && i >= 0 ; i += step << 1 /* *2 */ )
201+ ginInsertEntry (accum , heapptr , attnum , entries [i ]);
202+
203+ step >>= 1 ; /* /2 */
204+ }
211205}
212206
213207static int
@@ -219,86 +213,16 @@ qsortCompareItemPointers(const void *a, const void *b)
219213 return res ;
220214}
221215
222- /*
223- * walk on binary tree and returns ordered nodes
224- */
225- static EntryAccumulator *
226- walkTree (BuildAccumulator * accum )
227- {
228- EntryAccumulator * entry = accum -> stack [accum -> stackpos ];
229-
230- if (entry -> list != NULL )
231- {
232- /* return entry itself: we already was at left sublink */
233- return entry ;
234- }
235- else if (entry -> right && entry -> right != accum -> stack [accum -> stackpos + 1 ])
236- {
237- /* go on right sublink */
238- accum -> stackpos ++ ;
239- entry = entry -> right ;
240-
241- /* find most-left value */
242- for (;;)
243- {
244- accum -> stack [accum -> stackpos ] = entry ;
245- if (entry -> left )
246- {
247- accum -> stackpos ++ ;
248- entry = entry -> left ;
249- }
250- else
251- break ;
252- }
253- }
254- else
255- {
256- /* we already return all left subtree, itself and right subtree */
257- if (accum -> stackpos == 0 )
258- return 0 ;
259- accum -> stackpos -- ;
260- return walkTree (accum );
261- }
262-
263- return entry ;
264- }
265-
266216ItemPointerData *
267217ginGetEntry (BuildAccumulator * accum , OffsetNumber * attnum , Datum * value , uint32 * n )
268218{
269219 EntryAccumulator * entry ;
270220 ItemPointerData * list ;
271221
272- if (accum -> stack == NULL )
273- {
274- /* first call */
275- accum -> stack = palloc0 (sizeof (EntryAccumulator * ) * (accum -> maxdepth + 1 ));
276- accum -> allocatedMemory += GetMemoryChunkSpace (accum -> stack );
277- entry = accum -> entries ;
278-
279- if (entry == NULL )
280- return NULL ;
281-
282- /* find most-left value */
283- for (;;)
284- {
285- accum -> stack [accum -> stackpos ] = entry ;
286- if (entry -> left )
287- {
288- accum -> stackpos ++ ;
289- entry = entry -> left ;
290- }
291- else
292- break ;
293- }
294- }
295- else
296- {
297- accum -> allocatedMemory -= GetMemoryChunkSpace (accum -> stack [accum -> stackpos ]-> list );
298- pfree (accum -> stack [accum -> stackpos ]-> list );
299- accum -> stack [accum -> stackpos ]-> list = NULL ;
300- entry = walkTree (accum );
301- }
222+ if (accum -> iterator == NULL )
223+ accum -> iterator = rb_begin_iterate (accum -> tree , LeftRightWalk );
224+
225+ entry = rb_iterate (accum -> iterator );
302226
303227 if (entry == NULL )
304228 return NULL ;
0 commit comments