160160#include "executor/executor.h"
161161#include "executor/nodeAgg.h"
162162#include "miscadmin.h"
163+ #include "nodes/makefuncs.h"
163164#include "nodes/nodeFuncs.h"
164165#include "optimizer/clauses.h"
165166#include "optimizer/tlist.h"
@@ -213,6 +214,9 @@ typedef struct AggStatePerTransData
213214 */
214215 int numInputs ;
215216
217+ /* offset of input columns in AggState->evalslot */
218+ int inputoff ;
219+
216220 /*
217221 * Number of aggregated input columns to pass to the transfn. This
218222 * includes the ORDER BY columns for ordered-set aggs, but not for plain
@@ -234,7 +238,6 @@ typedef struct AggStatePerTransData
234238
235239 /* ExprStates of the FILTER and argument expressions. */
236240 ExprState * aggfilter ; /* state of FILTER expression, if any */
237- List * args ; /* states of aggregated-argument expressions */
238241 List * aggdirectargs ; /* states of direct-argument expressions */
239242
240243 /*
@@ -291,19 +294,19 @@ typedef struct AggStatePerTransData
291294 transtypeByVal ;
292295
293296 /*
294- * Stuff for evaluation of inputs. We used to just use ExecEvalExpr, but
295- * with the addition of ORDER BY we now need at least a slot for passing
296- * data to the sort object, which requires a tupledesc, so we might as
297- * well go whole hog and use ExecProject too .
297+ * Stuff for evaluation of aggregate inputs in cases where the aggregate
298+ * requires sorted input. The arguments themselves will be evaluated via
299+ * AggState->evalslot/evalproj for all aggregates at once, but we only
300+ * want to sort the relevant columns for individual aggregates .
298301 */
299- TupleDesc evaldesc ; /* descriptor of input tuples */
300- ProjectionInfo * evalproj ; /* projection machinery */
302+ TupleDesc sortdesc ; /* descriptor of input tuples */
301303
302304 /*
303305 * Slots for holding the evaluated input arguments. These are set up
304- * during ExecInitAgg() and then used for each input row.
306+ * during ExecInitAgg() and then used for each input row requiring
307+ * procesessing besides what's done in AggState->evalproj.
305308 */
306- TupleTableSlot * evalslot ; /* current input tuple */
309+ TupleTableSlot * sortslot ; /* current input tuple */
307310 TupleTableSlot * uniqslot ; /* used for multi-column DISTINCT */
308311
309312 /*
@@ -621,14 +624,14 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
621624 */
622625 if (pertrans -> numInputs == 1 )
623626 pertrans -> sortstates [aggstate -> current_set ] =
624- tuplesort_begin_datum (pertrans -> evaldesc -> attrs [0 ]-> atttypid ,
627+ tuplesort_begin_datum (pertrans -> sortdesc -> attrs [0 ]-> atttypid ,
625628 pertrans -> sortOperators [0 ],
626629 pertrans -> sortCollations [0 ],
627630 pertrans -> sortNullsFirst [0 ],
628631 work_mem , false);
629632 else
630633 pertrans -> sortstates [aggstate -> current_set ] =
631- tuplesort_begin_heap (pertrans -> evaldesc ,
634+ tuplesort_begin_heap (pertrans -> sortdesc ,
632635 pertrans -> numSortCols ,
633636 pertrans -> sortColIdx ,
634637 pertrans -> sortOperators ,
@@ -847,14 +850,19 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
847850 int setno = 0 ;
848851 int numGroupingSets = Max (aggstate -> phase -> numsets , 1 );
849852 int numTrans = aggstate -> numtrans ;
853+ TupleTableSlot * slot = aggstate -> evalslot ;
854+
855+ /* compute input for all aggregates */
856+ if (aggstate -> evalproj )
857+ aggstate -> evalslot = ExecProject (aggstate -> evalproj , NULL );
850858
851859 for (transno = 0 ; transno < numTrans ; transno ++ )
852860 {
853861 AggStatePerTrans pertrans = & aggstate -> pertrans [transno ];
854862 ExprState * filter = pertrans -> aggfilter ;
855863 int numTransInputs = pertrans -> numTransInputs ;
856864 int i ;
857- TupleTableSlot * slot ;
865+ int inputoff = pertrans -> inputoff ;
858866
859867 /* Skip anything FILTERed out */
860868 if (filter )
@@ -868,13 +876,10 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
868876 continue ;
869877 }
870878
871- /* Evaluate the current input expressions for this aggregate */
872- slot = ExecProject (pertrans -> evalproj , NULL );
873-
874879 if (pertrans -> numSortCols > 0 )
875880 {
876881 /* DISTINCT and/or ORDER BY case */
877- Assert (slot -> tts_nvalid == pertrans -> numInputs );
882+ Assert (slot -> tts_nvalid >= ( pertrans -> numInputs + inputoff ) );
878883
879884 /*
880885 * If the transfn is strict, we want to check for nullity before
@@ -887,7 +892,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
887892 {
888893 for (i = 0 ; i < numTransInputs ; i ++ )
889894 {
890- if (slot -> tts_isnull [i ])
895+ if (slot -> tts_isnull [i + inputoff ])
891896 break ;
892897 }
893898 if (i < numTransInputs )
@@ -899,10 +904,25 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
899904 /* OK, put the tuple into the tuplesort object */
900905 if (pertrans -> numInputs == 1 )
901906 tuplesort_putdatum (pertrans -> sortstates [setno ],
902- slot -> tts_values [0 ],
903- slot -> tts_isnull [0 ]);
907+ slot -> tts_values [inputoff ],
908+ slot -> tts_isnull [inputoff ]);
904909 else
905- tuplesort_puttupleslot (pertrans -> sortstates [setno ], slot );
910+ {
911+ /*
912+ * Copy slot contents, starting from inputoff, into sort
913+ * slot.
914+ */
915+ ExecClearTuple (pertrans -> sortslot );
916+ memcpy (pertrans -> sortslot -> tts_values ,
917+ & slot -> tts_values [inputoff ],
918+ pertrans -> numInputs * sizeof (Datum ));
919+ memcpy (pertrans -> sortslot -> tts_isnull ,
920+ & slot -> tts_isnull [inputoff ],
921+ pertrans -> numInputs * sizeof (bool ));
922+ pertrans -> sortslot -> tts_nvalid = pertrans -> numInputs ;
923+ ExecStoreVirtualTuple (pertrans -> sortslot );
924+ tuplesort_puttupleslot (pertrans -> sortstates [setno ], pertrans -> sortslot );
925+ }
906926 }
907927 }
908928 else
@@ -915,8 +935,8 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
915935 Assert (slot -> tts_nvalid >= numTransInputs );
916936 for (i = 0 ; i < numTransInputs ; i ++ )
917937 {
918- fcinfo -> arg [i + 1 ] = slot -> tts_values [i ];
919- fcinfo -> argnull [i + 1 ] = slot -> tts_isnull [i ];
938+ fcinfo -> arg [i + 1 ] = slot -> tts_values [i + inputoff ];
939+ fcinfo -> argnull [i + 1 ] = slot -> tts_isnull [i + inputoff ];
920940 }
921941
922942 for (setno = 0 ; setno < numGroupingSets ; setno ++ )
@@ -943,20 +963,24 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
943963{
944964 int transno ;
945965 int numTrans = aggstate -> numtrans ;
966+ TupleTableSlot * slot = NULL ;
946967
947968 /* combine not supported with grouping sets */
948969 Assert (aggstate -> phase -> numsets == 0 );
949970
971+ /* compute input for all aggregates */
972+ if (aggstate -> evalproj )
973+ slot = ExecProject (aggstate -> evalproj , NULL );
974+
950975 for (transno = 0 ; transno < numTrans ; transno ++ )
951976 {
952977 AggStatePerTrans pertrans = & aggstate -> pertrans [transno ];
953978 AggStatePerGroup pergroupstate = & pergroup [transno ];
954- TupleTableSlot * slot ;
955979 FunctionCallInfo fcinfo = & pertrans -> transfn_fcinfo ;
980+ int inputoff = pertrans -> inputoff ;
956981
957- /* Evaluate the current input expressions for this aggregate */
958- slot = ExecProject (pertrans -> evalproj , NULL );
959982 Assert (slot -> tts_nvalid >= 1 );
983+ Assert (slot -> tts_nvalid + inputoff >= 1 );
960984
961985 /*
962986 * deserialfn_oid will be set if we must deserialize the input state
@@ -965,18 +989,18 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
965989 if (OidIsValid (pertrans -> deserialfn_oid ))
966990 {
967991 /* Don't call a strict deserialization function with NULL input */
968- if (pertrans -> deserialfn .fn_strict && slot -> tts_isnull [0 ])
992+ if (pertrans -> deserialfn .fn_strict && slot -> tts_isnull [inputoff ])
969993 {
970- fcinfo -> arg [1 ] = slot -> tts_values [0 ];
971- fcinfo -> argnull [1 ] = slot -> tts_isnull [0 ];
994+ fcinfo -> arg [1 ] = slot -> tts_values [inputoff ];
995+ fcinfo -> argnull [1 ] = slot -> tts_isnull [inputoff ];
972996 }
973997 else
974998 {
975999 FunctionCallInfo dsinfo = & pertrans -> deserialfn_fcinfo ;
9761000 MemoryContext oldContext ;
9771001
978- dsinfo -> arg [0 ] = slot -> tts_values [0 ];
979- dsinfo -> argnull [0 ] = slot -> tts_isnull [0 ];
1002+ dsinfo -> arg [0 ] = slot -> tts_values [inputoff ];
1003+ dsinfo -> argnull [0 ] = slot -> tts_isnull [inputoff ];
9801004 /* Dummy second argument for type-safety reasons */
9811005 dsinfo -> arg [1 ] = PointerGetDatum (NULL );
9821006 dsinfo -> argnull [1 ] = false;
@@ -995,8 +1019,8 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
9951019 }
9961020 else
9971021 {
998- fcinfo -> arg [1 ] = slot -> tts_values [0 ];
999- fcinfo -> argnull [1 ] = slot -> tts_isnull [0 ];
1022+ fcinfo -> arg [1 ] = slot -> tts_values [inputoff ];
1023+ fcinfo -> argnull [1 ] = slot -> tts_isnull [inputoff ];
10001024 }
10011025
10021026 advance_combine_function (aggstate , pertrans , pergroupstate );
@@ -1233,7 +1257,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
12331257{
12341258 MemoryContext workcontext = aggstate -> tmpcontext -> ecxt_per_tuple_memory ;
12351259 FunctionCallInfo fcinfo = & pertrans -> transfn_fcinfo ;
1236- TupleTableSlot * slot1 = pertrans -> evalslot ;
1260+ TupleTableSlot * slot1 = pertrans -> sortslot ;
12371261 TupleTableSlot * slot2 = pertrans -> uniqslot ;
12381262 int numTransInputs = pertrans -> numTransInputs ;
12391263 int numDistinctCols = pertrans -> numDistinctCols ;
@@ -2343,10 +2367,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
23432367 transno ,
23442368 aggno ;
23452369 int phase ;
2370+ List * combined_inputeval ;
23462371 ListCell * l ;
23472372 Bitmapset * all_grouped_cols = NULL ;
23482373 int numGroupingSets = 1 ;
23492374 int numPhases ;
2375+ int column_offset ;
23502376 int i = 0 ;
23512377 int j = 0 ;
23522378
@@ -2928,6 +2954,53 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
29282954 aggstate -> numaggs = aggno + 1 ;
29292955 aggstate -> numtrans = transno + 1 ;
29302956
2957+ /*
2958+ * Build a single projection computing the aggregate arguments for all
2959+ * aggregates at once, that's considerably faster than doing it separately
2960+ * for each.
2961+ *
2962+ * First create a targetlist combining the targetlist of all the
2963+ * transitions.
2964+ */
2965+ combined_inputeval = NIL ;
2966+ column_offset = 0 ;
2967+ for (transno = 0 ; transno < aggstate -> numtrans ; transno ++ )
2968+ {
2969+ AggStatePerTrans pertrans = & pertransstates [transno ];
2970+ ListCell * arg ;
2971+
2972+ pertrans -> inputoff = column_offset ;
2973+
2974+ /*
2975+ * Adjust resno in a copied target entries, to point into the combined
2976+ * slot.
2977+ */
2978+ foreach (arg , pertrans -> aggref -> args )
2979+ {
2980+ TargetEntry * source_tle = (TargetEntry * ) lfirst (arg );
2981+ TargetEntry * tle ;
2982+
2983+ Assert (IsA (source_tle , TargetEntry ));
2984+ tle = flatCopyTargetEntry (source_tle );
2985+ tle -> resno += column_offset ;
2986+
2987+ combined_inputeval = lappend (combined_inputeval , tle );
2988+ }
2989+
2990+ column_offset += list_length (pertrans -> aggref -> args );
2991+ }
2992+
2993+ /* and then create a projection for that targetlist */
2994+ aggstate -> evaldesc = ExecTypeFromTL (combined_inputeval , false);
2995+ aggstate -> evalslot = ExecInitExtraTupleSlot (estate );
2996+ combined_inputeval = (List * ) ExecInitExpr ((Expr * ) combined_inputeval ,
2997+ (PlanState * ) aggstate );
2998+ aggstate -> evalproj = ExecBuildProjectionInfo (combined_inputeval ,
2999+ aggstate -> tmpcontext ,
3000+ aggstate -> evalslot ,
3001+ NULL );
3002+ ExecSetSlotDescriptor (aggstate -> evalslot , aggstate -> evaldesc );
3003+
29313004 return aggstate ;
29323005}
29333006
@@ -3098,24 +3171,12 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
30983171
30993172 }
31003173
3101- /*
3102- * Get a tupledesc corresponding to the aggregated inputs (including sort
3103- * expressions) of the agg.
3104- */
3105- pertrans -> evaldesc = ExecTypeFromTL (aggref -> args , false);
3106-
3107- /* Create slot we're going to do argument evaluation in */
3108- pertrans -> evalslot = ExecInitExtraTupleSlot (estate );
3109- ExecSetSlotDescriptor (pertrans -> evalslot , pertrans -> evaldesc );
3110-
31113174 /* Initialize the input and FILTER expressions */
31123175 naggs = aggstate -> numaggs ;
31133176 pertrans -> aggfilter = ExecInitExpr (aggref -> aggfilter ,
31143177 (PlanState * ) aggstate );
31153178 pertrans -> aggdirectargs = (List * ) ExecInitExpr ((Expr * ) aggref -> aggdirectargs ,
31163179 (PlanState * ) aggstate );
3117- pertrans -> args = (List * ) ExecInitExpr ((Expr * ) aggref -> args ,
3118- (PlanState * ) aggstate );
31193180
31203181 /*
31213182 * Complain if the aggregate's arguments contain any aggregates; nested
@@ -3127,12 +3188,6 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
31273188 (errcode (ERRCODE_GROUPING_ERROR ),
31283189 errmsg ("aggregate function calls cannot be nested" )));
31293190
3130- /* Set up projection info for evaluation */
3131- pertrans -> evalproj = ExecBuildProjectionInfo (pertrans -> args ,
3132- aggstate -> tmpcontext ,
3133- pertrans -> evalslot ,
3134- NULL );
3135-
31363191 /*
31373192 * If we're doing either DISTINCT or ORDER BY for a plain agg, then we
31383193 * have a list of SortGroupClause nodes; fish out the data in them and
@@ -3165,6 +3220,14 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
31653220
31663221 if (numSortCols > 0 )
31673222 {
3223+ /*
3224+ * Get a tupledesc and slot corresponding to the aggregated inputs
3225+ * (including sort expressions) of the agg.
3226+ */
3227+ pertrans -> sortdesc = ExecTypeFromTL (aggref -> args , false);
3228+ pertrans -> sortslot = ExecInitExtraTupleSlot (estate );
3229+ ExecSetSlotDescriptor (pertrans -> sortslot , pertrans -> sortdesc );
3230+
31683231 /*
31693232 * We don't implement DISTINCT or ORDER BY aggs in the HASHED case
31703233 * (yet)
@@ -3183,7 +3246,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
31833246 /* we will need an extra slot to store prior values */
31843247 pertrans -> uniqslot = ExecInitExtraTupleSlot (estate );
31853248 ExecSetSlotDescriptor (pertrans -> uniqslot ,
3186- pertrans -> evaldesc );
3249+ pertrans -> sortdesc );
31873250 }
31883251
31893252 /* Extract the sort information for use later */
0 commit comments