11-- Generic extended statistics support
2+ -- We will be checking execution plans without/with statistics, so
3+ -- let's make sure we get simple non-parallel plans. Also set the
4+ -- work_mem low so that we can use small amounts of data.
5+ SET max_parallel_workers = 0;
6+ SET max_parallel_workers_per_gather = 0;
7+ SET work_mem = '128kB';
28-- Ensure stats are dropped sanely
39CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
410CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
@@ -49,6 +55,67 @@ CREATE TABLE ndistinct (
4955 c INT,
5056 d INT
5157);
58+ -- over-estimates when using only per-column statistics
59+ INSERT INTO ndistinct (a, b, c, filler1)
60+ SELECT i/100, i/100, i/100, cash_words((i/100)::money)
61+ FROM generate_series(1,30000) s(i);
62+ ANALYZE ndistinct;
63+ -- Group Aggregate, due to over-estimate of the number of groups
64+ EXPLAIN (COSTS off)
65+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
66+ QUERY PLAN
67+ -----------------------------------
68+ GroupAggregate
69+ Group Key: a, b
70+ -> Sort
71+ Sort Key: a, b
72+ -> Seq Scan on ndistinct
73+ (5 rows)
74+
75+ EXPLAIN (COSTS off)
76+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
77+ QUERY PLAN
78+ -----------------------------------
79+ GroupAggregate
80+ Group Key: b, c
81+ -> Sort
82+ Sort Key: b, c
83+ -> Seq Scan on ndistinct
84+ (5 rows)
85+
86+ EXPLAIN (COSTS off)
87+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
88+ QUERY PLAN
89+ -----------------------------------
90+ GroupAggregate
91+ Group Key: a, b, c
92+ -> Sort
93+ Sort Key: a, b, c
94+ -> Seq Scan on ndistinct
95+ (5 rows)
96+
97+ EXPLAIN (COSTS off)
98+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
99+ QUERY PLAN
100+ -----------------------------------
101+ GroupAggregate
102+ Group Key: a, b, c, d
103+ -> Sort
104+ Sort Key: a, b, c, d
105+ -> Seq Scan on ndistinct
106+ (5 rows)
107+
108+ EXPLAIN (COSTS off)
109+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
110+ QUERY PLAN
111+ -----------------------------------
112+ GroupAggregate
113+ Group Key: b, c, d
114+ -> Sort
115+ Sort Key: b, c, d
116+ -> Seq Scan on ndistinct
117+ (5 rows)
118+
52119-- unknown column
53120CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
54121ERROR: column "unknown_column" referenced in statistics does not exist
@@ -63,18 +130,15 @@ CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
63130ERROR: duplicate column name in statistics definition
64131-- correct command
65132CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
66- -- perfectly correlated groups
67- INSERT INTO ndistinct (a, b, c, filler1)
68- SELECT i/100, i/100, i/100, cash_words(i::money)
69- FROM generate_series(1,10000) s(i);
70133ANALYZE ndistinct;
71134SELECT staenabled, standistinct
72135 FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
73136 staenabled | standistinct
74137------------+------------------------------------------------------------------------------------------------
75- {d} | [{(b 3 4), 101 .000000}, {(b 3 6), 101 .000000}, {(b 4 6), 101 .000000}, {(b 3 4 6), 101 .000000}]
138+ {d} | [{(b 3 4), 301 .000000}, {(b 3 6), 301 .000000}, {(b 4 6), 301 .000000}, {(b 3 4 6), 301 .000000}]
76139(1 row)
77140
141+ -- Hash Aggregate, thanks to estimates improved by the statistic
78142EXPLAIN (COSTS off)
79143 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
80144 QUERY PLAN
@@ -85,78 +149,165 @@ EXPLAIN (COSTS off)
85149(3 rows)
86150
87151EXPLAIN (COSTS off)
88- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
152+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
89153 QUERY PLAN
90154-----------------------------
91155 HashAggregate
92- Group Key: a, b, c
156+ Group Key: b, c
93157 -> Seq Scan on ndistinct
94158(3 rows)
95159
96160EXPLAIN (COSTS off)
97- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d ;
161+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
98162 QUERY PLAN
99163-----------------------------
100164 HashAggregate
101- Group Key: a, b, c, d
165+ Group Key: a, b, c
102166 -> Seq Scan on ndistinct
103167(3 rows)
104168
169+ -- last two plans keep using Group Aggregate, because 'd' is not covered
170+ -- by the statistic and while it's NULL-only we assume 200 values for it
171+ EXPLAIN (COSTS off)
172+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
173+ QUERY PLAN
174+ -----------------------------------
175+ GroupAggregate
176+ Group Key: a, b, c, d
177+ -> Sort
178+ Sort Key: a, b, c, d
179+ -> Seq Scan on ndistinct
180+ (5 rows)
181+
182+ EXPLAIN (COSTS off)
183+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
184+ QUERY PLAN
185+ -----------------------------------
186+ GroupAggregate
187+ Group Key: b, c, d
188+ -> Sort
189+ Sort Key: b, c, d
190+ -> Seq Scan on ndistinct
191+ (5 rows)
192+
105193TRUNCATE TABLE ndistinct;
106- -- partially correlated groups
107- INSERT INTO ndistinct (a, b, c)
108- SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
194+ -- under-estimates when using only per-column statistics
195+ INSERT INTO ndistinct (a, b, c, filler1)
196+ SELECT mod(i,50), mod(i,51), mod(i,32),
197+ cash_words(mod(i,33)::int::money)
198+ FROM generate_series(1,10000) s(i);
109199ANALYZE ndistinct;
110200SELECT staenabled, standistinct
111201 FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
112- staenabled | standistinct
113- ------------+------------------------------------------------------------------------------------------------
114- {d} | [{(b 3 4), 201 .000000}, {(b 3 6), 201 .000000}, {(b 4 6), 101 .000000}, {(b 3 4 6), 201 .000000}]
202+ staenabled | standistinct
203+ ------------+----------------------------------------------------------------------------------------------------
204+ {d} | [{(b 3 4), 2550 .000000}, {(b 3 6), 800 .000000}, {(b 4 6), 1632 .000000}, {(b 3 4 6), 10000 .000000}]
115205(1 row)
116206
117- EXPLAIN
207+ -- plans using Group Aggregate, thanks to using correct esimates
208+ EXPLAIN (COSTS off)
118209 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
119- QUERY PLAN
120- ---------------------------------------------------------------------
121- HashAggregate (cost=230.00..232.01 rows=201 width=16)
210+ QUERY PLAN
211+ -----------------------------------
212+ GroupAggregate
213+ Group Key: a, b
214+ -> Sort
215+ Sort Key: a, b
216+ -> Seq Scan on ndistinct
217+ (5 rows)
218+
219+ EXPLAIN (COSTS off)
220+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
221+ QUERY PLAN
222+ -----------------------------------
223+ GroupAggregate
224+ Group Key: a, b, c
225+ -> Sort
226+ Sort Key: a, b, c
227+ -> Seq Scan on ndistinct
228+ (5 rows)
229+
230+ EXPLAIN (COSTS off)
231+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
232+ QUERY PLAN
233+ -----------------------------------
234+ GroupAggregate
235+ Group Key: a, b, c, d
236+ -> Sort
237+ Sort Key: a, b, c, d
238+ -> Seq Scan on ndistinct
239+ (5 rows)
240+
241+ EXPLAIN (COSTS off)
242+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
243+ QUERY PLAN
244+ -----------------------------
245+ HashAggregate
246+ Group Key: b, c, d
247+ -> Seq Scan on ndistinct
248+ (3 rows)
249+
250+ EXPLAIN (COSTS off)
251+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
252+ QUERY PLAN
253+ -----------------------------
254+ HashAggregate
255+ Group Key: a, d
256+ -> Seq Scan on ndistinct
257+ (3 rows)
258+
259+ DROP STATISTICS s10;
260+ SELECT staenabled, standistinct
261+ FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
262+ staenabled | standistinct
263+ ------------+--------------
264+ (0 rows)
265+
266+ -- dropping the statistics switches the plans to Hash Aggregate,
267+ -- due to under-estimates
268+ EXPLAIN (COSTS off)
269+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
270+ QUERY PLAN
271+ -----------------------------
272+ HashAggregate
122273 Group Key: a, b
123- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
274+ -> Seq Scan on ndistinct
124275(3 rows)
125276
126- EXPLAIN
277+ EXPLAIN (COSTS off)
127278 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
128- QUERY PLAN
129- ----------------------------------------------------------------------
130- HashAggregate (cost=255.00..257.01 rows=201 width=20)
279+ QUERY PLAN
280+ -----------------------------
281+ HashAggregate
131282 Group Key: a, b, c
132- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
283+ -> Seq Scan on ndistinct
133284(3 rows)
134285
135- EXPLAIN
286+ EXPLAIN (COSTS off)
136287 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
137- QUERY PLAN
138- ----------------------------------------------------------------------
139- HashAggregate (cost=280.00..290.00 rows=1000 width=24)
288+ QUERY PLAN
289+ -----------------------------
290+ HashAggregate
140291 Group Key: a, b, c, d
141- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16)
292+ -> Seq Scan on ndistinct
142293(3 rows)
143294
144- EXPLAIN
295+ EXPLAIN (COSTS off)
145296 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
146- QUERY PLAN
147- ----------------------------------------------------------------------
148- HashAggregate (cost=255.00..265.00 rows=1000 width=20)
297+ QUERY PLAN
298+ -----------------------------
299+ HashAggregate
149300 Group Key: b, c, d
150- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
301+ -> Seq Scan on ndistinct
151302(3 rows)
152303
153- EXPLAIN
304+ EXPLAIN (COSTS off)
154305 SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
155- QUERY PLAN
156- ---------------------------------------------------------------------
157- HashAggregate (cost=230.00..240.00 rows=1000 width=16)
306+ QUERY PLAN
307+ -----------------------------
308+ HashAggregate
158309 Group Key: a, d
159- -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
310+ -> Seq Scan on ndistinct
160311(3 rows)
161312
162313DROP TABLE ndistinct;
0 commit comments