|
110 | 110 | #include "optimizer/predtest.h" |
111 | 111 | #include "optimizer/restrictinfo.h" |
112 | 112 | #include "optimizer/var.h" |
| 113 | +#include "parser/parse_clause.h" |
113 | 114 | #include "parser/parse_coerce.h" |
114 | 115 | #include "parser/parsetree.h" |
115 | 116 | #include "utils/builtins.h" |
@@ -255,10 +256,11 @@ var_eq_const(VariableStatData *vardata, Oid operator, |
255 | 256 | return 0.0; |
256 | 257 |
|
257 | 258 | /* |
258 | | - * If we matched the var to a unique index, assume there is exactly one |
259 | | - * match regardless of anything else. (This is slightly bogus, since the |
260 | | - * index's equality operator might be different from ours, but it's more |
261 | | - * likely to be right than ignoring the information.) |
| 259 | + * If we matched the var to a unique index or DISTINCT clause, assume |
| 260 | + * there is exactly one match regardless of anything else. (This is |
| 261 | + * slightly bogus, since the index or clause's equality operator might be |
| 262 | + * different from ours, but it's much more likely to be right than |
| 263 | + * ignoring the information.) |
262 | 264 | */ |
263 | 265 | if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) |
264 | 266 | return 1.0 / vardata->rel->tuples; |
@@ -389,10 +391,11 @@ var_eq_non_const(VariableStatData *vardata, Oid operator, |
389 | 391 | bool isdefault; |
390 | 392 |
|
391 | 393 | /* |
392 | | - * If we matched the var to a unique index, assume there is exactly one |
393 | | - * match regardless of anything else. (This is slightly bogus, since the |
394 | | - * index's equality operator might be different from ours, but it's more |
395 | | - * likely to be right than ignoring the information.) |
| 394 | + * If we matched the var to a unique index or DISTINCT clause, assume |
| 395 | + * there is exactly one match regardless of anything else. (This is |
| 396 | + * slightly bogus, since the index or clause's equality operator might be |
| 397 | + * different from ours, but it's much more likely to be right than |
| 398 | + * ignoring the information.) |
396 | 399 | */ |
397 | 400 | if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) |
398 | 401 | return 1.0 / vardata->rel->tuples; |
@@ -4128,10 +4131,11 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, |
4128 | 4131 | * atttype, atttypmod: type data to pass to get_attstatsslot(). This is |
4129 | 4132 | * commonly the same as the exposed type of the variable argument, |
4130 | 4133 | * but can be different in binary-compatible-type cases. |
4131 | | - * isunique: TRUE if we were able to match the var to a unique index, |
4132 | | - * implying its values are unique for this query. (Caution: this |
4133 | | - * should be trusted for statistical purposes only, since we do not |
4134 | | - * check indimmediate.) |
| 4134 | + * isunique: TRUE if we were able to match the var to a unique index or a |
| 4135 | + * single-column DISTINCT clause, implying its values are unique for |
| 4136 | + * this query. (Caution: this should be trusted for statistical |
| 4137 | + * purposes only, since we do not check indimmediate nor verify that |
| 4138 | + * the exact same definition of equality applies.) |
4135 | 4139 | * |
4136 | 4140 | * Caller is responsible for doing ReleaseVariableStats() before exiting. |
4137 | 4141 | */ |
@@ -4357,32 +4361,21 @@ examine_simple_variable(PlannerInfo *root, Var *var, |
4357 | 4361 | { |
4358 | 4362 | /* |
4359 | 4363 | * Plain subquery (not one that was converted to an appendrel). |
4360 | | - * |
4361 | | - * Punt if subquery uses set operations, GROUP BY, or DISTINCT --- any |
4362 | | - * of these will mash underlying columns' stats beyond recognition. |
4363 | | - * (Set ops are particularly nasty; if we forged ahead, we would |
4364 | | - * return stats relevant to only the leftmost subselect...) |
4365 | 4364 | */ |
4366 | 4365 | Query *subquery = rte->subquery; |
4367 | 4366 | RelOptInfo *rel; |
4368 | 4367 | TargetEntry *ste; |
4369 | 4368 |
|
4370 | | - if (subquery->setOperations || |
4371 | | - subquery->groupClause || |
4372 | | - subquery->distinctClause) |
4373 | | - return; |
4374 | | - |
4375 | 4369 | /* |
4376 | | - * If the sub-query originated from a view with the security_barrier |
4377 | | - * attribute, we treat it as a black-box from outside of the view. |
4378 | | - * This is probably a harsher restriction than necessary; it's |
4379 | | - * certainly OK for the selectivity estimator (which is a C function, |
4380 | | - * and therefore omnipotent anyway) to look at the statistics. But |
4381 | | - * many selectivity estimators will happily *invoke the operator |
4382 | | - * function* to try to work out a good estimate - and that's not OK. |
4383 | | - * So for now, we do this. |
| 4370 | + * Punt if subquery uses set operations or GROUP BY, as these will |
| 4371 | + * mash underlying columns' stats beyond recognition. (Set ops are |
| 4372 | + * particularly nasty; if we forged ahead, we would return stats |
| 4373 | + * relevant to only the leftmost subselect...) DISTINCT is also |
| 4374 | + * problematic, but we check that later because there is a possibility |
| 4375 | + * of learning something even with it. |
4384 | 4376 | */ |
4385 | | - if (rte->security_barrier) |
| 4377 | + if (subquery->setOperations || |
| 4378 | + subquery->groupClause) |
4386 | 4379 | return; |
4387 | 4380 |
|
4388 | 4381 | /* |
@@ -4415,6 +4408,37 @@ examine_simple_variable(PlannerInfo *root, Var *var, |
4415 | 4408 | rte->eref->aliasname, var->varattno); |
4416 | 4409 | var = (Var *) ste->expr; |
4417 | 4410 |
|
| 4411 | + /* |
| 4412 | + * If subquery uses DISTINCT, we can't make use of any stats for the |
| 4413 | + * variable ... but, if it's the only DISTINCT column, we are entitled |
| 4414 | + * to consider it unique. We do the test this way so that it works |
| 4415 | + * for cases involving DISTINCT ON. |
| 4416 | + */ |
| 4417 | + if (subquery->distinctClause) |
| 4418 | + { |
| 4419 | + if (list_length(subquery->distinctClause) == 1 && |
| 4420 | + targetIsInSortList(ste, InvalidOid, subquery->distinctClause)) |
| 4421 | + vardata->isunique = true; |
| 4422 | + /* cannot go further */ |
| 4423 | + return; |
| 4424 | + } |
| 4425 | + |
| 4426 | + /* |
| 4427 | + * If the sub-query originated from a view with the security_barrier |
| 4428 | + * attribute, we must not look at the variable's statistics, though |
| 4429 | + * it seems all right to notice the existence of a DISTINCT clause. |
| 4430 | + * So stop here. |
| 4431 | + * |
| 4432 | + * This is probably a harsher restriction than necessary; it's |
| 4433 | + * certainly OK for the selectivity estimator (which is a C function, |
| 4434 | + * and therefore omnipotent anyway) to look at the statistics. But |
| 4435 | + * many selectivity estimators will happily *invoke the operator |
| 4436 | + * function* to try to work out a good estimate - and that's not OK. |
| 4437 | + * So for now, don't dig down for stats. |
| 4438 | + */ |
| 4439 | + if (rte->security_barrier) |
| 4440 | + return; |
| 4441 | + |
4418 | 4442 | /* Can only handle a simple Var of subquery's query level */ |
4419 | 4443 | if (var && IsA(var, Var) && |
4420 | 4444 | var->varlevelsup == 0) |
@@ -4513,10 +4537,10 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) |
4513 | 4537 | } |
4514 | 4538 |
|
4515 | 4539 | /* |
4516 | | - * If there is a unique index for the variable, assume it is unique no |
4517 | | - * matter what pg_statistic says; the statistics could be out of date, or |
4518 | | - * we might have found a partial unique index that proves the var is |
4519 | | - * unique for this query. |
| 4540 | + * If there is a unique index or DISTINCT clause for the variable, assume |
| 4541 | + * it is unique no matter what pg_statistic says; the statistics could be |
| 4542 | + * out of date, or we might have found a partial unique index that proves |
| 4543 | + * the var is unique for this query. |
4520 | 4544 | */ |
4521 | 4545 | if (vardata->isunique) |
4522 | 4546 | stadistinct = -1.0; |
|
0 commit comments