diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index f6f77b8fe193..bdd9534de97a 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2008,13 +2008,12 @@ cost_incremental_sort(Path *path, run_cost, input_run_cost = input_total_cost - input_startup_cost; double group_tuples, - input_groups; + input_groups, + estimated_groups; Cost group_startup_cost, group_run_cost, group_input_run_cost; List *presortedExprs = NIL; - ListCell *l; - bool unknown_varno = false; Assert(presorted_keys > 0 && presorted_keys < list_length(pathkeys)); @@ -2025,9 +2024,6 @@ cost_incremental_sort(Path *path, if (input_tuples < 2.0) input_tuples = 2.0; - /* Default estimate of number of groups, capped to one group per row. */ - input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT); - /* * Extract presorted keys as list of expressions. * @@ -2050,33 +2046,15 @@ cost_incremental_sort(Path *path, * anyway - from that standpoint the DEFAULT_NUM_DISTINCT is defensive * while maintaining lower startup cost. */ - foreach(l, pathkeys) - { - PathKey *key = (PathKey *) lfirst(l); - EquivalenceMember *member = (EquivalenceMember *) - linitial(key->pk_eclass->ec_members); - - /* - * Check if the expression contains Var with "varno 0" so that we - * don't call estimate_num_groups in that case. - */ - if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr))) - { - unknown_varno = true; - break; - } + presortedExprs = list_copy_head(pathkeys, presorted_keys); - /* expression not containing any Vars with "varno 0" */ - presortedExprs = lappend(presortedExprs, member->em_expr); - - if (foreach_current_index(l) + 1 >= presorted_keys) - break; - } - - /* Estimate the number of groups with equal presorted keys. */ - if (!unknown_varno) - input_groups = estimate_num_groups(root, presortedExprs, input_tuples, + estimated_groups = estimate_num_groups(root, presortedExprs, input_tuples, NULL, NULL); + if (estimated_groups > 0.0) + input_groups = estimated_groups; + else + /* Default estimate of number of groups, capped to one group per row. */ + input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT); group_tuples = input_tuples / input_groups; group_input_run_cost = input_run_cost / input_groups; diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 0f9ecf5ee8b2..71678fa87a3f 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -525,6 +525,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, em->em_datatype = datatype; em->em_jdomain = jdomain; em->em_parent = parent; + em->em_ndistinct = -1.0; if (bms_is_empty(relids)) { diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 5b35debc8ffd..0ecadb24c3ee 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -214,6 +214,8 @@ static bool get_actual_variable_endpoint(Relation heapRel, MemoryContext outercontext, Datum *endpointDatum); static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); +static EquivalenceMember *identify_sort_ecmember(PlannerInfo *root, + EquivalenceClass *ec); /* @@ -3468,12 +3470,34 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, i = 0; foreach(l, groupExprs) { - Node *groupexpr = (Node *) lfirst(l); + Node *node = (Node *) lfirst(l); + Node *groupexpr; double this_srf_multiplier; VariableStatData vardata; List *varshere; ListCell *l2; + /* Find an expression beforehand */ + if (IsA(node, PathKey)) + { + PathKey *key = (PathKey *) node; + EquivalenceMember *em = identify_sort_ecmember(root, key->pk_eclass); + + /* + * Check if the expression contains Var with "varno 0" so that we + * don't call estimate_num_groups in that case. + */ + if (bms_is_member(0, pull_varnos(root, (Node *) em->em_expr))) + { + /* Return 'unknown' value */ + return 0.0; + } + + groupexpr = (Node *) em->em_expr; + } + else + groupexpr = node; + /* is expression in this grouping set? */ if (pgset && !list_member_int(*pgset, i++)) continue; @@ -8404,3 +8428,64 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexPages = index->pages; } + +/* + * Find suitable member of the equivalence class. + * Passing through the list of EC members find the member with minimum of + * distinct values. Cache estimated number of distincts in the em_ndistinct + * field of each member. + */ +static EquivalenceMember * +identify_sort_ecmember(PlannerInfo *root, EquivalenceClass *ec) +{ + EquivalenceMember *candidate = linitial(ec->ec_members); + + if (root == NULL) + /* Fast path */ + return candidate; + + foreach_node(EquivalenceMember, em, ec->ec_members) + { + VariableStatData vardata; + + if (em->em_ndistinct == 0.) + /* Nothing helpful */ + continue; + + if (em->em_is_child || em->em_is_const || bms_is_empty(em->em_relids) || + bms_is_member(0, em->em_relids)) + { + em->em_ndistinct = 0.; + continue; + } + + if (em->em_ndistinct < 0.) + { + bool isdefault = true; + double ndist = 0.; + + /* Let's check candidate's ndistinct value */ + examine_variable(root, (Node *) em->em_expr, 0, &vardata); + if (HeapTupleIsValid(vardata.statsTuple)) + ndist = get_variable_numdistinct(&vardata, &isdefault); + ReleaseVariableStats(vardata); + + if (isdefault) + { + em->em_ndistinct = 0.; + continue; + } + + em->em_ndistinct = ndist; + } + + Assert(em->em_ndistinct > 0.); + + if (candidate->em_ndistinct == 0. || + em->em_ndistinct < candidate->em_ndistinct) + candidate = em; + } + + Assert(candidate != NULL); + return candidate; +} diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index c24a1fc8514c..54eae83177b8 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1477,6 +1477,8 @@ typedef struct EquivalenceMember JoinDomain *em_jdomain; /* join domain containing the source clause */ /* if em_is_child is true, this links to corresponding EM for top parent */ struct EquivalenceMember *em_parent pg_node_attr(read_write_ignore); + + double em_ndistinct; /* cached value of ndistinct: 0- default value or 'unknown'; -1 - not defined yet */ } EquivalenceMember; /* diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index b00219643b9a..e3be6c8e2781 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1722,3 +1722,54 @@ order by t1.four, t1.two limit 1; -> Seq Scan on tenk1 t2 (12 rows) +-- Check: +-- commuting of sides in an expression doesn't influence cost estimation +CREATE TABLE sort_ndist_t1 (x numeric, y numeric); +CREATE TABLE sort_ndist_t2 (x numeric, y numeric); +INSERT INTO sort_ndist_t1 (x,y) + SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs; +INSERT INTO sort_ndist_t2 (x,y) + SELECT gs, gs FROM generate_series(1,1E4) AS gs; +CREATE INDEX t1_idx ON sort_ndist_t1 (x); +CREATE INDEX t2_idx ON sort_ndist_t2 (x); +VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2; +SET enable_hashjoin = 'off'; +-- Having lots of duplicates after the join it is more effective to use plain +-- Sort instead of incremental sort: with small number of groups we do the same +-- stuff like Sort but with extra penalty. +EXPLAIN (COSTS OFF) +SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2 +WHERE t1.x=t2.x +ORDER BY t1.x,t1.y; + QUERY PLAN +-------------------------------------------------------------------- + Sort + Sort Key: t1.x, t1.y + -> Nested Loop + -> Seq Scan on sort_ndist_t1 t1 + -> Memoize + Cache Key: t1.x + Cache Mode: logical + -> Index Only Scan using t2_idx on sort_ndist_t2 t2 + Index Cond: (x = t1.x) +(9 rows) + +EXPLAIN (COSTS OFF) -- the plan must be the same as above +SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2 +WHERE t2.x=t1.x +ORDER BY t1.x,t1.y; + QUERY PLAN +-------------------------------------------------------------------- + Sort + Sort Key: t1.x, t1.y + -> Nested Loop + -> Seq Scan on sort_ndist_t1 t1 + -> Memoize + Cache Key: t1.x + Cache Mode: logical + -> Index Only Scan using t2_idx on sort_ndist_t2 t2 + Index Cond: (x = t1.x) +(9 rows) + +RESET enable_hashjoin; +DROP TABLE sort_ndist_t1, sort_ndist_t2; diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index f1f8fae56549..54f91bc5f990 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -298,3 +298,34 @@ explain (costs off) select * from (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two order by t1.four, t1.two limit 1; + +-- Check: +-- commuting of sides in an expression doesn't influence cost estimation +CREATE TABLE sort_ndist_t1 (x numeric, y numeric); +CREATE TABLE sort_ndist_t2 (x numeric, y numeric); + +INSERT INTO sort_ndist_t1 (x,y) + SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs; +INSERT INTO sort_ndist_t2 (x,y) + SELECT gs, gs FROM generate_series(1,1E4) AS gs; +CREATE INDEX t1_idx ON sort_ndist_t1 (x); +CREATE INDEX t2_idx ON sort_ndist_t2 (x); +VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2; + +SET enable_hashjoin = 'off'; + +-- Having lots of duplicates after the join it is more effective to use plain +-- Sort instead of incremental sort: with small number of groups we do the same +-- stuff like Sort but with extra penalty. +EXPLAIN (COSTS OFF) +SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2 +WHERE t1.x=t2.x +ORDER BY t1.x,t1.y; + +EXPLAIN (COSTS OFF) -- the plan must be the same as above +SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2 +WHERE t2.x=t1.x +ORDER BY t1.x,t1.y; + +RESET enable_hashjoin; +DROP TABLE sort_ndist_t1, sort_ndist_t2;