From 096a0636307b6b5e363d480c450e02ed5a6b7762 Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <lepihov@gmail.com>
Date: Tue, 29 Oct 2024 08:49:33 +0700
Subject: [PATCH] Improve group number estimation.

Estimating GROUP BY x optimisation employs a distinct statistic for column x.
But if we have an expression like 'x=y' somewhere down the query tree, the
number of different values can't be more than the smaller distinct value on
columns 'x' and 'y'. That means it is possible to correct the estimation with
knowledge provided by the equivalence class.

In this commit, the estimate_num_groups routine is changed to include PathKey
nodes in the presortedExprs list. With the PathKey node, we can pass through
its equivalence class members and correct the distinct estimation.

To avoid multiple calls on statistic tuples, the em_ndistinct cache field
is introduced.
---
 src/backend/optimizer/path/costsize.c         | 40 ++-------
 src/backend/optimizer/path/equivclass.c       |  1 +
 src/backend/utils/adt/selfuncs.c              | 87 ++++++++++++++++++-
 src/include/nodes/pathnodes.h                 |  2 +
 .../regress/expected/incremental_sort.out     | 51 +++++++++++
 src/test/regress/sql/incremental_sort.sql     | 31 +++++++
 6 files changed, 180 insertions(+), 32 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index f6f77b8fe193..bdd9534de97a 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2008,13 +2008,12 @@ cost_incremental_sort(Path *path,
 				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
-				input_groups;
+				input_groups,
+				estimated_groups;
 	Cost		group_startup_cost,
 				group_run_cost,
 				group_input_run_cost;
 	List	   *presortedExprs = NIL;
-	ListCell   *l;
-	bool		unknown_varno = false;
 
 	Assert(presorted_keys > 0 && presorted_keys < list_length(pathkeys));
 
@@ -2025,9 +2024,6 @@ cost_incremental_sort(Path *path,
 	if (input_tuples < 2.0)
 		input_tuples = 2.0;
 
-	/* Default estimate of number of groups, capped to one group per row. */
-	input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT);
-
 	/*
 	 * Extract presorted keys as list of expressions.
 	 *
@@ -2050,33 +2046,15 @@ cost_incremental_sort(Path *path,
 	 * anyway - from that standpoint the DEFAULT_NUM_DISTINCT is defensive
 	 * while maintaining lower startup cost.
 	 */
-	foreach(l, pathkeys)
-	{
-		PathKey    *key = (PathKey *) lfirst(l);
-		EquivalenceMember *member = (EquivalenceMember *)
-			linitial(key->pk_eclass->ec_members);
-
-		/*
-		 * Check if the expression contains Var with "varno 0" so that we
-		 * don't call estimate_num_groups in that case.
-		 */
-		if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr)))
-		{
-			unknown_varno = true;
-			break;
-		}
+	presortedExprs = list_copy_head(pathkeys, presorted_keys);
 
-		/* expression not containing any Vars with "varno 0" */
-		presortedExprs = lappend(presortedExprs, member->em_expr);
-
-		if (foreach_current_index(l) + 1 >= presorted_keys)
-			break;
-	}
-
-	/* Estimate the number of groups with equal presorted keys. */
-	if (!unknown_varno)
-		input_groups = estimate_num_groups(root, presortedExprs, input_tuples,
+	estimated_groups = estimate_num_groups(root, presortedExprs, input_tuples,
 										   NULL, NULL);
+	if (estimated_groups > 0.0)
+		input_groups = estimated_groups;
+	else
+		/* Default estimate of number of groups, capped to one group per row. */
+		input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT);
 
 	group_tuples = input_tuples / input_groups;
 	group_input_run_cost = input_run_cost / input_groups;
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 0f9ecf5ee8b2..71678fa87a3f 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -525,6 +525,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
 	em->em_datatype = datatype;
 	em->em_jdomain = jdomain;
 	em->em_parent = parent;
+	em->em_ndistinct = -1.0;
 
 	if (bms_is_empty(relids))
 	{
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 5b35debc8ffd..0ecadb24c3ee 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -214,6 +214,8 @@ static bool get_actual_variable_endpoint(Relation heapRel,
 										 MemoryContext outercontext,
 										 Datum *endpointDatum);
 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
+static EquivalenceMember *identify_sort_ecmember(PlannerInfo *root,
+												 EquivalenceClass *ec);
 
 
 /*
@@ -3468,12 +3470,34 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	i = 0;
 	foreach(l, groupExprs)
 	{
-		Node	   *groupexpr = (Node *) lfirst(l);
+		Node	   *node = (Node *) lfirst(l);
+		Node	   *groupexpr;
 		double		this_srf_multiplier;
 		VariableStatData vardata;
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* Find an expression beforehand */
+		if (IsA(node, PathKey))
+		{
+			PathKey			   *key = (PathKey *) node;
+			EquivalenceMember  *em = identify_sort_ecmember(root, key->pk_eclass);
+
+			/*
+			 * Check if the expression contains Var with "varno 0" so that we
+			 * don't call estimate_num_groups in that case.
+			 */
+			if (bms_is_member(0, pull_varnos(root, (Node *) em->em_expr)))
+			{
+				/* Return 'unknown' value */
+				return 0.0;
+			}
+
+			groupexpr = (Node *) em->em_expr;
+		}
+		else
+			groupexpr = node;
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -8404,3 +8428,64 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 
 	*indexPages = index->pages;
 }
+
+/*
+ * Find suitable member of the equivalence class.
+ * Passing through the list of EC members find the member with minimum of
+ * distinct values. Cache estimated number of distincts in the em_ndistinct
+ * field of each member.
+ */
+static EquivalenceMember *
+identify_sort_ecmember(PlannerInfo *root, EquivalenceClass *ec)
+{
+	EquivalenceMember  *candidate = linitial(ec->ec_members);
+
+	if (root == NULL)
+		/* Fast path */
+		return candidate;
+
+	foreach_node(EquivalenceMember, em, ec->ec_members)
+	{
+		VariableStatData	vardata;
+
+		if (em->em_ndistinct == 0.)
+			/* Nothing helpful */
+			continue;
+
+		if (em->em_is_child || em->em_is_const || bms_is_empty(em->em_relids) ||
+			bms_is_member(0, em->em_relids))
+		{
+			em->em_ndistinct = 0.;
+			continue;
+		}
+
+		if (em->em_ndistinct < 0.)
+		{
+			bool	isdefault = true;
+			double	ndist = 0.;
+
+			/* Let's check candidate's ndistinct value */
+			examine_variable(root, (Node *) em->em_expr, 0, &vardata);
+			if (HeapTupleIsValid(vardata.statsTuple))
+				ndist = get_variable_numdistinct(&vardata, &isdefault);
+			ReleaseVariableStats(vardata);
+
+			if (isdefault)
+			{
+				em->em_ndistinct = 0.;
+				continue;
+			}
+
+			em->em_ndistinct = ndist;
+		}
+
+		Assert(em->em_ndistinct > 0.);
+
+		if (candidate->em_ndistinct == 0. ||
+			em->em_ndistinct < candidate->em_ndistinct)
+			candidate = em;
+	}
+
+	Assert(candidate != NULL);
+	return candidate;
+}
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c24a1fc8514c..54eae83177b8 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1477,6 +1477,8 @@ typedef struct EquivalenceMember
 	JoinDomain *em_jdomain;		/* join domain containing the source clause */
 	/* if em_is_child is true, this links to corresponding EM for top parent */
 	struct EquivalenceMember *em_parent pg_node_attr(read_write_ignore);
+
+	double em_ndistinct; /* cached value of ndistinct: 0- default value or 'unknown'; -1 - not defined yet */
 } EquivalenceMember;
 
 /*
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index b00219643b9a..e3be6c8e2781 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1722,3 +1722,54 @@ order by t1.four, t1.two limit 1;
                ->  Seq Scan on tenk1 t2
 (12 rows)
 
+-- Check:
+-- commuting of sides in an expression doesn't influence cost estimation
+CREATE TABLE sort_ndist_t1 (x numeric, y numeric);
+CREATE TABLE sort_ndist_t2 (x numeric, y numeric);
+INSERT INTO sort_ndist_t1 (x,y)
+  SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs;
+INSERT INTO sort_ndist_t2 (x,y)
+  SELECT gs, gs FROM generate_series(1,1E4) AS gs;
+CREATE INDEX t1_idx ON sort_ndist_t1 (x);
+CREATE INDEX t2_idx ON sort_ndist_t2 (x);
+VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;
+SET enable_hashjoin = 'off';
+-- Having lots of duplicates after the join it is more effective to use plain
+-- Sort instead of incremental sort: with small number of groups we do the same
+-- stuff like Sort but with extra penalty.
+EXPLAIN (COSTS OFF)
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t1.x=t2.x
+ORDER BY t1.x,t1.y;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sort
+   Sort Key: t1.x, t1.y
+   ->  Nested Loop
+         ->  Seq Scan on sort_ndist_t1 t1
+         ->  Memoize
+               Cache Key: t1.x
+               Cache Mode: logical
+               ->  Index Only Scan using t2_idx on sort_ndist_t2 t2
+                     Index Cond: (x = t1.x)
+(9 rows)
+
+EXPLAIN (COSTS OFF) -- the plan must be the same as above
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t2.x=t1.x
+ORDER BY t1.x,t1.y;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sort
+   Sort Key: t1.x, t1.y
+   ->  Nested Loop
+         ->  Seq Scan on sort_ndist_t1 t1
+         ->  Memoize
+               Cache Key: t1.x
+               Cache Mode: logical
+               ->  Index Only Scan using t2_idx on sort_ndist_t2 t2
+                     Index Cond: (x = t1.x)
+(9 rows)
+
+RESET enable_hashjoin;
+DROP TABLE sort_ndist_t1, sort_ndist_t2;
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index f1f8fae56549..54f91bc5f990 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -298,3 +298,34 @@ explain (costs off)
 select * from
   (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
 order by t1.four, t1.two limit 1;
+
+-- Check:
+-- commuting of sides in an expression doesn't influence cost estimation
+CREATE TABLE sort_ndist_t1 (x numeric, y numeric);
+CREATE TABLE sort_ndist_t2 (x numeric, y numeric);
+
+INSERT INTO sort_ndist_t1 (x,y)
+  SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs;
+INSERT INTO sort_ndist_t2 (x,y)
+  SELECT gs, gs FROM generate_series(1,1E4) AS gs;
+CREATE INDEX t1_idx ON sort_ndist_t1 (x);
+CREATE INDEX t2_idx ON sort_ndist_t2 (x);
+VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;
+
+SET enable_hashjoin = 'off';
+
+-- Having lots of duplicates after the join it is more effective to use plain
+-- Sort instead of incremental sort: with small number of groups we do the same
+-- stuff like Sort but with extra penalty.
+EXPLAIN (COSTS OFF)
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t1.x=t2.x
+ORDER BY t1.x,t1.y;
+
+EXPLAIN (COSTS OFF) -- the plan must be the same as above
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t2.x=t1.x
+ORDER BY t1.x,t1.y;
+
+RESET enable_hashjoin;
+DROP TABLE sort_ndist_t1, sort_ndist_t2;