Remove redundant grouping and DISTINCT columns.

Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/[email protected]
author: Tom Lane 2023-01-18 17:37:57 +0000
committer: Tom Lane 2023-01-18 17:37:57 +0000
commit: 8d83a5d0a2673174dc478e707de1f502935391a5 (patch)
tree: abfdce0820345a412b8a046ed8832b915094f031 /src/test
parent: d540a02a724b9643205abce8c5644a0f0908f6e3 (diff)
5 files changed, 11 insertions, 13 deletions
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index ae3b9053318..75c98f8d66b 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1289,7 +1289,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
                       QUERY PLAN                      
 ------------------------------------------------------
  HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.y
+   Group Key: t1.a, t1.b
    ->  Hash Join
          Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
          ->  Seq Scan on t2
@@ -1304,7 +1304,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
                       QUERY PLAN                      
 ------------------------------------------------------
  HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.z
+   Group Key: t1.a, t1.b, t2.z
    ->  Hash Join
          Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
          ->  Seq Scan on t2
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index fcad5c4093b..292196b1699 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -2135,7 +2135,6 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
         QUERY PLAN         
 ---------------------------
  GroupAggregate
-   Group Key: $2
    InitPlan 1 (returns $1)
      ->  Result
    InitPlan 3 (returns $2)
@@ -2143,7 +2142,7 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
    ->  Result
    SubPlan 2
      ->  Result
-(9 rows)
+(8 rows)
 
 select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
  grouping 
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index a82b8fb8fb7..1b900fddf8e 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -164,10 +164,9 @@ SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c;
            QUERY PLAN           
 --------------------------------
  GroupAggregate
-   Group Key: c
    ->  Result
          One-Time Filter: false
-(4 rows)
+(3 rows)
 
 SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c;
  c | sum 
@@ -805,10 +804,10 @@ SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOI
 -- Empty join relation because of empty outer side, no partitionwise agg plan
 EXPLAIN (COSTS OFF)
 SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2;
-              QUERY PLAN               
----------------------------------------
+              QUERY PLAN              
+--------------------------------------
  GroupAggregate
-   Group Key: pagg_tab1.x, pagg_tab1.y
+   Group Key: pagg_tab1.y
    ->  Sort
          Sort Key: pagg_tab1.y
          ->  Result
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index c649c4aeaae..c0ff13fb828 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1340,7 +1340,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1484,7 +1484,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1576,7 +1576,7 @@ SELECT avg(t1.a), avg(t2.b), t1.c, t2.c FROM plt1 t1 RIGHT JOIN plt2 t2 ON t1.c
  Sort
    Sort Key: t1.c
    ->  HashAggregate
-         Group Key: t1.c, t2.c
+         Group Key: t1.c
          ->  Append
                ->  Hash Join
                      Hash Cond: (t2_1.c = t1_1.c)
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 1fc07f220fb..9d44ea8056d 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -136,7 +136,7 @@ SELECT count(*) FROM
    Output: count(*)
    ->  HashAggregate
          Output: tenk1.two, tenk1.four, tenk1.two
-         Group Key: tenk1.two, tenk1.four, tenk1.two
+         Group Key: tenk1.two, tenk1.four
          ->  Seq Scan on public.tenk1
                Output: tenk1.two, tenk1.four, tenk1.two
 (7 rows)
author	Tom Lane	2023-01-18 17:37:57 +0000
committer	Tom Lane	2023-01-18 17:37:57 +0000
commit	8d83a5d0a2673174dc478e707de1f502935391a5 (patch)
tree	abfdce0820345a412b8a046ed8832b915094f031 /src/test
parent	d540a02a724b9643205abce8c5644a0f0908f6e3 (diff)