diff options
author | Robert Haas | 2018-03-22 16:49:48 +0000 |
---|---|---|
committer | Robert Haas | 2018-03-22 16:49:48 +0000 |
commit | e2f1eb0ee30d144628ab523432320f174a2c8966 (patch) | |
tree | c966ef6a6de9c5e3e4a8c6a0b310cb61306535f5 /src/test/regress/expected/partition_aggregate.out | |
parent | 2058d6a22b43a97d1069a51bd95ad56759b3c7bc (diff) |
Implement partition-wise grouping/aggregation.
If the partition keys of input relation are part of the GROUP BY
clause, all the rows belonging to a given group come from a single
partition. This allows aggregation/grouping over a partitioned
relation to be broken down * into aggregation/grouping on each
partition. This should be no worse, and often better, than the normal
approach.
If the GROUP BY clause does not contain all the partition keys, we can
still perform partial aggregation for each partition and then finalize
aggregation after appending the partial results. This is less certain
to be a win, but it's still useful.
Jeevan Chalke, Ashutosh Bapat, Robert Haas. The larger patch series
of which this patch is a part was also reviewed and tested by Antonin
Houska, Rajkumar Raghuwanshi, David Rowley, Dilip Kumar, Konstantin
Knizhnik, Pascal Legrand, and Rafia Sabih.
Discussion: http://postgr.es/m/CAM2+6=V64_xhstVHie0Rz=KPEQnLJMZt_e314P0jaT_oJ9MR8A@mail.gmail.com
Diffstat (limited to 'src/test/regress/expected/partition_aggregate.out')
-rw-r--r-- | src/test/regress/expected/partition_aggregate.out | 1393 |
1 files changed, 1393 insertions, 0 deletions
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out new file mode 100644 index 00000000000..bf8272e433c --- /dev/null +++ b/src/test/regress/expected/partition_aggregate.out @@ -0,0 +1,1393 @@ +-- +-- PARTITION_AGGREGATE +-- Test partitionwise aggregation on partitioned tables +-- +-- Enable partitionwise aggregate, which by default is disabled. +SET enable_partitionwise_aggregate TO true; +-- Enable partitionwise join, which by default is disabled. +SET enable_partitionwise_join TO true; +-- +-- Tests for list partitioned tables. +-- +CREATE TABLE pagg_tab (a int, b int, c text, d int) PARTITION BY LIST(c); +CREATE TABLE pagg_tab_p1 PARTITION OF pagg_tab FOR VALUES IN ('0000', '0001', '0002', '0003'); +CREATE TABLE pagg_tab_p2 PARTITION OF pagg_tab FOR VALUES IN ('0004', '0005', '0006', '0007'); +CREATE TABLE pagg_tab_p3 PARTITION OF pagg_tab FOR VALUES IN ('0008', '0009', '0010', '0011'); +INSERT INTO pagg_tab SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; +ANALYZE pagg_tab; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.a)), (avg(pagg_tab_p1.b)) + -> Append + -> HashAggregate + Group Key: pagg_tab_p1.c + Filter: (avg(pagg_tab_p1.d) < '15'::numeric) + -> Seq Scan on pagg_tab_p1 + -> HashAggregate + Group Key: pagg_tab_p2.c + Filter: (avg(pagg_tab_p2.d) < '15'::numeric) + -> Seq Scan on pagg_tab_p2 + -> HashAggregate + Group Key: pagg_tab_p3.c + Filter: (avg(pagg_tab_p3.d) < '15'::numeric) + -> Seq Scan on pagg_tab_p3 +(15 rows) + +SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; + c | sum | avg | count | min | max +------+------+---------------------+-------+-----+----- + 0000 | 2000 | 12.0000000000000000 | 250 | 0 | 24 + 0001 | 2250 | 13.0000000000000000 | 250 | 1 | 25 + 0002 | 2500 | 14.0000000000000000 | 250 | 2 | 26 + 0006 | 2500 | 12.0000000000000000 | 250 | 2 | 24 + 0007 | 2750 | 13.0000000000000000 | 250 | 3 | 25 + 0008 | 2000 | 14.0000000000000000 | 250 | 0 | 26 +(6 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.a, (sum(pagg_tab_p1.b)), (avg(pagg_tab_p1.b)) + -> Finalize HashAggregate + Group Key: pagg_tab_p1.a + Filter: (avg(pagg_tab_p1.d) < '15'::numeric) + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_p1.a + -> Seq Scan on pagg_tab_p1 + -> Partial HashAggregate + Group Key: pagg_tab_p2.a + -> Seq Scan on pagg_tab_p2 + -> Partial HashAggregate + Group Key: pagg_tab_p3.a + -> Seq Scan on pagg_tab_p3 +(15 rows) + +SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; + a | sum | avg | count | min | max +----+------+---------------------+-------+-----+----- + 0 | 1500 | 10.0000000000000000 | 150 | 0 | 20 + 1 | 1650 | 11.0000000000000000 | 150 | 1 | 21 + 2 | 1800 | 12.0000000000000000 | 150 | 2 | 22 + 3 | 1950 | 13.0000000000000000 | 150 | 3 | 23 + 4 | 2100 | 14.0000000000000000 | 150 | 4 | 24 + 10 | 1500 | 10.0000000000000000 | 150 | 10 | 20 + 11 | 1650 | 11.0000000000000000 | 150 | 11 | 21 + 12 | 1800 | 12.0000000000000000 | 150 | 12 | 22 + 13 | 1950 | 13.0000000000000000 | 150 | 13 | 23 + 14 | 2100 | 14.0000000000000000 | 150 | 14 | 24 +(10 rows) + +-- Check with multiple columns in GROUP BY +EXPLAIN (COSTS OFF) +SELECT a, c, count(*) FROM pagg_tab GROUP BY a, c; + QUERY PLAN +------------------------------------------------- + Append + -> HashAggregate + Group Key: pagg_tab_p1.a, pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> HashAggregate + Group Key: pagg_tab_p2.a, pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> HashAggregate + Group Key: pagg_tab_p3.a, pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(10 rows) + +-- Check with multiple columns in GROUP BY, order in GROUP BY is reversed +EXPLAIN (COSTS OFF) +SELECT a, c, count(*) FROM pagg_tab GROUP BY c, a; + QUERY PLAN +------------------------------------------------- + Append + -> HashAggregate + Group Key: pagg_tab_p1.c, pagg_tab_p1.a + -> Seq Scan on pagg_tab_p1 + -> HashAggregate + Group Key: pagg_tab_p2.c, pagg_tab_p2.a + -> Seq Scan on pagg_tab_p2 + -> HashAggregate + Group Key: pagg_tab_p3.c, pagg_tab_p3.a + -> Seq Scan on pagg_tab_p3 +(10 rows) + +-- Check with multiple columns in GROUP BY, order in target-list is reversed +EXPLAIN (COSTS OFF) +SELECT c, a, count(*) FROM pagg_tab GROUP BY a, c; + QUERY PLAN +------------------------------------------------- + Append + -> HashAggregate + Group Key: pagg_tab_p1.a, pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> HashAggregate + Group Key: pagg_tab_p2.a, pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> HashAggregate + Group Key: pagg_tab_p3.a, pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(10 rows) + +-- Test when input relation for grouping is dummy +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; + QUERY PLAN +-------------------------------- + HashAggregate + Group Key: pagg_tab.c + -> Result + One-Time Filter: false +(4 rows) + +SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; + c | sum +---+----- +(0 rows) + +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; + QUERY PLAN +-------------------------------- + GroupAggregate + Group Key: pagg_tab.c + -> Result + One-Time Filter: false +(4 rows) + +SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; + c | sum +---+----- +(0 rows) + +-- Test GroupAggregate paths by disabling hash aggregates. +SET enable_hashagg TO false; +-- When GROUP BY clause matches full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.a)), (avg(pagg_tab_p1.b)) + -> Append + -> GroupAggregate + Group Key: pagg_tab_p1.c + Filter: (avg(pagg_tab_p1.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> GroupAggregate + Group Key: pagg_tab_p2.c + Filter: (avg(pagg_tab_p2.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> GroupAggregate + Group Key: pagg_tab_p3.c + Filter: (avg(pagg_tab_p3.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(21 rows) + +SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + c | sum | avg | count +------+------+---------------------+------- + 0000 | 2000 | 12.0000000000000000 | 250 + 0001 | 2250 | 13.0000000000000000 | 250 + 0002 | 2500 | 14.0000000000000000 | 250 + 0006 | 2500 | 12.0000000000000000 | 250 + 0007 | 2750 | 13.0000000000000000 | 250 + 0008 | 2000 | 14.0000000000000000 | 250 +(6 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.a, (sum(pagg_tab_p1.b)), (avg(pagg_tab_p1.b)) + -> Finalize GroupAggregate + Group Key: pagg_tab_p1.a + Filter: (avg(pagg_tab_p1.d) < '15'::numeric) + -> Merge Append + Sort Key: pagg_tab_p1.a + -> Partial GroupAggregate + Group Key: pagg_tab_p1.a + -> Sort + Sort Key: pagg_tab_p1.a + -> Seq Scan on pagg_tab_p1 + -> Partial GroupAggregate + Group Key: pagg_tab_p2.a + -> Sort + Sort Key: pagg_tab_p2.a + -> Seq Scan on pagg_tab_p2 + -> Partial GroupAggregate + Group Key: pagg_tab_p3.a + -> Sort + Sort Key: pagg_tab_p3.a + -> Seq Scan on pagg_tab_p3 +(22 rows) + +SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + a | sum | avg | count +----+------+---------------------+------- + 0 | 1500 | 10.0000000000000000 | 150 + 1 | 1650 | 11.0000000000000000 | 150 + 2 | 1800 | 12.0000000000000000 | 150 + 3 | 1950 | 13.0000000000000000 | 150 + 4 | 2100 | 14.0000000000000000 | 150 + 10 | 1500 | 10.0000000000000000 | 150 + 11 | 1650 | 11.0000000000000000 | 150 + 12 | 1800 | 12.0000000000000000 | 150 + 13 | 1950 | 13.0000000000000000 | 150 + 14 | 2100 | 14.0000000000000000 | 150 +(10 rows) + +-- Test partitionwise grouping without any aggregates +EXPLAIN (COSTS OFF) +SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; + QUERY PLAN +------------------------------------------- + Merge Append + Sort Key: pagg_tab_p1.c + -> Group + Group Key: pagg_tab_p1.c + -> Sort + Sort Key: pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> Group + Group Key: pagg_tab_p2.c + -> Sort + Sort Key: pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> Group + Group Key: pagg_tab_p3.c + -> Sort + Sort Key: pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(17 rows) + +SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; + c +------ + 0000 + 0001 + 0002 + 0003 + 0004 + 0005 + 0006 + 0007 + 0008 + 0009 + 0010 + 0011 +(12 rows) + +EXPLAIN (COSTS OFF) +SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; + QUERY PLAN +------------------------------------------------- + Group + Group Key: pagg_tab_p1.a + -> Merge Append + Sort Key: pagg_tab_p1.a + -> Group + Group Key: pagg_tab_p1.a + -> Sort + Sort Key: pagg_tab_p1.a + -> Seq Scan on pagg_tab_p1 + Filter: (a < 3) + -> Group + Group Key: pagg_tab_p2.a + -> Sort + Sort Key: pagg_tab_p2.a + -> Seq Scan on pagg_tab_p2 + Filter: (a < 3) + -> Group + Group Key: pagg_tab_p3.a + -> Sort + Sort Key: pagg_tab_p3.a + -> Seq Scan on pagg_tab_p3 + Filter: (a < 3) +(22 rows) + +SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; + a +--- + 0 + 1 + 2 +(3 rows) + +RESET enable_hashagg; +-- ROLLUP, partitionwise aggregation does not apply +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab GROUP BY rollup(c) ORDER BY 1, 2; + QUERY PLAN +------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.a)) + -> MixedAggregate + Hash Key: pagg_tab_p1.c + Group Key: () + -> Append + -> Seq Scan on pagg_tab_p1 + -> Seq Scan on pagg_tab_p2 + -> Seq Scan on pagg_tab_p3 +(9 rows) + +-- ORDERED SET within the aggregate. +-- Full aggregation; since all the rows that belong to the same group come +-- from the same partition, having an ORDER BY within the aggregate doesn't +-- make any difference. +EXPLAIN (COSTS OFF) +SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2; + QUERY PLAN +------------------------------------------------------------------------ + Sort + Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.b ORDER BY pagg_tab_p1.a)) + -> Append + -> GroupAggregate + Group Key: pagg_tab_p1.c + -> Sort + Sort Key: pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> GroupAggregate + Group Key: pagg_tab_p2.c + -> Sort + Sort Key: pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> GroupAggregate + Group Key: pagg_tab_p3.c + -> Sort + Sort Key: pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(18 rows) + +-- Since GROUP BY clause does not match with PARTITION KEY; we need to do +-- partial aggregation. However, ORDERED SET are not partial safe and thus +-- partitionwise aggregation plan is not generated. +EXPLAIN (COSTS OFF) +SELECT a, sum(b order by a) FROM pagg_tab GROUP BY a ORDER BY 1, 2; + QUERY PLAN +------------------------------------------------------------------------ + Sort + Sort Key: pagg_tab_p1.a, (sum(pagg_tab_p1.b ORDER BY pagg_tab_p1.a)) + -> GroupAggregate + Group Key: pagg_tab_p1.a + -> Sort + Sort Key: pagg_tab_p1.a + -> Append + -> Seq Scan on pagg_tab_p1 + -> Seq Scan on pagg_tab_p2 + -> Seq Scan on pagg_tab_p3 +(10 rows) + +-- JOIN query +CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); +CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); +INSERT INTO pagg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; +INSERT INTO pagg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; +ANALYZE pagg_tab1; +ANALYZE pagg_tab2; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t1.x, (sum(t1.y)), (count(*)) + -> Append + -> HashAggregate + Group Key: t1.x + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> HashAggregate + Group Key: t1_1.x + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> HashAggregate + Group Key: t1_2.x + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(24 rows) + +SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + x | sum | count +----+------+------- + 0 | 500 | 100 + 6 | 1100 | 100 + 12 | 700 | 100 + 18 | 1300 | 100 + 24 | 900 | 100 +(5 rows) + +-- GROUP BY having other matching key +EXPLAIN (COSTS OFF) +SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t2.y, (sum(t1.y)), (count(*)) + -> Append + -> HashAggregate + Group Key: t2.y + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> HashAggregate + Group Key: t2_1.y + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> HashAggregate + Group Key: t2_2.y + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(24 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +-- Also test GroupAggregate paths by disabling hash aggregates. +SET enable_hashagg TO false; +EXPLAIN (COSTS OFF) +SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------- + Sort + Sort Key: t1.y, (sum(t1.x)), (count(*)) + -> Finalize GroupAggregate + Group Key: t1.y + Filter: (avg(t1.x) > '10'::numeric) + -> Merge Append + Sort Key: t1.y + -> Partial GroupAggregate + Group Key: t1.y + -> Sort + Sort Key: t1.y + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> Partial GroupAggregate + Group Key: t1_1.y + -> Sort + Sort Key: t1_1.y + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> Partial GroupAggregate + Group Key: t1_2.y + -> Sort + Sort Key: t1_2.y + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(34 rows) + +SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; + y | sum | count +----+------+------- + 2 | 600 | 50 + 4 | 1200 | 50 + 8 | 900 | 50 + 12 | 600 | 50 + 14 | 1200 | 50 + 18 | 900 | 50 +(6 rows) + +RESET enable_hashagg; +-- Check with LEFT/RIGHT/FULL OUTER JOINs which produces NULL values for +-- aggregation +-- LEFT JOIN, should produce partial partitionwise aggregation plan as +-- GROUP BY is on nullable column +EXPLAIN (COSTS OFF) +SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: b.y + -> Sort + Sort Key: b.y + -> Append + -> Partial HashAggregate + Group Key: b.y + -> Hash Left Join + Hash Cond: (a.x = b.y) + -> Seq Scan on pagg_tab1_p1 a + -> Hash + -> Seq Scan on pagg_tab2_p1 b + -> Partial HashAggregate + Group Key: b_1.y + -> Hash Left Join + Hash Cond: (a_1.x = b_1.y) + -> Seq Scan on pagg_tab1_p2 a_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 b_1 + -> Partial HashAggregate + Group Key: b_2.y + -> Hash Right Join + Hash Cond: (b_2.y = a_2.x) + -> Seq Scan on pagg_tab2_p3 b_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 a_2 +(26 rows) + +SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + y | sum +----+------ + 0 | 500 + 6 | 1100 + 12 | 700 + 18 | 1300 + 24 | 900 + | 900 +(6 rows) + +-- RIGHT JOIN, should produce full partitionwise aggregation plan as +-- GROUP BY is on non-nullable column +EXPLAIN (COSTS OFF) +SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + QUERY PLAN +------------------------------------------------------------ + Sort + Sort Key: b.y + -> Append + -> HashAggregate + Group Key: b.y + -> Hash Right Join + Hash Cond: (a.x = b.y) + -> Seq Scan on pagg_tab1_p1 a + -> Hash + -> Seq Scan on pagg_tab2_p1 b + -> HashAggregate + Group Key: b_1.y + -> Hash Right Join + Hash Cond: (a_1.x = b_1.y) + -> Seq Scan on pagg_tab1_p2 a_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 b_1 + -> HashAggregate + Group Key: b_2.y + -> Hash Left Join + Hash Cond: (b_2.y = a_2.x) + -> Seq Scan on pagg_tab2_p3 b_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 a_2 +(24 rows) + +SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + y | sum +----+------ + 0 | 500 + 3 | + 6 | 1100 + 9 | + 12 | 700 + 15 | + 18 | 1300 + 21 | + 24 | 900 + 27 | +(10 rows) + +-- FULL JOIN, should produce partial partitionwise aggregation plan as +-- GROUP BY is on nullable column +EXPLAIN (COSTS OFF) +SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: a.x + -> Sort + Sort Key: a.x + -> Append + -> Partial HashAggregate + Group Key: a.x + -> Hash Full Join + Hash Cond: (a.x = b.y) + -> Seq Scan on pagg_tab1_p1 a + -> Hash + -> Seq Scan on pagg_tab2_p1 b + -> Partial HashAggregate + Group Key: a_1.x + -> Hash Full Join + Hash Cond: (a_1.x = b_1.y) + -> Seq Scan on pagg_tab1_p2 a_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 b_1 + -> Partial HashAggregate + Group Key: a_2.x + -> Hash Full Join + Hash Cond: (b_2.y = a_2.x) + -> Seq Scan on pagg_tab2_p3 b_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 a_2 +(26 rows) + +SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; + x | sum +----+------ + 0 | 500 + 2 | + 4 | + 6 | 1100 + 8 | + 10 | + 12 | 700 + 14 | + 16 | + 18 | 1300 + 20 | + 22 | + 24 | 900 + 26 | + 28 | + | 500 +(16 rows) + +-- LEFT JOIN, with dummy relation on right side, +-- should produce full partitionwise aggregation plan as GROUP BY is on +-- non-nullable columns +EXPLAIN (COSTS OFF) +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + QUERY PLAN +----------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab1_p1.x, y + -> Append + -> HashAggregate + Group Key: pagg_tab1_p1.x, y + -> Hash Left Join + Hash Cond: (pagg_tab1_p1.x = y) + Filter: ((pagg_tab1_p1.x > 5) OR (y < 20)) + -> Seq Scan on pagg_tab1_p1 + Filter: (x < 20) + -> Hash + -> Result + One-Time Filter: false + -> HashAggregate + Group Key: pagg_tab1_p2.x, pagg_tab2_p2.y + -> Hash Left Join + Hash Cond: (pagg_tab1_p2.x = pagg_tab2_p2.y) + Filter: ((pagg_tab1_p2.x > 5) OR (pagg_tab2_p2.y < 20)) + -> Seq Scan on pagg_tab1_p2 + Filter: (x < 20) + -> Hash + -> Seq Scan on pagg_tab2_p2 + Filter: (y > 10) +(23 rows) + +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + x | y | count +----+----+------- + 6 | | 10 + 8 | | 10 + 10 | | 10 + 12 | 12 | 100 + 14 | | 10 + 16 | | 10 + 18 | 18 | 100 +(7 rows) + +-- FULL JOIN, with dummy relations on both sides, +-- should produce partial partitionwise aggregation plan as GROUP BY is on +-- nullable columns +EXPLAIN (COSTS OFF) +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + QUERY PLAN +----------------------------------------------------------------------------------- + Finalize GroupAggregate + Group Key: pagg_tab1_p1.x, y + -> Sort + Sort Key: pagg_tab1_p1.x, y + -> Append + -> Partial HashAggregate + Group Key: pagg_tab1_p1.x, y + -> Hash Full Join + Hash Cond: (pagg_tab1_p1.x = y) + Filter: ((pagg_tab1_p1.x > 5) OR (y < 20)) + -> Seq Scan on pagg_tab1_p1 + Filter: (x < 20) + -> Hash + -> Result + One-Time Filter: false + -> Partial HashAggregate + Group Key: pagg_tab1_p2.x, pagg_tab2_p2.y + -> Hash Full Join + Hash Cond: (pagg_tab1_p2.x = pagg_tab2_p2.y) + Filter: ((pagg_tab1_p2.x > 5) OR (pagg_tab2_p2.y < 20)) + -> Seq Scan on pagg_tab1_p2 + Filter: (x < 20) + -> Hash + -> Seq Scan on pagg_tab2_p2 + Filter: (y > 10) + -> Partial HashAggregate + Group Key: x, pagg_tab2_p3.y + -> Hash Full Join + Hash Cond: (pagg_tab2_p3.y = x) + Filter: ((x > 5) OR (pagg_tab2_p3.y < 20)) + -> Seq Scan on pagg_tab2_p3 + Filter: (y > 10) + -> Hash + -> Result + One-Time Filter: false +(35 rows) + +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + x | y | count +----+----+------- + 6 | | 10 + 8 | | 10 + 10 | | 10 + 12 | 12 | 100 + 14 | | 10 + 16 | | 10 + 18 | 18 | 100 + | 15 | 10 +(8 rows) + +-- Empty join relation because of empty outer side, no partitionwise agg plan +EXPLAIN (COSTS OFF) +SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; + QUERY PLAN +--------------------------------------- + GroupAggregate + Group Key: pagg_tab1.x, pagg_tab1.y + -> Sort + Sort Key: pagg_tab1.y + -> Result + One-Time Filter: false +(6 rows) + +SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; + x | y | count +---+---+------- +(0 rows) + +-- Partition by multiple columns +CREATE TABLE pagg_tab_m (a int, b int, c int) PARTITION BY RANGE(a, ((a+b)/2)); +CREATE TABLE pagg_tab_m_p1 PARTITION OF pagg_tab_m FOR VALUES FROM (0, 0) TO (10, 10); +CREATE TABLE pagg_tab_m_p2 PARTITION OF pagg_tab_m FOR VALUES FROM (10, 10) TO (20, 20); +CREATE TABLE pagg_tab_m_p3 PARTITION OF pagg_tab_m FOR VALUES FROM (20, 20) TO (30, 30); +INSERT INTO pagg_tab_m SELECT i % 30, i % 40, i % 50 FROM generate_series(0, 2999) i; +ANALYZE pagg_tab_m; +-- Partial aggregation as GROUP BY clause does not match with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_m_p1.a, (sum(pagg_tab_m_p1.b)), (avg(pagg_tab_m_p1.c)) + -> Finalize HashAggregate + Group Key: pagg_tab_m_p1.a + Filter: (avg(pagg_tab_m_p1.c) < '22'::numeric) + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_m_p1.a + -> Seq Scan on pagg_tab_m_p1 + -> Partial HashAggregate + Group Key: pagg_tab_m_p2.a + -> Seq Scan on pagg_tab_m_p2 + -> Partial HashAggregate + Group Key: pagg_tab_m_p3.a + -> Seq Scan on pagg_tab_m_p3 +(15 rows) + +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; + a | sum | avg | count +----+------+---------------------+------- + 0 | 1500 | 20.0000000000000000 | 100 + 1 | 1600 | 21.0000000000000000 | 100 + 10 | 1500 | 20.0000000000000000 | 100 + 11 | 1600 | 21.0000000000000000 | 100 + 20 | 1500 | 20.0000000000000000 | 100 + 21 | 1600 | 21.0000000000000000 | 100 +(6 rows) + +-- Full aggregation as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_m_p1.a, (sum(pagg_tab_m_p1.b)), (avg(pagg_tab_m_p1.c)) + -> Append + -> HashAggregate + Group Key: pagg_tab_m_p1.a, ((pagg_tab_m_p1.a + pagg_tab_m_p1.b) / 2) + Filter: (sum(pagg_tab_m_p1.b) < 50) + -> Seq Scan on pagg_tab_m_p1 + -> HashAggregate + Group Key: pagg_tab_m_p2.a, ((pagg_tab_m_p2.a + pagg_tab_m_p2.b) / 2) + Filter: (sum(pagg_tab_m_p2.b) < 50) + -> Seq Scan on pagg_tab_m_p2 + -> HashAggregate + Group Key: pagg_tab_m_p3.a, ((pagg_tab_m_p3.a + pagg_tab_m_p3.b) / 2) + Filter: (sum(pagg_tab_m_p3.b) < 50) + -> Seq Scan on pagg_tab_m_p3 +(15 rows) + +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; + a | sum | avg | count +----+-----+---------------------+------- + 0 | 0 | 20.0000000000000000 | 25 + 1 | 25 | 21.0000000000000000 | 25 + 10 | 0 | 20.0000000000000000 | 25 + 11 | 25 | 21.0000000000000000 | 25 + 20 | 0 | 20.0000000000000000 | 25 + 21 | 25 | 21.0000000000000000 | 25 +(6 rows) + +-- Full aggregation as PARTITION KEY is part of GROUP BY clause +EXPLAIN (COSTS OFF) +SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Sort + Sort Key: pagg_tab_m_p1.a, pagg_tab_m_p1.c, (sum(pagg_tab_m_p1.b)) + -> Append + -> HashAggregate + Group Key: ((pagg_tab_m_p1.a + pagg_tab_m_p1.b) / 2), pagg_tab_m_p1.c, pagg_tab_m_p1.a + Filter: ((sum(pagg_tab_m_p1.b) = 50) AND (avg(pagg_tab_m_p1.c) > '25'::numeric)) + -> Seq Scan on pagg_tab_m_p1 + -> HashAggregate + Group Key: ((pagg_tab_m_p2.a + pagg_tab_m_p2.b) / 2), pagg_tab_m_p2.c, pagg_tab_m_p2.a + Filter: ((sum(pagg_tab_m_p2.b) = 50) AND (avg(pagg_tab_m_p2.c) > '25'::numeric)) + -> Seq Scan on pagg_tab_m_p2 + -> HashAggregate + Group Key: ((pagg_tab_m_p3.a + pagg_tab_m_p3.b) / 2), pagg_tab_m_p3.c, pagg_tab_m_p3.a + Filter: ((sum(pagg_tab_m_p3.b) = 50) AND (avg(pagg_tab_m_p3.c) > '25'::numeric)) + -> Seq Scan on pagg_tab_m_p3 +(15 rows) + +SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; + a | c | sum | avg | count +----+----+-----+---------------------+------- + 0 | 30 | 50 | 30.0000000000000000 | 5 + 0 | 40 | 50 | 40.0000000000000000 | 5 + 10 | 30 | 50 | 30.0000000000000000 | 5 + 10 | 40 | 50 | 40.0000000000000000 | 5 + 20 | 30 | 50 | 30.0000000000000000 | 5 + 20 | 40 | 50 | 40.0000000000000000 | 5 +(6 rows) + +-- Test with multi-level partitioning scheme +CREATE TABLE pagg_tab_ml (a int, b int, c text) PARTITION BY RANGE(a); +CREATE TABLE pagg_tab_ml_p1 PARTITION OF pagg_tab_ml FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab_ml_p2 PARTITION OF pagg_tab_ml FOR VALUES FROM (10) TO (20) PARTITION BY LIST (c); +CREATE TABLE pagg_tab_ml_p2_s1 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0000', '0001'); +CREATE TABLE pagg_tab_ml_p2_s2 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0002', '0003'); +-- This level of partitioning has different column positions than the parent +CREATE TABLE pagg_tab_ml_p3(b int, c text, a int) PARTITION BY RANGE (b); +CREATE TABLE pagg_tab_ml_p3_s1(c text, a int, b int); +CREATE TABLE pagg_tab_ml_p3_s2 PARTITION OF pagg_tab_ml_p3 FOR VALUES FROM (5) TO (10); +ALTER TABLE pagg_tab_ml_p3 ATTACH PARTITION pagg_tab_ml_p3_s1 FOR VALUES FROM (0) TO (5); +ALTER TABLE pagg_tab_ml ATTACH PARTITION pagg_tab_ml_p3 FOR VALUES FROM (20) TO (30); +INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM generate_series(0, 29999) i; +ANALYZE pagg_tab_ml; +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, but still we do not see a partial aggregation as array_agg() +-- is not partial agg safe. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p2_s1.a, (sum(pagg_tab_ml_p2_s1.b)), (array_agg(DISTINCT pagg_tab_ml_p2_s1.c)) + -> Gather + Workers Planned: 2 + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml_p2_s1.a + Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p2_s1.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 + -> Seq Scan on pagg_tab_ml_p2_s2 + -> GroupAggregate + Group Key: pagg_tab_ml_p3_s1.a + Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p3_s1.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 + -> Seq Scan on pagg_tab_ml_p3_s2 + -> GroupAggregate + Group Key: pagg_tab_ml_p1.a + Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p1.a + -> Seq Scan on pagg_tab_ml_p1 +(27 rows) + +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + a | sum | array_agg | count +----+------+-------------+------- + 0 | 0 | {0000,0002} | 1000 + 1 | 1000 | {0001,0003} | 1000 + 2 | 2000 | {0000,0002} | 1000 + 10 | 0 | {0000,0002} | 1000 + 11 | 1000 | {0001,0003} | 1000 + 12 | 2000 | {0000,0002} | 1000 + 20 | 0 | {0000,0002} | 1000 + 21 | 1000 | {0001,0003} | 1000 + 22 | 2000 | {0000,0002} | 1000 +(9 rows) + +-- Without ORDER BY clause, to test Gather at top-most path +EXPLAIN (COSTS OFF) +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; + QUERY PLAN +----------------------------------------------------------------- + Gather + Workers Planned: 2 + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml_p2_s1.a + Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p2_s1.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 + -> Seq Scan on pagg_tab_ml_p2_s2 + -> GroupAggregate + Group Key: pagg_tab_ml_p3_s1.a + Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p3_s1.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 + -> Seq Scan on pagg_tab_ml_p3_s2 + -> GroupAggregate + Group Key: pagg_tab_ml_p1.a + Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p1.a + -> Seq Scan on pagg_tab_ml_p1 +(25 rows) + +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, thus we will have a partial aggregation for them. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) + -> Append + -> HashAggregate + Group Key: pagg_tab_ml_p1.a + Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) + -> Seq Scan on pagg_tab_ml_p1 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p2_s1.a + Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p2_s1.a + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s1.a + -> Seq Scan on pagg_tab_ml_p2_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s2.a + -> Seq Scan on pagg_tab_ml_p2_s2 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p3_s1.a + Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p3_s1.a + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s1.a + -> Seq Scan on pagg_tab_ml_p3_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s2.a + -> Seq Scan on pagg_tab_ml_p3_s2 +(31 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 0 | 0 | 1000 + 1 | 1000 | 1000 + 2 | 2000 | 1000 + 10 | 0 | 1000 + 11 | 1000 | 1000 + 12 | 2000 | 1000 + 20 | 0 | 1000 + 21 | 1000 | 1000 + 22 | 2000 | 1000 +(9 rows) + +-- Partial aggregation at all levels as GROUP BY clause does not match with +-- PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.b, (sum(pagg_tab_ml_p1.a)), (count(*)) + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p1.b + -> Sort + Sort Key: pagg_tab_ml_p1.b + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p1.b + -> Seq Scan on pagg_tab_ml_p1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s1.b + -> Seq Scan on pagg_tab_ml_p2_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s2.b + -> Seq Scan on pagg_tab_ml_p2_s2 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s1.b + -> Seq Scan on pagg_tab_ml_p3_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s2.b + -> Seq Scan on pagg_tab_ml_p3_s2 +(22 rows) + +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; + b | sum | count +---+-------+------- + 0 | 30000 | 3000 + 1 | 33000 | 3000 + 2 | 36000 | 3000 + 3 | 39000 | 3000 + 4 | 42000 | 3000 +(5 rows) + +-- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) + -> Append + -> HashAggregate + Group Key: pagg_tab_ml_p1.a, pagg_tab_ml_p1.b, pagg_tab_ml_p1.c + Filter: (avg(pagg_tab_ml_p1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p1 + -> HashAggregate + Group Key: pagg_tab_ml_p2_s1.a, pagg_tab_ml_p2_s1.b, pagg_tab_ml_p2_s1.c + Filter: (avg(pagg_tab_ml_p2_s1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s1 + -> HashAggregate + Group Key: pagg_tab_ml_p2_s2.a, pagg_tab_ml_p2_s2.b, pagg_tab_ml_p2_s2.c + Filter: (avg(pagg_tab_ml_p2_s2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s2 + -> HashAggregate + Group Key: pagg_tab_ml_p3_s1.a, pagg_tab_ml_p3_s1.b, pagg_tab_ml_p3_s1.c + Filter: (avg(pagg_tab_ml_p3_s1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s1 + -> HashAggregate + Group Key: pagg_tab_ml_p3_s2.a, pagg_tab_ml_p3_s2.b, pagg_tab_ml_p3_s2.c + Filter: (avg(pagg_tab_ml_p3_s2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s2 +(23 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 8 | 4000 | 500 + 8 | 4000 | 500 + 9 | 4500 | 500 + 9 | 4500 | 500 + 18 | 4000 | 500 + 18 | 4000 | 500 + 19 | 4500 | 500 + 19 | 4500 | 500 + 28 | 4000 | 500 + 28 | 4000 | 500 + 29 | 4500 | 500 + 29 | 4500 | 500 +(12 rows) + +-- Parallelism within partitionwise aggregates +SET max_parallel_workers_per_gather TO 2; +SET min_parallel_table_scan_size TO '8kB'; +SET parallel_setup_cost TO 0; +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, thus we will have a partial aggregation for them. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) + -> Append + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p1.a + Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p1.a + -> Partial HashAggregate + Group Key: pagg_tab_ml_p1.a + -> Parallel Seq Scan on pagg_tab_ml_p1 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p2_s1.a + Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p2_s1.a + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s1.a + -> Parallel Seq Scan on pagg_tab_ml_p2_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s2.a + -> Parallel Seq Scan on pagg_tab_ml_p2_s2 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p3_s1.a + Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p3_s1.a + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s1.a + -> Parallel Seq Scan on pagg_tab_ml_p3_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s2.a + -> Parallel Seq Scan on pagg_tab_ml_p3_s2 +(41 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 0 | 0 | 1000 + 1 | 1000 | 1000 + 2 | 2000 | 1000 + 10 | 0 | 1000 + 11 | 1000 | 1000 + 12 | 2000 | 1000 + 20 | 0 | 1000 + 21 | 1000 | 1000 + 22 | 2000 | 1000 +(9 rows) + +-- Partial aggregation at all levels as GROUP BY clause does not match with +-- PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.b, (sum(pagg_tab_ml_p1.a)), (count(*)) + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p1.b + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p1.b + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p1.b + -> Parallel Seq Scan on pagg_tab_ml_p1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s1.b + -> Parallel Seq Scan on pagg_tab_ml_p2_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s2.b + -> Parallel Seq Scan on pagg_tab_ml_p2_s2 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s1.b + -> Parallel Seq Scan on pagg_tab_ml_p3_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s2.b + -> Parallel Seq Scan on pagg_tab_ml_p3_s2 +(24 rows) + +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; + b | sum | count +---+-------+------- + 0 | 30000 | 3000 + 1 | 33000 | 3000 + 2 | 36000 | 3000 + 3 | 39000 | 3000 + 4 | 42000 | 3000 +(5 rows) + +-- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------------------- + Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) + -> Parallel Append + -> HashAggregate + Group Key: pagg_tab_ml_p1.a, pagg_tab_ml_p1.b, pagg_tab_ml_p1.c + Filter: (avg(pagg_tab_ml_p1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p1 + -> HashAggregate + Group Key: pagg_tab_ml_p2_s1.a, pagg_tab_ml_p2_s1.b, pagg_tab_ml_p2_s1.c + Filter: (avg(pagg_tab_ml_p2_s1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s1 + -> HashAggregate + Group Key: pagg_tab_ml_p2_s2.a, pagg_tab_ml_p2_s2.b, pagg_tab_ml_p2_s2.c + Filter: (avg(pagg_tab_ml_p2_s2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s2 + -> HashAggregate + Group Key: pagg_tab_ml_p3_s1.a, pagg_tab_ml_p3_s1.b, pagg_tab_ml_p3_s1.c + Filter: (avg(pagg_tab_ml_p3_s1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s1 + -> HashAggregate + Group Key: pagg_tab_ml_p3_s2.a, pagg_tab_ml_p3_s2.b, pagg_tab_ml_p3_s2.c + Filter: (avg(pagg_tab_ml_p3_s2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s2 +(25 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 8 | 4000 | 500 + 8 | 4000 | 500 + 9 | 4500 | 500 + 9 | 4500 | 500 + 18 | 4000 | 500 + 18 | 4000 | 500 + 19 | 4500 | 500 + 19 | 4500 | 500 + 28 | 4000 | 500 + 28 | 4000 | 500 + 29 | 4500 | 500 + 29 | 4500 | 500 +(12 rows) + +-- Parallelism within partitionwise aggregates (single level) +-- Add few parallel setup cost, so that we will see a plan which gathers +-- partially created paths even for full aggregation and sticks a single Gather +-- followed by finalization step. +-- Without this, the cost of doing partial aggregation + Gather + finalization +-- for each partition and then Append over it turns out to be same and this +-- wins as we add it first. This parallel_setup_cost plays a vital role in +-- costing such plans. +SET parallel_setup_cost TO 10; +CREATE TABLE pagg_tab_para(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab_para_p1 PARTITION OF pagg_tab_para FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab_para_p2 PARTITION OF pagg_tab_para FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab_para_p3 PARTITION OF pagg_tab_para FOR VALUES FROM (20) TO (30); +INSERT INTO pagg_tab_para SELECT i % 30, i % 20 FROM generate_series(0, 29999) i; +ANALYZE pagg_tab_para; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para_p1.x + Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para_p1.x + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para_p1.x + -> Parallel Seq Scan on pagg_tab_para_p1 + -> Partial HashAggregate + Group Key: pagg_tab_para_p2.x + -> Parallel Seq Scan on pagg_tab_para_p2 + -> Partial HashAggregate + Group Key: pagg_tab_para_p3.x + -> Parallel Seq Scan on pagg_tab_para_p3 +(19 rows) + +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + x | sum | avg | count +----+------+--------------------+------- + 0 | 5000 | 5.0000000000000000 | 1000 + 1 | 6000 | 6.0000000000000000 | 1000 + 10 | 5000 | 5.0000000000000000 | 1000 + 11 | 6000 | 6.0000000000000000 | 1000 + 20 | 5000 | 5.0000000000000000 | 1000 + 21 | 6000 | 6.0000000000000000 | 1000 +(6 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para_p1.y, (sum(pagg_tab_para_p1.x)), (avg(pagg_tab_para_p1.x)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para_p1.y + Filter: (avg(pagg_tab_para_p1.x) < '12'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para_p1.y + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para_p1.y + -> Parallel Seq Scan on pagg_tab_para_p1 + -> Partial HashAggregate + Group Key: pagg_tab_para_p2.y + -> Parallel Seq Scan on pagg_tab_para_p2 + -> Partial HashAggregate + Group Key: pagg_tab_para_p3.y + -> Parallel Seq Scan on pagg_tab_para_p3 +(19 rows) + +SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; + y | sum | avg | count +----+-------+---------------------+------- + 0 | 15000 | 10.0000000000000000 | 1500 + 1 | 16500 | 11.0000000000000000 | 1500 + 10 | 15000 | 10.0000000000000000 | 1500 + 11 | 16500 | 11.0000000000000000 | 1500 +(4 rows) + |