summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlexander Korotkov2024-02-15 10:05:52 +0000
committerAlexander Korotkov2024-02-15 10:06:12 +0000
commit9f133763961e280d8ba692bcad0b061b861e9138 (patch)
treeaf76ca54ac251b87aefae347d586e6b300df3c8e /src
parent995d400ceca3e552f84fe19f150fb03327bdc0c2 (diff)
Pull up ANY-SUBLINK with the necessary lateral support.
For ANY-SUBLINK, we adopted a two-stage pull-up approach to handle different types of scenarios. In the first stage, the sublink is pulled up as a subquery. Because of this, when writing this code, we did not have the ability to perform lateral joins, and therefore, we were unable to pull up Var with varlevelsup=1. Now that we have the ability to use lateral joins, we can eliminate this limitation. Author: Andy Fan <[email protected]> Author: Tom Lane <[email protected]> Reviewed-by: Tom Lane <[email protected]> Reviewed-by: Richard Guo <[email protected]> Reviewed-by: Alena Rybakina <[email protected]> Reviewed-by: Andrey Lepikhov <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/plan/subselect.c17
-rw-r--r--src/test/regress/expected/join.out14
-rw-r--r--src/test/regress/expected/subselect.out126
-rw-r--r--src/test/regress/sql/join.sql8
-rw-r--r--src/test/regress/sql/subselect.sql37
5 files changed, 187 insertions, 15 deletions
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 3115d79ad98..47e14723d2b 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1278,14 +1278,23 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
List *subquery_vars;
Node *quals;
ParseState *pstate;
+ Relids sub_ref_outer_relids;
+ bool use_lateral;
Assert(sublink->subLinkType == ANY_SUBLINK);
/*
- * The sub-select must not refer to any Vars of the parent query. (Vars of
- * higher levels should be okay, though.)
+ * If the sub-select refers to any Vars of the parent query, we so let's
+ * considering it as LATERAL. (Vars of higher levels don't matter here.)
*/
- if (contain_vars_of_level((Node *) subselect, 1))
+ sub_ref_outer_relids = pull_varnos_of_level(NULL, (Node *) subselect, 1);
+ use_lateral = !bms_is_empty(sub_ref_outer_relids);
+
+ /*
+ * Check that sub-select refers nothing outside of available_rels of the
+ * parent query.
+ */
+ if (!bms_is_subset(sub_ref_outer_relids, available_rels))
return NULL;
/*
@@ -1323,7 +1332,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
nsitem = addRangeTableEntryForSubquery(pstate,
subselect,
makeAlias("ANY_subquery", NIL),
- false,
+ use_lateral,
false);
rte = nsitem->p_rte;
parse->rtable = lappend(parse->rtable, rte);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 9c08d0134ca..0c2cba89213 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5277,7 +5277,7 @@ reset enable_nestloop;
explain (costs off)
select a.unique1, b.unique2
from onek a left join onek b on a.unique1 = b.unique2
- where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
+ where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
QUERY PLAN
----------------------------------------------------------
Hash Join
@@ -5293,7 +5293,7 @@ select a.unique1, b.unique2
select a.unique1, b.unique2
from onek a left join onek b on a.unique1 = b.unique2
- where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
+ where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
unique1 | unique2
---------+---------
123 | 123
@@ -8210,12 +8210,12 @@ select * from (values (0), (1)) v(id),
lateral (select * from int8_tbl t1,
lateral (select * from
(select * from int8_tbl t2
- where q1 = any (select q2 from int8_tbl t3
+ where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
where q2 = (select greatest(t1.q1,t2.q2))
and (select v.id=0)) offset 0) ss2) ss
where t1.q1 = ss.q2) ss0;
- QUERY PLAN
-----------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------
Nested Loop
Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2
-> Seq Scan on public.int8_tbl t1
@@ -8232,7 +8232,7 @@ lateral (select * from int8_tbl t1,
Filter: (SubPlan 3)
SubPlan 3
-> Result
- Output: t3.q2
+ Output: t3.q2, (random() > '0'::double precision)
One-Time Filter: $4
InitPlan 1 (returns $2)
-> Result
@@ -8249,7 +8249,7 @@ select * from (values (0), (1)) v(id),
lateral (select * from int8_tbl t1,
lateral (select * from
(select * from int8_tbl t2
- where q1 = any (select q2 from int8_tbl t3
+ where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
where q2 = (select greatest(t1.q1,t2.q2))
and (select v.id=0)) offset 0) ss2) ss
where t1.q1 = ss.q2) ss0;
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index a3a4d03d104..e41b728df83 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1926,3 +1926,129 @@ select * from x for update;
Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
(2 rows)
+-- Pull-up the direct-correlated ANY_SUBLINK
+explain (costs off)
+select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
+ QUERY PLAN
+------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((a.odd = b.odd) AND (a.hundred = b.hundred))
+ -> Seq Scan on tenk1 a
+ -> Hash
+ -> HashAggregate
+ Group Key: b.odd, b.hundred
+ -> Seq Scan on tenk2 b
+(7 rows)
+
+explain (costs off)
+select * from tenk1 A where exists
+(select 1 from tenk2 B
+where A.hundred in (select C.hundred FROM tenk2 C
+WHERE c.odd = b.odd));
+ QUERY PLAN
+---------------------------------
+ Nested Loop Semi Join
+ Join Filter: (SubPlan 1)
+ -> Seq Scan on tenk1 a
+ -> Materialize
+ -> Seq Scan on tenk2 b
+ SubPlan 1
+ -> Seq Scan on tenk2 c
+ Filter: (odd = b.odd)
+(8 rows)
+
+-- we should only try to pull up the sublink into RHS of a left join
+-- but a.hundred is not avaiable.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+ QUERY PLAN
+---------------------------------
+ Nested Loop Left Join
+ Join Filter: (SubPlan 1)
+ -> Seq Scan on tenk1 a
+ -> Materialize
+ -> Seq Scan on tenk2 b
+ SubPlan 1
+ -> Seq Scan on tenk2 c
+ Filter: (odd = b.odd)
+(8 rows)
+
+-- we should only try to pull up the sublink into RHS of a left join
+-- but a.odd is not avaiable for this.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
+ QUERY PLAN
+---------------------------------
+ Nested Loop Left Join
+ Join Filter: (SubPlan 1)
+ -> Seq Scan on tenk1 a
+ -> Materialize
+ -> Seq Scan on tenk2 b
+ SubPlan 1
+ -> Seq Scan on tenk2 c
+ Filter: (odd = a.odd)
+(8 rows)
+
+-- should be able to pull up since all the references is available
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+ QUERY PLAN
+------------------------------------------------------------------------
+ Nested Loop Left Join
+ -> Seq Scan on tenk1 a
+ -> Materialize
+ -> Hash Join
+ Hash Cond: ((b.odd = c.odd) AND (b.hundred = c.hundred))
+ -> Seq Scan on tenk2 b
+ -> Hash
+ -> HashAggregate
+ Group Key: c.odd, c.hundred
+ -> Seq Scan on tenk2 c
+(10 rows)
+
+-- we can pull up the sublink into the inner JoinExpr.
+explain (costs off)
+SELECT * FROM tenk1 A INNER JOIN tenk2 B
+ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+ QUERY PLAN
+-------------------------------------------------
+ Hash Join
+ Hash Cond: (c.odd = b.odd)
+ -> Hash Join
+ Hash Cond: (a.hundred = c.hundred)
+ -> Seq Scan on tenk1 a
+ -> Hash
+ -> HashAggregate
+ Group Key: c.odd, c.hundred
+ -> Seq Scan on tenk2 c
+ -> Hash
+ -> Seq Scan on tenk2 b
+(11 rows)
+
+-- we can pull up the aggregate sublink into RHS of a left join.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
+ QUERY PLAN
+---------------------------------------------------------------------------------------
+ Nested Loop Left Join
+ -> Seq Scan on tenk1 a
+ -> Materialize
+ -> Nested Loop
+ -> Seq Scan on tenk2 b
+ -> Memoize
+ Cache Key: b.hundred, b.odd
+ Cache Mode: binary
+ -> Subquery Scan on "ANY_subquery"
+ Filter: (b.hundred = "ANY_subquery".min)
+ -> Result
+ InitPlan 1 (returns $1)
+ -> Limit
+ -> Index Scan using tenk2_hundred on tenk2 c
+ Index Cond: (hundred IS NOT NULL)
+ Filter: (odd = b.odd)
+(16 rows)
+
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index e1db2025db6..257f727a2be 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1864,11 +1864,11 @@ reset enable_nestloop;
explain (costs off)
select a.unique1, b.unique2
from onek a left join onek b on a.unique1 = b.unique2
- where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
+ where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
select a.unique1, b.unique2
from onek a left join onek b on a.unique1 = b.unique2
- where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
+ where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
--
-- test full-join strength reduction
@@ -3038,7 +3038,7 @@ select * from (values (0), (1)) v(id),
lateral (select * from int8_tbl t1,
lateral (select * from
(select * from int8_tbl t2
- where q1 = any (select q2 from int8_tbl t3
+ where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
where q2 = (select greatest(t1.q1,t2.q2))
and (select v.id=0)) offset 0) ss2) ss
where t1.q1 = ss.q2) ss0;
@@ -3047,7 +3047,7 @@ select * from (values (0), (1)) v(id),
lateral (select * from int8_tbl t1,
lateral (select * from
(select * from int8_tbl t2
- where q1 = any (select q2 from int8_tbl t3
+ where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
where q2 = (select greatest(t1.q1,t2.q2))
and (select v.id=0)) offset 0) ss2) ss
where t1.q1 = ss.q2) ss0;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index bd2e39efd48..2f3601a0582 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -968,3 +968,40 @@ select * from (with x as (select 2 as y) select * from x) ss;
explain (verbose, costs off)
with x as (select * from subselect_tbl)
select * from x for update;
+
+-- Pull-up the direct-correlated ANY_SUBLINK
+explain (costs off)
+select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
+
+explain (costs off)
+select * from tenk1 A where exists
+(select 1 from tenk2 B
+where A.hundred in (select C.hundred FROM tenk2 C
+WHERE c.odd = b.odd));
+
+-- we should only try to pull up the sublink into RHS of a left join
+-- but a.hundred is not avaiable.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+
+-- we should only try to pull up the sublink into RHS of a left join
+-- but a.odd is not avaiable for this.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
+
+-- should be able to pull up since all the references is available
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+
+-- we can pull up the sublink into the inner JoinExpr.
+explain (costs off)
+SELECT * FROM tenk1 A INNER JOIN tenk2 B
+ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
+
+-- we can pull up the aggregate sublink into RHS of a left join.
+explain (costs off)
+SELECT * FROM tenk1 A LEFT JOIN tenk2 B
+ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd); \ No newline at end of file