Skip to content

Commit 400cea5

Browse files
author
Commitfest Bot
committed
[CF 5553] starjoin join order planning
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5553 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/[email protected] Author(s): Tomas Vondra
2 parents 65db396 + 5c27223 commit 400cea5

File tree

4 files changed

+288
-2
lines changed

4 files changed

+288
-2
lines changed

src/backend/optimizer/path/allpaths.c

+117-1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ typedef enum pushdown_safe_type
7777

7878
/* These parameters are set by GUC */
7979
bool enable_geqo = false; /* just in case GUC doesn't set it */
80+
bool enable_starjoin = false;
8081
int geqo_threshold;
8182
int min_parallel_table_scan_size;
8283
int min_parallel_index_scan_size;
@@ -3410,6 +3411,114 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
34103411
}
34113412
}
34123413

3414+
static int
3415+
starjoin_join_search(PlannerInfo *root, List *initial_rels, int level)
3416+
{
3417+
if (!enable_starjoin)
3418+
return level;
3419+
3420+
{
3421+
ListCell *lc;
3422+
List *rels = plan_star_join(root, initial_rels);
3423+
RelOptInfo *fact = NULL;
3424+
RelOptInfo *rel = NULL;
3425+
3426+
/*
3427+
* add the dimensions one by one, and adjust the start level
3428+
*
3429+
* XXX The first element is the fact table.
3430+
*/
3431+
foreach(lc, rels)
3432+
{
3433+
ListCell *lc2;
3434+
RelOptInfo *old_rel = NULL;
3435+
3436+
rel = lfirst(lc);
3437+
3438+
/* us the first element as fact table, jump to the next one,
3439+
* which is the first dimension */
3440+
if (fact == NULL)
3441+
{
3442+
fact = rel;
3443+
continue;
3444+
}
3445+
3446+
/* we're adding join for the first dimension, so set the level */
3447+
root->join_cur_level = level;
3448+
3449+
/*
3450+
* XXX Subset of join_search_one_level. The main difference is
3451+
* we don't need to walk any of the lower levels, because for
3452+
* level=2 we already have the fact table, and for higher
3453+
* levels there should be only a single joinrel.
3454+
*/
3455+
3456+
if (level == 2)
3457+
old_rel = fact;
3458+
else
3459+
old_rel = (RelOptInfo *) linitial(root->join_rel_level[level - 1]);
3460+
3461+
/* there should be no join relation yet */
3462+
Assert(root->join_rel_level[level] == NIL);
3463+
3464+
make_rel_by_clause_joins(root, old_rel, rel);
3465+
3466+
/*
3467+
* If everything went fine, we should have exactly one join relation
3468+
* for the current level.
3469+
*
3470+
* XXX This could happen if the current starjoin logic fails to
3471+
* consider something that prevents creating the join, e.g. some
3472+
* sort of join restriction. Not sure if that should be treated
3473+
* as a bug, or something expected (in which case we could just
3474+
* fallback to the regular planning).
3475+
*/
3476+
Assert(root->join_rel_level[startlev] != NIL);
3477+
Assert(list_length(root->join_rel_level[startlev]) == 1);
3478+
3479+
/* generate/set paths for the join relation we just created */
3480+
3481+
/*
3482+
* Run generate_partitionwise_join_paths() and
3483+
* generate_useful_gather_paths() for each just-processed joinrel. We
3484+
* could not do this earlier because both regular and partial paths
3485+
* can get added to a particular joinrel at multiple times within
3486+
* join_search_one_level.
3487+
*
3488+
* After that, we're done creating paths for the joinrel, so run
3489+
* set_cheapest().
3490+
*/
3491+
foreach(lc2, root->join_rel_level[level])
3492+
{
3493+
rel = (RelOptInfo *) lfirst(lc2);
3494+
3495+
/* Create paths for partitionwise joins. */
3496+
generate_partitionwise_join_paths(root, rel);
3497+
3498+
/*
3499+
* Except for the topmost scan/join rel, consider gathering
3500+
* partial paths. We'll do the same for the topmost scan/join rel
3501+
* once we know the final targetlist (see grouping_planner's and
3502+
* its call to apply_scanjoin_target_to_paths).
3503+
*/
3504+
if (!bms_equal(rel->relids, root->all_query_rels))
3505+
generate_useful_gather_paths(root, rel, false);
3506+
3507+
/* Find and save the cheapest paths for this rel */
3508+
set_cheapest(rel);
3509+
3510+
#ifdef OPTIMIZER_DEBUG
3511+
pprint(rel);
3512+
#endif
3513+
}
3514+
3515+
level++;
3516+
}
3517+
}
3518+
3519+
return level;
3520+
}
3521+
34133522
/*
34143523
* standard_join_search
34153524
* Find possible joinpaths for a query by successively finding ways
@@ -3443,6 +3552,7 @@ RelOptInfo *
34433552
standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
34443553
{
34453554
int lev;
3555+
int startlev = 2;
34463556
RelOptInfo *rel;
34473557

34483558
/*
@@ -3466,7 +3576,13 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
34663576

34673577
root->join_rel_level[1] = initial_rels;
34683578

3469-
for (lev = 2; lev <= levels_needed; lev++)
3579+
/*
3580+
* Try simplified planning for starjoin. If it succeeds, we should
3581+
* continue at level startlev.
3582+
*/
3583+
startlev = starjoin_join_search(root, initial_rels, 2);
3584+
3585+
for (lev = startlev; lev <= levels_needed; lev++)
34703586
{
34713587
ListCell *lc;
34723588

src/backend/optimizer/path/joinrels.c

+156-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,6 @@ join_search_one_level(PlannerInfo *root, int level)
225225
foreach(r, joinrels[level - 1])
226226
{
227227
RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
228-
229228
make_rels_by_clauseless_joins(root,
230229
old_rel,
231230
joinrels[1]);
@@ -256,6 +255,19 @@ join_search_one_level(PlannerInfo *root, int level)
256255
}
257256
}
258257

258+
void
259+
make_rel_by_clause_joins(PlannerInfo *root,
260+
RelOptInfo *old_rel,
261+
RelOptInfo *other_rel)
262+
{
263+
if (!bms_overlap(old_rel->relids, other_rel->relids) &&
264+
(have_relevant_joinclause(root, old_rel, other_rel) ||
265+
have_join_order_restriction(root, old_rel, other_rel)))
266+
{
267+
(void) make_join_rel(root, old_rel, other_rel);
268+
}
269+
}
270+
259271
/*
260272
* make_rels_by_clause_joins
261273
* Build joins between the given relation 'old_rel' and other relations
@@ -1979,3 +1991,146 @@ get_matching_part_pairs(PlannerInfo *root, RelOptInfo *joinrel,
19791991
*parts2 = lappend(*parts2, child_rel2);
19801992
}
19811993
}
1994+
1995+
/*
1996+
* Try to identify a starjoin in the list of relations. Pick the largest
1997+
* relation, and the smaller dimensions.
1998+
*
1999+
* Happens in two steps. First, we find the largest relation and consider
2000+
* it to be the "fact" of the star schema. Then we walk the rest of the
2001+
* relations and check which can be treated as dimensions for the fact.
2002+
* This is possible only if the relation has join clause only to the fact
2003+
* and no other relations.
2004+
*
2005+
* XXX It can happen the largest table is not the fact, in which case we
2006+
* should just try the second largest one, etc. Or maybe there are multiple
2007+
* facts, in which case we detect the should try to build a group for each
2008+
* fact (fact + dimensions).
2009+
*
2010+
* Returns the list of relations to join, in the join order, with the fact
2011+
* table as the first element, followed by the dimensions.
2012+
*/
2013+
List *
2014+
plan_star_join(PlannerInfo *root, List *rels)
2015+
{
2016+
ListCell *lc;
2017+
RelOptInfo *fact = NULL;
2018+
List *dimensions = NIL;
2019+
2020+
/*
2021+
* We need at least 3 relations for a star join, to have a chance to
2022+
* gain anything by simpler join order planning.
2023+
*/
2024+
if (list_length(rels) < 3)
2025+
return NIL;
2026+
2027+
/*
2028+
* Find the largest relation, we'll try to use it as "fact" table.
2029+
*/
2030+
foreach(lc, rels)
2031+
{
2032+
RelOptInfo *rel = (RelOptInfo *) lfirst(lc);
2033+
2034+
/* first relation */
2035+
if (fact == NULL)
2036+
{
2037+
fact = rel;
2038+
continue;
2039+
}
2040+
2041+
/*
2042+
* We look at total relation sizes, not the estimated cardinality
2043+
* with conditions applied.
2044+
*/
2045+
if (fact->tuples < rel->tuples)
2046+
{
2047+
fact = rel;
2048+
continue;
2049+
}
2050+
}
2051+
2052+
/*
2053+
* If the "fact" does not have any join clauses, we're done.
2054+
*
2055+
* XXX Seems has_join_restriction is not what we want to require for the
2056+
* fact table - it checks for restrictions on join order, but that's not
2057+
* what we want for the fact. Maybe we should do the exact opposite, i.e.
2058+
* require that a fact table does not have that? Although, if we want to
2059+
* support multiple "partial star joins" (query on multiple fact tables,
2060+
* each with it's own dimensions).
2061+
*/
2062+
//if (!has_join_restriction(root, fact))
2063+
// return NIL;
2064+
2065+
/* the fact must have no restrictions */
2066+
if (has_join_restriction(root, fact))
2067+
return NIL;
2068+
2069+
/*
2070+
* Now go and try to detect dimensions, i.e. relations that have a join
2071+
* with the fact table, and no other relations. We will order them by
2072+
* selectivity (rows / tuples), because we prefer to reduce the join
2073+
* size early.
2074+
*/
2075+
foreach(lc, rels)
2076+
{
2077+
RelOptInfo *rel = (RelOptInfo *) lfirst(lc);
2078+
2079+
/* ignore the fact table */
2080+
if (rel == fact)
2081+
continue;
2082+
2083+
// elog(WARNING, "> has_join_restriction %d", has_join_restriction(root, rel));
2084+
// elog(WARNING, "> has_legal_joinclause %d", has_legal_joinclause(root, rel));
2085+
2086+
/* ignore rels without any join clause */
2087+
// if (!has_join_restriction(root, rel))
2088+
// continue;
2089+
2090+
/*
2091+
* XXX Do not allow join restrictions for dimensions either, just like
2092+
* for fact, although for dims we should be able to allow this ...
2093+
*/
2094+
if (has_join_restriction(root, rel))
2095+
continue;
2096+
2097+
/*
2098+
* Must have join clause with the fact table. This is a subset of
2099+
* has_legal_joinclause for a single (fact) table. We always look
2100+
* at initial rels, so the relids overlap checks are not needed.
2101+
*/
2102+
if (have_relevant_joinclause(root, fact, rel))
2103+
{
2104+
Relids joinrelids;
2105+
SpecialJoinInfo *sjinfo;
2106+
bool reversed;
2107+
2108+
/* join_is_legal needs relids of the union */
2109+
joinrelids = bms_union(fact->relids, rel->relids);
2110+
2111+
if (join_is_legal(root, fact, rel, joinrelids,
2112+
&sjinfo, &reversed))
2113+
{
2114+
/* Yes, this will work */
2115+
// bms_free(joinrelids);
2116+
2117+
// FIXME this should also check the rel does not have join
2118+
// clauses to any other relation;
2119+
dimensions = lappend(dimensions, rel);
2120+
}
2121+
2122+
bms_free(joinrelids);
2123+
}
2124+
}
2125+
2126+
/*
2127+
* repeat the check we actually found a star join with at least 3 rels
2128+
* (so two dimensions)
2129+
*/
2130+
if (list_length(dimensions) < 2)
2131+
return NIL;
2132+
2133+
/* FIXME sort the dimensions by selectivity */
2134+
2135+
return list_concat(list_make1(fact), dimensions);
2136+
}

src/backend/utils/misc/guc_tables.c

+10
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,16 @@ struct config_bool ConfigureNamesBool[] =
10311031
true,
10321032
NULL, NULL, NULL
10331033
},
1034+
{
1035+
{"enable_starjoin", PGC_USERSET, QUERY_TUNING_GEQO,
1036+
gettext_noop("Enables starjoin optimization."),
1037+
gettext_noop("This algorithm attempts to do faster planning for star joins."),
1038+
GUC_EXPLAIN
1039+
},
1040+
&enable_starjoin,
1041+
false,
1042+
NULL, NULL, NULL
1043+
},
10341044
{
10351045
/*
10361046
* Not for general use --- used by SET SESSION AUTHORIZATION and SET

src/include/optimizer/paths.h

+5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
* allpaths.c
2222
*/
2323
extern PGDLLIMPORT bool enable_geqo;
24+
extern PGDLLIMPORT bool enable_starjoin;
2425
extern PGDLLIMPORT int geqo_threshold;
2526
extern PGDLLIMPORT int min_parallel_table_scan_size;
2627
extern PGDLLIMPORT int min_parallel_index_scan_size;
@@ -114,6 +115,10 @@ extern bool have_dangerous_phv(PlannerInfo *root,
114115
extern void mark_dummy_rel(RelOptInfo *rel);
115116
extern void init_dummy_sjinfo(SpecialJoinInfo *sjinfo, Relids left_relids,
116117
Relids right_relids);
118+
extern List *plan_star_join(PlannerInfo *root, List *rels);
119+
extern void make_rel_by_clause_joins(PlannerInfo *root,
120+
RelOptInfo *old_rel,
121+
RelOptInfo *other_rel);
117122

118123
/*
119124
* equivclass.c

0 commit comments

Comments
 (0)