diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index df3453f99f0f..cb16b90be7ee 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -77,6 +77,7 @@ typedef enum pushdown_safe_type /* These parameters are set by GUC */ bool enable_geqo = false; /* just in case GUC doesn't set it */ +bool enable_starjoin = false; int geqo_threshold; int min_parallel_table_scan_size; int min_parallel_index_scan_size; @@ -3410,6 +3411,114 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist) } } +static int +starjoin_join_search(PlannerInfo *root, List *initial_rels, int level) +{ + if (!enable_starjoin) + return level; + + { + ListCell *lc; + List *rels = plan_star_join(root, initial_rels); + RelOptInfo *fact = NULL; + RelOptInfo *rel = NULL; + + /* + * add the dimensions one by one, and adjust the start level + * + * XXX The first element is the fact table. + */ + foreach(lc, rels) + { + ListCell *lc2; + RelOptInfo *old_rel = NULL; + + rel = lfirst(lc); + + /* us the first element as fact table, jump to the next one, + * which is the first dimension */ + if (fact == NULL) + { + fact = rel; + continue; + } + + /* we're adding join for the first dimension, so set the level */ + root->join_cur_level = level; + + /* + * XXX Subset of join_search_one_level. The main difference is + * we don't need to walk any of the lower levels, because for + * level=2 we already have the fact table, and for higher + * levels there should be only a single joinrel. + */ + + if (level == 2) + old_rel = fact; + else + old_rel = (RelOptInfo *) linitial(root->join_rel_level[level - 1]); + + /* there should be no join relation yet */ + Assert(root->join_rel_level[level] == NIL); + + make_rel_by_clause_joins(root, old_rel, rel); + + /* + * If everything went fine, we should have exactly one join relation + * for the current level. + * + * XXX This could happen if the current starjoin logic fails to + * consider something that prevents creating the join, e.g. some + * sort of join restriction. Not sure if that should be treated + * as a bug, or something expected (in which case we could just + * fallback to the regular planning). + */ + Assert(root->join_rel_level[startlev] != NIL); + Assert(list_length(root->join_rel_level[startlev]) == 1); + + /* generate/set paths for the join relation we just created */ + + /* + * Run generate_partitionwise_join_paths() and + * generate_useful_gather_paths() for each just-processed joinrel. We + * could not do this earlier because both regular and partial paths + * can get added to a particular joinrel at multiple times within + * join_search_one_level. + * + * After that, we're done creating paths for the joinrel, so run + * set_cheapest(). + */ + foreach(lc2, root->join_rel_level[level]) + { + rel = (RelOptInfo *) lfirst(lc2); + + /* Create paths for partitionwise joins. */ + generate_partitionwise_join_paths(root, rel); + + /* + * Except for the topmost scan/join rel, consider gathering + * partial paths. We'll do the same for the topmost scan/join rel + * once we know the final targetlist (see grouping_planner's and + * its call to apply_scanjoin_target_to_paths). + */ + if (!bms_equal(rel->relids, root->all_query_rels)) + generate_useful_gather_paths(root, rel, false); + + /* Find and save the cheapest paths for this rel */ + set_cheapest(rel); + + #ifdef OPTIMIZER_DEBUG + pprint(rel); + #endif + } + + level++; + } + } + + return level; +} + /* * standard_join_search * Find possible joinpaths for a query by successively finding ways @@ -3443,6 +3552,7 @@ RelOptInfo * standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) { int lev; + int startlev = 2; RelOptInfo *rel; /* @@ -3466,7 +3576,13 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) root->join_rel_level[1] = initial_rels; - for (lev = 2; lev <= levels_needed; lev++) + /* + * Try simplified planning for starjoin. If it succeeds, we should + * continue at level startlev. + */ + startlev = starjoin_join_search(root, initial_rels, 2); + + for (lev = startlev; lev <= levels_needed; lev++) { ListCell *lc; diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 60d65762b5d5..4f43ae95e1cb 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -225,7 +225,6 @@ join_search_one_level(PlannerInfo *root, int level) foreach(r, joinrels[level - 1]) { RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); - make_rels_by_clauseless_joins(root, old_rel, joinrels[1]); @@ -256,6 +255,19 @@ join_search_one_level(PlannerInfo *root, int level) } } +void +make_rel_by_clause_joins(PlannerInfo *root, + RelOptInfo *old_rel, + RelOptInfo *other_rel) +{ + if (!bms_overlap(old_rel->relids, other_rel->relids) && + (have_relevant_joinclause(root, old_rel, other_rel) || + have_join_order_restriction(root, old_rel, other_rel))) + { + (void) make_join_rel(root, old_rel, other_rel); + } +} + /* * make_rels_by_clause_joins * Build joins between the given relation 'old_rel' and other relations @@ -1979,3 +1991,146 @@ get_matching_part_pairs(PlannerInfo *root, RelOptInfo *joinrel, *parts2 = lappend(*parts2, child_rel2); } } + +/* + * Try to identify a starjoin in the list of relations. Pick the largest + * relation, and the smaller dimensions. + * + * Happens in two steps. First, we find the largest relation and consider + * it to be the "fact" of the star schema. Then we walk the rest of the + * relations and check which can be treated as dimensions for the fact. + * This is possible only if the relation has join clause only to the fact + * and no other relations. + * + * XXX It can happen the largest table is not the fact, in which case we + * should just try the second largest one, etc. Or maybe there are multiple + * facts, in which case we detect the should try to build a group for each + * fact (fact + dimensions). + * + * Returns the list of relations to join, in the join order, with the fact + * table as the first element, followed by the dimensions. + */ +List * +plan_star_join(PlannerInfo *root, List *rels) +{ + ListCell *lc; + RelOptInfo *fact = NULL; + List *dimensions = NIL; + + /* + * We need at least 3 relations for a star join, to have a chance to + * gain anything by simpler join order planning. + */ + if (list_length(rels) < 3) + return NIL; + + /* + * Find the largest relation, we'll try to use it as "fact" table. + */ + foreach(lc, rels) + { + RelOptInfo *rel = (RelOptInfo *) lfirst(lc); + + /* first relation */ + if (fact == NULL) + { + fact = rel; + continue; + } + + /* + * We look at total relation sizes, not the estimated cardinality + * with conditions applied. + */ + if (fact->tuples < rel->tuples) + { + fact = rel; + continue; + } + } + + /* + * If the "fact" does not have any join clauses, we're done. + * + * XXX Seems has_join_restriction is not what we want to require for the + * fact table - it checks for restrictions on join order, but that's not + * what we want for the fact. Maybe we should do the exact opposite, i.e. + * require that a fact table does not have that? Although, if we want to + * support multiple "partial star joins" (query on multiple fact tables, + * each with it's own dimensions). + */ + //if (!has_join_restriction(root, fact)) + // return NIL; + + /* the fact must have no restrictions */ + if (has_join_restriction(root, fact)) + return NIL; + + /* + * Now go and try to detect dimensions, i.e. relations that have a join + * with the fact table, and no other relations. We will order them by + * selectivity (rows / tuples), because we prefer to reduce the join + * size early. + */ + foreach(lc, rels) + { + RelOptInfo *rel = (RelOptInfo *) lfirst(lc); + + /* ignore the fact table */ + if (rel == fact) + continue; + + // elog(WARNING, "> has_join_restriction %d", has_join_restriction(root, rel)); + // elog(WARNING, "> has_legal_joinclause %d", has_legal_joinclause(root, rel)); + + /* ignore rels without any join clause */ + // if (!has_join_restriction(root, rel)) + // continue; + + /* + * XXX Do not allow join restrictions for dimensions either, just like + * for fact, although for dims we should be able to allow this ... + */ + if (has_join_restriction(root, rel)) + continue; + + /* + * Must have join clause with the fact table. This is a subset of + * has_legal_joinclause for a single (fact) table. We always look + * at initial rels, so the relids overlap checks are not needed. + */ + if (have_relevant_joinclause(root, fact, rel)) + { + Relids joinrelids; + SpecialJoinInfo *sjinfo; + bool reversed; + + /* join_is_legal needs relids of the union */ + joinrelids = bms_union(fact->relids, rel->relids); + + if (join_is_legal(root, fact, rel, joinrelids, + &sjinfo, &reversed)) + { + /* Yes, this will work */ + // bms_free(joinrelids); + + // FIXME this should also check the rel does not have join + // clauses to any other relation; + dimensions = lappend(dimensions, rel); + } + + bms_free(joinrelids); + } + } + + /* + * repeat the check we actually found a star join with at least 3 rels + * (so two dimensions) + */ + if (list_length(dimensions) < 2) + return NIL; + + /* FIXME sort the dimensions by selectivity */ + + return list_concat(list_make1(fact), dimensions); +} diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 9c0b10ad4dc2..705236f66728 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1031,6 +1031,16 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_starjoin", PGC_USERSET, QUERY_TUNING_GEQO, + gettext_noop("Enables starjoin optimization."), + gettext_noop("This algorithm attempts to do faster planning for star joins."), + GUC_EXPLAIN + }, + &enable_starjoin, + false, + NULL, NULL, NULL + }, { /* * Not for general use --- used by SET SESSION AUTHORIZATION and SET diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index bc5dfd7db417..12181a99f042 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -21,6 +21,7 @@ * allpaths.c */ extern PGDLLIMPORT bool enable_geqo; +extern PGDLLIMPORT bool enable_starjoin; extern PGDLLIMPORT int geqo_threshold; extern PGDLLIMPORT int min_parallel_table_scan_size; extern PGDLLIMPORT int min_parallel_index_scan_size; @@ -114,6 +115,10 @@ extern bool have_dangerous_phv(PlannerInfo *root, extern void mark_dummy_rel(RelOptInfo *rel); extern void init_dummy_sjinfo(SpecialJoinInfo *sjinfo, Relids left_relids, Relids right_relids); +extern List *plan_star_join(PlannerInfo *root, List *rels); +extern void make_rel_by_clause_joins(PlannerInfo *root, + RelOptInfo *old_rel, + RelOptInfo *other_rel); /* * equivclass.c