summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRobert Haas2016-12-21 14:44:33 +0000
committerRobert Haas2016-12-21 14:45:50 +0000
commit59649c3f1cbd536314c0060dcabd234deab148b2 (patch)
treefffab67cf76e95f9cd8abeada5d6c00ffe99c942 /src
parentf3b421da5f4addc95812b9db05a24972b8fd9739 (diff)
Refactor merge path generation code.
This shouldn't change the set of paths that get generated in any way, but it is preparatory work for further changes to allow a partial path to be merge-joined witih a non-partial path to produce a partial join path. Dilip Kumar, with cosmetic adjustments by me.
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/path/joinpath.c461
1 files changed, 250 insertions, 211 deletions
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 96f00fca5bf..b5cbcf4d049 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -50,6 +50,15 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
List *restrictlist,
JoinType jointype,
bool *mergejoin_allowed);
+static void generate_mergejoin_paths(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *innerrel,
+ Path *outerpath,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ bool useallclauses,
+ Path *inner_cheapest_total,
+ List *merge_pathkeys);
/*
@@ -777,6 +786,241 @@ sort_inner_and_outer(PlannerInfo *root,
}
/*
+ * generate_mergejoin_paths
+ * Creates possible mergejoin paths for input outerpath.
+ *
+ * We generate mergejoins if mergejoin clauses are available. We have
+ * two ways to generate the inner path for a mergejoin: sort the cheapest
+ * inner path, or use an inner path that is already suitably ordered for the
+ * merge. If we have several mergeclauses, it could be that there is no inner
+ * path (or only a very expensive one) for the full list of mergeclauses, but
+ * better paths exist if we truncate the mergeclause list (thereby discarding
+ * some sort key requirements). So, we consider truncations of the
+ * mergeclause list as well as the full list. (Ideally we'd consider all
+ * subsets of the mergeclause list, but that seems way too expensive.)
+ */
+static void
+generate_mergejoin_paths(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *innerrel,
+ Path *outerpath,
+ JoinType jointype,
+ JoinPathExtraData *extra,
+ bool useallclauses,
+ Path *inner_cheapest_total,
+ List *merge_pathkeys)
+{
+ List *mergeclauses;
+ List *innersortkeys;
+ List *trialsortkeys;
+ Path *cheapest_startup_inner;
+ Path *cheapest_total_inner;
+ JoinType save_jointype = jointype;
+ int num_sortkeys;
+ int sortkeycnt;
+
+ if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+ jointype = JOIN_INNER;
+
+ /* Look for useful mergeclauses (if any) */
+ mergeclauses = find_mergeclauses_for_pathkeys(root,
+ outerpath->pathkeys,
+ true,
+ extra->mergeclause_list);
+
+ /*
+ * Done with this outer path if no chance for a mergejoin.
+ *
+ * Special corner case: for "x FULL JOIN y ON true", there will be no join
+ * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
+ * but since mergejoin is our only join type that supports FULL JOIN
+ * without any join clauses, it's necessary to generate a clauseless
+ * mergejoin path instead.
+ */
+ if (mergeclauses == NIL)
+ {
+ if (jointype == JOIN_FULL)
+ /* okay to try for mergejoin */ ;
+ else
+ return;
+ }
+ if (useallclauses &&
+ list_length(mergeclauses) != list_length(extra->mergeclause_list))
+ return;
+
+ /* Compute the required ordering of the inner path */
+ innersortkeys = make_inner_pathkeys_for_merge(root,
+ mergeclauses,
+ outerpath->pathkeys);
+
+ /*
+ * Generate a mergejoin on the basis of sorting the cheapest inner. Since
+ * a sort will be needed, only cheapest total cost matters. (But
+ * try_mergejoin_path will do the right thing if inner_cheapest_total is
+ * already correctly sorted.)
+ */
+ try_mergejoin_path(root,
+ joinrel,
+ outerpath,
+ inner_cheapest_total,
+ merge_pathkeys,
+ mergeclauses,
+ NIL,
+ innersortkeys,
+ jointype,
+ extra);
+
+ /* Can't do anything else if inner path needs to be unique'd */
+ if (save_jointype == JOIN_UNIQUE_INNER)
+ return;
+
+ /*
+ * Look for presorted inner paths that satisfy the innersortkey list ---
+ * or any truncation thereof, if we are allowed to build a mergejoin using
+ * a subset of the merge clauses. Here, we consider both cheap startup
+ * cost and cheap total cost.
+ *
+ * Currently we do not consider parameterized inner paths here. This
+ * interacts with decisions elsewhere that also discriminate against
+ * mergejoins with parameterized inputs; see comments in
+ * src/backend/optimizer/README.
+ *
+ * As we shorten the sortkey list, we should consider only paths that are
+ * strictly cheaper than (in particular, not the same as) any path found
+ * in an earlier iteration. Otherwise we'd be intentionally using fewer
+ * merge keys than a given path allows (treating the rest as plain
+ * joinquals), which is unlikely to be a good idea. Also, eliminating
+ * paths here on the basis of compare_path_costs is a lot cheaper than
+ * building the mergejoin path only to throw it away.
+ *
+ * If inner_cheapest_total is well enough sorted to have not required a
+ * sort in the path made above, we shouldn't make a duplicate path with
+ * it, either. We handle that case with the same logic that handles the
+ * previous consideration, by initializing the variables that track
+ * cheapest-so-far properly. Note that we do NOT reject
+ * inner_cheapest_total if we find it matches some shorter set of
+ * pathkeys. That case corresponds to using fewer mergekeys to avoid
+ * sorting inner_cheapest_total, whereas we did sort it above, so the
+ * plans being considered are different.
+ */
+ if (pathkeys_contained_in(innersortkeys,
+ inner_cheapest_total->pathkeys))
+ {
+ /* inner_cheapest_total didn't require a sort */
+ cheapest_startup_inner = inner_cheapest_total;
+ cheapest_total_inner = inner_cheapest_total;
+ }
+ else
+ {
+ /* it did require a sort, at least for the full set of keys */
+ cheapest_startup_inner = NULL;
+ cheapest_total_inner = NULL;
+ }
+ num_sortkeys = list_length(innersortkeys);
+ if (num_sortkeys > 1 && !useallclauses)
+ trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
+ else
+ trialsortkeys = innersortkeys; /* won't really truncate */
+
+ for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
+ {
+ Path *innerpath;
+ List *newclauses = NIL;
+
+ /*
+ * Look for an inner path ordered well enough for the first
+ * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
+ * destructively, which is why we made a copy...
+ */
+ trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
+ innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+ trialsortkeys,
+ NULL,
+ TOTAL_COST);
+ if (innerpath != NULL &&
+ (cheapest_total_inner == NULL ||
+ compare_path_costs(innerpath, cheapest_total_inner,
+ TOTAL_COST) < 0))
+ {
+ /* Found a cheap (or even-cheaper) sorted path */
+ /* Select the right mergeclauses, if we didn't already */
+ if (sortkeycnt < num_sortkeys)
+ {
+ newclauses =
+ find_mergeclauses_for_pathkeys(root,
+ trialsortkeys,
+ false,
+ mergeclauses);
+ Assert(newclauses != NIL);
+ }
+ else
+ newclauses = mergeclauses;
+ try_mergejoin_path(root,
+ joinrel,
+ outerpath,
+ innerpath,
+ merge_pathkeys,
+ newclauses,
+ NIL,
+ NIL,
+ jointype,
+ extra);
+ cheapest_total_inner = innerpath;
+ }
+ /* Same on the basis of cheapest startup cost ... */
+ innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+ trialsortkeys,
+ NULL,
+ STARTUP_COST);
+ if (innerpath != NULL &&
+ (cheapest_startup_inner == NULL ||
+ compare_path_costs(innerpath, cheapest_startup_inner,
+ STARTUP_COST) < 0))
+ {
+ /* Found a cheap (or even-cheaper) sorted path */
+ if (innerpath != cheapest_total_inner)
+ {
+ /*
+ * Avoid rebuilding clause list if we already made one; saves
+ * memory in big join trees...
+ */
+ if (newclauses == NIL)
+ {
+ if (sortkeycnt < num_sortkeys)
+ {
+ newclauses =
+ find_mergeclauses_for_pathkeys(root,
+ trialsortkeys,
+ false,
+ mergeclauses);
+ Assert(newclauses != NIL);
+ }
+ else
+ newclauses = mergeclauses;
+ }
+ try_mergejoin_path(root,
+ joinrel,
+ outerpath,
+ innerpath,
+ merge_pathkeys,
+ newclauses,
+ NIL,
+ NIL,
+ jointype,
+ extra);
+ }
+ cheapest_startup_inner = innerpath;
+ }
+
+ /*
+ * Don't consider truncated sortkeys if we need all clauses.
+ */
+ if (useallclauses)
+ break;
+ }
+}
+
+/*
* match_unsorted_outer
* Creates possible join paths for processing a single join relation
* 'joinrel' by employing either iterative substitution or
@@ -790,15 +1034,8 @@ sort_inner_and_outer(PlannerInfo *root,
* cheapest-total inner-indexscan path (if any), and one on the
* cheapest-startup inner-indexscan path (if different).
*
- * We also consider mergejoins if mergejoin clauses are available. We have
- * two ways to generate the inner path for a mergejoin: sort the cheapest
- * inner path, or use an inner path that is already suitably ordered for the
- * merge. If we have several mergeclauses, it could be that there is no inner
- * path (or only a very expensive one) for the full list of mergeclauses, but
- * better paths exist if we truncate the mergeclause list (thereby discarding
- * some sort key requirements). So, we consider truncations of the
- * mergeclause list as well as the full list. (Ideally we'd consider all
- * subsets of the mergeclause list, but that seems way too expensive.)
+ * We also consider mergejoins if mergejoin clauses are available. See
+ * detailed comments in generate_mergejoin_paths.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
@@ -894,13 +1131,6 @@ match_unsorted_outer(PlannerInfo *root,
{
Path *outerpath = (Path *) lfirst(lc1);
List *merge_pathkeys;
- List *mergeclauses;
- List *innersortkeys;
- List *trialsortkeys;
- Path *cheapest_startup_inner;
- Path *cheapest_total_inner;
- int num_sortkeys;
- int sortkeycnt;
/*
* We cannot use an outer path that is parameterized by the inner rel.
@@ -986,201 +1216,10 @@ match_unsorted_outer(PlannerInfo *root,
if (inner_cheapest_total == NULL)
continue;
- /* Look for useful mergeclauses (if any) */
- mergeclauses = find_mergeclauses_for_pathkeys(root,
- outerpath->pathkeys,
- true,
- extra->mergeclause_list);
-
- /*
- * Done with this outer path if no chance for a mergejoin.
- *
- * Special corner case: for "x FULL JOIN y ON true", there will be no
- * join clauses at all. Ordinarily we'd generate a clauseless
- * nestloop path, but since mergejoin is our only join type that
- * supports FULL JOIN without any join clauses, it's necessary to
- * generate a clauseless mergejoin path instead.
- */
- if (mergeclauses == NIL)
- {
- if (jointype == JOIN_FULL)
- /* okay to try for mergejoin */ ;
- else
- continue;
- }
- if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
- continue;
-
- /* Compute the required ordering of the inner path */
- innersortkeys = make_inner_pathkeys_for_merge(root,
- mergeclauses,
- outerpath->pathkeys);
-
- /*
- * Generate a mergejoin on the basis of sorting the cheapest inner.
- * Since a sort will be needed, only cheapest total cost matters. (But
- * try_mergejoin_path will do the right thing if inner_cheapest_total
- * is already correctly sorted.)
- */
- try_mergejoin_path(root,
- joinrel,
- outerpath,
- inner_cheapest_total,
- merge_pathkeys,
- mergeclauses,
- NIL,
- innersortkeys,
- jointype,
- extra);
-
- /* Can't do anything else if inner path needs to be unique'd */
- if (save_jointype == JOIN_UNIQUE_INNER)
- continue;
-
- /*
- * Look for presorted inner paths that satisfy the innersortkey list
- * --- or any truncation thereof, if we are allowed to build a
- * mergejoin using a subset of the merge clauses. Here, we consider
- * both cheap startup cost and cheap total cost.
- *
- * Currently we do not consider parameterized inner paths here. This
- * interacts with decisions elsewhere that also discriminate against
- * mergejoins with parameterized inputs; see comments in
- * src/backend/optimizer/README.
- *
- * As we shorten the sortkey list, we should consider only paths that
- * are strictly cheaper than (in particular, not the same as) any path
- * found in an earlier iteration. Otherwise we'd be intentionally
- * using fewer merge keys than a given path allows (treating the rest
- * as plain joinquals), which is unlikely to be a good idea. Also,
- * eliminating paths here on the basis of compare_path_costs is a lot
- * cheaper than building the mergejoin path only to throw it away.
- *
- * If inner_cheapest_total is well enough sorted to have not required
- * a sort in the path made above, we shouldn't make a duplicate path
- * with it, either. We handle that case with the same logic that
- * handles the previous consideration, by initializing the variables
- * that track cheapest-so-far properly. Note that we do NOT reject
- * inner_cheapest_total if we find it matches some shorter set of
- * pathkeys. That case corresponds to using fewer mergekeys to avoid
- * sorting inner_cheapest_total, whereas we did sort it above, so the
- * plans being considered are different.
- */
- if (pathkeys_contained_in(innersortkeys,
- inner_cheapest_total->pathkeys))
- {
- /* inner_cheapest_total didn't require a sort */
- cheapest_startup_inner = inner_cheapest_total;
- cheapest_total_inner = inner_cheapest_total;
- }
- else
- {
- /* it did require a sort, at least for the full set of keys */
- cheapest_startup_inner = NULL;
- cheapest_total_inner = NULL;
- }
- num_sortkeys = list_length(innersortkeys);
- if (num_sortkeys > 1 && !useallclauses)
- trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
- else
- trialsortkeys = innersortkeys; /* won't really truncate */
-
- for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
- {
- Path *innerpath;
- List *newclauses = NIL;
-
- /*
- * Look for an inner path ordered well enough for the first
- * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
- * destructively, which is why we made a copy...
- */
- trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
- innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
- trialsortkeys,
- NULL,
- TOTAL_COST);
- if (innerpath != NULL &&
- (cheapest_total_inner == NULL ||
- compare_path_costs(innerpath, cheapest_total_inner,
- TOTAL_COST) < 0))
- {
- /* Found a cheap (or even-cheaper) sorted path */
- /* Select the right mergeclauses, if we didn't already */
- if (sortkeycnt < num_sortkeys)
- {
- newclauses =
- find_mergeclauses_for_pathkeys(root,
- trialsortkeys,
- false,
- mergeclauses);
- Assert(newclauses != NIL);
- }
- else
- newclauses = mergeclauses;
- try_mergejoin_path(root,
- joinrel,
- outerpath,
- innerpath,
- merge_pathkeys,
- newclauses,
- NIL,
- NIL,
- jointype,
- extra);
- cheapest_total_inner = innerpath;
- }
- /* Same on the basis of cheapest startup cost ... */
- innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
- trialsortkeys,
- NULL,
- STARTUP_COST);
- if (innerpath != NULL &&
- (cheapest_startup_inner == NULL ||
- compare_path_costs(innerpath, cheapest_startup_inner,
- STARTUP_COST) < 0))
- {
- /* Found a cheap (or even-cheaper) sorted path */
- if (innerpath != cheapest_total_inner)
- {
- /*
- * Avoid rebuilding clause list if we already made one;
- * saves memory in big join trees...
- */
- if (newclauses == NIL)
- {
- if (sortkeycnt < num_sortkeys)
- {
- newclauses =
- find_mergeclauses_for_pathkeys(root,
- trialsortkeys,
- false,
- mergeclauses);
- Assert(newclauses != NIL);
- }
- else
- newclauses = mergeclauses;
- }
- try_mergejoin_path(root,
- joinrel,
- outerpath,
- innerpath,
- merge_pathkeys,
- newclauses,
- NIL,
- NIL,
- jointype,
- extra);
- }
- cheapest_startup_inner = innerpath;
- }
-
- /*
- * Don't consider truncated sortkeys if we need all clauses.
- */
- if (useallclauses)
- break;
- }
+ /* Generate merge join paths */
+ generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
+ save_jointype, extra, useallclauses,
+ inner_cheapest_total, merge_pathkeys);
}
/*