diff options
author | Tom Lane | 2013-12-23 21:11:35 +0000 |
---|---|---|
committer | Tom Lane | 2013-12-23 21:11:35 +0000 |
commit | 8d65da1f01c6a4c84fe9c59aeb6b7e3adf870145 (patch) | |
tree | 9ab9bf5fc1f7a128ff4638d1c7f36a83fc317ca2 /src/backend/parser | |
parent | 37484ad2aacef5ec794f4dd3d5cf814475180a78 (diff) |
Support ordered-set (WITHIN GROUP) aggregates.
This patch introduces generic support for ordered-set and hypothetical-set
aggregate functions, as well as implementations of the instances defined in
SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(),
percent_rank(), cume_dist()). We also added mode() though it is not in the
spec, as well as versions of percentile_cont() and percentile_disc() that
can compute multiple percentile values in one pass over the data.
Unlike the original submission, this patch puts full control of the sorting
process in the hands of the aggregate's support functions. To allow the
support functions to find out how they're supposed to sort, a new API
function AggGetAggref() is added to nodeAgg.c. This allows retrieval of
the aggregate call's Aggref node, which may have other uses beyond the
immediate need. There is also support for ordered-set aggregates to
install cleanup callback functions, so that they can be sure that
infrastructure such as tuplesort objects gets cleaned up.
In passing, make some fixes in the recently-added support for variadic
aggregates, and make some editorial adjustments in the recent FILTER
additions for aggregates. Also, simplify use of IsBinaryCoercible() by
allowing it to succeed whenever the target type is ANY or ANYELEMENT.
It was inconsistent that it dealt with other polymorphic target types
but not these.
Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing,
and rather heavily editorialized upon by Tom Lane
Diffstat (limited to 'src/backend/parser')
-rw-r--r-- | src/backend/parser/gram.y | 212 | ||||
-rw-r--r-- | src/backend/parser/parse_agg.c | 409 | ||||
-rw-r--r-- | src/backend/parser/parse_clause.c | 5 | ||||
-rw-r--r-- | src/backend/parser/parse_coerce.c | 4 | ||||
-rw-r--r-- | src/backend/parser/parse_collate.c | 297 | ||||
-rw-r--r-- | src/backend/parser/parse_expr.c | 49 | ||||
-rw-r--r-- | src/backend/parser/parse_func.c | 368 |
7 files changed, 1126 insertions, 218 deletions
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index b4e5552636e..0249f5cdf35 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -142,6 +142,9 @@ static void check_qualified_name(List *names, core_yyscan_t yyscanner); static List *check_func_name(List *names, core_yyscan_t yyscanner); static List *check_indirection(List *indirection, core_yyscan_t yyscanner); static List *extractArgTypes(List *parameters); +static List *extractAggrArgTypes(List *aggrargs); +static List *makeOrderedSetArgs(List *directargs, List *orderedargs, + core_yyscan_t yyscanner); static void insertSelectOptions(SelectStmt *stmt, List *sortClause, List *lockingClause, Node *limitOffset, Node *limitCount, @@ -491,12 +494,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <with> with_clause opt_with_clause %type <list> cte_list +%type <list> within_group_clause +%type <node> filter_clause %type <list> window_clause window_definition_list opt_partition_clause %type <windef> window_definition over_clause window_specification opt_frame_clause frame_extent frame_bound %type <str> opt_existing_window_name %type <boolean> opt_if_not_exists -%type <node> filter_clause /* * Non-keyword token types. These are hard-wired into the "flex" lexer. @@ -599,7 +603,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); VACUUM VALID VALIDATE VALIDATOR VALUE_P VALUES VARCHAR VARIADIC VARYING VERBOSE VERSION_P VIEW VOLATILE - WHEN WHERE WHITESPACE_P WINDOW WITH WITHOUT WORK WRAPPER WRITE + WHEN WHERE WHITESPACE_P WINDOW WITH WITHIN WITHOUT WORK WRAPPER WRITE XML_P XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLEXISTS XMLFOREST XMLPARSE XMLPI XMLROOT XMLSERIALIZE @@ -3715,7 +3719,7 @@ AlterExtensionContentsStmt: n->action = $4; n->objtype = OBJECT_AGGREGATE; n->objname = $6; - n->objargs = extractArgTypes($7); + n->objargs = extractAggrArgTypes($7); $$ = (Node *)n; } | ALTER EXTENSION name add_drop CAST '(' Typename AS Typename ')' @@ -5294,7 +5298,7 @@ CommentStmt: CommentStmt *n = makeNode(CommentStmt); n->objtype = OBJECT_AGGREGATE; n->objname = $4; - n->objargs = extractArgTypes($5); + n->objargs = extractAggrArgTypes($5); n->comment = $7; $$ = (Node *) n; } @@ -5460,7 +5464,7 @@ SecLabelStmt: n->provider = $3; n->objtype = OBJECT_AGGREGATE; n->objname = $6; - n->objargs = extractArgTypes($7); + n->objargs = extractAggrArgTypes($7); n->label = $9; $$ = (Node *) n; } @@ -6460,9 +6464,52 @@ aggr_arg: func_arg } ; -/* Zero-argument aggregates are named with * for consistency with COUNT(*) */ -aggr_args: '(' aggr_args_list ')' { $$ = $2; } - | '(' '*' ')' { $$ = NIL; } +/* + * The SQL standard offers no guidance on how to declare aggregate argument + * lists, since it doesn't have CREATE AGGREGATE etc. We accept these cases: + * + * (*) - normal agg with no args + * (aggr_arg,...) - normal agg with args + * (ORDER BY aggr_arg,...) - ordered-set agg with no direct args + * (aggr_arg,... ORDER BY aggr_arg,...) - ordered-set agg with direct args + * + * The zero-argument case is spelled with '*' for consistency with COUNT(*). + * + * An additional restriction is that if the direct-args list ends in a + * VARIADIC item, the ordered-args list must contain exactly one item that + * is also VARIADIC with the same type. This allows us to collapse the two + * VARIADIC items into one, which is necessary to represent the aggregate in + * pg_proc. We check this at the grammar stage so that we can return a list + * in which the second VARIADIC item is already discarded, avoiding extra work + * in cases such as DROP AGGREGATE. + * + * The return value of this production is a two-element list, in which the + * first item is a sublist of FunctionParameter nodes (with any duplicate + * VARIADIC item already dropped, as per above) and the second is an integer + * Value node, containing -1 if there was no ORDER BY and otherwise the number + * of argument declarations before the ORDER BY. (If this number is equal + * to the first sublist's length, then we dropped a duplicate VARIADIC item.) + * This representation is passed as-is to CREATE AGGREGATE; for operations + * on existing aggregates, we can just apply extractArgTypes to the first + * sublist. + */ +aggr_args: '(' '*' ')' + { + $$ = list_make2(NIL, makeInteger(-1)); + } + | '(' aggr_args_list ')' + { + $$ = list_make2($2, makeInteger(-1)); + } + | '(' ORDER BY aggr_args_list ')' + { + $$ = list_make2($4, makeInteger(0)); + } + | '(' aggr_args_list ORDER BY aggr_args_list ')' + { + /* this is the only case requiring consistency checking */ + $$ = makeOrderedSetArgs($2, $5, yyscanner); + } ; aggr_args_list: @@ -6668,7 +6715,7 @@ RemoveAggrStmt: DropStmt *n = makeNode(DropStmt); n->removeType = OBJECT_AGGREGATE; n->objects = list_make1($3); - n->arguments = list_make1(extractArgTypes($4)); + n->arguments = list_make1(extractAggrArgTypes($4)); n->behavior = $5; n->missing_ok = false; n->concurrent = false; @@ -6679,7 +6726,7 @@ RemoveAggrStmt: DropStmt *n = makeNode(DropStmt); n->removeType = OBJECT_AGGREGATE; n->objects = list_make1($5); - n->arguments = list_make1(extractArgTypes($6)); + n->arguments = list_make1(extractAggrArgTypes($6)); n->behavior = $7; n->missing_ok = true; n->concurrent = false; @@ -6895,7 +6942,7 @@ RenameStmt: ALTER AGGREGATE func_name aggr_args RENAME TO name RenameStmt *n = makeNode(RenameStmt); n->renameType = OBJECT_AGGREGATE; n->object = $3; - n->objarg = extractArgTypes($4); + n->objarg = extractAggrArgTypes($4); n->newname = $7; n->missing_ok = false; $$ = (Node *)n; @@ -7369,7 +7416,7 @@ AlterObjectSchemaStmt: AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt); n->objectType = OBJECT_AGGREGATE; n->object = $3; - n->objarg = extractArgTypes($4); + n->objarg = extractAggrArgTypes($4); n->newschema = $7; n->missing_ok = false; $$ = (Node *)n; @@ -7598,7 +7645,7 @@ AlterOwnerStmt: ALTER AGGREGATE func_name aggr_args OWNER TO RoleId AlterOwnerStmt *n = makeNode(AlterOwnerStmt); n->objectType = OBJECT_AGGREGATE; n->object = $3; - n->objarg = extractArgTypes($4); + n->objarg = extractAggrArgTypes($4); n->newowner = $7; $$ = (Node *)n; } @@ -11165,26 +11212,24 @@ func_application: func_name '(' ')' { $$ = (Node *) makeFuncCall($1, NIL, @1); } - | func_name '(' func_arg_list ')' + | func_name '(' func_arg_list opt_sort_clause ')' { - $$ = (Node *) makeFuncCall($1, $3, @1); + FuncCall *n = makeFuncCall($1, $3, @1); + n->agg_order = $4; + $$ = (Node *)n; } - | func_name '(' VARIADIC func_arg_expr ')' + | func_name '(' VARIADIC func_arg_expr opt_sort_clause ')' { FuncCall *n = makeFuncCall($1, list_make1($4), @1); n->func_variadic = TRUE; + n->agg_order = $5; $$ = (Node *)n; } - | func_name '(' func_arg_list ',' VARIADIC func_arg_expr ')' + | func_name '(' func_arg_list ',' VARIADIC func_arg_expr opt_sort_clause ')' { FuncCall *n = makeFuncCall($1, lappend($3, $6), @1); n->func_variadic = TRUE; - $$ = (Node *)n; - } - | func_name '(' func_arg_list sort_clause ')' - { - FuncCall *n = makeFuncCall($1, $3, @1); - n->agg_order = $4; + n->agg_order = $7; $$ = (Node *)n; } | func_name '(' ALL func_arg_list opt_sort_clause ')' @@ -11232,12 +11277,40 @@ func_application: func_name '(' ')' * (Note that many of the special SQL functions wouldn't actually make any * sense as functional index entries, but we ignore that consideration here.) */ -func_expr: func_application filter_clause over_clause +func_expr: func_application within_group_clause filter_clause over_clause { - FuncCall *n = (FuncCall*)$1; - n->agg_filter = $2; - n->over = $3; - $$ = (Node*)n; + FuncCall *n = (FuncCall *) $1; + /* + * The order clause for WITHIN GROUP and the one for + * plain-aggregate ORDER BY share a field, so we have to + * check here that at most one is present. We also check + * for DISTINCT and VARIADIC here to give a better error + * location. Other consistency checks are deferred to + * parse analysis. + */ + if ($2 != NIL) + { + if (n->agg_order != NIL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use multiple ORDER BY clauses with WITHIN GROUP"), + parser_errposition(@2))); + if (n->agg_distinct) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use DISTINCT with WITHIN GROUP"), + parser_errposition(@2))); + if (n->func_variadic) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use VARIADIC with WITHIN GROUP"), + parser_errposition(@2))); + n->agg_order = $2; + n->agg_within_group = TRUE; + } + n->agg_filter = $3; + n->over = $4; + $$ = (Node *) n; } | func_expr_common_subexpr { $$ = $1; } @@ -11625,6 +11698,20 @@ xmlexists_argument: /* + * Aggregate decoration clauses + */ +within_group_clause: + WITHIN GROUP_P '(' sort_clause ')' { $$ = $4; } + | /*EMPTY*/ { $$ = NIL; } + ; + +filter_clause: + FILTER '(' WHERE a_expr ')' { $$ = $4; } + | /*EMPTY*/ { $$ = NULL; } + ; + + +/* * Window Definitions */ window_clause: @@ -11647,11 +11734,6 @@ window_definition: } ; -filter_clause: - FILTER '(' WHERE a_expr ')' { $$ = $4; } - | /*EMPTY*/ { $$ = NULL; } - ; - over_clause: OVER window_specification { $$ = $2; } | OVER ColId @@ -12416,16 +12498,17 @@ AexprConst: Iconst t->location = @1; $$ = makeStringConstCast($2, @2, t); } - | func_name '(' func_arg_list ')' Sconst + | func_name '(' func_arg_list opt_sort_clause ')' Sconst { /* generic syntax with a type modifier */ TypeName *t = makeTypeNameFromNameList($1); ListCell *lc; /* - * We must use func_arg_list in the production to avoid - * reduce/reduce conflicts, but we don't actually wish - * to allow NamedArgExpr in this context. + * We must use func_arg_list and opt_sort_clause in the + * production to avoid reduce/reduce conflicts, but we + * don't actually wish to allow NamedArgExpr in this + * context, nor ORDER BY. */ foreach(lc, $3) { @@ -12437,9 +12520,15 @@ AexprConst: Iconst errmsg("type modifier cannot have parameter name"), parser_errposition(arg->location))); } + if ($4 != NIL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type modifier cannot have ORDER BY"), + parser_errposition(@4))); + t->typmods = $3; t->location = @1; - $$ = makeStringConstCast($5, @5, t); + $$ = makeStringConstCast($6, @6, t); } | ConstTypename Sconst { @@ -12800,6 +12889,7 @@ unreserved_keyword: | VIEW | VOLATILE | WHITESPACE_P + | WITHIN | WITHOUT | WORK | WRAPPER @@ -13275,6 +13365,52 @@ extractArgTypes(List *parameters) return result; } +/* extractAggrArgTypes() + * As above, but work from the output of the aggr_args production. + */ +static List * +extractAggrArgTypes(List *aggrargs) +{ + Assert(list_length(aggrargs) == 2); + return extractArgTypes((List *) linitial(aggrargs)); +} + +/* makeOrderedSetArgs() + * Build the result of the aggr_args production (which see the comments for). + * This handles only the case where both given lists are nonempty, so that + * we have to deal with multiple VARIADIC arguments. + */ +static List * +makeOrderedSetArgs(List *directargs, List *orderedargs, + core_yyscan_t yyscanner) +{ + FunctionParameter *lastd = (FunctionParameter *) llast(directargs); + + /* No restriction unless last direct arg is VARIADIC */ + if (lastd->mode == FUNC_PARAM_VARIADIC) + { + FunctionParameter *firsto = (FunctionParameter *) linitial(orderedargs); + + /* + * We ignore the names, though the aggr_arg production allows them; + * it doesn't allow default values, so those need not be checked. + */ + if (list_length(orderedargs) != 1 || + firsto->mode != FUNC_PARAM_VARIADIC || + !equal(lastd->argType, firsto->argType)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("an ordered-set aggregate with a VARIADIC direct argument must have one VARIADIC aggregated argument of the same data type"), + parser_errposition(exprLocation((Node *) firsto)))); + + /* OK, drop the duplicate VARIADIC argument from the internal form */ + orderedargs = NIL; + } + + return list_make2(list_concat(directargs, orderedargs), + makeInteger(list_length(directargs))); +} + /* insertSelectOptions() * Insert ORDER BY, etc into an already-constructed SelectStmt. * diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 98cb58a7cc0..04a20eefcd4 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -14,16 +14,20 @@ */ #include "postgres.h" +#include "catalog/pg_aggregate.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_type.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/tlist.h" #include "parser/parse_agg.h" #include "parser/parse_clause.h" +#include "parser/parse_coerce.h" #include "parser/parse_expr.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" typedef struct @@ -42,9 +46,13 @@ typedef struct bool have_non_var_grouping; List **func_grouped_rels; int sublevels_up; + bool in_agg_direct_args; } check_ungrouped_columns_context; -static int check_agg_arguments(ParseState *pstate, List *args, Expr *filter); +static int check_agg_arguments(ParseState *pstate, + List *directargs, + List *args, + Expr *filter); static bool check_agg_arguments_walker(Node *node, check_agg_arguments_context *context); static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry, @@ -59,15 +67,21 @@ static bool check_ungrouped_columns_walker(Node *node, * Finish initial transformation of an aggregate call * * parse_func.c has recognized the function as an aggregate, and has set up - * all the fields of the Aggref except args, aggorder, aggdistinct and - * agglevelsup. The passed-in args list has been through standard expression - * transformation, while the passed-in aggorder list hasn't been transformed - * at all. + * all the fields of the Aggref except aggdirectargs, args, aggorder, + * aggdistinct and agglevelsup. The passed-in args list has been through + * standard expression transformation and type coercion to match the agg's + * declared arg types, while the passed-in aggorder list hasn't been + * transformed at all. * - * Here we convert the args list into a targetlist by inserting TargetEntry - * nodes, and then transform the aggorder and agg_distinct specifications to - * produce lists of SortGroupClause nodes. (That might also result in adding - * resjunk expressions to the targetlist.) + * Here we separate the args list into direct and aggregated args, storing the + * former in agg->aggdirectargs and the latter in agg->args. The regular + * args, but not the direct args, are converted into a targetlist by inserting + * TargetEntry nodes. We then transform the aggorder and agg_distinct + * specifications to produce lists of SortGroupClause nodes for agg->aggorder + * and agg->aggdistinct. (For a regular aggregate, this might result in + * adding resjunk expressions to the targetlist; but for ordered-set + * aggregates the aggorder list will always be one-to-one with the aggregated + * args.) * * We must also determine which query level the aggregate actually belongs to, * set agglevelsup accordingly, and mark p_hasAggs true in the corresponding @@ -77,76 +91,122 @@ void transformAggregateCall(ParseState *pstate, Aggref *agg, List *args, List *aggorder, bool agg_distinct) { - List *tlist; - List *torder; + List *tlist = NIL; + List *torder = NIL; List *tdistinct = NIL; - AttrNumber attno; + AttrNumber attno = 1; int save_next_resno; int min_varlevel; ListCell *lc; const char *err; bool errkind; - /* - * Transform the plain list of Exprs into a targetlist. We don't bother - * to assign column names to the entries. - */ - tlist = NIL; - attno = 1; - foreach(lc, args) + if (AGGKIND_IS_ORDERED_SET(agg->aggkind)) { - Expr *arg = (Expr *) lfirst(lc); - TargetEntry *tle = makeTargetEntry(arg, attno++, NULL, false); + /* + * For an ordered-set agg, the args list includes direct args and + * aggregated args; we must split them apart. + */ + int numDirectArgs = list_length(args) - list_length(aggorder); + List *aargs; + ListCell *lc2; - tlist = lappend(tlist, tle); - } + Assert(numDirectArgs >= 0); - /* - * If we have an ORDER BY, transform it. This will add columns to the - * tlist if they appear in ORDER BY but weren't already in the arg list. - * They will be marked resjunk = true so we can tell them apart from - * regular aggregate arguments later. - * - * We need to mess with p_next_resno since it will be used to number any - * new targetlist entries. - */ - save_next_resno = pstate->p_next_resno; - pstate->p_next_resno = attno; + aargs = list_copy_tail(args, numDirectArgs); + agg->aggdirectargs = list_truncate(args, numDirectArgs); - torder = transformSortClause(pstate, - aggorder, - &tlist, - EXPR_KIND_ORDER_BY, - true /* fix unknowns */ , - true /* force SQL99 rules */ ); + /* + * Build a tlist from the aggregated args, and make a sortlist entry + * for each one. Note that the expressions in the SortBy nodes are + * ignored (they are the raw versions of the transformed args); we are + * just looking at the sort information in the SortBy nodes. + */ + forboth(lc, aargs, lc2, aggorder) + { + Expr *arg = (Expr *) lfirst(lc); + SortBy *sortby = (SortBy *) lfirst(lc2); + TargetEntry *tle; - /* - * If we have DISTINCT, transform that to produce a distinctList. - */ - if (agg_distinct) + /* We don't bother to assign column names to the entries */ + tle = makeTargetEntry(arg, attno++, NULL, false); + tlist = lappend(tlist, tle); + + torder = addTargetToSortList(pstate, tle, + torder, tlist, sortby, + true /* fix unknowns */ ); + } + + /* Never any DISTINCT in an ordered-set agg */ + Assert(!agg_distinct); + } + else { - tdistinct = transformDistinctClause(pstate, &tlist, torder, true); + /* Regular aggregate, so it has no direct args */ + agg->aggdirectargs = NIL; + + /* + * Transform the plain list of Exprs into a targetlist. + */ + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + TargetEntry *tle; + + /* We don't bother to assign column names to the entries */ + tle = makeTargetEntry(arg, attno++, NULL, false); + tlist = lappend(tlist, tle); + } + + /* + * If we have an ORDER BY, transform it. This will add columns to the + * tlist if they appear in ORDER BY but weren't already in the arg + * list. They will be marked resjunk = true so we can tell them apart + * from regular aggregate arguments later. + * + * We need to mess with p_next_resno since it will be used to number + * any new targetlist entries. + */ + save_next_resno = pstate->p_next_resno; + pstate->p_next_resno = attno; + + torder = transformSortClause(pstate, + aggorder, + &tlist, + EXPR_KIND_ORDER_BY, + true /* fix unknowns */ , + true /* force SQL99 rules */ ); /* - * Remove this check if executor support for hashed distinct for - * aggregates is ever added. + * If we have DISTINCT, transform that to produce a distinctList. */ - foreach(lc, tdistinct) + if (agg_distinct) { - SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc); + tdistinct = transformDistinctClause(pstate, &tlist, torder, true); - if (!OidIsValid(sortcl->sortop)) + /* + * Remove this check if executor support for hashed distinct for + * aggregates is ever added. + */ + foreach(lc, tdistinct) { - Node *expr = get_sortgroupclause_expr(sortcl, tlist); - - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("could not identify an ordering operator for type %s", - format_type_be(exprType(expr))), - errdetail("Aggregates with DISTINCT must be able to sort their inputs."), - parser_errposition(pstate, exprLocation(expr)))); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc); + + if (!OidIsValid(sortcl->sortop)) + { + Node *expr = get_sortgroupclause_expr(sortcl, tlist); + + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an ordering operator for type %s", + format_type_be(exprType(expr))), + errdetail("Aggregates with DISTINCT must be able to sort their inputs."), + parser_errposition(pstate, exprLocation(expr)))); + } } } + + pstate->p_next_resno = save_next_resno; } /* Update the Aggref with the transformation results */ @@ -154,13 +214,14 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, agg->aggorder = torder; agg->aggdistinct = tdistinct; - pstate->p_next_resno = save_next_resno; - /* * Check the arguments to compute the aggregate's level and detect * improper nesting. */ - min_varlevel = check_agg_arguments(pstate, agg->args, agg->aggfilter); + min_varlevel = check_agg_arguments(pstate, + agg->aggdirectargs, + agg->args, + agg->aggfilter); agg->agglevelsup = min_varlevel; /* Mark the correct pstate level as having aggregates */ @@ -302,8 +363,17 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, * one is its parent, etc). * * The aggregate's level is the same as the level of the lowest-level variable - * or aggregate in its arguments or filter expression; or if it contains no - * variables at all, we presume it to be local. + * or aggregate in its aggregated arguments (including any ORDER BY columns) + * or filter expression; or if it contains no variables at all, we presume it + * to be local. + * + * Vars/Aggs in direct arguments are *not* counted towards determining the + * agg's level, as those arguments aren't evaluated per-row but only + * per-group, and so in some sense aren't really agg arguments. However, + * this can mean that we decide an agg is upper-level even when its direct + * args contain lower-level Vars/Aggs, and that case has to be disallowed. + * (This is a little strange, but the SQL standard seems pretty definite that + * direct args are not to be considered when setting the agg's level.) * * We also take this opportunity to detect any aggregates or window functions * nested within the arguments. We can throw error immediately if we find @@ -312,7 +382,10 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, * which we can't know until we finish scanning the arguments. */ static int -check_agg_arguments(ParseState *pstate, List *args, Expr *filter) +check_agg_arguments(ParseState *pstate, + List *directargs, + List *args, + Expr *filter) { int agglevel; check_agg_arguments_context context; @@ -337,8 +410,9 @@ check_agg_arguments(ParseState *pstate, List *args, Expr *filter) if (context.min_varlevel < 0) { if (context.min_agglevel < 0) - return 0; - agglevel = context.min_agglevel; + agglevel = 0; + else + agglevel = context.min_agglevel; } else if (context.min_agglevel < 0) agglevel = context.min_varlevel; @@ -349,12 +423,49 @@ check_agg_arguments(ParseState *pstate, List *args, Expr *filter) * If there's a nested aggregate of the same semantic level, complain. */ if (agglevel == context.min_agglevel) + { + int aggloc; + + aggloc = locate_agg_of_level((Node *) args, agglevel); + if (aggloc < 0) + aggloc = locate_agg_of_level((Node *) filter, agglevel); ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg("aggregate function calls cannot be nested"), - parser_errposition(pstate, - locate_agg_of_level((Node *) args, - agglevel)))); + parser_errposition(pstate, aggloc))); + } + + /* + * Now check for vars/aggs in the direct arguments, and throw error if + * needed. Note that we allow a Var of the agg's semantic level, but not + * an Agg of that level. In principle such Aggs could probably be + * supported, but it would create an ordering dependency among the + * aggregates at execution time. Since the case appears neither to be + * required by spec nor particularly useful, we just treat it as a + * nested-aggregate situation. + */ + if (directargs) + { + context.min_varlevel = -1; + context.min_agglevel = -1; + (void) expression_tree_walker((Node *) directargs, + check_agg_arguments_walker, + (void *) &context); + if (context.min_varlevel >= 0 && context.min_varlevel < agglevel) + ereport(ERROR, + (errcode(ERRCODE_GROUPING_ERROR), + errmsg("outer-level aggregate cannot contain a lower-level variable in its direct arguments"), + parser_errposition(pstate, + locate_var_of_level((Node *) directargs, + context.min_varlevel)))); + if (context.min_agglevel >= 0 && context.min_agglevel <= agglevel) + ereport(ERROR, + (errcode(ERRCODE_GROUPING_ERROR), + errmsg("aggregate function calls cannot be nested"), + parser_errposition(pstate, + locate_agg_of_level((Node *) directargs, + context.min_agglevel)))); + } return agglevel; } @@ -442,6 +553,10 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, /* * A window function call can't contain another one (but aggs are OK). XXX * is this required by spec, or just an unimplemented feature? + * + * Note: we don't need to check the filter expression here, because the + * context checks done below and in transformAggregateCall would have + * already rejected any window funcs or aggs within the filter. */ if (pstate->p_hasWindowFuncs && contain_windowfuncs((Node *) wfunc->args)) @@ -800,6 +915,7 @@ check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry, context.have_non_var_grouping = have_non_var_grouping; context.func_grouped_rels = func_grouped_rels; context.sublevels_up = 0; + context.in_agg_direct_args = false; check_ungrouped_columns_walker(node, &context); } @@ -815,16 +931,39 @@ check_ungrouped_columns_walker(Node *node, IsA(node, Param)) return false; /* constants are always acceptable */ - /* - * If we find an aggregate call of the original level, do not recurse into - * its arguments or filter; ungrouped vars there are not an error. We can - * also skip looking at aggregates of higher levels, since they could not - * possibly contain Vars of concern to us (see transformAggregateCall). - * We do need to look at aggregates of lower levels, however. - */ - if (IsA(node, Aggref) && - (int) ((Aggref *) node)->agglevelsup >= context->sublevels_up) - return false; + if (IsA(node, Aggref)) + { + Aggref *agg = (Aggref *) node; + + if ((int) agg->agglevelsup == context->sublevels_up) + { + /* + * If we find an aggregate call of the original level, do not + * recurse into its normal arguments, ORDER BY arguments, or + * filter; ungrouped vars there are not an error. But we should + * check direct arguments as though they weren't in an aggregate. + * We set a special flag in the context to help produce a useful + * error message for ungrouped vars in direct arguments. + */ + bool result; + + Assert(!context->in_agg_direct_args); + context->in_agg_direct_args = true; + result = check_ungrouped_columns_walker((Node *) agg->aggdirectargs, + context); + context->in_agg_direct_args = false; + return result; + } + + /* + * We can skip recursing into aggregates of higher levels altogether, + * since they could not possibly contain Vars of concern to us (see + * transformAggregateCall). We do need to look at aggregates of lower + * levels, however. + */ + if ((int) agg->agglevelsup > context->sublevels_up) + return false; + } /* * If we have any GROUP BY items that are not simple Vars, check to see if @@ -917,6 +1056,8 @@ check_ungrouped_columns_walker(Node *node, (errcode(ERRCODE_GROUPING_ERROR), errmsg("column \"%s.%s\" must appear in the GROUP BY clause or be used in an aggregate function", rte->eref->aliasname, attname), + context->in_agg_direct_args ? + errdetail("Direct arguments of an ordered-set aggregate must use only grouped columns.") : 0, parser_errposition(context->pstate, var->location))); else ereport(ERROR, @@ -944,6 +1085,93 @@ check_ungrouped_columns_walker(Node *node, } /* + * get_aggregate_argtypes + * Identify the specific datatypes passed to an aggregate call. + * + * Given an Aggref, extract the actual datatypes of the input arguments. + * The input datatypes are reported in a way that matches up with the + * aggregate's declaration, ie, any ORDER BY columns attached to a plain + * aggregate are ignored, but we report both direct and aggregated args of + * an ordered-set aggregate. + * + * Datatypes are returned into inputTypes[], which must reference an array + * of length FUNC_MAX_ARGS. + * + * The function result is the number of actual arguments. + */ +int +get_aggregate_argtypes(Aggref *aggref, Oid *inputTypes) +{ + int numArguments = 0; + ListCell *lc; + + /* Any direct arguments of an ordered-set aggregate come first */ + foreach(lc, aggref->aggdirectargs) + { + Node *expr = (Node *) lfirst(lc); + + inputTypes[numArguments] = exprType(expr); + numArguments++; + } + + /* Now get the regular (aggregated) arguments */ + foreach(lc, aggref->args) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + /* Ignore ordering columns of a plain aggregate */ + if (tle->resjunk) + continue; + + inputTypes[numArguments] = exprType((Node *) tle->expr); + numArguments++; + } + + return numArguments; +} + +/* + * resolve_aggregate_transtype + * Identify the transition state value's datatype for an aggregate call. + * + * This function resolves a polymorphic aggregate's state datatype. + * It must be passed the aggtranstype from the aggregate's catalog entry, + * as well as the actual argument types extracted by get_aggregate_argtypes. + * (We could fetch these values internally, but for all existing callers that + * would just duplicate work the caller has to do too, so we pass them in.) + */ +Oid +resolve_aggregate_transtype(Oid aggfuncid, + Oid aggtranstype, + Oid *inputTypes, + int numArguments) +{ + /* resolve actual type of transition state, if polymorphic */ + if (IsPolymorphicType(aggtranstype)) + { + /* have to fetch the agg's declared input types... */ + Oid *declaredArgTypes; + int agg_nargs; + + (void) get_func_signature(aggfuncid, &declaredArgTypes, &agg_nargs); + + /* + * VARIADIC ANY aggs could have more actual than declared args, but + * such extra args can't affect polymorphic type resolution. + */ + Assert(agg_nargs <= numArguments); + + aggtranstype = enforce_generic_type_consistency(inputTypes, + declaredArgTypes, + agg_nargs, + aggtranstype, + false); + pfree(declaredArgTypes); + } + return aggtranstype; +} + +/* * Create expression trees for the transition and final functions * of an aggregate. These are needed so that polymorphic functions * can be used within an aggregate --- without the expression trees, @@ -956,6 +1184,9 @@ check_ungrouped_columns_walker(Node *node, * resolved to actual types (ie, none should ever be ANYELEMENT etc). * agg_input_collation is the aggregate function's input collation. * + * For an ordered-set aggregate, remember that agg_input_types describes + * the direct arguments followed by the aggregated arguments. + * * transfn_oid and finalfn_oid identify the funcs to be called; the latter * may be InvalidOid. * @@ -965,6 +1196,8 @@ check_ungrouped_columns_walker(Node *node, void build_aggregate_fnexprs(Oid *agg_input_types, int agg_num_inputs, + int agg_num_direct_inputs, + bool agg_ordered_set, bool agg_variadic, Oid agg_state_type, Oid agg_result_type, @@ -995,7 +1228,7 @@ build_aggregate_fnexprs(Oid *agg_input_types, args = list_make1(argp); - for (i = 0; i < agg_num_inputs; i++) + for (i = agg_num_direct_inputs; i < agg_num_inputs; i++) { argp = makeNode(Param); argp->paramkind = PARAM_EXEC; @@ -1035,10 +1268,26 @@ build_aggregate_fnexprs(Oid *agg_input_types, argp->location = -1; args = list_make1(argp); + if (agg_ordered_set) + { + for (i = 0; i < agg_num_inputs; i++) + { + argp = makeNode(Param); + argp->paramkind = PARAM_EXEC; + argp->paramid = -1; + argp->paramtype = agg_input_types[i]; + argp->paramtypmod = -1; + argp->paramcollid = agg_input_collation; + argp->location = -1; + args = lappend(args, argp); + } + } + *finalfnexpr = (Expr *) makeFuncExpr(finalfn_oid, agg_result_type, args, InvalidOid, agg_input_collation, COERCE_EXPLICIT_CALL); + /* finalfn is currently never treated as variadic */ } diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 87b0c8fd418..05ddb8c3e74 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -75,9 +75,6 @@ static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist, ParseExprKind exprKind); static int get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs); -static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, - List *sortlist, List *targetlist, SortBy *sortby, - bool resolveUnknown); static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle, List *grouplist, List *targetlist, int location, bool resolveUnknown); @@ -2177,7 +2174,7 @@ get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs) * * Returns the updated SortGroupClause list. */ -static List * +List * addTargetToSortList(ParseState *pstate, TargetEntry *tle, List *sortlist, List *targetlist, SortBy *sortby, bool resolveUnknown) diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c index b6df2c60b46..efd483d8139 100644 --- a/src/backend/parser/parse_coerce.c +++ b/src/backend/parser/parse_coerce.c @@ -2009,6 +2009,10 @@ IsBinaryCoercible(Oid srctype, Oid targettype) if (srctype == targettype) return true; + /* Anything is coercible to ANY or ANYELEMENT */ + if (targettype == ANYOID || targettype == ANYELEMENTOID) + return true; + /* If srctype is a domain, reduce to its base type */ if (OidIsValid(srctype)) srctype = getBaseType(srctype); diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c index c02f98acc71..f33fe3e305a 100644 --- a/src/backend/parser/parse_collate.c +++ b/src/backend/parser/parse_collate.c @@ -40,7 +40,9 @@ */ #include "postgres.h" +#include "catalog/pg_aggregate.h" #include "catalog/pg_collation.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "parser/parse_collate.h" #include "utils/lsyscache.h" @@ -73,6 +75,18 @@ typedef struct static bool assign_query_collations_walker(Node *node, ParseState *pstate); static bool assign_collations_walker(Node *node, assign_collations_context *context); +static void merge_collation_state(Oid collation, + CollateStrength strength, + int location, + Oid collation2, + int location2, + assign_collations_context *context); +static void assign_aggregate_collations(Aggref *aggref, + assign_collations_context *loccontext); +static void assign_ordered_set_collations(Aggref *aggref, + assign_collations_context *loccontext); +static void assign_hypothetical_collations(Aggref *aggref, + assign_collations_context *loccontext); /* @@ -258,6 +272,9 @@ assign_collations_walker(Node *node, assign_collations_context *context) loccontext.collation = InvalidOid; loccontext.strength = COLLATE_NONE; loccontext.location = -1; + /* Set these fields just to suppress uninitialized-value warnings: */ + loccontext.collation2 = InvalidOid; + loccontext.location2 = -1; /* * Recurse if appropriate, then determine the collation for this node. @@ -570,40 +587,31 @@ assign_collations_walker(Node *node, assign_collations_context *context) case T_Aggref: { /* - * Aggref is a special case because expressions - * used only for ordering shouldn't be taken to - * conflict with each other or with regular args. - * So we apply assign_expr_collations() to them - * rather than passing down our loccontext. - * - * Note that we recurse to each TargetEntry, not - * directly to its contained expression, so that - * the case above for T_TargetEntry will apply - * appropriate checks to agg ORDER BY items. - * - * Likewise, we assign collations for the (bool) - * expression in aggfilter, independently of any - * other args. - * - * We need not recurse into the aggorder or - * aggdistinct lists, because those contain only - * SortGroupClause nodes which we need not - * process. + * Aggref is messy enough that we give it its own + * function, in fact three of them. The FILTER + * clause is independent of the rest of the + * aggregate, however, so it can be processed + * separately. */ Aggref *aggref = (Aggref *) node; - ListCell *lc; - foreach(lc, aggref->args) + switch (aggref->aggkind) { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - - Assert(IsA(tle, TargetEntry)); - if (tle->resjunk) - assign_expr_collations(context->pstate, - (Node *) tle); - else - (void) assign_collations_walker((Node *) tle, + case AGGKIND_NORMAL: + assign_aggregate_collations(aggref, + &loccontext); + break; + case AGGKIND_ORDERED_SET: + assign_ordered_set_collations(aggref, &loccontext); + break; + case AGGKIND_HYPOTHETICAL: + assign_hypothetical_collations(aggref, + &loccontext); + break; + default: + elog(ERROR, "unrecognized aggkind: %d", + (int) aggref->aggkind); } assign_expr_collations(context->pstate, @@ -730,9 +738,33 @@ assign_collations_walker(Node *node, assign_collations_context *context) } /* - * Now, merge my information into my parent's state. If the collation - * strength for this node is different from what's already in *context, - * then this node either dominates or is dominated by earlier siblings. + * Now, merge my information into my parent's state. + */ + merge_collation_state(collation, + strength, + location, + loccontext.collation2, + loccontext.location2, + context); + + return false; +} + +/* + * Merge collation state of a subexpression into the context for its parent. + */ +static void +merge_collation_state(Oid collation, + CollateStrength strength, + int location, + Oid collation2, + int location2, + assign_collations_context *context) +{ + /* + * If the collation strength for this node is different from what's + * already in *context, then this node either dominates or is dominated by + * earlier siblings. */ if (strength > context->strength) { @@ -743,8 +775,8 @@ assign_collations_walker(Node *node, assign_collations_context *context) /* Bubble up error info if applicable */ if (strength == COLLATE_CONFLICT) { - context->collation2 = loccontext.collation2; - context->location2 = loccontext.location2; + context->collation2 = collation2; + context->location2 = location2; } } else if (strength == context->strength) @@ -805,6 +837,201 @@ assign_collations_walker(Node *node, assign_collations_context *context) break; } } +} - return false; +/* + * Aggref is a special case because expressions used only for ordering + * shouldn't be taken to conflict with each other or with regular args, + * indeed shouldn't affect the aggregate's result collation at all. + * We handle this by applying assign_expr_collations() to them rather than + * passing down our loccontext. + * + * Note that we recurse to each TargetEntry, not directly to its contained + * expression, so that the case above for T_TargetEntry will complain if we + * can't resolve a collation for an ORDER BY item (whether or not it is also + * a normal aggregate arg). + * + * We need not recurse into the aggorder or aggdistinct lists, because those + * contain only SortGroupClause nodes which we need not process. + */ +static void +assign_aggregate_collations(Aggref *aggref, + assign_collations_context *loccontext) +{ + ListCell *lc; + + /* Plain aggregates have no direct args */ + Assert(aggref->aggdirectargs == NIL); + + /* Process aggregated args, holding resjunk ones at arm's length */ + foreach(lc, aggref->args) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + Assert(IsA(tle, TargetEntry)); + if (tle->resjunk) + assign_expr_collations(loccontext->pstate, (Node *) tle); + else + (void) assign_collations_walker((Node *) tle, loccontext); + } +} + +/* + * For ordered-set aggregates, it's somewhat unclear how best to proceed. + * The spec-defined inverse distribution functions have only one sort column + * and don't return collatable types, but this is clearly too restrictive in + * the general case. Our solution is to consider that the aggregate's direct + * arguments contribute normally to determination of the aggregate's own + * collation, while aggregated arguments contribute only when the aggregate + * is designed to have exactly one aggregated argument (i.e., it has a single + * aggregated argument and is non-variadic). If it can have more than one + * aggregated argument, we process the aggregated arguments as independent + * sort columns. This avoids throwing error for something like + * agg(...) within group (order by x collate "foo", y collate "bar") + * while also guaranteeing that variadic aggregates don't change in behavior + * depending on how many sort columns a particular call happens to have. + * + * Otherwise this is much like the plain-aggregate case. + */ +static void +assign_ordered_set_collations(Aggref *aggref, + assign_collations_context *loccontext) +{ + bool merge_sort_collations; + ListCell *lc; + + /* Merge sort collations to parent only if there can be only one */ + merge_sort_collations = (list_length(aggref->args) == 1 && + get_func_variadictype(aggref->aggfnoid) == InvalidOid); + + /* Direct args, if any, are normal children of the Aggref node */ + (void) assign_collations_walker((Node *) aggref->aggdirectargs, + loccontext); + + /* Process aggregated args appropriately */ + foreach(lc, aggref->args) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + Assert(IsA(tle, TargetEntry)); + if (merge_sort_collations) + (void) assign_collations_walker((Node *) tle, loccontext); + else + assign_expr_collations(loccontext->pstate, (Node *) tle); + } +} + +/* + * Hypothetical-set aggregates are even more special: per spec, we need to + * unify the collations of each pair of hypothetical and aggregated args. + * And we need to force the choice of collation down into the sort column + * to ensure that the sort happens with the chosen collation. Other than + * that, the behavior is like regular ordered-set aggregates. Note that + * hypothetical direct arguments contribute to the aggregate collation + * only when their partner aggregated arguments do. + */ +static void +assign_hypothetical_collations(Aggref *aggref, + assign_collations_context *loccontext) +{ + ListCell *h_cell = list_head(aggref->aggdirectargs); + ListCell *s_cell = list_head(aggref->args); + bool merge_sort_collations; + int extra_args; + + /* Merge sort collations to parent only if there can be only one */ + merge_sort_collations = (list_length(aggref->args) == 1 && + get_func_variadictype(aggref->aggfnoid) == InvalidOid); + + /* Process any non-hypothetical direct args */ + extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args); + Assert(extra_args >= 0); + while (extra_args-- > 0) + { + (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext); + h_cell = lnext(h_cell); + } + + /* Scan hypothetical args and aggregated args in parallel */ + while (h_cell && s_cell) + { + Node *h_arg = (Node *) lfirst(h_cell); + TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell); + assign_collations_context paircontext; + + /* + * Assign collations internally in this pair of expressions, then + * choose a common collation for them. This should match + * select_common_collation(), but we can't use that function as-is + * because we need access to the whole collation state so we can + * bubble it up to the aggregate function's level. + */ + paircontext.pstate = loccontext->pstate; + paircontext.collation = InvalidOid; + paircontext.strength = COLLATE_NONE; + paircontext.location = -1; + /* Set these fields just to suppress uninitialized-value warnings: */ + paircontext.collation2 = InvalidOid; + paircontext.location2 = -1; + + (void) assign_collations_walker(h_arg, &paircontext); + (void) assign_collations_walker((Node *) s_tle->expr, &paircontext); + + /* deal with collation conflict */ + if (paircontext.strength == COLLATE_CONFLICT) + ereport(ERROR, + (errcode(ERRCODE_COLLATION_MISMATCH), + errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"", + get_collation_name(paircontext.collation), + get_collation_name(paircontext.collation2)), + errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."), + parser_errposition(paircontext.pstate, + paircontext.location2))); + + /* + * At this point paircontext.collation can be InvalidOid only if the + * type is not collatable; no need to do anything in that case. If we + * do have to change the sort column's collation, do it by inserting a + * RelabelType node into the sort column TLE. + * + * XXX This is pretty grotty for a couple of reasons: + * assign_collations_walker isn't supposed to be changing the + * expression structure like this, and a parse-time change of + * collation ought to be signaled by a CollateExpr not a RelabelType + * (the use of RelabelType for collation marking is supposed to be a + * planner/executor thing only). But we have no better alternative. + * In particular, injecting a CollateExpr could result in the + * expression being interpreted differently after dump/reload, since + * we might be effectively promoting an implicit collation to + * explicit. This kluge is relying on ruleutils.c not printing a + * COLLATE clause for a RelabelType, and probably on some other + * fragile behaviors. + */ + if (OidIsValid(paircontext.collation) && + paircontext.collation != exprCollation((Node *) s_tle->expr)) + { + s_tle->expr = (Expr *) + makeRelabelType(s_tle->expr, + exprType((Node *) s_tle->expr), + exprTypmod((Node *) s_tle->expr), + paircontext.collation, + COERCE_IMPLICIT_CAST); + } + + /* + * If appropriate, merge this column's collation state up to the + * aggregate function. + */ + if (merge_sort_collations) + merge_collation_state(paircontext.collation, + paircontext.strength, + paircontext.location, + paircontext.collation2, + paircontext.location2, + loccontext); + + h_cell = lnext(h_cell); + s_cell = lnext(s_cell); + } + Assert(h_cell == NULL && s_cell == NULL); } diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 68b711dfd9a..3a3489fcb36 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -463,8 +463,8 @@ transformIndirection(ParseState *pstate, Node *basenode, List *indirection) newresult = ParseFuncOrColumn(pstate, list_make1(n), list_make1(result), - NIL, NULL, false, false, false, - NULL, true, location); + NULL, + location); if (newresult == NULL) unknown_attribute(pstate, result, strVal(n), location); result = newresult; @@ -631,8 +631,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) node = ParseFuncOrColumn(pstate, list_make1(makeString(colname)), list_make1(node), - NIL, NULL, false, false, false, - NULL, true, cref->location); + NULL, + cref->location); } break; } @@ -676,8 +676,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) node = ParseFuncOrColumn(pstate, list_make1(makeString(colname)), list_make1(node), - NIL, NULL, false, false, false, - NULL, true, cref->location); + NULL, + cref->location); } break; } @@ -734,8 +734,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) node = ParseFuncOrColumn(pstate, list_make1(makeString(colname)), list_make1(node), - NIL, NULL, false, false, false, - NULL, true, cref->location); + NULL, + cref->location); } break; } @@ -1242,7 +1242,6 @@ transformFuncCall(ParseState *pstate, FuncCall *fn) { List *targs; ListCell *args; - Expr *tagg_filter; /* Transform the list of arguments ... */ targs = NIL; @@ -1253,26 +1252,30 @@ transformFuncCall(ParseState *pstate, FuncCall *fn) } /* - * Transform the aggregate filter using transformWhereClause(), to which - * FILTER is virtually identical... + * When WITHIN GROUP is used, we treat its ORDER BY expressions as + * additional arguments to the function, for purposes of function lookup + * and argument type coercion. So, transform each such expression and add + * them to the targs list. We don't explicitly mark where each argument + * came from, but ParseFuncOrColumn can tell what's what by reference to + * list_length(fn->agg_order). */ - tagg_filter = NULL; - if (fn->agg_filter != NULL) - tagg_filter = (Expr *) - transformWhereClause(pstate, (Node *) fn->agg_filter, - EXPR_KIND_FILTER, "FILTER"); + if (fn->agg_within_group) + { + Assert(fn->agg_order != NIL); + foreach(args, fn->agg_order) + { + SortBy *arg = (SortBy *) lfirst(args); + + targs = lappend(targs, transformExpr(pstate, arg->node, + EXPR_KIND_ORDER_BY)); + } + } /* ... and hand off to ParseFuncOrColumn */ return ParseFuncOrColumn(pstate, fn->funcname, targs, - fn->agg_order, - tagg_filter, - fn->agg_star, - fn->agg_distinct, - fn->func_variadic, - fn->over, - false, + fn, fn->location); } diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index ede36d159a3..6aaa73380e5 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "catalog/pg_aggregate.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "funcapi.h" @@ -22,6 +23,7 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "parser/parse_agg.h" +#include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parse_func.h" #include "parser/parse_relation.h" @@ -32,6 +34,9 @@ #include "utils/syscache.h" +static void unify_hypothetical_args(ParseState *pstate, + List *fargs, int numAggregatedArgs, + Oid *actual_arg_types, Oid *declared_arg_types); static Oid FuncNameAsType(List *funcname); static Node *ParseComplexProjection(ParseState *pstate, char *funcname, Node *first_arg, int location); @@ -47,24 +52,30 @@ static Node *ParseComplexProjection(ParseState *pstate, char *funcname, * a function of a single complex-type argument can be written like a * column reference, allowing functions to act like computed columns. * - * Hence, both cases come through here. The is_column parameter tells us - * which syntactic construct is actually being dealt with, but this is - * intended to be used only to deliver an appropriate error message, - * not to affect the semantics. When is_column is true, we should have - * a single argument (the putative table), unqualified function name - * equal to the column name, and no aggregate or variadic decoration. - * Also, when is_column is true, we return NULL on failure rather than + * Hence, both cases come through here. If fn is null, we're dealing with + * column syntax not function syntax, but in principle that should not + * affect the lookup behavior, only which error messages we deliver. + * The FuncCall struct is needed however to carry various decoration that + * applies to aggregate and window functions. + * + * Also, when fn is null, we return NULL on failure rather than * reporting a no-such-function error. * - * The argument expressions (in fargs) and filter must have been transformed - * already. But the agg_order expressions, if any, have not been. + * The argument expressions (in fargs) must have been transformed + * already. However, nothing in *fn has been transformed. */ Node * ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, - List *agg_order, Expr *agg_filter, - bool agg_star, bool agg_distinct, bool func_variadic, - WindowDef *over, bool is_column, int location) + FuncCall *fn, int location) { + bool is_column = (fn == NULL); + List *agg_order = (fn ? fn->agg_order : NIL); + Expr *agg_filter = NULL; + bool agg_within_group = (fn ? fn->agg_within_group : false); + bool agg_star = (fn ? fn->agg_star : false); + bool agg_distinct = (fn ? fn->agg_distinct : false); + bool func_variadic = (fn ? fn->func_variadic : false); + WindowDef *over = (fn ? fn->over : NULL); Oid rettype; Oid funcid; ListCell *l; @@ -81,6 +92,15 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, int nvargs; Oid vatype; FuncDetailCode fdresult; + char aggkind = 0; + + /* + * If there's an aggregate filter, transform it using transformWhereClause + */ + if (fn && fn->agg_filter != NULL) + agg_filter = (Expr *) transformWhereClause(pstate, fn->agg_filter, + EXPR_KIND_FILTER, + "FILTER"); /* * Most of the rest of the parser just assumes that functions do not have @@ -101,10 +121,12 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, * Extract arg type info in preparation for function lookup. * * If any arguments are Param markers of type VOID, we discard them from - * the parameter list. This is a hack to allow the JDBC driver to not - * have to distinguish "input" and "output" parameter symbols while - * parsing function-call constructs. We can't use foreach() because we - * may modify the list ... + * the parameter list. This is a hack to allow the JDBC driver to not have + * to distinguish "input" and "output" parameter symbols while parsing + * function-call constructs. Don't do this if dealing with column syntax, + * nor if we had WITHIN GROUP (because in that case it's critical to keep + * the argument count unchanged). We can't use foreach() because we may + * modify the list ... */ nargs = 0; for (l = list_head(fargs); l != NULL; l = nextl) @@ -114,7 +136,8 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, nextl = lnext(l); - if (argtype == VOIDOID && IsA(arg, Param) &&!is_column) + if (argtype == VOIDOID && IsA(arg, Param) && + !is_column && !agg_within_group) { fargs = list_delete_ptr(fargs, arg); continue; @@ -247,6 +270,12 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, errmsg("DISTINCT specified, but %s is not an aggregate function", NameListToString(funcname)), parser_errposition(pstate, location))); + if (agg_within_group) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("WITHIN GROUP specified, but %s is not an aggregate function", + NameListToString(funcname)), + parser_errposition(pstate, location))); if (agg_order != NIL) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -266,8 +295,181 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, NameListToString(funcname)), parser_errposition(pstate, location))); } - else if (!(fdresult == FUNCDETAIL_AGGREGATE || - fdresult == FUNCDETAIL_WINDOWFUNC)) + else if (fdresult == FUNCDETAIL_AGGREGATE) + { + /* + * It's an aggregate; fetch needed info from the pg_aggregate entry. + */ + HeapTuple tup; + Form_pg_aggregate classForm; + int catDirectArgs; + + tup = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for aggregate %u", funcid); + classForm = (Form_pg_aggregate) GETSTRUCT(tup); + aggkind = classForm->aggkind; + catDirectArgs = classForm->aggnumdirectargs; + ReleaseSysCache(tup); + + /* Now check various disallowed cases. */ + if (AGGKIND_IS_ORDERED_SET(aggkind)) + { + int numAggregatedArgs; + int numDirectArgs; + + if (!agg_within_group) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("WITHIN GROUP is required for ordered-set aggregate %s", + NameListToString(funcname)), + parser_errposition(pstate, location))); + if (over) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("OVER is not supported for ordered-set aggregate %s", + NameListToString(funcname)), + parser_errposition(pstate, location))); + /* gram.y rejects DISTINCT + WITHIN GROUP */ + Assert(!agg_distinct); + /* gram.y rejects VARIADIC + WITHIN GROUP */ + Assert(!func_variadic); + + /* + * Since func_get_detail was working with an undifferentiated list + * of arguments, it might have selected an aggregate that doesn't + * really match because it requires a different division of direct + * and aggregated arguments. Check that the number of direct + * arguments is actually OK; if not, throw an "undefined function" + * error, similarly to the case where a misplaced ORDER BY is used + * in a regular aggregate call. + */ + numAggregatedArgs = list_length(agg_order); + numDirectArgs = nargs - numAggregatedArgs; + Assert(numDirectArgs >= 0); + + if (!OidIsValid(vatype)) + { + /* Test is simple if aggregate isn't variadic */ + if (numDirectArgs != catDirectArgs) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(funcname, nargs, + argnames, + actual_arg_types)), + errhint("There is an ordered-set aggregate %s, but it requires %d direct arguments, not %d.", + NameListToString(funcname), + catDirectArgs, numDirectArgs), + parser_errposition(pstate, location))); + } + else + { + /* + * If it's variadic, we have two cases depending on whether + * the agg was "... ORDER BY VARIADIC" or "..., VARIADIC ORDER + * BY VARIADIC". It's the latter if catDirectArgs equals + * pronargs; to save a catalog lookup, we reverse-engineer + * pronargs from the info we got from func_get_detail. + */ + int pronargs; + + pronargs = nargs; + if (nvargs > 1) + pronargs -= nvargs - 1; + if (catDirectArgs < pronargs) + { + /* VARIADIC isn't part of direct args, so still easy */ + if (numDirectArgs != catDirectArgs) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(funcname, nargs, + argnames, + actual_arg_types)), + errhint("There is an ordered-set aggregate %s, but it requires %d direct arguments, not %d.", + NameListToString(funcname), + catDirectArgs, numDirectArgs), + parser_errposition(pstate, location))); + } + else + { + /* + * Both direct and aggregated args were declared variadic. + * For a standard ordered-set aggregate, it's okay as long + * as there aren't too few direct args. For a + * hypothetical-set aggregate, we assume that the + * hypothetical arguments are those that matched the + * variadic parameter; there must be just as many of them + * as there are aggregated arguments. + */ + if (aggkind == AGGKIND_HYPOTHETICAL) + { + if (nvargs != 2 * numAggregatedArgs) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(funcname, nargs, + argnames, + actual_arg_types)), + errhint("To use the hypothetical-set aggregate %s, the number of hypothetical direct arguments (here %d) must match the number of ordering columns (here %d).", + NameListToString(funcname), + nvargs - numAggregatedArgs, numAggregatedArgs), + parser_errposition(pstate, location))); + } + else + { + if (nvargs <= numAggregatedArgs) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(funcname, nargs, + argnames, + actual_arg_types)), + errhint("There is an ordered-set aggregate %s, but it requires at least %d direct arguments.", + NameListToString(funcname), + catDirectArgs), + parser_errposition(pstate, location))); + } + } + } + + /* Check type matching of hypothetical arguments */ + if (aggkind == AGGKIND_HYPOTHETICAL) + unify_hypothetical_args(pstate, fargs, numAggregatedArgs, + actual_arg_types, declared_arg_types); + } + else + { + /* Normal aggregate, so it can't have WITHIN GROUP */ + if (agg_within_group) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not an ordered-set aggregate, so it cannot have WITHIN GROUP", + NameListToString(funcname)), + parser_errposition(pstate, location))); + } + } + else if (fdresult == FUNCDETAIL_WINDOWFUNC) + { + /* + * True window functions must be called with a window definition. + */ + if (!over) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("window function %s requires an OVER clause", + NameListToString(funcname)), + parser_errposition(pstate, location))); + /* And, per spec, WITHIN GROUP isn't allowed */ + if (agg_within_group) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("window function %s cannot have WITHIN GROUP", + NameListToString(funcname)), + parser_errposition(pstate, location))); + } + else { /* * Oops. Time to die. @@ -290,7 +492,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, errhint("Could not choose a best candidate function. " "You might need to add explicit type casts."), parser_errposition(pstate, location))); - else if (list_length(agg_order) > 1) + else if (list_length(agg_order) > 1 && !agg_within_group) { /* It's agg(x, ORDER BY y,z) ... perhaps misplaced ORDER BY */ ereport(ERROR, @@ -424,10 +626,12 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, aggref->aggfnoid = funcid; aggref->aggtype = rettype; /* aggcollid and inputcollid will be set by parse_collate.c */ - /* args, aggorder, aggdistinct will be set by transformAggregateCall */ + /* aggdirectargs and args will be set by transformAggregateCall */ + /* aggorder and aggdistinct will be set by transformAggregateCall */ aggref->aggfilter = agg_filter; aggref->aggstar = agg_star; aggref->aggvariadic = func_variadic; + aggref->aggkind = aggkind; /* agglevelsup will be set by transformAggregateCall */ aggref->location = location; @@ -435,7 +639,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, * Reject attempt to call a parameterless aggregate without (*) * syntax. This is mere pedantry but some folks insisted ... */ - if (fargs == NIL && !agg_star) + if (fargs == NIL && !agg_star && !agg_within_group) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("%s(*) must be used to call a parameterless aggregate function", @@ -473,14 +677,8 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, /* window function */ WindowFunc *wfunc = makeNode(WindowFunc); - /* - * True window functions must be called with a window definition. - */ - if (!over) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("window function call requires an OVER clause"), - parser_errposition(pstate, location))); + Assert(over); /* lack of this was checked above */ + Assert(!agg_within_group); /* also checked above */ wfunc->winfnoid = funcid; wfunc->wintype = rettype; @@ -513,22 +711,21 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, parser_errposition(pstate, location))); /* - * Reject window functions which are not aggregates in the case of - * FILTER. + * ordered aggs not allowed in windows yet */ - if (!wfunc->winagg && agg_filter) + if (agg_order != NIL) ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("FILTER is not implemented in non-aggregate window functions"), + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("aggregate ORDER BY is not implemented for window functions"), parser_errposition(pstate, location))); /* - * ordered aggs not allowed in windows yet + * FILTER is not yet supported with true window functions */ - if (agg_order != NIL) + if (!wfunc->winagg && agg_filter) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("aggregate ORDER BY is not implemented for window functions"), + errmsg("FILTER is not implemented for non-aggregate window functions"), parser_errposition(pstate, location))); if (retset) @@ -1348,6 +1545,101 @@ func_get_detail(List *funcname, /* + * unify_hypothetical_args() + * + * Ensure that each hypothetical direct argument of a hypothetical-set + * aggregate has the same type as the corresponding aggregated argument. + * Modify the expressions in the fargs list, if necessary, and update + * actual_arg_types[]. + * + * If the agg declared its args non-ANY (even ANYELEMENT), we need only a + * sanity check that the declared types match; make_fn_arguments will coerce + * the actual arguments to match the declared ones. But if the declaration + * is ANY, nothing will happen in make_fn_arguments, so we need to fix any + * mismatch here. We use the same type resolution logic as UNION etc. + */ +static void +unify_hypothetical_args(ParseState *pstate, + List *fargs, + int numAggregatedArgs, + Oid *actual_arg_types, + Oid *declared_arg_types) +{ + Node *args[FUNC_MAX_ARGS]; + int numDirectArgs, + numNonHypotheticalArgs; + int i; + ListCell *lc; + + numDirectArgs = list_length(fargs) - numAggregatedArgs; + numNonHypotheticalArgs = numDirectArgs - numAggregatedArgs; + /* safety check (should only trigger with a misdeclared agg) */ + if (numNonHypotheticalArgs < 0) + elog(ERROR, "incorrect number of arguments to hypothetical-set aggregate"); + + /* Deconstruct fargs into an array for ease of subscripting */ + i = 0; + foreach(lc, fargs) + { + args[i++] = (Node *) lfirst(lc); + } + + /* Check each hypothetical arg and corresponding aggregated arg */ + for (i = numNonHypotheticalArgs; i < numDirectArgs; i++) + { + int aargpos = numDirectArgs + (i - numNonHypotheticalArgs); + Oid commontype; + + /* A mismatch means AggregateCreate didn't check properly ... */ + if (declared_arg_types[i] != declared_arg_types[aargpos]) + elog(ERROR, "hypothetical-set aggregate has inconsistent declared argument types"); + + /* No need to unify if make_fn_arguments will coerce */ + if (declared_arg_types[i] != ANYOID) + continue; + + /* + * Select common type, giving preference to the aggregated argument's + * type (we'd rather coerce the direct argument once than coerce all + * the aggregated values). + */ + commontype = select_common_type(pstate, + list_make2(args[aargpos], args[i]), + "WITHIN GROUP", + NULL); + + /* + * Perform the coercions. We don't need to worry about NamedArgExprs + * here because they aren't supported with aggregates. + */ + args[i] = coerce_type(pstate, + args[i], + actual_arg_types[i], + commontype, -1, + COERCION_IMPLICIT, + COERCE_IMPLICIT_CAST, + -1); + actual_arg_types[i] = commontype; + args[aargpos] = coerce_type(pstate, + args[aargpos], + actual_arg_types[aargpos], + commontype, -1, + COERCION_IMPLICIT, + COERCE_IMPLICIT_CAST, + -1); + actual_arg_types[aargpos] = commontype; + } + + /* Reconstruct fargs from array */ + i = 0; + foreach(lc, fargs) + { + lfirst(lc) = args[i++]; + } +} + + +/* * make_fn_arguments() * * Given the actual argument expressions for a function, and the desired |