Re-refactor the core scanner's API, in order to get out from under the problem

of different parsers having different YYSTYPE unions that they want to use with it. I defined a new union core_YYSTYPE that is just the (very short) list of semantic values returned by the core scanner. I had originally worried that this would require an extra interface layer, but actually we can have parser.c's base_yylex (formerly filtered_base_yylex) take care of that at no extra cost. Names associated with the core scanner are now "core_yy_foo", with "base_yy_foo" being used in the core Bison parser and the parser.c interface layer. This solves the last serious stumbling block to eliminating plpgsql's separate lexer. One restriction that will still be present is that plpgsql and the core will have to agree on the token numbers assigned to tokens that can be returned by the core lexer. Since Bison doesn't seem willing to accept external assignments of those numbers, we'll have to live with decreeing that core and plpgsql grammars declare these tokens first and in the same order.
author: Tom Lane 2009-11-09 18:38:48 +0000
committer: Tom Lane 2009-11-09 18:38:48 +0000
commit: 10bcfa189bedaeaa6bfe8d7841ed3b17f23c0df4 (patch)
tree: 70b98c6fd252fb828a393d830322f64b37cd5e81 /src/backend/parser/parser.c
parent: 2ace38d226246b83e5cc4d8f4063a82a485ddc95 (diff)
1 files changed, 27 insertions, 21 deletions
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c
index 93632c88114..354e335ce91 100644
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -14,7 +14,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.81 2009/07/14 20:24:10 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.82 2009/11/09 18:38:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -34,14 +34,15 @@
 List *
 raw_parser(const char *str)
 {
-	base_yyscan_t yyscanner;
+	core_yyscan_t yyscanner;
 	base_yy_extra_type yyextra;
 	int			yyresult;
 
 	/* initialize the flex scanner */
-	yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords);
+	yyscanner = scanner_init(str, &yyextra.core_yy_extra,
+							 ScanKeywords, NumScanKeywords);
 
-	/* filtered_base_yylex() only needs this much initialization */
+	/* base_yylex() only needs this much initialization */
 	yyextra.have_lookahead = false;
 
 	/* initialize the bison parser */
@@ -73,15 +74,16 @@ raw_parser(const char *str)
 char *
 pg_parse_string_token(const char *token)
 {
-	base_yyscan_t yyscanner;
+	core_yyscan_t yyscanner;
 	base_yy_extra_type yyextra;
 	int			ctoken;
-	YYSTYPE		yylval;
+	core_YYSTYPE yylval;
 	YYLTYPE		yylloc;
 
-	yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords);
+	yyscanner = scanner_init(token, &yyextra.core_yy_extra,
+							 ScanKeywords, NumScanKeywords);
 
-	ctoken = base_yylex(&yylval, &yylloc, yyscanner);
+	ctoken = core_yylex(&yylval, &yylloc, yyscanner);
 
 	if (ctoken != SCONST)		/* caller error */
 		elog(ERROR, "expected string constant, got token code %d", ctoken);
@@ -93,7 +95,7 @@ pg_parse_string_token(const char *token)
 
 
 /*
- * Intermediate filter between parser and base lexer (base_yylex in scan.l).
+ * Intermediate filter between parser and core lexer (core_yylex in scan.l).
  *
  * The filter is needed because in some cases the standard SQL grammar
  * requires more than one token lookahead.	We reduce these cases to one-token
@@ -104,26 +106,30 @@ pg_parse_string_token(const char *token)
  * words.  Furthermore it's not clear how to do it without re-introducing
  * scanner backtrack, which would cost more performance than this filter
  * layer does.
+ *
+ * The filter also provides a convenient place to translate between
+ * the core_YYSTYPE and YYSTYPE representations (which are really the
+ * same thing anyway, but notationally they're different).
  */
 int
-filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
+base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
 {
 	base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
 	int			cur_token;
 	int			next_token;
-	YYSTYPE		cur_yylval;
+	core_YYSTYPE cur_yylval;
 	YYLTYPE		cur_yylloc;
 
 	/* Get next token --- we might already have it */
 	if (yyextra->have_lookahead)
 	{
 		cur_token = yyextra->lookahead_token;
-		*lvalp = yyextra->lookahead_yylval;
+		lvalp->core_yystype = yyextra->lookahead_yylval;
 		*llocp = yyextra->lookahead_yylloc;
 		yyextra->have_lookahead = false;
 	}
 	else
-		cur_token = base_yylex(lvalp, llocp, yyscanner);
+		cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
 
 	/* Do we need to look ahead for a possible multiword token? */
 	switch (cur_token)
@@ -133,9 +139,9 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
 			/*
 			 * NULLS FIRST and NULLS LAST must be reduced to one token
 			 */
-			cur_yylval = *lvalp;
+			cur_yylval = lvalp->core_yystype;
 			cur_yylloc = *llocp;
-			next_token = base_yylex(lvalp, llocp, yyscanner);
+			next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
 			switch (next_token)
 			{
 				case FIRST_P:
@@ -147,11 +153,11 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
 				default:
 					/* save the lookahead token for next time */
 					yyextra->lookahead_token = next_token;
-					yyextra->lookahead_yylval = *lvalp;
+					yyextra->lookahead_yylval = lvalp->core_yystype;
 					yyextra->lookahead_yylloc = *llocp;
 					yyextra->have_lookahead = true;
 					/* and back up the output info to cur_token */
-					*lvalp = cur_yylval;
+					lvalp->core_yystype = cur_yylval;
 					*llocp = cur_yylloc;
 					break;
 			}
@@ -162,9 +168,9 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
 			/*
 			 * WITH TIME must be reduced to one token
 			 */
-			cur_yylval = *lvalp;
+			cur_yylval = lvalp->core_yystype;
 			cur_yylloc = *llocp;
-			next_token = base_yylex(lvalp, llocp, yyscanner);
+			next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
 			switch (next_token)
 			{
 				case TIME:
@@ -173,11 +179,11 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
 				default:
 					/* save the lookahead token for next time */
 					yyextra->lookahead_token = next_token;
-					yyextra->lookahead_yylval = *lvalp;
+					yyextra->lookahead_yylval = lvalp->core_yystype;
 					yyextra->lookahead_yylloc = *llocp;
 					yyextra->have_lookahead = true;
 					/* and back up the output info to cur_token */
-					*lvalp = cur_yylval;
+					lvalp->core_yystype = cur_yylval;
 					*llocp = cur_yylloc;
 					break;
 			}
author	Tom Lane	2009-11-09 18:38:48 +0000
committer	Tom Lane	2009-11-09 18:38:48 +0000
commit	10bcfa189bedaeaa6bfe8d7841ed3b17f23c0df4 (patch)
tree	70b98c6fd252fb828a393d830322f64b37cd5e81 /src/backend/parser/parser.c
parent	2ace38d226246b83e5cc4d8f4063a82a485ddc95 (diff)