ecpg: major cleanup, simplification, and documentation of parse.pl.

author Tom Lane <[email protected]>

Mon, 14 Oct 2024 17:37:33 +0000 (13:37 -0400)

committer Tom Lane <[email protected]>

Mon, 14 Oct 2024 17:37:33 +0000 (13:37 -0400)
author Tom Lane <[email protected]>
Mon, 14 Oct 2024 17:37:33 +0000 (13:37 -0400)
committer Tom Lane <[email protected]>
Mon, 14 Oct 2024 17:37:33 +0000 (13:37 -0400)
diff --git a/src/interfaces/ecpg/preproc/parse.pl b/src/interfaces/ecpg/preproc/parse.pl

index 86d0782d456d89d9268fbfd7c421c626c489ac56..3df94a24af0d8c1947cfbfa1fcae13c6395ca910 100644 (file)
--- a/src/interfaces/ecpg/preproc/parse.pl
+++ b/src/interfaces/ecpg/preproc/parse.pl
@@ -31,27 +31,11 @@ GetOptions(
     'output=s' => \$outfile,
     'parser=s' => \$parser,) or die "wrong arguments";
  
-# open parser / output file early, to raise errors early
-open(my $parserfh, '<', $parser) or die "could not open parser file $parser";
-open(my $outfh, '>', $outfile) or die "could not open output file $outfile";
-
-my $copymode = 0;
-my $brace_indent = 0;
-my $yaccmode = 0;
-my $in_rule = 0;
-my $header_included = 0;
-my $has_feature_not_supported = 0;
-my $has_if_command = 0;
-my $tokenmode = 0;
-
-my (%buff, $infield, $comment, %tokens, %addons);
-my ($stmt_mode, @fields);
-my $line = '';
-my $non_term_id;
  
+# These hash tables define additional transformations to apply to
+# grammar rules.
  
-# some token have to be replaced by other symbols
-# either in the rule
+# Substitutions to apply to tokens whenever they are seen in a rule.
  my %replace_token = (
     'BCONST' => 'ecpg_bconst',
     'FCONST' => 'ecpg_fconst',
@@ -60,7 +44,9 @@ my %replace_token = (
     'IDENT' => 'ecpg_ident',
     'PARAM' => 'ecpg_param',);
  
-# or in the block
+# Substitutions to apply to terminal token names to reconstruct the
+# literal form of the token.  (There is also a hard-wired substitution
+# rule that strips trailing '_P'.)
  my %replace_string = (
     'FORMAT_LA' => 'format',
     'NOT_LA' => 'not',
@@ -75,14 +61,16 @@ my %replace_string = (
     'GREATER_EQUALS' => '>=',
     'NOT_EQUALS' => '<>',);
  
-# specific replace_types for specific non-terminals - never include the ':'
-# ECPG-only replace_types are defined in ecpg-replace_types
+# This hash can provide a result type to override '<str>' for nonterminals
+# that need that, or it can specify 'ignore' to cause us to skip the rule
+# for that nonterminal.  (In that case, ecpg.trailer had better provide
+# a substitute rule.)
  my %replace_types = (
     'PrepareStmt' => '<prep>',
     'ExecuteStmt' => '<exec>',
     'opt_array_bounds' => '<index>',
  
-   # "ignore" means: do not create type and rules for this non-term-id
+   # "ignore" means: do not create type and rules for this nonterminal
     'parse_toplevel' => 'ignore',
     'stmtmulti' => 'ignore',
     'CreateAsStmt' => 'ignore',
@@ -97,9 +85,12 @@ my %replace_types = (
     'plassign_target' => 'ignore',
     'plassign_equals' => 'ignore',);
  
-# these replace_line commands excise certain keywords from the core keyword
-# lists.  Be sure to account for these in ColLabel and related productions.
+# This hash provides an "ignore" option or substitute expansion for any
+# rule or rule alternative.  The hash key is the same "concattokens" tag
+# used for lookup in ecpg.addons.
  my %replace_line = (
+   # These entries excise certain keywords from the core keyword lists.
+   # Be sure to account for these in ColLabel and related productions.
     'unreserved_keywordCONNECTION' => 'ignore',
     'unreserved_keywordCURRENT_P' => 'ignore',
     'unreserved_keywordDAY_P' => 'ignore',
@@ -137,10 +128,77 @@ my %replace_line = (
       'PREPARE prepared_name prep_type_clause AS PreparableStmt',
     'var_nameColId' => 'ECPGColId');
  
+
+# Declare assorted state variables.
+
+# yaccmode counts the '%%' separator lines we have seen, so that we can
+# distinguish prologue, rules, and epilogue sections of gram.y.
+my $yaccmode = 0;
+# in /* ... */ comment?
+my $comment = 0;
+# in { ... } braced text?
+my $brace_indent = 0;
+# within a rule (production)?
+my $in_rule = 0;
+# count of alternatives processed within the current rule.
+my $alt_count = 0;
+# copymode = 1 when we want to emit the current rule to preproc.y.
+# If it's 0, we have decided to ignore the current rule, and should
+# skip all output until we get to the ending semicolon.
+my $copymode = 0;
+# tokenmode = 1 indicates we are processing %token and following declarations.
+my $tokenmode = 0;
+# stmt_mode = 1 indicates that we are processing the 'stmt:' rule.
+my $stmt_mode = 0;
+# Hacky state for emitting feature-not-supported warnings.
+my $has_feature_not_supported = 0;
+my $has_if_command = 0;
+
+# %addons holds the rules loaded from ecpg.addons.
+my %addons;
+
+# %buff holds various named "buffers", which are just strings that accumulate
+# the output destined for different sections of the preproc.y file.  This
+# allows us to process the input in one pass even though the resulting output
+# needs to appear in various places.  See dump_buffer calls below for the
+# set of buffer names and the order in which they'll be dumped.
+my %buff;
+
+# %tokens contains an entry for every name we have discovered to be a token.
+my %tokens;
+
+# $non_term_id is the name of the nonterminal that is the target of the
+# current rule.
+my $non_term_id;
+
+# $line holds the reconstructed rule text (that is, RHS token list) that
+# we plan to emit for the current rule.
+my $line = '';
+
+# @fields holds the items to be emitted in the token-concatenation action
+# for the current rule (assuming we emit one).  "$N" refers to the N'th
+# input token of the rule; anything else is a string to emit literally.
+# (We assume no such string can need to start with '$'.)
+my @fields;
+
+
+# Open parser / output file early, to raise errors early.
+open(my $parserfh, '<', $parser) or die "could not open parser file $parser";
+open(my $outfh, '>', $outfile) or die "could not open output file $outfile";
+
+# Read the various ecpg-supplied input files.
+# ecpg.addons is loaded into the %addons hash, while the other files
+# are just copied into buffers for verbatim output later.
  preload_addons();
+include_file('header', 'ecpg.header');
+include_file('tokens', 'ecpg.tokens');
+include_file('ecpgtype', 'ecpg.type');
+include_file('trailer', 'ecpg.trailer');
  
+# Read gram.y, and do the bulk of the processing.
  main();
  
+# Emit data from the various buffers we filled.
  dump_buffer('header');
  dump_buffer('tokens');
  dump_buffer('types');
@@ -149,7 +207,6 @@ dump_buffer('orig_tokens');
  print $outfh '%%', "\n";
  print $outfh 'prog: statements;', "\n";
  dump_buffer('rules');
-include_file('trailer', 'ecpg.trailer');
  dump_buffer('trailer');
  
  close($parserfh);
@@ -162,83 +219,67 @@ foreach (keys %addons)
  }
  
  
+# Read the backend grammar.
  sub main
  {
    line: while (<$parserfh>)
     {
         chomp;
  
-       # comment out the line below to make the result file match (blank line wise)
-       # the prior version.
-       #next if ($_ eq '');
-
-       # Dump the action for a rule -
-       # stmt_mode indicates if we are processing the 'stmt:'
-       # rule (mode==0 means normal,  mode==1 means stmt:)
-       # flds are the fields to use. These may start with a '$' - in
-       # which case they are the result of a previous non-terminal
-       #
-       # if they don't start with a '$' then they are token name
-       #
-       # len is the number of fields in flds...
-       # leadin is the padding to apply at the beginning (just use for formatting)
-
         if (/^%%/)
         {
-           $tokenmode = 2;
-           $copymode = 1;
+           # New file section, so advance yaccmode.
             $yaccmode++;
-           $infield = 0;
+           # We are no longer examining %token and related commands.
+           $tokenmode = 0;
+           # Shouldn't be anything else on the line.
+           next line;
         }
  
+       # Hacky check for rules that throw FEATURE_NOT_SUPPORTED
+       # (do this before $_ has a chance to get clobbered)
         if ($yaccmode == 1)
         {
-           # Check for rules that throw FEATURE_NOT_SUPPORTED
             $has_feature_not_supported = 1 if /ERRCODE_FEATURE_NOT_SUPPORTED/;
             $has_if_command = 1 if /^\s*if/;
         }
  
+       # We track %prec per-line, not per-rule, which is not quite right
+       # but there are no counterexamples in gram.y at present.
         my $prec = 0;
  
-       # Make sure any braces are split
+       # Make sure any braces are split into separate fields
         s/{/ { /g;
         s/}/ } /g;
  
-       # Any comments are split
+       # Likewise for comment start/end markers
         s|\/\*| /* |g;
         s|\*\/| */ |g;
  
         # Now split the line into individual fields
         my @arr = split(' ');
  
+       # Ignore empty lines
         if (!@arr)
         {
-           # empty line: in tokenmode 1, emit an empty line, else ignore
-           if ($tokenmode == 1)
-           {
-               add_to_buffer('orig_tokens', '');
-           }
             next line;
         }
  
-       if ($arr[0] eq '%token' && $tokenmode == 0)
+       # Once we have seen %token in the prologue, we assume all that follows
+       # up to the '%%' separator is %token and associativity declarations.
+       # Collect and process that as necessary.
+       if ($arr[0] eq '%token' && $yaccmode == 0)
         {
             $tokenmode = 1;
-           include_file('tokens', 'ecpg.tokens');
-       }
-       elsif ($arr[0] eq '%type' && $header_included == 0)
-       {
-           include_file('header', 'ecpg.header');
-           include_file('ecpgtype', 'ecpg.type');
-           $header_included = 1;
         }
  
         if ($tokenmode == 1)
         {
+           # Collect everything of interest on this line into $str.
             my $str = '';
-           my $prior = '';
             for my $a (@arr)
             {
+               # Skip comments.
                 if ($a eq '/*')
                 {
                     $comment++;
@@ -253,40 +294,50 @@ sub main
                 {
                     next;
                 }
+
+               # If it's "<something>", it's a type in a %token declaration,
+               # which we can just drop.
                 if (substr($a, 0, 1) eq '<')
                 {
                     next;
-
-                   # its a type
                 }
+
+               # Remember that this is a token.  This will also make entries
+               # for "%token" and the associativity keywords such as "%left",
+               # which should be harmless so it's not worth the trouble to
+               # avoid it.  If a token appears both in %token and in an
+               # associativity declaration, we'll redundantly re-set its
+               # entry, which is also OK.
                 $tokens{$a} = 1;
  
+               # Accumulate the line in $str.
                 $str = $str . ' ' . $a;
-               if ($a eq 'IDENT' && $prior eq '%nonassoc')
-               {
  
-                   # add more tokens to the list
+               # HACK: insert our own %nonassoc line after IDENT.
+               # XXX: this seems pretty wrong, IDENT is not last on its line!
+               if ($a eq 'IDENT' && $arr[0] eq '%nonassoc')
+               {
                     $str = $str . "\n%nonassoc CSTRING";
                 }
-               $prior = $a;
             }
+           # Save the lightly-processed line in orig_tokens.
             add_to_buffer('orig_tokens', $str);
             next line;
         }
  
-       # Don't worry about anything if we're not in the right section of gram.y
+       # The rest is only appropriate if we're in the rules section of gram.y
         if ($yaccmode != 1)
         {
             next line;
         }
  
-
-       # Go through each field in turn
+       # Go through each word of the rule in turn
         for (
             my $fieldIndexer = 0;
             $fieldIndexer < scalar(@arr);
             $fieldIndexer++)
         {
+           # Detect and ignore comments and braced action text
             if ($arr[$fieldIndexer] eq '*/' && $comment)
             {
                 $comment = 0;
@@ -298,15 +349,10 @@ sub main
             }
             elsif ($arr[$fieldIndexer] eq '/*')
             {
-
-               # start of a multiline comment
+               # start of a possibly-multiline comment
                 $comment = 1;
                 next;
             }
-           elsif ($arr[$fieldIndexer] eq '//')
-           {
-               next line;
-           }
             elsif ($arr[$fieldIndexer] eq '}')
             {
                 $brace_indent--;
@@ -317,29 +363,35 @@ sub main
                 $brace_indent++;
                 next;
             }
-
             if ($brace_indent > 0)
             {
                 next;
             }
+
+           # OK, it's not a comment or part of an action.
+           # Check for ';' ending the current rule, or '|' ending the
+           # current alternative.
             if ($arr[$fieldIndexer] eq ';')
             {
                 if ($copymode)
                 {
-                   if ($infield)
-                   {
-                       dump_line($stmt_mode, \@fields);
-                   }
+                   # Print the accumulated rule.
+                   emit_rule(\@fields);
                     add_to_buffer('rules', ";\n\n");
                 }
                 else
                 {
+                   # End of an ignored rule; revert to copymode = 1.
                     $copymode = 1;
                 }
+
+               # Reset for the next rule.
                 @fields = ();
-               $infield = 0;
                 $line = '';
                 $in_rule = 0;
+               $alt_count = 0;
+               $has_feature_not_supported = 0;
+               $has_if_command = 0;
                 next;
             }
  
@@ -347,56 +399,68 @@ sub main
             {
                 if ($copymode)
                 {
-                   if ($infield)
-                   {
-                       $infield = $infield + dump_line($stmt_mode, \@fields);
-                   }
-                   if ($infield > 1)
-                   {
-                       $line = '| ';
-                   }
+                   # Print the accumulated alternative.
+                   # Increment $alt_count for each non-ignored alternative.
+                   $alt_count += emit_rule(\@fields);
                 }
+
+               # Reset for the next alternative.
                 @fields = ();
+               # Start the next line with '|' if we've printed at least one
+               # alternative.
+               if ($alt_count > 1)
+               {
+                   $line = '| ';
+               }
+               else
+               {
+                   $line = '';
+               }
+               $has_feature_not_supported = 0;
+               $has_if_command = 0;
                 next;
             }
  
+           # Apply replace_token substitution if we have one.
             if (exists $replace_token{ $arr[$fieldIndexer] })
             {
                 $arr[$fieldIndexer] = $replace_token{ $arr[$fieldIndexer] };
             }
  
-           # Are we looking at a declaration of a non-terminal ?
-           if (($arr[$fieldIndexer] =~ /[A-Za-z0-9]+:/)
+           # Are we looking at a declaration of a non-terminal?
+           # We detect that by seeing ':' on the end of the token or
+           # as the next token.
+           if (($arr[$fieldIndexer] =~ /[A-Za-z0-9]+:$/)
                 || (   $fieldIndexer + 1 < scalar(@arr)
                     && $arr[ $fieldIndexer + 1 ] eq ':'))
             {
+               # Extract the non-terminal, sans : if any
                 $non_term_id = $arr[$fieldIndexer];
                 $non_term_id =~ tr/://d;
  
+               # Consume the ':' if it's separate
+               if (!($arr[$fieldIndexer] =~ /[A-Za-z0-9]+:$/))
+               {
+                   $fieldIndexer++;
+               }
+
+               # Check for %replace_types override of nonterminal's type
                 if (not defined $replace_types{$non_term_id})
                 {
+                   # By default, the type is <str>
                     $replace_types{$non_term_id} = '<str>';
-                   $copymode = 1;
                 }
                 elsif ($replace_types{$non_term_id} eq 'ignore')
                 {
+                   # We'll ignore this nonterminal and rule altogether.
                     $copymode = 0;
-                   $line = '';
                     next line;
                 }
-               $line = $line . ' ' . $arr[$fieldIndexer];
  
-               # Do we have the : attached already ?
-               # If yes, we'll have already printed the ':'
-               if (!($arr[$fieldIndexer] =~ '[A-Za-z0-9]+:'))
-               {
+               # OK, we want this rule.
+               $copymode = 1;
  
-                   # Consume the ':' which is next...
-                   $line = $line . ':';
-                   $fieldIndexer++;
-               }
-
-               # Special mode?
+               # Set special mode for the "stmt:" rule.
                 if ($non_term_id eq 'stmt')
                 {
                     $stmt_mode = 1;
@@ -405,69 +469,73 @@ sub main
                 {
                     $stmt_mode = 0;
                 }
+
+               # Emit appropriate %type declaration for this nonterminal.
                 my $tstr =
                     '%type '
                   . $replace_types{$non_term_id} . ' '
                   . $non_term_id;
                 add_to_buffer('types', $tstr);
  
-               if ($copymode)
-               {
-                   add_to_buffer('rules', $line);
-               }
+               # Emit the target part of the rule.
+               # Note: the leading space is just to match
+               # the rather weird pre-v18 output logic.
+               $tstr = ' ' . $non_term_id . ':';
+               add_to_buffer('rules', $tstr);
+
+               # Prepare for reading the fields (tokens) of the rule.
                 $line = '';
                 @fields = ();
-               $infield = 1;
                 die "unterminated rule at grammar line $.\n"
                   if $in_rule;
                 $in_rule = 1;
+               $alt_count = 1;
                 next;
             }
             elsif ($copymode)
             {
+               # Not a nonterminal declaration, so just add it to $line.
                 $line = $line . ' ' . $arr[$fieldIndexer];
             }
+
+           # %prec and whatever follows it should get added to $line,
+           # but not to @fields.
             if ($arr[$fieldIndexer] eq '%prec')
             {
                 $prec = 1;
                 next;
             }
  
+           # Emit transformed version of token to @fields if appropriate.
             if (   $copymode
                 && !$prec
                 && !$comment
-               && $fieldIndexer < scalar(@arr)
-               && length($arr[$fieldIndexer])
-               && $infield)
+               && $in_rule)
             {
-               if ($arr[$fieldIndexer] ne 'Op'
-                   && ((   defined $tokens{ $arr[$fieldIndexer] }
-                           && $tokens{ $arr[$fieldIndexer] } > 0)
-                       || $arr[$fieldIndexer] =~ /'.+'/)
-                   || $stmt_mode == 1)
+               my $S = $arr[$fieldIndexer];
+
+               # If it's a known terminal token (other than Op) or a literal
+               # character, we need to emit the equivalent string, which'll
+               # later get wrapped into a C string literal, perhaps after
+               # merging with adjacent strings.
+               if ($S ne 'Op'
+                   && (defined $tokens{$S}
+                       || $S =~ /^'.+'$/))
                 {
-                   my $S;
-                   if (exists $replace_string{ $arr[$fieldIndexer] })
-                   {
-                       $S = $replace_string{ $arr[$fieldIndexer] };
-                   }
-                   else
-                   {
-                       $S = $arr[$fieldIndexer];
-                   }
-                   $S =~ s/_P//g;
+                   # Apply replace_string substitution if any.
+                   $S = $replace_string{$S} if (exists $replace_string{$S});
+                   # Automatically strip _P if present.
+                   $S =~ s/_P$//;
+                   # And get rid of quotes if it's a literal character.
                     $S =~ tr/'//d;
-                   if ($stmt_mode == 1)
-                   {
-                       push(@fields, $S);
-                   }
-                   else
-                   {
-                       push(@fields, lc($S));
-                   }
+                   # Finally, downcase and push into @fields.
+                   push(@fields, lc($S));
                 }
                 else
                 {
+                   # Otherwise, push a $N reference to this input token.
+                   # (We assume this cannot be confused with anything the
+                   # above code would produce.)
                     push(@fields, '$' . (scalar(@fields) + 1));
                 }
             }
@@ -495,94 +563,108 @@ sub include_file
     return;
  }
  
-sub include_addon
+# Emit the semantic action for the current rule.
+# This function mainly accounts for any modifications specified
+# by an ecpg.addons entry.
+sub emit_rule_action
  {
-   my ($buffer, $block, $fields, $stmt_mode) = @_;
-   my $rec = $addons{$block};
-   return 0 unless $rec;
+   my ($tag, $fields) = @_;
  
-   # Track usage for later cross-check
+   # See if we have an addons entry; if not, just emit default action
+   my $rec = $addons{$tag};
+   if (!$rec)
+   {
+       emit_default_action($fields, 0);
+       return;
+   }
+
+   # Track addons entry usage for later cross-check
     $rec->{used}++;
  
     my $rectype = $rec->{type};
     if ($rectype eq 'rule')
     {
-       dump_fields($stmt_mode, $fields, ' { ');
+       # Emit default action and then the code block.
+       emit_default_action($fields, 0);
     }
     elsif ($rectype eq 'addon')
     {
+       # Emit the code block wrapped in the same braces as the default action.
         add_to_buffer('rules', ' { ');
     }
  
-   #add_to_buffer( $stream, $_ );
-   #We have an array to add to the buffer, we'll add it ourself instead of
-   #calling add_to_buffer, which does not know about arrays
-
-   push(@{ $buff{$buffer} }, @{ $rec->{lines} });
+   # Emit the addons entry's code block.
+   # We have an array to add to the buffer, we'll add it directly instead of
+   # calling add_to_buffer, which does not know about arrays.
+   push(@{ $buff{'rules'} }, @{ $rec->{lines} });
  
     if ($rectype eq 'addon')
     {
-       dump_fields($stmt_mode, $fields, '');
+       emit_default_action($fields, 1);
     }
-
-
-   # if we added something (ie there are lines in our array), return 1
-   return 1 if (scalar(@{ $rec->{lines} }) > 0);
-   return 0;
+   return;
  }
  
-
-# include_addon does this same thing, but does not call this
-# sub... so if you change this, you need to fix include_addon too
+# Add the given line to the specified buffer.
  #   Pass:  buffer_name, string_to_append
+# Note we add a newline automatically.
  sub add_to_buffer
  {
     push(@{ $buff{ $_[0] } }, "$_[1]\n");
     return;
  }
  
+# Dump the specified buffer to the output file.
  sub dump_buffer
  {
     my ($buffer) = @_;
+   # Label the output for debugging purposes.
     print $outfh '/* ', $buffer, ' */', "\n";
     my $ref = $buff{$buffer};
     print $outfh @$ref;
     return;
  }
  
-sub dump_fields
+# Emit the default action (usually token concatenation) for the current rule.
+#   Pass: fields array, brace_printed boolean
+# brace_printed should be true if caller already printed action's open brace.
+sub emit_default_action
  {
-   my ($mode, $flds, $ln) = @_;
+   my ($flds, $brace_printed) = @_;
     my $len = scalar(@$flds);
  
-   if ($mode == 0)
+   if ($stmt_mode == 0)
     {
-
-       #Normal
-       add_to_buffer('rules', $ln);
+       # Normal rule
         if ($has_feature_not_supported and not $has_if_command)
         {
             # The backend unconditionally reports
             # FEATURE_NOT_SUPPORTED in this rule, so let's emit
             # a warning on the ecpg side.
+           if (!$brace_printed)
+           {
+               add_to_buffer('rules', ' { ');
+               $brace_printed = 1;
+           }
             add_to_buffer('rules',
                 'mmerror(PARSE_ERROR, ET_WARNING, "unsupported feature will be passed to server");'
             );
         }
-       $has_feature_not_supported = 0;
-       $has_if_command = 0;
  
         if ($len == 0)
         {
-
-           # We have no fields ?
+           # Empty rule
+           if (!$brace_printed)
+           {
+               add_to_buffer('rules', ' { ');
+               $brace_printed = 1;
+           }
             add_to_buffer('rules', ' $$=EMPTY; }');
         }
         else
         {
-
-           # Go through each field and try to 'aggregate' the tokens
-           # into a single 'mm_strdup' where possible
+           # Go through each field and aggregate consecutive literal tokens
+           # into a single 'mm_strdup' call.
             my @flds_new;
             my $str;
             for (my $z = 0; $z < $len; $z++)
@@ -600,8 +682,10 @@ sub dump_fields
                     if ($z >= $len - 1
                         || substr($flds->[ $z + 1 ], 0, 1) eq '$')
                     {
-
-                       # We're at the end...
+                       # Can't combine any more literals; push to @flds_new.
+                       # This code would need work if any literals contain
+                       # backslash or double quote, but right now that never
+                       # happens.
                         push(@flds_new, "mm_strdup(\"$str\")");
                         last;
                     }
@@ -614,49 +698,62 @@ sub dump_fields
             $len = scalar(@flds_new);
             if ($len == 1)
             {
-
-               # Straight assignment
+               # Single field can be handled by straight assignment
+               if (!$brace_printed)
+               {
+                   add_to_buffer('rules', ' { ');
+                   $brace_printed = 1;
+               }
                 $str = ' $$ = ' . $flds_new[0] . ';';
                 add_to_buffer('rules', $str);
             }
             else
             {
-
-               # Need to concatenate the results to form
-               # our final string
+               # Need to concatenate the results to form our final string
+               if (!$brace_printed)
+               {
+                   add_to_buffer('rules', ' { ');
+                   $brace_printed = 1;
+               }
                 $str =
                   ' $$ = cat_str(' . $len . ',' . join(',', @flds_new) . ');';
                 add_to_buffer('rules', $str);
             }
-           add_to_buffer('rules', '}');
+           add_to_buffer('rules', '}') if ($brace_printed);
         }
     }
     else
     {
-
-       # we're in the stmt: rule
+       # We're in the "stmt:" rule, where we need to output special actions.
+       # This code assumes that no ecpg.addons entry applies.
         if ($len)
         {
-
-           # or just the statement ...
+           # Any regular kind of statement calls output_statement
             add_to_buffer('rules',
                 ' { output_statement($1, 0, ECPGst_normal); }');
         }
         else
         {
+           # The empty production for stmt: do nothing
             add_to_buffer('rules', ' { $$ = NULL; }');
         }
     }
     return;
  }
  
-
-sub dump_line
+# Print the accumulated rule text (in $line) and the appropriate action.
+# Ordinarily return 1.  However, if the rule matches an "ignore"
+# entry in %replace_line, then do nothing and return 0.
+sub emit_rule
  {
-   my ($stmt_mode, $fields) = @_;
-   my $block = $non_term_id . $line;
-   $block =~ tr/ |//d;
-   my $rep = $replace_line{$block};
+   my ($fields) = @_;
+
+   # compute tag to be used as lookup key in %replace_line and %addons
+   my $tag = $non_term_id . $line;
+   $tag =~ tr/ |//d;
+
+   # apply replace_line substitution if any
+   my $rep = $replace_line{$tag};
     if ($rep)
     {
         if ($rep eq 'ignore')
@@ -664,6 +761,7 @@ sub dump_line
             return 0;
         }
  
+       # non-ignore entries replace the line, but we'd better keep any '|'
         if (index($line, '|') != -1)
         {
             $line = '| ' . $rep;
@@ -672,15 +770,15 @@ sub dump_line
         {
             $line = $rep;
         }
-       $block = $non_term_id . $line;
-       $block =~ tr/ |//d;
+
+       # recompute tag for use in emit_rule_action
+       $tag = $non_term_id . $line;
+       $tag =~ tr/ |//d;
     }
+
+   # Emit $line, then print the appropriate action.
     add_to_buffer('rules', $line);
-   my $i = include_addon('rules', $block, $fields, $stmt_mode);
-   if ($i == 0)
-   {
-       dump_fields($stmt_mode, $fields, ' { ');
-   }
+   emit_rule_action($tag, $fields);
     return 1;
  }
author	Tom Lane <[email protected]>
	Mon, 14 Oct 2024 17:37:33 +0000 (13:37 -0400)
committer	Tom Lane <[email protected]>
	Mon, 14 Oct 2024 17:37:33 +0000 (13:37 -0400)