diff options
author | tomoya ishida <[email protected]> | 2023-06-16 00:39:53 +0900 |
---|---|---|
committer | git <[email protected]> | 2023-06-15 15:39:58 +0000 |
commit | 364a6d56d776270da09604816d623047c66c5e32 (patch) | |
tree | 2e482a3252c5a366e9aab8fe23ae3757759069a3 | |
parent | c1c926219de5489c321d53577ff2eb8c041e166f (diff) |
[ruby/irb] Rewrite RubyLex to fix some bugs and make it possible to
add new features easily
(https://github.com/ruby/irb/pull/500)
* Add nesting level parser for multiple use (indent, prompt, termination check)
* Rewrite RubyLex using NestingParser
* Add nesting parser tests, fix some existing tests
* Add description comment, rename method to NestingParser
* Add comments and tweak code to RubyLex
* Update NestingParser test
* Extract list of ltype tokens to constants
-rw-r--r-- | lib/irb/nesting_parser.rb | 227 | ||||
-rw-r--r-- | lib/irb/ruby-lex.rb | 553 | ||||
-rw-r--r-- | test/irb/test_nesting_parser.rb | 303 | ||||
-rw-r--r-- | test/irb/test_ruby_lex.rb | 86 |
4 files changed, 661 insertions, 508 deletions
diff --git a/lib/irb/nesting_parser.rb b/lib/irb/nesting_parser.rb new file mode 100644 index 0000000000..3d4db82444 --- /dev/null +++ b/lib/irb/nesting_parser.rb @@ -0,0 +1,227 @@ +# frozen_string_literal: true +module IRB + module NestingParser + IGNORE_TOKENS = %i[on_sp on_ignored_nl on_comment on_embdoc_beg on_embdoc on_embdoc_end] + + # Scan each token and call the given block with array of token and other information for parsing + def self.scan_opens(tokens) + opens = [] + pending_heredocs = [] + first_token_on_line = true + tokens.each do |t| + skip = false + last_tok, state, args = opens.last + case state + when :in_unquoted_symbol + unless IGNORE_TOKENS.include?(t.event) + opens.pop + skip = true + end + when :in_lambda_head + opens.pop if t.event == :on_tlambeg || (t.event == :on_kw && t.tok == 'do') + when :in_method_head + unless IGNORE_TOKENS.include?(t.event) + next_args = [] + body = nil + if args.include?(:receiver) + case t.event + when :on_lparen, :on_ivar, :on_gvar, :on_cvar + # def (receiver). | def @ivar. | def $gvar. | def @@cvar. + next_args << :dot + when :on_kw + case t.tok + when 'self', 'true', 'false', 'nil' + # def self(arg) | def self. + next_args.push(:arg, :dot) + else + # def if(arg) + skip = true + next_args << :arg + end + when :on_op, :on_backtick + # def +(arg) + skip = true + next_args << :arg + when :on_ident, :on_const + # def a(arg) | def a. + next_args.push(:arg, :dot) + end + end + if args.include?(:dot) + # def receiver.name + next_args << :name if t.event == :on_period || (t.event == :on_op && t.tok == '::') + end + if args.include?(:name) + if %i[on_ident on_const on_op on_kw on_backtick].include?(t.event) + # def name(arg) | def receiver.name(arg) + next_args << :arg + skip = true + end + end + if args.include?(:arg) + case t.event + when :on_nl, :on_semicolon + # def recever.f; + body = :normal + when :on_lparen + # def recever.f() + next_args << :eq + else + if t.event == :on_op && t.tok == '=' + # def receiver.f = + body = :oneliner + else + # def recever.f arg + next_args << :arg_without_paren + end + end + end + if args.include?(:eq) + if t.event == :on_op && t.tok == '=' + body = :oneliner + else + body = :normal + end + end + if args.include?(:arg_without_paren) + if %i[on_semicolon on_nl].include?(t.event) + # def f a; + body = :normal + else + # def f a, b + next_args << :arg_without_paren + end + end + if body == :oneliner + opens.pop + elsif body + opens[-1] = [last_tok, nil] + else + opens[-1] = [last_tok, :in_method_head, next_args] + end + end + when :in_for_while_until_condition + if t.event == :on_semicolon || t.event == :on_nl || (t.event == :on_kw && t.tok == 'do') + skip = true if t.event == :on_kw && t.tok == 'do' + opens[-1] = [last_tok, nil] + end + end + + unless skip + case t.event + when :on_kw + case t.tok + when 'begin', 'class', 'module', 'do', 'case' + opens << [t, nil] + when 'end' + opens.pop + when 'def' + opens << [t, :in_method_head, [:receiver, :name]] + when 'if', 'unless' + unless t.state.allbits?(Ripper::EXPR_LABEL) + opens << [t, nil] + end + when 'while', 'until' + unless t.state.allbits?(Ripper::EXPR_LABEL) + opens << [t, :in_for_while_until_condition] + end + when 'ensure', 'rescue' + unless t.state.allbits?(Ripper::EXPR_LABEL) + opens.pop + opens << [t, nil] + end + when 'elsif', 'else', 'when' + opens.pop + opens << [t, nil] + when 'for' + opens << [t, :in_for_while_until_condition] + when 'in' + if last_tok&.event == :on_kw && %w[case in].include?(last_tok.tok) && first_token_on_line + opens.pop + opens << [t, nil] + end + end + when :on_tlambda + opens << [t, :in_lambda_head] + when :on_lparen, :on_lbracket, :on_lbrace, :on_tlambeg, :on_embexpr_beg, :on_embdoc_beg + opens << [t, nil] + when :on_rparen, :on_rbracket, :on_rbrace, :on_embexpr_end, :on_embdoc_end + opens.pop + when :on_heredoc_beg + pending_heredocs << t + when :on_heredoc_end + opens.pop + when :on_backtick + opens << [t, nil] if t.state.allbits?(Ripper::EXPR_BEG) + when :on_tstring_beg, :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_regexp_beg + opens << [t, nil] + when :on_tstring_end, :on_regexp_end, :on_label_end + opens.pop + when :on_symbeg + if t.tok == ':' + opens << [t, :in_unquoted_symbol] + else + opens << [t, nil] + end + end + end + if t.event == :on_nl || t.event == :on_semicolon + first_token_on_line = true + elsif t.event != :on_sp + first_token_on_line = false + end + if pending_heredocs.any? && t.tok.include?("\n") + pending_heredocs.reverse_each { |t| opens << [t, nil] } + pending_heredocs = [] + end + yield t, opens if block_given? + end + opens.map(&:first) + pending_heredocs.reverse + end + + def self.open_tokens(tokens) + # scan_opens without block will return a list of open tokens at last token position + scan_opens(tokens) + end + + # Calculates token information [line_tokens, prev_opens, next_opens, min_depth] for each line. + # Example code + # ["hello + # world"+( + # First line + # line_tokens: [[lbracket, '['], [tstring_beg, '"'], [tstring_content("hello\nworld"), "hello\n"]] + # prev_opens: [] + # next_tokens: [lbracket, tstring_beg] + # min_depth: 0 (minimum at beginning of line) + # Second line + # line_tokens: [[tstring_content("hello\nworld"), "world"], [tstring_end, '"'], [op, '+'], [lparen, '(']] + # prev_opens: [lbracket, tstring_beg] + # next_tokens: [lbracket, lparen] + # min_depth: 1 (minimum just after tstring_end) + def self.parse_by_line(tokens) + line_tokens = [] + prev_opens = [] + min_depth = 0 + output = [] + last_opens = scan_opens(tokens) do |t, opens| + depth = t == opens.last&.first ? opens.size - 1 : opens.size + min_depth = depth if depth < min_depth + if t.tok.include?("\n") + t.tok.each_line do |line| + line_tokens << [t, line] + next if line[-1] != "\n" + next_opens = opens.map(&:first) + output << [line_tokens, prev_opens, next_opens, min_depth] + prev_opens = next_opens + min_depth = prev_opens.size + line_tokens = [] + end + else + line_tokens << [t, t.tok] + end + end + output << [line_tokens, prev_opens, last_opens, min_depth] if line_tokens.any? + output + end + end +end diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index e29d52e47c..77c5b07ae9 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -6,6 +6,7 @@ require "ripper" require "jruby" if RUBY_ENGINE == "jruby" +require_relative "nesting_parser" # :stopdoc: class RubyLex @@ -54,8 +55,7 @@ class RubyLex if @io.respond_to?(:check_termination) @io.check_termination do |code| if Reline::IOGate.in_pasting? - lex = RubyLex.new(@context) - rest = lex.check_termination_in_prev_line(code) + rest = check_termination_in_prev_line(code) if rest Reline.delete_text rest.bytes.reverse_each do |c| @@ -69,64 +69,39 @@ class RubyLex # Accept any single-line input for symbol aliases or commands that transform args next true if single_line_command?(code) - ltype, indent, continue, code_block_open = check_code_state(code) - if ltype or indent > 0 or continue or code_block_open - false - else - true - end + _tokens, _opens, terminated = check_code_state(code) + terminated end end end if @io.respond_to?(:dynamic_prompt) @io.dynamic_prompt do |lines| lines << '' if lines.empty? - result = [] tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: @context) - code = String.new - partial_tokens = [] - unprocessed_tokens = [] - line_num_offset = 0 - tokens.each do |t| - partial_tokens << t - unprocessed_tokens << t - if t.tok.include?("\n") - t_str = t.tok - t_str.each_line("\n") do |s| - code << s - next unless s.include?("\n") - ltype, indent, continue, code_block_open = check_state(code, partial_tokens) - result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset) - line_num_offset += 1 - end - unprocessed_tokens = [] - else - code << t.tok + line_results = IRB::NestingParser.parse_by_line(tokens) + tokens_until_line = [] + line_results.map.with_index do |(line_tokens, _prev_opens, next_opens, _min_depth), line_num_offset| + line_tokens.each do |token, _s| + # Avoid appending duplicated token. Tokens that include "\n" like multiline tstring_content can exist in multiple lines. + tokens_until_line << token if token != tokens_until_line.last end + continue = process_continue(tokens_until_line) + prompt(next_opens, continue, line_num_offset) end - - unless unprocessed_tokens.empty? - ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens) - result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset) - end - result end end if @io.respond_to?(:auto_indent) and @context.auto_indent_mode @io.auto_indent do |lines, line_index, byte_pointer, is_newline| if is_newline - @tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: @context) - prev_spaces = find_prev_spaces(line_index) - depth_difference = check_newline_depth_difference - depth_difference = 0 if depth_difference < 0 - prev_spaces + depth_difference * 2 + tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: @context) + process_indent_level(tokens, lines) else code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join last_line = lines[line_index]&.byteslice(0, byte_pointer) code += last_line if last_line - @tokens = self.class.ripper_lex_without_warning(code, context: @context) - check_corresponding_token_depth(lines, line_index) + tokens = self.class.ripper_lex_without_warning(code, context: @context) + check_corresponding_token_depth(tokens, lines, line_index) end end end @@ -176,50 +151,30 @@ class RubyLex $VERBOSE = verbose end - def find_prev_spaces(line_index) - return 0 if @tokens.size == 0 - md = @tokens[0].tok.match(/(\A +)/) - prev_spaces = md.nil? ? 0 : md[1].count(' ') - line_count = 0 - @tokens.each_with_index do |t, i| - if t.tok.include?("\n") - line_count += t.tok.count("\n") - if line_count >= line_index - return prev_spaces - end - next if t.event == :on_tstring_content || t.event == :on_words_sep - if (@tokens.size - 1) > i - md = @tokens[i + 1].tok.match(/(\A +)/) - prev_spaces = md.nil? ? 0 : md[1].count(' ') - end - end - end - prev_spaces - end - - def check_state(code, tokens) - ltype = process_literal_type(tokens) - indent = process_nesting_level(tokens) - continue = process_continue(tokens) - lvars_code = self.class.generate_local_variables_assign_code(@context.local_variables) - code = "#{lvars_code}\n#{code}" if lvars_code - code_block_open = check_code_block(code, tokens) - [ltype, indent, continue, code_block_open] + def prompt(opens, continue, line_num_offset) + ltype = ltype_from_open_tokens(opens) + _indent_level, nesting_level = calc_nesting_depth(opens) + @prompt&.call(ltype, nesting_level, opens.any? || continue, @line_no + line_num_offset) end def check_code_state(code) check_target_code = code.gsub(/\s*\z/, '').concat("\n") tokens = self.class.ripper_lex_without_warning(check_target_code, context: @context) - check_state(check_target_code, tokens) + opens = IRB::NestingParser.open_tokens(tokens) + [tokens, opens, code_terminated?(code, tokens, opens)] end - def save_prompt_to_context_io(ltype, indent, continue, line_num_offset) + def code_terminated?(code, tokens, opens) + opens.empty? && !process_continue(tokens) && !check_code_block(code, tokens) + end + + def save_prompt_to_context_io(opens, continue, line_num_offset) # Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`. - @prompt.call(ltype, indent, continue, @line_no + line_num_offset) + prompt(opens, continue, line_num_offset) end def readmultiline - save_prompt_to_context_io(nil, 0, false, 0) + save_prompt_to_context_io([], false, 0) # multiline return @input.call if @io.respond_to?(:check_termination) @@ -237,11 +192,12 @@ class RubyLex # Accept any single-line input for symbol aliases or commands that transform args return code if single_line_command?(code) - ltype, indent, continue, code_block_open = check_code_state(code) - return code unless ltype or indent > 0 or continue or code_block_open + tokens, opens, terminated = check_code_state(code) + return code if terminated line_offset += 1 - save_prompt_to_context_io(ltype, indent, continue, line_offset) + continue = process_continue(tokens) + save_prompt_to_context_io(opens, continue, line_offset) end end @@ -282,9 +238,6 @@ class RubyLex def check_code_block(code, tokens) return true if tokens.empty? - if tokens.last.event == :on_heredoc_beg - return true - end begin # check if parser error are available verbose, $VERBOSE = $VERBOSE, nil @@ -372,365 +325,82 @@ class RubyLex false end - def process_nesting_level(tokens) - indent = 0 - in_oneliner_def = nil - tokens.each_with_index { |t, index| - # detecting one-liner method definition - if in_oneliner_def.nil? - if t.state.allbits?(Ripper::EXPR_ENDFN) - in_oneliner_def = :ENDFN - end - else - if t.state.allbits?(Ripper::EXPR_ENDFN) - # continuing - elsif t.state.allbits?(Ripper::EXPR_BEG) - if t.tok == '=' - in_oneliner_def = :BODY - end - else - if in_oneliner_def == :BODY - # one-liner method definition - indent -= 1 - end - in_oneliner_def = nil - end - end - + # Calculates [indent_level, nesting_level]. nesting_level is used in prompt string. + def calc_nesting_depth(opens) + indent_level = 0 + nesting_level = 0 + opens.each do |t| case t.event - when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg - indent += 1 - when :on_rbracket, :on_rbrace, :on_rparen - indent -= 1 - when :on_kw - next if index > 0 and tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME) - case t.tok - when 'do' - syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index) - indent += 1 if syntax_of_do == :method_calling - when 'def', 'case', 'for', 'begin', 'class', 'module' - indent += 1 - when 'if', 'unless', 'while', 'until' - # postfix if/unless/while/until must be Ripper::EXPR_LABEL - indent += 1 unless t.state.allbits?(Ripper::EXPR_LABEL) - when 'end' - indent -= 1 - end - end - # percent literals are not indented - } - indent - end - - def is_method_calling?(tokens, index) - tk = tokens[index] - if tk.state.anybits?(Ripper::EXPR_CMDARG) and tk.event == :on_ident - # The target method call to pass the block with "do". - return true - elsif tk.state.anybits?(Ripper::EXPR_ARG) and tk.event == :on_ident - non_sp_index = tokens[0..(index - 1)].rindex{ |t| t.event != :on_sp } - if non_sp_index - prev_tk = tokens[non_sp_index] - if prev_tk.state.anybits?(Ripper::EXPR_DOT) and prev_tk.event == :on_period - # The target method call with receiver to pass the block with "do". - return true - end + when :on_heredoc_beg + # TODO: indent heredoc + when :on_tstring_beg, :on_regexp_beg, :on_symbeg + # can be indented if t.tok starts with `%` + when :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_embexpr_beg + # can be indented but not indented in current implementation + when :on_embdoc_beg + indent_level = 0 + else + nesting_level += 1 + indent_level += 1 end end - false + [indent_level, nesting_level] end - def take_corresponding_syntax_to_kw_do(tokens, index) - syntax_of_do = nil - # Finding a syntax corresponding to "do". - index.downto(0) do |i| - tk = tokens[i] - # In "continue", the token isn't the corresponding syntax to "do". - non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp } - first_in_fomula = false - if non_sp_index.nil? - first_in_fomula = true - elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event) - first_in_fomula = true - end - if is_method_calling?(tokens, i) - syntax_of_do = :method_calling - break if first_in_fomula - elsif tk.event == :on_kw && %w{while until for}.include?(tk.tok) - # A loop syntax in front of "do" found. - # - # while cond do # also "until" or "for" - # end - # - # This "do" doesn't increment indent because the loop syntax already - # incremented. - syntax_of_do = :loop_syntax - break if first_in_fomula - end + def free_indent_token(opens, line_index) + last_token = opens.last + return unless last_token + if last_token.event == :on_heredoc_beg && last_token.pos.first < line_index + 1 + # accept extra indent spaces inside heredoc + last_token end - syntax_of_do end - def is_the_in_correspond_to_a_for(tokens, index) - syntax_of_in = nil - # Finding a syntax corresponding to "do". - index.downto(0) do |i| - tk = tokens[i] - # In "continue", the token isn't the corresponding syntax to "do". - non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp } - first_in_fomula = false - if non_sp_index.nil? - first_in_fomula = true - elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event) - first_in_fomula = true - end - if tk.event == :on_kw && tk.tok == 'for' - # A loop syntax in front of "do" found. - # - # while cond do # also "until" or "for" - # end - # - # This "do" doesn't increment indent because the loop syntax already - # incremented. - syntax_of_in = :for - end - break if first_in_fomula + def process_indent_level(tokens, lines) + opens = IRB::NestingParser.open_tokens(tokens) + indent_level, _nesting_level = calc_nesting_depth(opens) + indent = indent_level * 2 + line_index = lines.size - 2 + if free_indent_token(opens, line_index) + return [indent, lines[line_index][/^ */].length].max end - syntax_of_in - end - - def check_newline_depth_difference - depth_difference = 0 - open_brace_on_line = 0 - in_oneliner_def = nil - @tokens.each_with_index do |t, index| - # detecting one-liner method definition - if in_oneliner_def.nil? - if t.state.allbits?(Ripper::EXPR_ENDFN) - in_oneliner_def = :ENDFN - end - else - if t.state.allbits?(Ripper::EXPR_ENDFN) - # continuing - elsif t.state.allbits?(Ripper::EXPR_BEG) - if t.tok == '=' - in_oneliner_def = :BODY - end - else - if in_oneliner_def == :BODY - # one-liner method definition - depth_difference -= 1 - end - in_oneliner_def = nil - end - end - case t.event - when :on_ignored_nl, :on_nl, :on_comment - if index != (@tokens.size - 1) and in_oneliner_def != :BODY - depth_difference = 0 - open_brace_on_line = 0 - end - next - when :on_sp - next - end - - case t.event - when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg - depth_difference += 1 - open_brace_on_line += 1 - when :on_rbracket, :on_rbrace, :on_rparen - depth_difference -= 1 if open_brace_on_line > 0 - when :on_kw - next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME) - case t.tok - when 'do' - syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index) - depth_difference += 1 if syntax_of_do == :method_calling - when 'def', 'case', 'for', 'begin', 'class', 'module' - depth_difference += 1 - when 'if', 'unless', 'while', 'until', 'rescue' - # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL - unless t.state.allbits?(Ripper::EXPR_LABEL) - depth_difference += 1 - end - when 'else', 'elsif', 'ensure', 'when' - depth_difference += 1 - when 'in' - unless is_the_in_correspond_to_a_for(@tokens, index) - depth_difference += 1 - end - when 'end' - depth_difference -= 1 - end - end - end - depth_difference + indent end - def check_corresponding_token_depth(lines, line_index) - corresponding_token_depth = nil - is_first_spaces_of_line = true - is_first_printable_of_line = true - spaces_of_nest = [] - spaces_at_line_head = 0 - open_brace_on_line = 0 - in_oneliner_def = nil - - if heredoc_scope? + def check_corresponding_token_depth(tokens, lines, line_index) + line_results = IRB::NestingParser.parse_by_line(tokens) + result = line_results[line_index] + return unless result + + # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation. + # Shortest open tokens can be calculated by `opens.take(min_depth)` + _tokens, prev_opens, opens, min_depth = result + indent_level, _nesting_level = calc_nesting_depth(opens.take(min_depth)) + indent = indent_level * 2 + free_indent_tok = free_indent_token(opens, line_index) + prev_line_free_indent_tok = free_indent_token(prev_opens, line_index - 1) + if prev_line_free_indent_tok && prev_line_free_indent_tok != free_indent_tok + return indent + elsif free_indent_tok return lines[line_index][/^ */].length end - - @tokens.each_with_index do |t, index| - # detecting one-liner method definition - if in_oneliner_def.nil? - if t.state.allbits?(Ripper::EXPR_ENDFN) - in_oneliner_def = :ENDFN - end - else - if t.state.allbits?(Ripper::EXPR_ENDFN) - # continuing - elsif t.state.allbits?(Ripper::EXPR_BEG) - if t.tok == '=' - in_oneliner_def = :BODY - end - else - if in_oneliner_def == :BODY - # one-liner method definition - if is_first_printable_of_line - corresponding_token_depth = spaces_of_nest.pop - else - spaces_of_nest.pop - corresponding_token_depth = nil - end - end - in_oneliner_def = nil - end - end - - case t.event - when :on_ignored_nl, :on_nl, :on_comment, :on_heredoc_end, :on_embdoc_end - if in_oneliner_def != :BODY - corresponding_token_depth = nil - spaces_at_line_head = 0 - is_first_spaces_of_line = true - is_first_printable_of_line = true - open_brace_on_line = 0 - end - next - when :on_sp - spaces_at_line_head = t.tok.count(' ') if is_first_spaces_of_line - is_first_spaces_of_line = false - next - end - - case t.event - when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg - spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2) - open_brace_on_line += 1 - when :on_rbracket, :on_rbrace, :on_rparen - if is_first_printable_of_line - corresponding_token_depth = spaces_of_nest.pop - else - spaces_of_nest.pop - corresponding_token_depth = nil - end - open_brace_on_line -= 1 - when :on_kw - next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME) - case t.tok - when 'do' - syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index) - if syntax_of_do == :method_calling - spaces_of_nest.push(spaces_at_line_head) - end - when 'def', 'case', 'for', 'begin', 'class', 'module' - spaces_of_nest.push(spaces_at_line_head) - when 'rescue' - unless t.state.allbits?(Ripper::EXPR_LABEL) - corresponding_token_depth = spaces_of_nest.last - end - when 'if', 'unless', 'while', 'until' - # postfix if/unless/while/until must be Ripper::EXPR_LABEL - unless t.state.allbits?(Ripper::EXPR_LABEL) - spaces_of_nest.push(spaces_at_line_head) - end - when 'else', 'elsif', 'ensure', 'when' - corresponding_token_depth = spaces_of_nest.last - when 'in' - if in_keyword_case_scope? - corresponding_token_depth = spaces_of_nest.last - end - when 'end' - if is_first_printable_of_line - corresponding_token_depth = spaces_of_nest.pop - else - spaces_of_nest.pop - corresponding_token_depth = nil - end - end - end - is_first_spaces_of_line = false - is_first_printable_of_line = false - end - corresponding_token_depth + prev_indent_level, _prev_nesting_level = calc_nesting_depth(prev_opens) + indent if indent_level < prev_indent_level end - def check_string_literal(tokens) - i = 0 - start_token = [] - end_type = [] - pending_heredocs = [] - while i < tokens.size - t = tokens[i] - case t.event - when *end_type.last - start_token.pop - end_type.pop - when :on_tstring_beg - start_token << t - end_type << [:on_tstring_end, :on_label_end] - when :on_regexp_beg - start_token << t - end_type << :on_regexp_end - when :on_symbeg - acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw on_int on_backtick} - if (i + 1) < tokens.size - if acceptable_single_tokens.all?{ |st| tokens[i + 1].event != st } - start_token << t - end_type << :on_tstring_end - else - i += 1 - end - end - when :on_backtick - if t.state.allbits?(Ripper::EXPR_BEG) - start_token << t - end_type << :on_tstring_end - end - when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg - start_token << t - end_type << :on_tstring_end - when :on_heredoc_beg - pending_heredocs << t - end + LTYPE_TOKENS = %i[ + on_heredoc_beg on_tstring_beg + on_regexp_beg on_symbeg on_backtick + on_symbols_beg on_qsymbols_beg + on_words_beg on_qwords_beg + ] - if pending_heredocs.any? && t.tok.include?("\n") - pending_heredocs.reverse_each do |t| - start_token << t - end_type << :on_heredoc_end - end - pending_heredocs = [] - end - i += 1 + def ltype_from_open_tokens(opens) + start_token = opens.reverse_each.find do |tok| + LTYPE_TOKENS.include?(tok.event) end - pending_heredocs.first || start_token.last - end - - def process_literal_type(tokens) - start_token = check_string_literal(tokens) - return nil if start_token == "" + return nil unless start_token case start_token&.event when :on_tstring_beg @@ -783,47 +453,16 @@ class RubyLex end end - if first_token.nil? - return false - elsif first_token && first_token.state == Ripper::EXPR_DOT - return false - else + if first_token && first_token.state != Ripper::EXPR_DOT tokens_without_last_line = tokens[0..index] - ltype = process_literal_type(tokens_without_last_line) - indent = process_nesting_level(tokens_without_last_line) - continue = process_continue(tokens_without_last_line) - code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line) - if ltype or indent > 0 or continue or code_block_open - return false - else - return last_line_tokens.map(&:tok).join('') + code_without_last_line = tokens_without_last_line.map(&:tok).join + opens_without_last_line = IRB::NestingParser.open_tokens(tokens_without_last_line) + if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line) + return last_line_tokens.map(&:tok).join end end end false end - - private - - def heredoc_scope? - heredoc_tokens = @tokens.select { |t| [:on_heredoc_beg, :on_heredoc_end].include?(t.event) } - heredoc_tokens[-1]&.event == :on_heredoc_beg - end - - def in_keyword_case_scope? - kw_tokens = @tokens.select { |t| t.event == :on_kw && ['case', 'for', 'end'].include?(t.tok) } - counter = 0 - kw_tokens.reverse.each do |t| - if t.tok == 'case' - return true if counter.zero? - counter += 1 - elsif t.tok == 'for' - counter += 1 - elsif t.tok == 'end' - counter -= 1 - end - end - false - end end # :startdoc: diff --git a/test/irb/test_nesting_parser.rb b/test/irb/test_nesting_parser.rb new file mode 100644 index 0000000000..83c7fb08a6 --- /dev/null +++ b/test/irb/test_nesting_parser.rb @@ -0,0 +1,303 @@ +# frozen_string_literal: false +require 'irb' + +require_relative "helper" + +module TestIRB + class NestingParserTest < TestCase + def setup + save_encodings + end + + def teardown + restore_encodings + end + + def parse_by_line(code) + IRB::NestingParser.parse_by_line(RubyLex.ripper_lex_without_warning(code)) + end + + def test_open_tokens + code = <<~'EOS' + class A + def f + if true + tap do + { + x: " + #{p(1, 2, 3 + EOS + opens = IRB::NestingParser.open_tokens(RubyLex.ripper_lex_without_warning(code)) + assert_equal(%w[class def if do { " #{ (], opens.map(&:tok)) + end + + def test_parse_by_line + code = <<~EOS + (((((1+2 + ).to_s())).tap do ((( + EOS + _tokens, prev_opens, next_opens, min_depth = parse_by_line(code).last + assert_equal(%w[( ( ( ( (], prev_opens.map(&:tok)) + assert_equal(%w[( ( do ( ( (], next_opens.map(&:tok)) + assert_equal(2, min_depth) + end + + def test_ruby_syntax + code = <<~'EOS' + class A + 1 if 2 + 1 while 2 + 1 until 2 + 1 unless 2 + 1 rescue 2 + begin; rescue; ensure; end + tap do; rescue; ensure; end + class B; end + module C; end + def f; end + def `; end + def f() = 1 + %(); %w[]; %q(); %r{}; %i[] + "#{1}"; ''; /#{1}/; `#{1}` + :sym; :"sym"; :+; :`; :if + [1, 2, 3] + { x: 1, y: 2 } + (a, (*b, c), d), e = 1, 2, 3 + ->(a){}; ->(a) do end + -> a = -> b = :do do end do end + if 1; elsif 2; else; end + unless 1; end + while 1; end + until 1; end + for i in j; end + case 1; when 2; end + puts(1, 2, 3) + loop{|i|} + loop do |i| end + end + EOS + line_results = parse_by_line(code) + assert_equal(code.lines.size, line_results.size) + class_open, *inner_line_results, class_close = line_results + assert_equal(['class'], class_open[2].map(&:tok)) + inner_line_results.each {|result| assert_equal(['class'], result[2].map(&:tok)) } + assert_equal([], class_close[2].map(&:tok)) + end + + def test_multiline_string + code = <<~EOS + " + aaa + bbb + " + <<A + aaa + bbb + A + EOS + line_results = parse_by_line(code) + assert_equal(code.lines.size, line_results.size) + string_content_line, string_opens = line_results[1] + assert_equal("\naaa\nbbb\n", string_content_line.first.first.tok) + assert_equal("aaa\n", string_content_line.first.last) + assert_equal(['"'], string_opens.map(&:tok)) + heredoc_content_line, heredoc_opens = line_results[6] + assert_equal("aaa\nbbb\n", heredoc_content_line.first.first.tok) + assert_equal("bbb\n", heredoc_content_line.first.last) + assert_equal(['<<A'], heredoc_opens.map(&:tok)) + _line, _prev_opens, next_opens, _min_depth = line_results.last + assert_equal([], next_opens) + end + + def test_backslash_continued_nested_symbol + code = <<~'EOS' + x = <<A, :\ + heredoc #{ + here + } + A + =begin + embdoc + =end + # comment + + if # this is symbol :if + while + EOS + line_results = parse_by_line(code) + assert_equal(%w[: <<A #{], line_results[2][2].map(&:tok)) + assert_equal(%w[while], line_results.last[2].map(&:tok)) + end + + def test_oneliner_def + code = <<~EOC + if true + # normal oneliner def + def f = 1 + def f() = 1 + def f(*) = 1 + # keyword, backtick, op + def * = 1 + def ` = 1 + def if = 1 + def *() = 1 + def `() = 1 + def if() = 1 + # oneliner def with receiver + def a.* = 1 + def $a.* = 1 + def @a.` = 1 + def A.` = 1 + def ((a;b;c)).*() = 1 + def ((a;b;c)).if() = 1 + def ((a;b;c)).end() = 1 + # multiline oneliner def + def f = + 1 + def f() + = + 1 + # oneliner def with comment and embdoc + def # comment + =begin + embdoc + =end + ((a;b;c)) + . # comment + =begin + embdoc + =end + f (*) # comment + =begin + embdoc + =end + = + 1 + # nested oneliner def + def f(x = def f() = 1) = def f() = 1 + EOC + _tokens, _prev_opens, next_opens, min_depth = parse_by_line(code).last + assert_equal(['if'], next_opens.map(&:tok)) + assert_equal(1, min_depth) + end + + def test_heredoc_embexpr + code = <<~'EOS' + <<A+<<B+<<C+(<<D+(<<E) + #{ + <<~F+"#{<<~G} + #{ + here + } + F + G + " + } + A + B + C + D + E + ) + EOS + line_results = parse_by_line(code) + last_opens = line_results.last[-2] + assert_equal([], last_opens) + _tokens, _prev_opens, next_opens, _min_depth = line_results[4] + assert_equal(%w[( <<E <<D <<C <<B <<A #{ " <<~G <<~F #{], next_opens.map(&:tok)) + end + + def test_for_in + code = <<~EOS + for i in j + here + end + for i in j do + here + end + for i in + j do + here + end + for + # comment + i in j do + here + end + for (a;b;c).d in (a;b;c) do + here + end + for i in :in + :do do + here + end + for i in -> do end do + here + end + EOS + line_results = parse_by_line(code).select { |tokens,| tokens.map(&:last).include?('here') } + assert_equal(7, line_results.size) + line_results.each do |_tokens, _prev_opens, next_opens, _min_depth| + assert_equal(['for'], next_opens.map(&:tok)) + end + end + + def test_while_until + base_code = <<~'EOS' + while_or_until true + here + end + while_or_until a < c + here + end + while_or_until true do + here + end + while_or_until + # comment + (a + b) < + # comment + c do + here + end + while_or_until :\ + do do + here + end + while_or_until def do; end == :do do + here + end + while_or_until -> do end do + here + end + EOS + %w[while until].each do |keyword| + code = base_code.gsub('while_or_until', keyword) + line_results = parse_by_line(code).select { |tokens,| tokens.map(&:last).include?('here') } + assert_equal(7, line_results.size) + line_results.each do |_tokens, _prev_opens, next_opens, _min_depth| + assert_equal([keyword], next_opens.map(&:tok) ) + end + end + end + + def test_case_in + if Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.7.0') + pend 'This test requires ruby version that supports case-in syntax' + end + code = <<~EOS + case 1 + in 1 + here + in + 2 + here + end + EOS + line_results = parse_by_line(code).select { |tokens,| tokens.map(&:last).include?('here') } + assert_equal(2, line_results.size) + line_results.each do |_tokens, _prev_opens, next_opens, _min_depth| + assert_equal(['in'], next_opens.map(&:tok)) + end + end + end +end diff --git a/test/irb/test_ruby_lex.rb b/test/irb/test_ruby_lex.rb index aa27204e26..9d7910cca6 100644 --- a/test/irb/test_ruby_lex.rb +++ b/test/irb/test_ruby_lex.rb @@ -95,8 +95,11 @@ module TestIRB def check_state(lines, local_variables: []) context = build_context(local_variables) + tokens = RubyLex.ripper_lex_without_warning(lines.join("\n"), context: context) + opens = IRB::NestingParser.open_tokens(tokens) ruby_lex = RubyLex.new(context) - _ltype, indent, _continue, code_block_open = ruby_lex.check_code_state(lines.join("\n")) + indent, _nesting_level = ruby_lex.calc_nesting_depth(opens) + code_block_open = !opens.empty? || ruby_lex.process_continue(tokens) [indent, code_block_open] end @@ -164,9 +167,9 @@ module TestIRB Row.new(%q( ]), 4, 4), Row.new(%q( ]), 2, 2), Row.new(%q(]), 0, 0), - Row.new(%q([<<FOO]), 0, 0), + Row.new(%q([<<FOO]), nil, 0), Row.new(%q(hello), 0, 0), - Row.new(%q(FOO), nil, 0), + Row.new(%q(FOO), 0, 0), ] lines = [] @@ -489,12 +492,12 @@ module TestIRB end end - def test_corresponding_syntax_to_keyword_in + def test_typing_incomplete_include_interpreted_as_keyword_in input_with_correct_indents = [ Row.new(%q(module E), nil, 2, 1), Row.new(%q(end), 0, 0, 0), Row.new(%q(class A), nil, 2, 1), - Row.new(%q( in), nil, 4, 1) + Row.new(%q( in), nil, 2, 1) # scenario typing `include E` ] lines = [] @@ -575,11 +578,19 @@ module TestIRB end def test_heredoc_with_indent + if Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.7.0') + pend 'This test needs Ripper::Lexer#scan to take broken tokens' + end input_with_correct_indents = [ - Row.new(%q(<<~Q), 0, 0, 0), - Row.new(%q({), 0, 0, 0), - Row.new(%q( #), 2, 0, 0), - Row.new(%q(}), 0, 0, 0) + Row.new(%q(<<~Q+<<~R), nil, 0, 0), + Row.new(%q(a), 0, 0, 0), + Row.new(%q(a), 0, 0, 0), + Row.new(%q( b), 2, 2, 0), + Row.new(%q( b), 2, 2, 0), + Row.new(%q( Q), 0, 2, 0), + Row.new(%q( c), 4, 4, 0), + Row.new(%q( c), 4, 4, 0), + Row.new(%q( R), 0, 0, 0), ] lines = [] @@ -592,8 +603,8 @@ module TestIRB def test_oneliner_def_in_multiple_lines input_with_correct_indents = [ - Row.new(%q(def a()=[), nil, 4, 2), - Row.new(%q( 1,), nil, 4, 1), + Row.new(%q(def a()=[), nil, 2, 1), + Row.new(%q( 1,), nil, 2, 1), Row.new(%q(].), 0, 0, 0), Row.new(%q(to_s), nil, 0, 0), ] @@ -609,7 +620,7 @@ module TestIRB def test_broken_heredoc input_with_correct_indents = [ Row.new(%q(def foo), nil, 2, 1), - Row.new(%q( <<~Q), 2, 2, 1), + Row.new(%q( <<~Q), nil, 2, 1), Row.new(%q( Qend), 2, 2, 1), ] @@ -621,6 +632,15 @@ module TestIRB end end + def test_heredoc_keep_indent_spaces + (1..4).each do |indent| + row = Row.new(' ' * indent, indent, [2, indent].max, 1) + lines = ['def foo', ' <<~Q', row.content] + assert_row_indenting(lines, row) + assert_nesting_level(lines, row.nesting_level) + end + end + PromptRow = Struct.new(:prompt, :content) class MockIO_DynamicPrompt @@ -746,10 +766,9 @@ module TestIRB end def test_unterminated_heredoc_string_literal - context = build_context ['<<A;<<B', "<<A;<<B\n", "%W[\#{<<A;<<B", "%W[\#{<<A;<<B\n"].each do |code| tokens = RubyLex.ripper_lex_without_warning(code) - string_literal = RubyLex.new(context).check_string_literal(tokens) + string_literal = IRB::NestingParser.open_tokens(tokens).last assert_equal('<<A', string_literal&.tok) end end @@ -779,43 +798,8 @@ module TestIRB [reference_code, code_with_heredoc, code_with_embdoc].each do |code| lex = RubyLex.new(context) lines = code.lines - lex.instance_variable_set('@tokens', RubyLex.ripper_lex_without_warning(code)) - assert_equal 2, lex.check_corresponding_token_depth(lines, lines.size) - end - end - - def test_find_prev_spaces_with_multiline_literal - lex = RubyLex.new(build_context) - reference_code = <<~EOC.chomp - if true - 1 - hello - 1 - world - end - EOC - code_with_percent_string = <<~EOC.chomp - if true - %w[ - hello - ] - world - end - EOC - code_with_quoted_string = <<~EOC.chomp - if true - ' - hello - ' - world - end - EOC - context = build_context - [reference_code, code_with_percent_string, code_with_quoted_string].each do |code| - lex = RubyLex.new(context) - lex.instance_variable_set('@tokens', RubyLex.ripper_lex_without_warning(code)) - prev_spaces = (1..code.lines.size).map { |index| lex.find_prev_spaces index } - assert_equal [0, 2, 2, 2, 2, 0], prev_spaces + tokens = RubyLex.ripper_lex_without_warning(code) + assert_equal(2, lex.check_corresponding_token_depth(tokens, lines, lines.size - 1)) end end |