summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/irb/nesting_parser.rb227
-rw-r--r--lib/irb/ruby-lex.rb553
-rw-r--r--test/irb/test_nesting_parser.rb303
-rw-r--r--test/irb/test_ruby_lex.rb86
4 files changed, 661 insertions, 508 deletions
diff --git a/lib/irb/nesting_parser.rb b/lib/irb/nesting_parser.rb
new file mode 100644
index 0000000000..3d4db82444
--- /dev/null
+++ b/lib/irb/nesting_parser.rb
@@ -0,0 +1,227 @@
+# frozen_string_literal: true
+module IRB
+ module NestingParser
+ IGNORE_TOKENS = %i[on_sp on_ignored_nl on_comment on_embdoc_beg on_embdoc on_embdoc_end]
+
+ # Scan each token and call the given block with array of token and other information for parsing
+ def self.scan_opens(tokens)
+ opens = []
+ pending_heredocs = []
+ first_token_on_line = true
+ tokens.each do |t|
+ skip = false
+ last_tok, state, args = opens.last
+ case state
+ when :in_unquoted_symbol
+ unless IGNORE_TOKENS.include?(t.event)
+ opens.pop
+ skip = true
+ end
+ when :in_lambda_head
+ opens.pop if t.event == :on_tlambeg || (t.event == :on_kw && t.tok == 'do')
+ when :in_method_head
+ unless IGNORE_TOKENS.include?(t.event)
+ next_args = []
+ body = nil
+ if args.include?(:receiver)
+ case t.event
+ when :on_lparen, :on_ivar, :on_gvar, :on_cvar
+ # def (receiver). | def @ivar. | def $gvar. | def @@cvar.
+ next_args << :dot
+ when :on_kw
+ case t.tok
+ when 'self', 'true', 'false', 'nil'
+ # def self(arg) | def self.
+ next_args.push(:arg, :dot)
+ else
+ # def if(arg)
+ skip = true
+ next_args << :arg
+ end
+ when :on_op, :on_backtick
+ # def +(arg)
+ skip = true
+ next_args << :arg
+ when :on_ident, :on_const
+ # def a(arg) | def a.
+ next_args.push(:arg, :dot)
+ end
+ end
+ if args.include?(:dot)
+ # def receiver.name
+ next_args << :name if t.event == :on_period || (t.event == :on_op && t.tok == '::')
+ end
+ if args.include?(:name)
+ if %i[on_ident on_const on_op on_kw on_backtick].include?(t.event)
+ # def name(arg) | def receiver.name(arg)
+ next_args << :arg
+ skip = true
+ end
+ end
+ if args.include?(:arg)
+ case t.event
+ when :on_nl, :on_semicolon
+ # def recever.f;
+ body = :normal
+ when :on_lparen
+ # def recever.f()
+ next_args << :eq
+ else
+ if t.event == :on_op && t.tok == '='
+ # def receiver.f =
+ body = :oneliner
+ else
+ # def recever.f arg
+ next_args << :arg_without_paren
+ end
+ end
+ end
+ if args.include?(:eq)
+ if t.event == :on_op && t.tok == '='
+ body = :oneliner
+ else
+ body = :normal
+ end
+ end
+ if args.include?(:arg_without_paren)
+ if %i[on_semicolon on_nl].include?(t.event)
+ # def f a;
+ body = :normal
+ else
+ # def f a, b
+ next_args << :arg_without_paren
+ end
+ end
+ if body == :oneliner
+ opens.pop
+ elsif body
+ opens[-1] = [last_tok, nil]
+ else
+ opens[-1] = [last_tok, :in_method_head, next_args]
+ end
+ end
+ when :in_for_while_until_condition
+ if t.event == :on_semicolon || t.event == :on_nl || (t.event == :on_kw && t.tok == 'do')
+ skip = true if t.event == :on_kw && t.tok == 'do'
+ opens[-1] = [last_tok, nil]
+ end
+ end
+
+ unless skip
+ case t.event
+ when :on_kw
+ case t.tok
+ when 'begin', 'class', 'module', 'do', 'case'
+ opens << [t, nil]
+ when 'end'
+ opens.pop
+ when 'def'
+ opens << [t, :in_method_head, [:receiver, :name]]
+ when 'if', 'unless'
+ unless t.state.allbits?(Ripper::EXPR_LABEL)
+ opens << [t, nil]
+ end
+ when 'while', 'until'
+ unless t.state.allbits?(Ripper::EXPR_LABEL)
+ opens << [t, :in_for_while_until_condition]
+ end
+ when 'ensure', 'rescue'
+ unless t.state.allbits?(Ripper::EXPR_LABEL)
+ opens.pop
+ opens << [t, nil]
+ end
+ when 'elsif', 'else', 'when'
+ opens.pop
+ opens << [t, nil]
+ when 'for'
+ opens << [t, :in_for_while_until_condition]
+ when 'in'
+ if last_tok&.event == :on_kw && %w[case in].include?(last_tok.tok) && first_token_on_line
+ opens.pop
+ opens << [t, nil]
+ end
+ end
+ when :on_tlambda
+ opens << [t, :in_lambda_head]
+ when :on_lparen, :on_lbracket, :on_lbrace, :on_tlambeg, :on_embexpr_beg, :on_embdoc_beg
+ opens << [t, nil]
+ when :on_rparen, :on_rbracket, :on_rbrace, :on_embexpr_end, :on_embdoc_end
+ opens.pop
+ when :on_heredoc_beg
+ pending_heredocs << t
+ when :on_heredoc_end
+ opens.pop
+ when :on_backtick
+ opens << [t, nil] if t.state.allbits?(Ripper::EXPR_BEG)
+ when :on_tstring_beg, :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_regexp_beg
+ opens << [t, nil]
+ when :on_tstring_end, :on_regexp_end, :on_label_end
+ opens.pop
+ when :on_symbeg
+ if t.tok == ':'
+ opens << [t, :in_unquoted_symbol]
+ else
+ opens << [t, nil]
+ end
+ end
+ end
+ if t.event == :on_nl || t.event == :on_semicolon
+ first_token_on_line = true
+ elsif t.event != :on_sp
+ first_token_on_line = false
+ end
+ if pending_heredocs.any? && t.tok.include?("\n")
+ pending_heredocs.reverse_each { |t| opens << [t, nil] }
+ pending_heredocs = []
+ end
+ yield t, opens if block_given?
+ end
+ opens.map(&:first) + pending_heredocs.reverse
+ end
+
+ def self.open_tokens(tokens)
+ # scan_opens without block will return a list of open tokens at last token position
+ scan_opens(tokens)
+ end
+
+ # Calculates token information [line_tokens, prev_opens, next_opens, min_depth] for each line.
+ # Example code
+ # ["hello
+ # world"+(
+ # First line
+ # line_tokens: [[lbracket, '['], [tstring_beg, '"'], [tstring_content("hello\nworld"), "hello\n"]]
+ # prev_opens: []
+ # next_tokens: [lbracket, tstring_beg]
+ # min_depth: 0 (minimum at beginning of line)
+ # Second line
+ # line_tokens: [[tstring_content("hello\nworld"), "world"], [tstring_end, '"'], [op, '+'], [lparen, '(']]
+ # prev_opens: [lbracket, tstring_beg]
+ # next_tokens: [lbracket, lparen]
+ # min_depth: 1 (minimum just after tstring_end)
+ def self.parse_by_line(tokens)
+ line_tokens = []
+ prev_opens = []
+ min_depth = 0
+ output = []
+ last_opens = scan_opens(tokens) do |t, opens|
+ depth = t == opens.last&.first ? opens.size - 1 : opens.size
+ min_depth = depth if depth < min_depth
+ if t.tok.include?("\n")
+ t.tok.each_line do |line|
+ line_tokens << [t, line]
+ next if line[-1] != "\n"
+ next_opens = opens.map(&:first)
+ output << [line_tokens, prev_opens, next_opens, min_depth]
+ prev_opens = next_opens
+ min_depth = prev_opens.size
+ line_tokens = []
+ end
+ else
+ line_tokens << [t, t.tok]
+ end
+ end
+ output << [line_tokens, prev_opens, last_opens, min_depth] if line_tokens.any?
+ output
+ end
+ end
+end
diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb
index e29d52e47c..77c5b07ae9 100644
--- a/lib/irb/ruby-lex.rb
+++ b/lib/irb/ruby-lex.rb
@@ -6,6 +6,7 @@
require "ripper"
require "jruby" if RUBY_ENGINE == "jruby"
+require_relative "nesting_parser"
# :stopdoc:
class RubyLex
@@ -54,8 +55,7 @@ class RubyLex
if @io.respond_to?(:check_termination)
@io.check_termination do |code|
if Reline::IOGate.in_pasting?
- lex = RubyLex.new(@context)
- rest = lex.check_termination_in_prev_line(code)
+ rest = check_termination_in_prev_line(code)
if rest
Reline.delete_text
rest.bytes.reverse_each do |c|
@@ -69,64 +69,39 @@ class RubyLex
# Accept any single-line input for symbol aliases or commands that transform args
next true if single_line_command?(code)
- ltype, indent, continue, code_block_open = check_code_state(code)
- if ltype or indent > 0 or continue or code_block_open
- false
- else
- true
- end
+ _tokens, _opens, terminated = check_code_state(code)
+ terminated
end
end
end
if @io.respond_to?(:dynamic_prompt)
@io.dynamic_prompt do |lines|
lines << '' if lines.empty?
- result = []
tokens = self.class.ripper_lex_without_warning(lines.map{ |l| l + "\n" }.join, context: @context)
- code = String.new
- partial_tokens = []
- unprocessed_tokens = []
- line_num_offset = 0
- tokens.each do |t|
- partial_tokens << t
- unprocessed_tokens << t
- if t.tok.include?("\n")
- t_str = t.tok
- t_str.each_line("\n") do |s|
- code << s
- next unless s.include?("\n")
- ltype, indent, continue, code_block_open = check_state(code, partial_tokens)
- result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
- line_num_offset += 1
- end
- unprocessed_tokens = []
- else
- code << t.tok
+ line_results = IRB::NestingParser.parse_by_line(tokens)
+ tokens_until_line = []
+ line_results.map.with_index do |(line_tokens, _prev_opens, next_opens, _min_depth), line_num_offset|
+ line_tokens.each do |token, _s|
+ # Avoid appending duplicated token. Tokens that include "\n" like multiline tstring_content can exist in multiple lines.
+ tokens_until_line << token if token != tokens_until_line.last
end
+ continue = process_continue(tokens_until_line)
+ prompt(next_opens, continue, line_num_offset)
end
-
- unless unprocessed_tokens.empty?
- ltype, indent, continue, code_block_open = check_state(code, unprocessed_tokens)
- result << @prompt.call(ltype, indent, continue || code_block_open, @line_no + line_num_offset)
- end
- result
end
end
if @io.respond_to?(:auto_indent) and @context.auto_indent_mode
@io.auto_indent do |lines, line_index, byte_pointer, is_newline|
if is_newline
- @tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: @context)
- prev_spaces = find_prev_spaces(line_index)
- depth_difference = check_newline_depth_difference
- depth_difference = 0 if depth_difference < 0
- prev_spaces + depth_difference * 2
+ tokens = self.class.ripper_lex_without_warning(lines[0..line_index].join("\n"), context: @context)
+ process_indent_level(tokens, lines)
else
code = line_index.zero? ? '' : lines[0..(line_index - 1)].map{ |l| l + "\n" }.join
last_line = lines[line_index]&.byteslice(0, byte_pointer)
code += last_line if last_line
- @tokens = self.class.ripper_lex_without_warning(code, context: @context)
- check_corresponding_token_depth(lines, line_index)
+ tokens = self.class.ripper_lex_without_warning(code, context: @context)
+ check_corresponding_token_depth(tokens, lines, line_index)
end
end
end
@@ -176,50 +151,30 @@ class RubyLex
$VERBOSE = verbose
end
- def find_prev_spaces(line_index)
- return 0 if @tokens.size == 0
- md = @tokens[0].tok.match(/(\A +)/)
- prev_spaces = md.nil? ? 0 : md[1].count(' ')
- line_count = 0
- @tokens.each_with_index do |t, i|
- if t.tok.include?("\n")
- line_count += t.tok.count("\n")
- if line_count >= line_index
- return prev_spaces
- end
- next if t.event == :on_tstring_content || t.event == :on_words_sep
- if (@tokens.size - 1) > i
- md = @tokens[i + 1].tok.match(/(\A +)/)
- prev_spaces = md.nil? ? 0 : md[1].count(' ')
- end
- end
- end
- prev_spaces
- end
-
- def check_state(code, tokens)
- ltype = process_literal_type(tokens)
- indent = process_nesting_level(tokens)
- continue = process_continue(tokens)
- lvars_code = self.class.generate_local_variables_assign_code(@context.local_variables)
- code = "#{lvars_code}\n#{code}" if lvars_code
- code_block_open = check_code_block(code, tokens)
- [ltype, indent, continue, code_block_open]
+ def prompt(opens, continue, line_num_offset)
+ ltype = ltype_from_open_tokens(opens)
+ _indent_level, nesting_level = calc_nesting_depth(opens)
+ @prompt&.call(ltype, nesting_level, opens.any? || continue, @line_no + line_num_offset)
end
def check_code_state(code)
check_target_code = code.gsub(/\s*\z/, '').concat("\n")
tokens = self.class.ripper_lex_without_warning(check_target_code, context: @context)
- check_state(check_target_code, tokens)
+ opens = IRB::NestingParser.open_tokens(tokens)
+ [tokens, opens, code_terminated?(code, tokens, opens)]
end
- def save_prompt_to_context_io(ltype, indent, continue, line_num_offset)
+ def code_terminated?(code, tokens, opens)
+ opens.empty? && !process_continue(tokens) && !check_code_block(code, tokens)
+ end
+
+ def save_prompt_to_context_io(opens, continue, line_num_offset)
# Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`.
- @prompt.call(ltype, indent, continue, @line_no + line_num_offset)
+ prompt(opens, continue, line_num_offset)
end
def readmultiline
- save_prompt_to_context_io(nil, 0, false, 0)
+ save_prompt_to_context_io([], false, 0)
# multiline
return @input.call if @io.respond_to?(:check_termination)
@@ -237,11 +192,12 @@ class RubyLex
# Accept any single-line input for symbol aliases or commands that transform args
return code if single_line_command?(code)
- ltype, indent, continue, code_block_open = check_code_state(code)
- return code unless ltype or indent > 0 or continue or code_block_open
+ tokens, opens, terminated = check_code_state(code)
+ return code if terminated
line_offset += 1
- save_prompt_to_context_io(ltype, indent, continue, line_offset)
+ continue = process_continue(tokens)
+ save_prompt_to_context_io(opens, continue, line_offset)
end
end
@@ -282,9 +238,6 @@ class RubyLex
def check_code_block(code, tokens)
return true if tokens.empty?
- if tokens.last.event == :on_heredoc_beg
- return true
- end
begin # check if parser error are available
verbose, $VERBOSE = $VERBOSE, nil
@@ -372,365 +325,82 @@ class RubyLex
false
end
- def process_nesting_level(tokens)
- indent = 0
- in_oneliner_def = nil
- tokens.each_with_index { |t, index|
- # detecting one-liner method definition
- if in_oneliner_def.nil?
- if t.state.allbits?(Ripper::EXPR_ENDFN)
- in_oneliner_def = :ENDFN
- end
- else
- if t.state.allbits?(Ripper::EXPR_ENDFN)
- # continuing
- elsif t.state.allbits?(Ripper::EXPR_BEG)
- if t.tok == '='
- in_oneliner_def = :BODY
- end
- else
- if in_oneliner_def == :BODY
- # one-liner method definition
- indent -= 1
- end
- in_oneliner_def = nil
- end
- end
-
+ # Calculates [indent_level, nesting_level]. nesting_level is used in prompt string.
+ def calc_nesting_depth(opens)
+ indent_level = 0
+ nesting_level = 0
+ opens.each do |t|
case t.event
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
- indent += 1
- when :on_rbracket, :on_rbrace, :on_rparen
- indent -= 1
- when :on_kw
- next if index > 0 and tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
- case t.tok
- when 'do'
- syntax_of_do = take_corresponding_syntax_to_kw_do(tokens, index)
- indent += 1 if syntax_of_do == :method_calling
- when 'def', 'case', 'for', 'begin', 'class', 'module'
- indent += 1
- when 'if', 'unless', 'while', 'until'
- # postfix if/unless/while/until must be Ripper::EXPR_LABEL
- indent += 1 unless t.state.allbits?(Ripper::EXPR_LABEL)
- when 'end'
- indent -= 1
- end
- end
- # percent literals are not indented
- }
- indent
- end
-
- def is_method_calling?(tokens, index)
- tk = tokens[index]
- if tk.state.anybits?(Ripper::EXPR_CMDARG) and tk.event == :on_ident
- # The target method call to pass the block with "do".
- return true
- elsif tk.state.anybits?(Ripper::EXPR_ARG) and tk.event == :on_ident
- non_sp_index = tokens[0..(index - 1)].rindex{ |t| t.event != :on_sp }
- if non_sp_index
- prev_tk = tokens[non_sp_index]
- if prev_tk.state.anybits?(Ripper::EXPR_DOT) and prev_tk.event == :on_period
- # The target method call with receiver to pass the block with "do".
- return true
- end
+ when :on_heredoc_beg
+ # TODO: indent heredoc
+ when :on_tstring_beg, :on_regexp_beg, :on_symbeg
+ # can be indented if t.tok starts with `%`
+ when :on_words_beg, :on_qwords_beg, :on_symbols_beg, :on_qsymbols_beg, :on_embexpr_beg
+ # can be indented but not indented in current implementation
+ when :on_embdoc_beg
+ indent_level = 0
+ else
+ nesting_level += 1
+ indent_level += 1
end
end
- false
+ [indent_level, nesting_level]
end
- def take_corresponding_syntax_to_kw_do(tokens, index)
- syntax_of_do = nil
- # Finding a syntax corresponding to "do".
- index.downto(0) do |i|
- tk = tokens[i]
- # In "continue", the token isn't the corresponding syntax to "do".
- non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
- first_in_fomula = false
- if non_sp_index.nil?
- first_in_fomula = true
- elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
- first_in_fomula = true
- end
- if is_method_calling?(tokens, i)
- syntax_of_do = :method_calling
- break if first_in_fomula
- elsif tk.event == :on_kw && %w{while until for}.include?(tk.tok)
- # A loop syntax in front of "do" found.
- #
- # while cond do # also "until" or "for"
- # end
- #
- # This "do" doesn't increment indent because the loop syntax already
- # incremented.
- syntax_of_do = :loop_syntax
- break if first_in_fomula
- end
+ def free_indent_token(opens, line_index)
+ last_token = opens.last
+ return unless last_token
+ if last_token.event == :on_heredoc_beg && last_token.pos.first < line_index + 1
+ # accept extra indent spaces inside heredoc
+ last_token
end
- syntax_of_do
end
- def is_the_in_correspond_to_a_for(tokens, index)
- syntax_of_in = nil
- # Finding a syntax corresponding to "do".
- index.downto(0) do |i|
- tk = tokens[i]
- # In "continue", the token isn't the corresponding syntax to "do".
- non_sp_index = tokens[0..(i - 1)].rindex{ |t| t.event != :on_sp }
- first_in_fomula = false
- if non_sp_index.nil?
- first_in_fomula = true
- elsif [:on_ignored_nl, :on_nl, :on_comment].include?(tokens[non_sp_index].event)
- first_in_fomula = true
- end
- if tk.event == :on_kw && tk.tok == 'for'
- # A loop syntax in front of "do" found.
- #
- # while cond do # also "until" or "for"
- # end
- #
- # This "do" doesn't increment indent because the loop syntax already
- # incremented.
- syntax_of_in = :for
- end
- break if first_in_fomula
+ def process_indent_level(tokens, lines)
+ opens = IRB::NestingParser.open_tokens(tokens)
+ indent_level, _nesting_level = calc_nesting_depth(opens)
+ indent = indent_level * 2
+ line_index = lines.size - 2
+ if free_indent_token(opens, line_index)
+ return [indent, lines[line_index][/^ */].length].max
end
- syntax_of_in
- end
-
- def check_newline_depth_difference
- depth_difference = 0
- open_brace_on_line = 0
- in_oneliner_def = nil
- @tokens.each_with_index do |t, index|
- # detecting one-liner method definition
- if in_oneliner_def.nil?
- if t.state.allbits?(Ripper::EXPR_ENDFN)
- in_oneliner_def = :ENDFN
- end
- else
- if t.state.allbits?(Ripper::EXPR_ENDFN)
- # continuing
- elsif t.state.allbits?(Ripper::EXPR_BEG)
- if t.tok == '='
- in_oneliner_def = :BODY
- end
- else
- if in_oneliner_def == :BODY
- # one-liner method definition
- depth_difference -= 1
- end
- in_oneliner_def = nil
- end
- end
- case t.event
- when :on_ignored_nl, :on_nl, :on_comment
- if index != (@tokens.size - 1) and in_oneliner_def != :BODY
- depth_difference = 0
- open_brace_on_line = 0
- end
- next
- when :on_sp
- next
- end
-
- case t.event
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
- depth_difference += 1
- open_brace_on_line += 1
- when :on_rbracket, :on_rbrace, :on_rparen
- depth_difference -= 1 if open_brace_on_line > 0
- when :on_kw
- next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
- case t.tok
- when 'do'
- syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
- depth_difference += 1 if syntax_of_do == :method_calling
- when 'def', 'case', 'for', 'begin', 'class', 'module'
- depth_difference += 1
- when 'if', 'unless', 'while', 'until', 'rescue'
- # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL
- unless t.state.allbits?(Ripper::EXPR_LABEL)
- depth_difference += 1
- end
- when 'else', 'elsif', 'ensure', 'when'
- depth_difference += 1
- when 'in'
- unless is_the_in_correspond_to_a_for(@tokens, index)
- depth_difference += 1
- end
- when 'end'
- depth_difference -= 1
- end
- end
- end
- depth_difference
+ indent
end
- def check_corresponding_token_depth(lines, line_index)
- corresponding_token_depth = nil
- is_first_spaces_of_line = true
- is_first_printable_of_line = true
- spaces_of_nest = []
- spaces_at_line_head = 0
- open_brace_on_line = 0
- in_oneliner_def = nil
-
- if heredoc_scope?
+ def check_corresponding_token_depth(tokens, lines, line_index)
+ line_results = IRB::NestingParser.parse_by_line(tokens)
+ result = line_results[line_index]
+ return unless result
+
+ # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation.
+ # Shortest open tokens can be calculated by `opens.take(min_depth)`
+ _tokens, prev_opens, opens, min_depth = result
+ indent_level, _nesting_level = calc_nesting_depth(opens.take(min_depth))
+ indent = indent_level * 2
+ free_indent_tok = free_indent_token(opens, line_index)
+ prev_line_free_indent_tok = free_indent_token(prev_opens, line_index - 1)
+ if prev_line_free_indent_tok && prev_line_free_indent_tok != free_indent_tok
+ return indent
+ elsif free_indent_tok
return lines[line_index][/^ */].length
end
-
- @tokens.each_with_index do |t, index|
- # detecting one-liner method definition
- if in_oneliner_def.nil?
- if t.state.allbits?(Ripper::EXPR_ENDFN)
- in_oneliner_def = :ENDFN
- end
- else
- if t.state.allbits?(Ripper::EXPR_ENDFN)
- # continuing
- elsif t.state.allbits?(Ripper::EXPR_BEG)
- if t.tok == '='
- in_oneliner_def = :BODY
- end
- else
- if in_oneliner_def == :BODY
- # one-liner method definition
- if is_first_printable_of_line
- corresponding_token_depth = spaces_of_nest.pop
- else
- spaces_of_nest.pop
- corresponding_token_depth = nil
- end
- end
- in_oneliner_def = nil
- end
- end
-
- case t.event
- when :on_ignored_nl, :on_nl, :on_comment, :on_heredoc_end, :on_embdoc_end
- if in_oneliner_def != :BODY
- corresponding_token_depth = nil
- spaces_at_line_head = 0
- is_first_spaces_of_line = true
- is_first_printable_of_line = true
- open_brace_on_line = 0
- end
- next
- when :on_sp
- spaces_at_line_head = t.tok.count(' ') if is_first_spaces_of_line
- is_first_spaces_of_line = false
- next
- end
-
- case t.event
- when :on_lbracket, :on_lbrace, :on_lparen, :on_tlambeg
- spaces_of_nest.push(spaces_at_line_head + open_brace_on_line * 2)
- open_brace_on_line += 1
- when :on_rbracket, :on_rbrace, :on_rparen
- if is_first_printable_of_line
- corresponding_token_depth = spaces_of_nest.pop
- else
- spaces_of_nest.pop
- corresponding_token_depth = nil
- end
- open_brace_on_line -= 1
- when :on_kw
- next if index > 0 and @tokens[index - 1].state.allbits?(Ripper::EXPR_FNAME)
- case t.tok
- when 'do'
- syntax_of_do = take_corresponding_syntax_to_kw_do(@tokens, index)
- if syntax_of_do == :method_calling
- spaces_of_nest.push(spaces_at_line_head)
- end
- when 'def', 'case', 'for', 'begin', 'class', 'module'
- spaces_of_nest.push(spaces_at_line_head)
- when 'rescue'
- unless t.state.allbits?(Ripper::EXPR_LABEL)
- corresponding_token_depth = spaces_of_nest.last
- end
- when 'if', 'unless', 'while', 'until'
- # postfix if/unless/while/until must be Ripper::EXPR_LABEL
- unless t.state.allbits?(Ripper::EXPR_LABEL)
- spaces_of_nest.push(spaces_at_line_head)
- end
- when 'else', 'elsif', 'ensure', 'when'
- corresponding_token_depth = spaces_of_nest.last
- when 'in'
- if in_keyword_case_scope?
- corresponding_token_depth = spaces_of_nest.last
- end
- when 'end'
- if is_first_printable_of_line
- corresponding_token_depth = spaces_of_nest.pop
- else
- spaces_of_nest.pop
- corresponding_token_depth = nil
- end
- end
- end
- is_first_spaces_of_line = false
- is_first_printable_of_line = false
- end
- corresponding_token_depth
+ prev_indent_level, _prev_nesting_level = calc_nesting_depth(prev_opens)
+ indent if indent_level < prev_indent_level
end
- def check_string_literal(tokens)
- i = 0
- start_token = []
- end_type = []
- pending_heredocs = []
- while i < tokens.size
- t = tokens[i]
- case t.event
- when *end_type.last
- start_token.pop
- end_type.pop
- when :on_tstring_beg
- start_token << t
- end_type << [:on_tstring_end, :on_label_end]
- when :on_regexp_beg
- start_token << t
- end_type << :on_regexp_end
- when :on_symbeg
- acceptable_single_tokens = %i{on_ident on_const on_op on_cvar on_ivar on_gvar on_kw on_int on_backtick}
- if (i + 1) < tokens.size
- if acceptable_single_tokens.all?{ |st| tokens[i + 1].event != st }
- start_token << t
- end_type << :on_tstring_end
- else
- i += 1
- end
- end
- when :on_backtick
- if t.state.allbits?(Ripper::EXPR_BEG)
- start_token << t
- end_type << :on_tstring_end
- end
- when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg
- start_token << t
- end_type << :on_tstring_end
- when :on_heredoc_beg
- pending_heredocs << t
- end
+ LTYPE_TOKENS = %i[
+ on_heredoc_beg on_tstring_beg
+ on_regexp_beg on_symbeg on_backtick
+ on_symbols_beg on_qsymbols_beg
+ on_words_beg on_qwords_beg
+ ]
- if pending_heredocs.any? && t.tok.include?("\n")
- pending_heredocs.reverse_each do |t|
- start_token << t
- end_type << :on_heredoc_end
- end
- pending_heredocs = []
- end
- i += 1
+ def ltype_from_open_tokens(opens)
+ start_token = opens.reverse_each.find do |tok|
+ LTYPE_TOKENS.include?(tok.event)
end
- pending_heredocs.first || start_token.last
- end
-
- def process_literal_type(tokens)
- start_token = check_string_literal(tokens)
- return nil if start_token == ""
+ return nil unless start_token
case start_token&.event
when :on_tstring_beg
@@ -783,47 +453,16 @@ class RubyLex
end
end
- if first_token.nil?
- return false
- elsif first_token && first_token.state == Ripper::EXPR_DOT
- return false
- else
+ if first_token && first_token.state != Ripper::EXPR_DOT
tokens_without_last_line = tokens[0..index]
- ltype = process_literal_type(tokens_without_last_line)
- indent = process_nesting_level(tokens_without_last_line)
- continue = process_continue(tokens_without_last_line)
- code_block_open = check_code_block(tokens_without_last_line.map(&:tok).join(''), tokens_without_last_line)
- if ltype or indent > 0 or continue or code_block_open
- return false
- else
- return last_line_tokens.map(&:tok).join('')
+ code_without_last_line = tokens_without_last_line.map(&:tok).join
+ opens_without_last_line = IRB::NestingParser.open_tokens(tokens_without_last_line)
+ if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line)
+ return last_line_tokens.map(&:tok).join
end
end
end
false
end
-
- private
-
- def heredoc_scope?
- heredoc_tokens = @tokens.select { |t| [:on_heredoc_beg, :on_heredoc_end].include?(t.event) }
- heredoc_tokens[-1]&.event == :on_heredoc_beg
- end
-
- def in_keyword_case_scope?
- kw_tokens = @tokens.select { |t| t.event == :on_kw && ['case', 'for', 'end'].include?(t.tok) }
- counter = 0
- kw_tokens.reverse.each do |t|
- if t.tok == 'case'
- return true if counter.zero?
- counter += 1
- elsif t.tok == 'for'
- counter += 1
- elsif t.tok == 'end'
- counter -= 1
- end
- end
- false
- end
end
# :startdoc:
diff --git a/test/irb/test_nesting_parser.rb b/test/irb/test_nesting_parser.rb
new file mode 100644
index 0000000000..83c7fb08a6
--- /dev/null
+++ b/test/irb/test_nesting_parser.rb
@@ -0,0 +1,303 @@
+# frozen_string_literal: false
+require 'irb'
+
+require_relative "helper"
+
+module TestIRB
+ class NestingParserTest < TestCase
+ def setup
+ save_encodings
+ end
+
+ def teardown
+ restore_encodings
+ end
+
+ def parse_by_line(code)
+ IRB::NestingParser.parse_by_line(RubyLex.ripper_lex_without_warning(code))
+ end
+
+ def test_open_tokens
+ code = <<~'EOS'
+ class A
+ def f
+ if true
+ tap do
+ {
+ x: "
+ #{p(1, 2, 3
+ EOS
+ opens = IRB::NestingParser.open_tokens(RubyLex.ripper_lex_without_warning(code))
+ assert_equal(%w[class def if do { " #{ (], opens.map(&:tok))
+ end
+
+ def test_parse_by_line
+ code = <<~EOS
+ (((((1+2
+ ).to_s())).tap do (((
+ EOS
+ _tokens, prev_opens, next_opens, min_depth = parse_by_line(code).last
+ assert_equal(%w[( ( ( ( (], prev_opens.map(&:tok))
+ assert_equal(%w[( ( do ( ( (], next_opens.map(&:tok))
+ assert_equal(2, min_depth)
+ end
+
+ def test_ruby_syntax
+ code = <<~'EOS'
+ class A
+ 1 if 2
+ 1 while 2
+ 1 until 2
+ 1 unless 2
+ 1 rescue 2
+ begin; rescue; ensure; end
+ tap do; rescue; ensure; end
+ class B; end
+ module C; end
+ def f; end
+ def `; end
+ def f() = 1
+ %(); %w[]; %q(); %r{}; %i[]
+ "#{1}"; ''; /#{1}/; `#{1}`
+ :sym; :"sym"; :+; :`; :if
+ [1, 2, 3]
+ { x: 1, y: 2 }
+ (a, (*b, c), d), e = 1, 2, 3
+ ->(a){}; ->(a) do end
+ -> a = -> b = :do do end do end
+ if 1; elsif 2; else; end
+ unless 1; end
+ while 1; end
+ until 1; end
+ for i in j; end
+ case 1; when 2; end
+ puts(1, 2, 3)
+ loop{|i|}
+ loop do |i| end
+ end
+ EOS
+ line_results = parse_by_line(code)
+ assert_equal(code.lines.size, line_results.size)
+ class_open, *inner_line_results, class_close = line_results
+ assert_equal(['class'], class_open[2].map(&:tok))
+ inner_line_results.each {|result| assert_equal(['class'], result[2].map(&:tok)) }
+ assert_equal([], class_close[2].map(&:tok))
+ end
+
+ def test_multiline_string
+ code = <<~EOS
+ "
+ aaa
+ bbb
+ "
+ <<A
+ aaa
+ bbb
+ A
+ EOS
+ line_results = parse_by_line(code)
+ assert_equal(code.lines.size, line_results.size)
+ string_content_line, string_opens = line_results[1]
+ assert_equal("\naaa\nbbb\n", string_content_line.first.first.tok)
+ assert_equal("aaa\n", string_content_line.first.last)
+ assert_equal(['"'], string_opens.map(&:tok))
+ heredoc_content_line, heredoc_opens = line_results[6]
+ assert_equal("aaa\nbbb\n", heredoc_content_line.first.first.tok)
+ assert_equal("bbb\n", heredoc_content_line.first.last)
+ assert_equal(['<<A'], heredoc_opens.map(&:tok))
+ _line, _prev_opens, next_opens, _min_depth = line_results.last
+ assert_equal([], next_opens)
+ end
+
+ def test_backslash_continued_nested_symbol
+ code = <<~'EOS'
+ x = <<A, :\
+ heredoc #{
+ here
+ }
+ A
+ =begin
+ embdoc
+ =end
+ # comment
+
+ if # this is symbol :if
+ while
+ EOS
+ line_results = parse_by_line(code)
+ assert_equal(%w[: <<A #{], line_results[2][2].map(&:tok))
+ assert_equal(%w[while], line_results.last[2].map(&:tok))
+ end
+
+ def test_oneliner_def
+ code = <<~EOC
+ if true
+ # normal oneliner def
+ def f = 1
+ def f() = 1
+ def f(*) = 1
+ # keyword, backtick, op
+ def * = 1
+ def ` = 1
+ def if = 1
+ def *() = 1
+ def `() = 1
+ def if() = 1
+ # oneliner def with receiver
+ def a.* = 1
+ def $a.* = 1
+ def @a.` = 1
+ def A.` = 1
+ def ((a;b;c)).*() = 1
+ def ((a;b;c)).if() = 1
+ def ((a;b;c)).end() = 1
+ # multiline oneliner def
+ def f =
+ 1
+ def f()
+ =
+ 1
+ # oneliner def with comment and embdoc
+ def # comment
+ =begin
+ embdoc
+ =end
+ ((a;b;c))
+ . # comment
+ =begin
+ embdoc
+ =end
+ f (*) # comment
+ =begin
+ embdoc
+ =end
+ =
+ 1
+ # nested oneliner def
+ def f(x = def f() = 1) = def f() = 1
+ EOC
+ _tokens, _prev_opens, next_opens, min_depth = parse_by_line(code).last
+ assert_equal(['if'], next_opens.map(&:tok))
+ assert_equal(1, min_depth)
+ end
+
+ def test_heredoc_embexpr
+ code = <<~'EOS'
+ <<A+<<B+<<C+(<<D+(<<E)
+ #{
+ <<~F+"#{<<~G}
+ #{
+ here
+ }
+ F
+ G
+ "
+ }
+ A
+ B
+ C
+ D
+ E
+ )
+ EOS
+ line_results = parse_by_line(code)
+ last_opens = line_results.last[-2]
+ assert_equal([], last_opens)
+ _tokens, _prev_opens, next_opens, _min_depth = line_results[4]
+ assert_equal(%w[( <<E <<D <<C <<B <<A #{ " <<~G <<~F #{], next_opens.map(&:tok))
+ end
+
+ def test_for_in
+ code = <<~EOS
+ for i in j
+ here
+ end
+ for i in j do
+ here
+ end
+ for i in
+ j do
+ here
+ end
+ for
+ # comment
+ i in j do
+ here
+ end
+ for (a;b;c).d in (a;b;c) do
+ here
+ end
+ for i in :in + :do do
+ here
+ end
+ for i in -> do end do
+ here
+ end
+ EOS
+ line_results = parse_by_line(code).select { |tokens,| tokens.map(&:last).include?('here') }
+ assert_equal(7, line_results.size)
+ line_results.each do |_tokens, _prev_opens, next_opens, _min_depth|
+ assert_equal(['for'], next_opens.map(&:tok))
+ end
+ end
+
+ def test_while_until
+ base_code = <<~'EOS'
+ while_or_until true
+ here
+ end
+ while_or_until a < c
+ here
+ end
+ while_or_until true do
+ here
+ end
+ while_or_until
+ # comment
+ (a + b) <
+ # comment
+ c do
+ here
+ end
+ while_or_until :\
+ do do
+ here
+ end
+ while_or_until def do; end == :do do
+ here
+ end
+ while_or_until -> do end do
+ here
+ end
+ EOS
+ %w[while until].each do |keyword|
+ code = base_code.gsub('while_or_until', keyword)
+ line_results = parse_by_line(code).select { |tokens,| tokens.map(&:last).include?('here') }
+ assert_equal(7, line_results.size)
+ line_results.each do |_tokens, _prev_opens, next_opens, _min_depth|
+ assert_equal([keyword], next_opens.map(&:tok) )
+ end
+ end
+ end
+
+ def test_case_in
+ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.7.0')
+ pend 'This test requires ruby version that supports case-in syntax'
+ end
+ code = <<~EOS
+ case 1
+ in 1
+ here
+ in
+ 2
+ here
+ end
+ EOS
+ line_results = parse_by_line(code).select { |tokens,| tokens.map(&:last).include?('here') }
+ assert_equal(2, line_results.size)
+ line_results.each do |_tokens, _prev_opens, next_opens, _min_depth|
+ assert_equal(['in'], next_opens.map(&:tok))
+ end
+ end
+ end
+end
diff --git a/test/irb/test_ruby_lex.rb b/test/irb/test_ruby_lex.rb
index aa27204e26..9d7910cca6 100644
--- a/test/irb/test_ruby_lex.rb
+++ b/test/irb/test_ruby_lex.rb
@@ -95,8 +95,11 @@ module TestIRB
def check_state(lines, local_variables: [])
context = build_context(local_variables)
+ tokens = RubyLex.ripper_lex_without_warning(lines.join("\n"), context: context)
+ opens = IRB::NestingParser.open_tokens(tokens)
ruby_lex = RubyLex.new(context)
- _ltype, indent, _continue, code_block_open = ruby_lex.check_code_state(lines.join("\n"))
+ indent, _nesting_level = ruby_lex.calc_nesting_depth(opens)
+ code_block_open = !opens.empty? || ruby_lex.process_continue(tokens)
[indent, code_block_open]
end
@@ -164,9 +167,9 @@ module TestIRB
Row.new(%q( ]), 4, 4),
Row.new(%q( ]), 2, 2),
Row.new(%q(]), 0, 0),
- Row.new(%q([<<FOO]), 0, 0),
+ Row.new(%q([<<FOO]), nil, 0),
Row.new(%q(hello), 0, 0),
- Row.new(%q(FOO), nil, 0),
+ Row.new(%q(FOO), 0, 0),
]
lines = []
@@ -489,12 +492,12 @@ module TestIRB
end
end
- def test_corresponding_syntax_to_keyword_in
+ def test_typing_incomplete_include_interpreted_as_keyword_in
input_with_correct_indents = [
Row.new(%q(module E), nil, 2, 1),
Row.new(%q(end), 0, 0, 0),
Row.new(%q(class A), nil, 2, 1),
- Row.new(%q( in), nil, 4, 1)
+ Row.new(%q( in), nil, 2, 1) # scenario typing `include E`
]
lines = []
@@ -575,11 +578,19 @@ module TestIRB
end
def test_heredoc_with_indent
+ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.7.0')
+ pend 'This test needs Ripper::Lexer#scan to take broken tokens'
+ end
input_with_correct_indents = [
- Row.new(%q(<<~Q), 0, 0, 0),
- Row.new(%q({), 0, 0, 0),
- Row.new(%q( #), 2, 0, 0),
- Row.new(%q(}), 0, 0, 0)
+ Row.new(%q(<<~Q+<<~R), nil, 0, 0),
+ Row.new(%q(a), 0, 0, 0),
+ Row.new(%q(a), 0, 0, 0),
+ Row.new(%q( b), 2, 2, 0),
+ Row.new(%q( b), 2, 2, 0),
+ Row.new(%q( Q), 0, 2, 0),
+ Row.new(%q( c), 4, 4, 0),
+ Row.new(%q( c), 4, 4, 0),
+ Row.new(%q( R), 0, 0, 0),
]
lines = []
@@ -592,8 +603,8 @@ module TestIRB
def test_oneliner_def_in_multiple_lines
input_with_correct_indents = [
- Row.new(%q(def a()=[), nil, 4, 2),
- Row.new(%q( 1,), nil, 4, 1),
+ Row.new(%q(def a()=[), nil, 2, 1),
+ Row.new(%q( 1,), nil, 2, 1),
Row.new(%q(].), 0, 0, 0),
Row.new(%q(to_s), nil, 0, 0),
]
@@ -609,7 +620,7 @@ module TestIRB
def test_broken_heredoc
input_with_correct_indents = [
Row.new(%q(def foo), nil, 2, 1),
- Row.new(%q( <<~Q), 2, 2, 1),
+ Row.new(%q( <<~Q), nil, 2, 1),
Row.new(%q( Qend), 2, 2, 1),
]
@@ -621,6 +632,15 @@ module TestIRB
end
end
+ def test_heredoc_keep_indent_spaces
+ (1..4).each do |indent|
+ row = Row.new(' ' * indent, indent, [2, indent].max, 1)
+ lines = ['def foo', ' <<~Q', row.content]
+ assert_row_indenting(lines, row)
+ assert_nesting_level(lines, row.nesting_level)
+ end
+ end
+
PromptRow = Struct.new(:prompt, :content)
class MockIO_DynamicPrompt
@@ -746,10 +766,9 @@ module TestIRB
end
def test_unterminated_heredoc_string_literal
- context = build_context
['<<A;<<B', "<<A;<<B\n", "%W[\#{<<A;<<B", "%W[\#{<<A;<<B\n"].each do |code|
tokens = RubyLex.ripper_lex_without_warning(code)
- string_literal = RubyLex.new(context).check_string_literal(tokens)
+ string_literal = IRB::NestingParser.open_tokens(tokens).last
assert_equal('<<A', string_literal&.tok)
end
end
@@ -779,43 +798,8 @@ module TestIRB
[reference_code, code_with_heredoc, code_with_embdoc].each do |code|
lex = RubyLex.new(context)
lines = code.lines
- lex.instance_variable_set('@tokens', RubyLex.ripper_lex_without_warning(code))
- assert_equal 2, lex.check_corresponding_token_depth(lines, lines.size)
- end
- end
-
- def test_find_prev_spaces_with_multiline_literal
- lex = RubyLex.new(build_context)
- reference_code = <<~EOC.chomp
- if true
- 1
- hello
- 1
- world
- end
- EOC
- code_with_percent_string = <<~EOC.chomp
- if true
- %w[
- hello
- ]
- world
- end
- EOC
- code_with_quoted_string = <<~EOC.chomp
- if true
- '
- hello
- '
- world
- end
- EOC
- context = build_context
- [reference_code, code_with_percent_string, code_with_quoted_string].each do |code|
- lex = RubyLex.new(context)
- lex.instance_variable_set('@tokens', RubyLex.ripper_lex_without_warning(code))
- prev_spaces = (1..code.lines.size).map { |index| lex.find_prev_spaces index }
- assert_equal [0, 2, 2, 2, 2, 0], prev_spaces
+ tokens = RubyLex.ripper_lex_without_warning(code)
+ assert_equal(2, lex.check_corresponding_token_depth(tokens, lines, lines.size - 1))
end
end