diff options
author | Nobuyoshi Nakada <[email protected]> | 2019-04-23 21:55:29 +0900 |
---|---|---|
committer | Nobuyoshi Nakada <[email protected]> | 2019-04-23 21:55:29 +0900 |
commit | f4f66bd11c65882b86e0acf4d58f15fb596f25cf (patch) | |
tree | 34e6e50d15802ad6d8aab4f0d9d747efcd7c149d /lib/irb | |
parent | 89271d4a3733bc5e70e9c56b4bd12f277e699c42 (diff) |
Revert "IRB is improved with Reline and RDoc, take 2"
Accidentally merged when 89271d4a3733bc5e70e9c56b4bd12f277e699c42
"Adjusted indents".
Diffstat (limited to 'lib/irb')
-rw-r--r-- | lib/irb/completion.rb | 19 | ||||
-rw-r--r-- | lib/irb/context.rb | 81 | ||||
-rw-r--r-- | lib/irb/init.rb | 8 | ||||
-rw-r--r-- | lib/irb/input-method.rb | 85 | ||||
-rw-r--r-- | lib/irb/lc/help-message | 1 | ||||
-rw-r--r-- | lib/irb/lc/ja/help-message | 2 | ||||
-rw-r--r-- | lib/irb/ruby-lex.rb | 1259 |
7 files changed, 1117 insertions, 338 deletions
diff --git a/lib/irb/completion.rb b/lib/irb/completion.rb index 4cd9427743..390e7254dd 100644 --- a/lib/irb/completion.rb +++ b/lib/irb/completion.rb @@ -8,10 +8,11 @@ # require "readline" -require "rdoc" module IRB module InputCompletor # :nodoc: + + # Set of reserved words used by Ruby, you should not use these for # constants or variables ReservedWords = %w[ @@ -34,8 +35,6 @@ module IRB yield ] - BASIC_WORD_BREAK_CHARACTERS = " \t\n`><=;|&{(" - CompletionProc = proc { |input| bind = IRB.conf[:MAIN_CONTEXT].workspace.binding @@ -196,14 +195,6 @@ module IRB end } - RDocRIDriver = RDoc::RI::Driver.new - PerfectMatchedProc = proc { |matched| - begin - RDocRIDriver.display_name(matched) - rescue RDoc::RI::Driver::NotFoundError - end - } - # Set of available operators in Ruby Operators = %w[% & * ** + - / < << <= <=> == === =~ > >= >> [] []= ^ ! != !~] @@ -245,3 +236,9 @@ module IRB end end end + +if Readline.respond_to?("basic_word_break_characters=") + Readline.basic_word_break_characters= " \t\n`><=;|&{(" +end +Readline.completion_append_character = nil +Readline.completion_proc = IRB::InputCompletor::CompletionProc diff --git a/lib/irb/context.rb b/lib/irb/context.rb index 866eb1de9d..e8e6a118e6 100644 --- a/lib/irb/context.rb +++ b/lib/irb/context.rb @@ -22,7 +22,7 @@ module IRB # # The optional +input_method+ argument: # - # +nil+:: uses stdin or Reidline or Readline + # +nil+:: uses stdin or Readline # +String+:: uses a File # +other+:: uses this as InputMethod def initialize(irb, workspace = nil, input_method = nil, output_method = nil) @@ -40,7 +40,6 @@ module IRB @load_modules = IRB.conf[:LOAD_MODULES] @use_readline = IRB.conf[:USE_READLINE] - @use_reidline = IRB.conf[:USE_REIDLINE] @verbose = IRB.conf[:VERBOSE] @io = nil @@ -65,41 +64,23 @@ module IRB case input_method when nil - @io = nil - case use_reidline? + case use_readline? when nil - if STDIN.tty? && IRB.conf[:PROMPT_MODE] != :INF_RUBY && !use_readline? - @io = ReidlineInputMethod.new + if (defined?(ReadlineInputMethod) && STDIN.tty? && + IRB.conf[:PROMPT_MODE] != :INF_RUBY) + @io = ReadlineInputMethod.new else - @io = nil + @io = StdioInputMethod.new end when false - @io = nil + @io = StdioInputMethod.new when true - @io = ReidlineInputMethod.new - end - unless @io - case use_readline? - when nil - if (defined?(ReadlineInputMethod) && STDIN.tty? && - IRB.conf[:PROMPT_MODE] != :INF_RUBY) - @io = ReadlineInputMethod.new - else - @io = nil - end - when false - @io = nil - when true - if defined?(ReadlineInputMethod) - @io = ReadlineInputMethod.new - else - @io = nil - end + if defined?(ReadlineInputMethod) + @io = ReadlineInputMethod.new else - @io = nil + @io = StdioInputMethod.new end end - @io = StdioInputMethod.new unless @io when String @io = FileInputMethod.new(input_method) @@ -120,6 +101,7 @@ module IRB if @echo.nil? @echo = true end + self.debug_level = IRB.conf[:DEBUG_LEVEL] end # The top-level workspace, see WorkSpace#main @@ -135,9 +117,9 @@ module IRB attr_reader :thread # The current input method # - # Can be either StdioInputMethod, ReadlineInputMethod, - # ReidlineInputMethod, FileInputMethod or other specified when the - # context is created. See ::new for more # information on +input_method+. + # Can be either StdioInputMethod, ReadlineInputMethod, FileInputMethod or + # other specified when the context is created. See ::new for more + # information on +input_method+. attr_accessor :io # Current irb session @@ -155,12 +137,6 @@ module IRB # +input_method+ passed to Context.new attr_accessor :irb_path - # Whether +Reidline+ is enabled or not. - # - # A copy of the default <code>IRB.conf[:USE_REIDLINE]</code> - # - # See #use_reidline= for more information. - attr_reader :use_reidline # Whether +Readline+ is enabled or not. # # A copy of the default <code>IRB.conf[:USE_READLINE]</code> @@ -235,6 +211,10 @@ module IRB # # A copy of the default <code>IRB.conf[:VERBOSE]</code> attr_accessor :verbose + # The debug level of irb + # + # See #debug_level= for more information. + attr_reader :debug_level # The limit of backtrace lines displayed as top +n+ and tail +n+. # @@ -245,8 +225,6 @@ module IRB # See IRB@Command+line+options for more command line options. attr_accessor :back_trace_limit - # Alias for #use_reidline - alias use_reidline? use_reidline # Alias for #use_readline alias use_readline? use_readline # Alias for #rc @@ -258,9 +236,7 @@ module IRB # Returns whether messages are displayed or not. def verbose? if @verbose.nil? - if @io.kind_of?(ReidlineInputMethod) - false - elsif defined?(ReadlineInputMethod) && @io.kind_of?(ReadlineInputMethod) + if defined?(ReadlineInputMethod) && @io.kind_of?(ReadlineInputMethod) false elsif !STDIN.tty? or @io.kind_of?(FileInputMethod) true @@ -273,11 +249,9 @@ module IRB end # Whether #verbose? is +true+, and +input_method+ is either - # StdioInputMethod or ReidlineInputMethod or ReadlineInputMethod, see #io - # for more information. + # StdioInputMethod or ReadlineInputMethod, see #io for more information. def prompting? verbose? || (STDIN.tty? && @io.kind_of?(StdioInputMethod) || - @io.kind_of?(ReidlineInputMethod) || (defined?(ReadlineInputMethod) && @io.kind_of?(ReadlineInputMethod))) end @@ -387,6 +361,21 @@ module IRB print "Do nothing." end + # Sets the debug level of irb + # + # Can also be set using the +--irb_debug+ command line option. + # + # See IRB@Command+line+options for more command line options. + def debug_level=(value) + @debug_level = value + RubyLex.debug_level = value + end + + # Whether or not debug mode is enabled, see #debug_level=. + def debug? + @debug_level > 0 + end + def evaluate(line, line_no, exception: nil) # :nodoc: @line_no = line_no if exception diff --git a/lib/irb/init.rb b/lib/irb/init.rb index ec413679df..2066d8cb64 100644 --- a/lib/irb/init.rb +++ b/lib/irb/init.rb @@ -112,6 +112,8 @@ module IRB # :nodoc: @CONF[:LC_MESSAGES] = Locale.new @CONF[:AT_EXIT] = [] + + @CONF[:DEBUG_LEVEL] = 0 end def IRB.init_error @@ -163,10 +165,6 @@ module IRB # :nodoc: @CONF[:USE_READLINE] = true when "--noreadline" @CONF[:USE_READLINE] = false - when "--reidline" - @CONF[:USE_REIDLINE] = true - when "--noreidline" - @CONF[:USE_REIDLINE] = false when "--echo" @CONF[:ECHO] = true when "--noecho" @@ -193,6 +191,8 @@ module IRB # :nodoc: @CONF[:CONTEXT_MODE] = ($1 || argv.shift).to_i when "--single-irb" @CONF[:SINGLE_IRB] = true + when /^--irb_debug(?:=(.+))?/ + @CONF[:DEBUG_LEVEL] = ($1 || argv.shift).to_i when "-v", "--version" print IRB.version, "\n" exit 0 diff --git a/lib/irb/input-method.rb b/lib/irb/input-method.rb index e29d282d2c..f491d5a760 100644 --- a/lib/irb/input-method.rb +++ b/lib/irb/input-method.rb @@ -11,8 +11,6 @@ # require_relative 'src_encoding' require_relative 'magic-file' -require_relative "completion" -require 'reline' module IRB STDIN_FILE_NAME = "(line)" # :nodoc: @@ -142,12 +140,6 @@ module IRB @stdin = IO.open(STDIN.to_i, :external_encoding => IRB.conf[:LC_MESSAGES].encoding, :internal_encoding => "-") @stdout = IO.open(STDOUT.to_i, 'w', :external_encoding => IRB.conf[:LC_MESSAGES].encoding, :internal_encoding => "-") - - if Readline.respond_to?("basic_word_break_characters=") - Readline.basic_word_break_characters = IRB::InputCompletor::BASIC_WORD_BREAK_CHARACTERS - end - Readline.completion_append_character = nil - Readline.completion_proc = IRB::InputCompletor::CompletionProc end # Reads the next line from this input method. @@ -194,84 +186,7 @@ module IRB def encoding @stdin.external_encoding end - - if Readline.respond_to?("basic_word_break_characters=") - Readline.basic_word_break_characters = IRB::InputCompletor::BASIC_WORD_BREAK_CHARACTERS - end - Readline.completion_append_character = nil - Readline.completion_proc = IRB::InputCompletor::CompletionProc end rescue LoadError end - - class ReidlineInputMethod < InputMethod - include Reline - # Creates a new input method object using Readline - def initialize - super - - @line_no = 0 - @line = [] - @eof = false - - @stdin = IO.open(STDIN.to_i, :external_encoding => IRB.conf[:LC_MESSAGES].encoding, :internal_encoding => "-") - @stdout = IO.open(STDOUT.to_i, 'w', :external_encoding => IRB.conf[:LC_MESSAGES].encoding, :internal_encoding => "-") - - if Reline.respond_to?("basic_word_break_characters=") - Reline.basic_word_break_characters = IRB::InputCompletor::BASIC_WORD_BREAK_CHARACTERS - end - Reline.completion_append_character = nil - Reline.completion_proc = IRB::InputCompletor::CompletionProc - Reline.dig_perfect_match_proc = IRB::InputCompletor::PerfectMatchedProc - end - - def check_termination(&block) - @check_termination_proc = block - end - - # Reads the next line from this input method. - # - # See IO#gets for more information. - def gets - Reline.input = @stdin - Reline.output = @stdout - if l = readmultiline(@prompt, false, &@check_termination_proc) - HISTORY.push(l) if !l.empty? - @line[@line_no += 1] = l + "\n" - else - @eof = true - l - end - end - - # Whether the end of this input method has been reached, returns +true+ - # if there is no more data to read. - # - # See IO#eof? for more information. - def eof? - @eof - end - - # Whether this input method is still readable when there is no more data to - # read. - # - # See IO#eof for more information. - def readable_after_eof? - true - end - - # Returns the current line number for #io. - # - # #line counts the number of times #gets is called. - # - # See IO#lineno for more information. - def line(line_no) - @line[line_no] - end - - # The external encoding for standard input. - def encoding - @stdin.external_encoding - end - end end diff --git a/lib/irb/lc/help-message b/lib/irb/lc/help-message index d1a66dddda..d43c6a1695 100644 --- a/lib/irb/lc/help-message +++ b/lib/irb/lc/help-message @@ -39,6 +39,7 @@ Usage: irb.rb [options] [programfile] [arguments] --back-trace-limit n Display backtrace top n and tail n. The default value is 16. + --irb_debug n Set internal debug level to n (not for popular use) --verbose Show details --noverbose Don't show details -v, --version Print the version of irb diff --git a/lib/irb/lc/ja/help-message b/lib/irb/lc/ja/help-message index 7a15f973c6..1b24d14d28 100644 --- a/lib/irb/lc/ja/help-message +++ b/lib/irb/lc/ja/help-message @@ -41,6 +41,8 @@ Usage: irb.rb [options] [programfile] [arguments] バックトレース表示をバックトレースの頭から n, 後ろ からnだけ行なう. デフォルトは16 + --irb_debug n irbのデバッグレベルをnに設定する(非推奨). + --verbose 詳細なメッセージを出力する. --noverbose 詳細なメッセージを出力しない(デフォルト). -v, --version irbのバージョンを表示する. diff --git a/lib/irb/ruby-lex.rb b/lib/irb/ruby-lex.rb index c4bec4a854..555d1f024f 100644 --- a/lib/irb/ruby-lex.rb +++ b/lib/irb/ruby-lex.rb @@ -11,39 +11,73 @@ # require "e2mmap" -require "ripper" +require_relative "slex" +require_relative "ruby-token" # :stopdoc: class RubyLex extend Exception2MessageMapper + def_exception(:AlreadyDefinedToken, "Already defined token(%s)") + def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')") + def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')") + def_exception(:TkReading2TokenDuplicateError, + "key duplicate(token_n='%s', key='%s')") + def_exception(:SyntaxError, "%s") + def_exception(:TerminateLineInput, "Terminate Line Input") + include RubyToken + + class << self + attr_accessor :debug_level + def debug? + @debug_level > 0 + end + end + @debug_level = 0 + def initialize + lex_init + set_input(STDIN) + + @seek = 0 @exp_line_no = @line_no = 1 + @base_char_no = 0 + @char_no = 0 + @rests = [] + @readed = [] + @here_readed = [] + @indent = 0 + @indent_stack = [] + @lex_state = EXPR_BEG + @space_seen = false + @here_header = false + @post_symbeg = false + @continue = false @line = "" + + @skip_space = false + @readed_auto_clean_up = false + @exception_on_syntax_error = true + @prompt = nil end + attr_accessor :skip_space + attr_accessor :readed_auto_clean_up + attr_accessor :exception_on_syntax_error + + attr_reader :seek + attr_reader :char_no + attr_reader :line_no + attr_reader :indent + # io functions def set_input(io, p = nil, &block) @io = io - if @io.respond_to?(:check_termination) - @io.check_termination do |code| - @tokens = Ripper.lex(code) - continue = process_continue - code_block_open = check_code_block(code) - indent = process_nesting_level - ltype = process_literal_type - if code_block_open or ltype or continue or indent > 0 - false - else - true - end - end - end if p.respond_to?(:call) @input = p elsif block_given? @@ -53,6 +87,112 @@ class RubyLex end end + def get_readed + if idx = @readed.rindex("\n") + @base_char_no = @readed.size - (idx + 1) + else + @base_char_no += @readed.size + end + + readed = @readed.join("") + @readed = [] + readed + end + + def getc + while @rests.empty? + @rests.push nil unless buf_input + end + c = @rests.shift + if @here_header + @here_readed.push c + else + @readed.push c + end + @seek += 1 + if c == "\n" + @line_no += 1 + @char_no = 0 + else + @char_no += 1 + end + c + end + + def gets + l = "" + while c = getc + l.concat(c) + break if c == "\n" + end + return nil if l == "" and c.nil? + l + end + + def eof? + @io.eof? + end + + def getc_of_rests + if @rests.empty? + nil + else + getc + end + end + + def ungetc(c = nil) + if @here_readed.empty? + c2 = @readed.pop + else + c2 = @here_readed.pop + end + c = c2 unless c + @rests.unshift c #c = + @seek -= 1 + if c == "\n" + @line_no -= 1 + if idx = @readed.rindex("\n") + @char_no = idx + 1 + else + @char_no = @base_char_no + @readed.size + end + else + @char_no -= 1 + end + end + + def peek_equal?(str) + chrs = str.split(//) + until @rests.size >= chrs.size + return false unless buf_input + end + @rests[0, chrs.size] == chrs + end + + def peek_match?(regexp) + while @rests.empty? + return false unless buf_input + end + regexp =~ @rests.join("") + end + + def peek(i = 0) + while @rests.size <= i + return nil unless buf_input + end + @rests[i] + end + + def buf_input + prompt + line = @input.call + return nil unless line + @rests.concat line.chars.to_a + true + end + private :buf_input + def set_prompt(p = nil, &block) p = block if block_given? if p.respond_to?(:call) @@ -70,11 +210,20 @@ class RubyLex def initialize_input @ltype = nil + @quoted = nil @indent = 0 + @indent_stack = [] + @lex_state = EXPR_BEG + @space_seen = false + @here_header = false + @continue = false + @post_symbeg = false + + prompt + @line = "" @exp_line_no = @line_no - @code_block_open = false end def each_top_level_statement @@ -82,14 +231,13 @@ class RubyLex catch(:TERM_INPUT) do loop do begin + @continue = false prompt unless l = lex throw :TERM_INPUT if @line == '' else - @line_no += 1 - next if l == "\n" @line.concat l - if @code_block_open or @ltype or @continue or @indent > 0 + if @ltype or @continue or @indent > 0 next end end @@ -102,203 +250,930 @@ class RubyLex @exp_line_no = @line_no @indent = 0 + @indent_stack = [] + prompt rescue TerminateLineInput initialize_input prompt + get_readed end end end end def lex - line = @input.call - if @io.respond_to?(:check_termination) - return line # multiline - end - code = @line + (line.nil? ? '' : line) - code.gsub!(/\n*$/, '').concat("\n") - @tokens = Ripper.lex(code) - @continue = process_continue - @code_block_open = check_code_block(code) - @indent = process_nesting_level - @ltype = process_literal_type - line + continue = @continue + while tk = token + case tk + when TkNL, TkEND_OF_SCRIPT + @continue = continue unless continue.nil? + break unless @continue + when TkSPACE, TkCOMMENT + when TkSEMICOLON, TkBEGIN, TkELSE + @continue = continue = false + else + continue = nil + end + end + line = get_readed + if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil? + nil + else + line + end + end + + def token + @prev_seek = @seek + @prev_line_no = @line_no + @prev_char_no = @char_no + begin + begin + tk = @OP.match(self) + @space_seen = tk.kind_of?(TkSPACE) + @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp) + @post_symbeg = tk.kind_of?(TkSYMBEG) + rescue SyntaxError + raise if @exception_on_syntax_error + tk = TkError.new(@seek, @line_no, @char_no) + end + end while @skip_space and tk.kind_of?(TkSPACE) + if @readed_auto_clean_up + get_readed + end + tk + end + + ENINDENT_CLAUSE = [ + "case", "class", "def", "do", "for", "if", + "module", "unless", "until", "while", "begin" + ] + DEINDENT_CLAUSE = ["end" + ] + + PERCENT_LTYPE = { + "q" => "\'", + "Q" => "\"", + "x" => "\`", + "r" => "/", + "w" => "]", + "W" => "]", + "i" => "]", + "I" => "]", + "s" => ":" + } + + PERCENT_PAREN = { + "{" => "}", + "[" => "]", + "<" => ">", + "(" => ")" + } + + Ltype2Token = { + "\'" => TkSTRING, + "\"" => TkSTRING, + "\`" => TkXSTRING, + "/" => TkREGEXP, + "]" => TkDSTRING, + ":" => TkSYMBOL + } + DLtype2Token = { + "\"" => TkDSTRING, + "\`" => TkDXSTRING, + "/" => TkDREGEXP, + } + + def lex_init() + @OP = IRB::SLex.new + @OP.def_rules("\0", "\004", "\032") do |op, io| + Token(TkEND_OF_SCRIPT) + end + + @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io| + @space_seen = true + while getc =~ /[ \t\f\r\13]/; end + ungetc + Token(TkSPACE) + end + + @OP.def_rule("#") do |op, io| + identify_comment + end + + @OP.def_rule("=begin", + proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do + |op, io| + @ltype = "=" + until getc == "\n"; end + until peek_equal?("=end") && peek(4) =~ /\s/ + until getc == "\n"; end + end + gets + @ltype = nil + Token(TkRD_COMMENT) + end + + @OP.def_rule("\n") do |op, io| + print "\\n\n" if RubyLex.debug? + case @lex_state + when EXPR_BEG, EXPR_FNAME, EXPR_DOT + @continue = true + else + @continue = false + @lex_state = EXPR_BEG + until (@indent_stack.empty? || + [TkLPAREN, TkLBRACK, TkLBRACE, + TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) + @indent_stack.pop + end + end + @here_header = false + @here_readed = [] + Token(TkNL) + end + + @OP.def_rules("*", "**", + "=", "==", "===", + "=~", "<=>", + "<", "<=", + ">", ">=", ">>", + "!", "!=", "!~") do + |op, io| + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + Token(op) + end + + @OP.def_rules("<<") do + |op, io| + tk = nil + if @lex_state != EXPR_END && @lex_state != EXPR_CLASS && + (@lex_state != EXPR_ARG || @space_seen) + c = peek(0) + if /[-~"'`\w]/ =~ c + tk = identify_here_document + end + end + unless tk + tk = Token(op) + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + end + tk + end + + @OP.def_rules("'", '"') do + |op, io| + identify_string(op) + end + + @OP.def_rules("`") do + |op, io| + if @lex_state == EXPR_FNAME + @lex_state = EXPR_END + Token(op) + else + identify_string(op) + end + end + + @OP.def_rules('?') do + |op, io| + if @lex_state == EXPR_END + @lex_state = EXPR_BEG + Token(TkQUESTION) + else + ch = getc + if @lex_state == EXPR_ARG && ch =~ /\s/ + ungetc + @lex_state = EXPR_BEG; + Token(TkQUESTION) + else + if (ch == '\\') + read_escape + end + @lex_state = EXPR_END + Token(TkINTEGER) + end + end + end + + @OP.def_rules("&", "&&", "|", "||") do + |op, io| + @lex_state = EXPR_BEG + Token(op) + end + + @OP.def_rules("+=", "-=", "*=", "**=", + "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do + |op, io| + @lex_state = EXPR_BEG + op =~ /^(.*)=$/ + Token(TkOPASGN, $1) + end + + @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do + |op, io| + @lex_state = EXPR_ARG + Token(op) + end + + @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do + |op, io| + @lex_state = EXPR_ARG + Token(op) + end + + @OP.def_rules("+", "-") do + |op, io| + catch(:RET) do + if @lex_state == EXPR_ARG + if @space_seen and peek(0) =~ /[0-9]/ + throw :RET, identify_number + else + @lex_state = EXPR_BEG + end + elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/ + throw :RET, identify_number + else + @lex_state = EXPR_BEG + end + Token(op) + end + end + + @OP.def_rule(".") do + |op, io| + @lex_state = EXPR_BEG + if peek(0) =~ /[0-9]/ + ungetc + identify_number + else + # for "obj.if" etc. + @lex_state = EXPR_DOT + Token(TkDOT) + end + end + + @OP.def_rules("..", "...") do + |op, io| + @lex_state = EXPR_BEG + Token(op) + end + + lex_int2 + end + + def lex_int2 + @OP.def_rules("]", "}", ")") do + |op, io| + @lex_state = EXPR_END + @indent -= 1 + @indent_stack.pop + Token(op) + end + + @OP.def_rule(":") do + |op, io| + if @lex_state == EXPR_END || peek(0) =~ /\s/ + @lex_state = EXPR_BEG + Token(TkCOLON) + else + @lex_state = EXPR_FNAME + Token(TkSYMBEG) + end + end + + @OP.def_rule("::") do + |op, io| + if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen + @lex_state = EXPR_BEG + Token(TkCOLON3) + else + @lex_state = EXPR_DOT + Token(TkCOLON2) + end + end + + @OP.def_rule("/") do + |op, io| + if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + identify_string(op) + elsif peek(0) == '=' + getc + @lex_state = EXPR_BEG + Token(TkOPASGN, "/") #/) + elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ + identify_string(op) + else + @lex_state = EXPR_BEG + Token("/") #/) + end + end + + @OP.def_rules("^") do + |op, io| + @lex_state = EXPR_BEG + Token("^") + end + + @OP.def_rules(",") do + |op, io| + @lex_state = EXPR_BEG + Token(op) + end + + @OP.def_rules(";") do + |op, io| + @lex_state = EXPR_BEG + until (@indent_stack.empty? || + [TkLPAREN, TkLBRACK, TkLBRACE, + TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) + @indent_stack.pop + end + Token(op) + end + + @OP.def_rule("~") do + |op, io| + @lex_state = EXPR_BEG + Token("~") + end + + @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do + |op, io| + @lex_state = EXPR_BEG + Token("~") + end + + @OP.def_rule("(") do + |op, io| + @indent += 1 + if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + @lex_state = EXPR_BEG + tk_c = TkfLPAREN + else + @lex_state = EXPR_BEG + tk_c = TkLPAREN + end + @indent_stack.push tk_c + Token(tk_c) + end + + @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do + |op, io| + @lex_state = EXPR_ARG + Token("[]") + end + + @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do + |op, io| + @lex_state = EXPR_ARG + Token("[]=") + end + + @OP.def_rule("[") do + |op, io| + @indent += 1 + if @lex_state == EXPR_FNAME + tk_c = TkfLBRACK + else + if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + tk_c = TkLBRACK + elsif @lex_state == EXPR_ARG && @space_seen + tk_c = TkLBRACK + else + tk_c = TkfLBRACK + end + @lex_state = EXPR_BEG + end + @indent_stack.push tk_c + Token(tk_c) + end + + @OP.def_rule("{") do + |op, io| + @indent += 1 + if @lex_state != EXPR_END && @lex_state != EXPR_ARG + tk_c = TkLBRACE + else + tk_c = TkfLBRACE + end + @lex_state = EXPR_BEG + @indent_stack.push tk_c + Token(tk_c) + end + + @OP.def_rule('\\') do + |op, io| + if getc == "\n" + @space_seen = true + @continue = true + Token(TkSPACE) + else + read_escape + Token("\\") + end + end + + @OP.def_rule('%') do + |op, io| + if @lex_state == EXPR_BEG || @lex_state == EXPR_MID + identify_quotation + elsif peek(0) == '=' + getc + Token(TkOPASGN, :%) + elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ + identify_quotation + else + @lex_state = EXPR_BEG + Token("%") #)) + end + end + + @OP.def_rule('$') do + |op, io| + identify_gvar + end + + @OP.def_rule('@') do + |op, io| + if peek(0) =~ /[\w@]/ + ungetc + identify_identifier + else + Token("@") + end + end + + @OP.def_rule("") do + |op, io| + printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug? + if peek(0) =~ /[0-9]/ + t = identify_number + elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/ + t = identify_identifier + end + printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug? + t + end + + p @OP if RubyLex.debug? end - def process_continue - continued_bits = Ripper::EXPR_BEG | Ripper::EXPR_FNAME | Ripper::EXPR_DOT - # last token is always newline - if @tokens.size >= 2 and @tokens[-2][1] == :on_regexp_end - # end of regexp literal - return false - elsif @tokens.size >= 2 and @tokens[-2][1] == :on_semicolon - return false - elsif @tokens.size >= 2 and @tokens[-2][1] == :on_kw and (@tokens[-2][2] == 'begin' or @tokens[-2][2] == 'else') - return false - elsif [email protected]? and @tokens.last[2] == "\\\n" - return true - elsif @tokens.size >= 2 and @tokens[-2][3].anybits?(continued_bits) - # end of literal except for regexp - return true - end - false + def identify_gvar + @lex_state = EXPR_END + + case ch = getc + when /[~_*$?!@\/\\;,=:<>".]/ #" + Token(TkGVAR, "$" + ch) + when "-" + Token(TkGVAR, "$-" + getc) + when "&", "`", "'", "+" + Token(TkBACK_REF, "$"+ch) + when /[1-9]/ + while getc =~ /[0-9]/; end + ungetc + Token(TkNTH_REF) + when /\w/ + ungetc + ungetc + identify_identifier + else + ungetc + Token("$") + end end - def check_code_block(code) - return true if @tokens.empty? - if @tokens.last[1] == :on_heredoc_beg - return true - end - - begin # check if parser error are available - RubyVM::InstructionSequence.compile(code) - rescue SyntaxError => e - case e.message - when /unterminated (?:string|regexp) meets end of file/ - # "unterminated regexp meets end of file" - # - # example: - # / - # - # "unterminated string meets end of file" - # - # example: - # ' - return true - when /syntax error, unexpected end-of-input/ - # "syntax error, unexpected end-of-input, expecting keyword_end" - # - # example: - # if ture - # hoge - # if false - # fuga - # end - return true - when /syntax error, unexpected keyword_end/ - # "syntax error, unexpected keyword_end" - # - # example: - # if ( - # end - # - # example: - # end - return false - when /unexpected tREGEXP_BEG/ - # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('" - # - # example: - # method / f / - return false - end - end - - last_lex_state = @tokens.last[3] - if last_lex_state.allbits?(Ripper::EXPR_BEG) - return false - elsif last_lex_state.allbits?(Ripper::EXPR_DOT) - return true - elsif last_lex_state.allbits?(Ripper::EXPR_CLASS) - return true - elsif last_lex_state.allbits?(Ripper::EXPR_FNAME) - return true - elsif last_lex_state.allbits?(Ripper::EXPR_VALUE) - return true - elsif last_lex_state.allbits?(Ripper::EXPR_ARG) - return false - end - - false + def identify_identifier + token = "" + if peek(0) =~ /[$@]/ + token.concat(c = getc) + if c == "@" and peek(0) == "@" + token.concat getc + end + end + + while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/ + print ":", ch, ":" if RubyLex.debug? + token.concat ch + end + ungetc + + if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "=" + token.concat getc + end + + # almost fix token + + case token + when /^\$/ + return Token(TkGVAR, token) + when /^\@\@/ + @lex_state = EXPR_END + # p Token(TkCVAR, token) + return Token(TkCVAR, token) + when /^\@/ + @lex_state = EXPR_END + return Token(TkIVAR, token) + end + + if @lex_state != EXPR_DOT + print token, "\n" if RubyLex.debug? + + token_c, *trans = TkReading2Token[token] + if token_c + # reserved word? + + if (@lex_state != EXPR_BEG && + @lex_state != EXPR_FNAME && + trans[1]) + # modifiers + token_c = TkSymbol2Token[trans[1]] + @lex_state = trans[0] + else + if @lex_state != EXPR_FNAME and peek(0) != ':' + if ENINDENT_CLAUSE.include?(token) + # check for ``class = val'' etc. + valid = true + case token + when "class" + valid = false unless peek_match?(/^\s*(<<|\w|::)/) + when "def" + valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/) + when "do" + valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/) + when *ENINDENT_CLAUSE + valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/) + else + # no nothing + end + if valid + if token == "do" + if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last) + @indent += 1 + @indent_stack.push token_c + end + else + @indent += 1 + @indent_stack.push token_c + end + end + + elsif DEINDENT_CLAUSE.include?(token) + @indent -= 1 + @indent_stack.pop + end + @lex_state = trans[0] + else + @lex_state = EXPR_END + end + end + return Token(token_c, token) + end + end + + if @lex_state == EXPR_FNAME + @lex_state = EXPR_END + if peek(0) == '=' + token.concat getc + end + elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_END + end + + if token[0, 1] =~ /[A-Z]/ + return Token(TkCONSTANT, token) + elsif token[token.size - 1, 1] =~ /[!?]/ + return Token(TkFID, token) + else + return Token(TkIDENTIFIER, token) + end end - def process_nesting_level - @tokens.inject(0) { |indent, t| - case t[1] - when :on_lbracket, :on_lbrace, :on_lparen - indent += 1 - when :on_rbracket, :on_rbrace, :on_rparen - indent -= 1 - when :on_kw - case t[2] - when 'def', 'do', 'case', 'for', 'begin', 'class', 'module' - indent += 1 - when 'if', 'unless', 'while', 'until', 'rescue' - # postfix if/unless/while/until/rescue must be Ripper::EXPR_LABEL - indent += 1 unless t[3].allbits?(Ripper::EXPR_LABEL) - when 'end' - indent -= 1 + def identify_here_document + ch = getc + if ch == "-" || ch == "~" + ch = getc + indent = true + end + if /['"`]/ =~ ch + lt = ch + quoted = "" + while (c = getc) && c != lt + quoted.concat c + end + else + lt = '"' + quoted = ch.dup + while (c = getc) && c =~ /\w/ + quoted.concat c + end + ungetc + end + + ltback, @ltype = @ltype, lt + reserve = [] + while ch = getc + reserve.push ch + if ch == "\\" + reserve.push ch = getc + elsif ch == "\n" + break + end + end + + @here_header = false + + line = "" + while ch = getc + if ch == "\n" + if line == quoted + break + end + line = "" + else + line.concat ch unless indent && line == "" && /\s/ =~ ch + if @ltype != "'" && ch == "#" && peek(0) == "{" + identify_string_dvar + end + end + end + + @here_header = true + @here_readed.concat reserve + while ch = reserve.pop + ungetc ch + end + + @ltype = ltback + @lex_state = EXPR_END + Token(Ltype2Token[lt]) + end + + def identify_quotation + ch = getc + if lt = PERCENT_LTYPE[ch] + ch = getc + elsif ch =~ /\W/ + lt = "\"" + else + RubyLex.fail SyntaxError, "unknown type of %string" + end + @quoted = ch unless @quoted = PERCENT_PAREN[ch] + identify_string(lt, @quoted) + end + + def identify_number + @lex_state = EXPR_END + + if peek(0) == "0" && peek(1) !~ /[.eE]/ + getc + case peek(0) + when /[xX]/ + ch = getc + match = /[0-9a-fA-F_]/ + when /[bB]/ + ch = getc + match = /[01_]/ + when /[oO]/ + ch = getc + match = /[0-7_]/ + when /[dD]/ + ch = getc + match = /[0-9_]/ + when /[0-7]/ + match = /[0-7_]/ + when /[89]/ + RubyLex.fail SyntaxError, "Invalid octal digit" + else + return Token(TkINTEGER) + end + + len0 = true + non_digit = false + while ch = getc + if match =~ ch + if ch == "_" + if non_digit + RubyLex.fail SyntaxError, "trailing `#{ch}' in number" + else + non_digit = ch + end + else + non_digit = false + len0 = false + end + else + ungetc + if len0 + RubyLex.fail SyntaxError, "numeric literal without digits" + end + if non_digit + RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" + end + break + end + end + return Token(TkINTEGER) + end + + type = TkINTEGER + allow_point = true + allow_e = true + non_digit = false + while ch = getc + case ch + when /[0-9]/ + non_digit = false + when "_" + non_digit = ch + when allow_point && "." + if non_digit + RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" + end + type = TkFLOAT + if peek(0) !~ /[0-9]/ + type = TkINTEGER + ungetc + break + end + allow_point = false + when allow_e && "e", allow_e && "E" + if non_digit + RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" + end + type = TkFLOAT + if peek(0) =~ /[+-]/ + getc + end + allow_e = false + allow_point = false + non_digit = ch + else + if non_digit + RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" end + ungetc + break end - # percent literals are not indented - indent - } + end + Token(type) + end + + def identify_string(ltype, quoted = ltype) + @ltype = ltype + @quoted = quoted + subtype = nil + begin + nest = 0 + while ch = getc + if @quoted == ch and nest == 0 + break + elsif @ltype != "'" && ch == "#" && peek(0) == "{" + identify_string_dvar + elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#" + subtype = true + elsif ch == '\\' and @ltype == "'" #' + case ch = getc + when "\\", "\n", "'" + else + ungetc + end + elsif ch == '\\' #' + read_escape + end + if PERCENT_PAREN.values.include?(@quoted) + if PERCENT_PAREN[ch] == @quoted + nest += 1 + elsif ch == @quoted + nest -= 1 + end + end + end + if @ltype == "/" + while /[imxoesun]/ =~ peek(0) + getc + end + end + if subtype + Token(DLtype2Token[ltype]) + else + Token(Ltype2Token[ltype]) + end + ensure + @ltype = nil + @quoted = nil + @lex_state = EXPR_END + end end - def check_string_literal - i = 0 - start_token = [] - end_type = [] - while i < @tokens.size - t = @tokens[i] - case t[1] - when :on_tstring_beg - start_token << t - end_type << :on_tstring_end - when :on_regexp_beg - start_token << t - end_type << :on_regexp_end - when :on_symbeg - if (i + 1) < @tokens.size and @tokens[i + 1][1] != :on_ident - start_token << t - end_type << :on_tstring_end + def identify_string_dvar + begin + getc + + reserve_continue = @continue + reserve_ltype = @ltype + reserve_indent = @indent + reserve_indent_stack = @indent_stack + reserve_state = @lex_state + reserve_quoted = @quoted + + @ltype = nil + @quoted = nil + @indent = 0 + @indent_stack = [] + @lex_state = EXPR_BEG + + loop do + @continue = false + prompt + tk = token + if @ltype or @continue or @indent >= 0 + next end - when :on_backtick - start_token << t - end_type << :on_tstring_end - when :on_qwords_beg, :on_words_beg, :on_qsymbols_beg, :on_symbols_beg - start_token << t - end_type << :on_tstring_end - when :on_heredoc_beg - start_token << t - end_type << :on_heredoc_end - when end_type.last - start_token.pop - end_type.pop - end - i += 1 - end - start_token.last.nil? ? '' : start_token.last + break if tk.kind_of?(TkRBRACE) + end + ensure + @continue = reserve_continue + @ltype = reserve_ltype + @indent = reserve_indent + @indent_stack = reserve_indent_stack + @lex_state = reserve_state + @quoted = reserve_quoted + end end - def process_literal_type - start_token = check_string_literal - case start_token[1] - when :on_tstring_beg - case start_token[2] - when ?" then ?" - when /^%.$/ then ?" - when /^%Q.$/ then ?" - when ?' then ?' - when /^%q.$/ then ?' - end - when :on_regexp_beg then ?/ - when :on_symbeg then ?: - when :on_backtick then ?` - when :on_qwords_beg then ?] - when :on_words_beg then ?] - when :on_qsymbols_beg then ?] - when :on_symbols_beg then ?] - when :on_heredoc_beg - start_token[2] =~ /<<[-~]?(['"`])[_a-zA-Z0-9]+\1/ - case $1 - when ?" then ?" - when ?' then ?' - when ?` then ?` - else ?" + def identify_comment + @ltype = "#" + + while ch = getc + if ch == "\n" + @ltype = nil + ungetc + break + end + end + return Token(TkCOMMENT) + end + + def read_escape + case ch = getc + when "\n", "\r", "\f" + when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #" + when /[0-7]/ + ungetc ch + 3.times do + case ch = getc + when /[0-7]/ + when nil + break + else + ungetc + break + end + end + + when "x" + 2.times do + case ch = getc + when /[0-9a-fA-F]/ + when nil + break + else + ungetc + break + end + end + + when "M" + if (ch = getc) != '-' + ungetc + else + if (ch = getc) == "\\" #" + read_escape + end + end + + when "C", "c" #, "^" + if ch == "C" and (ch = getc) != "-" + ungetc + elsif (ch = getc) == "\\" #" + read_escape end else - nil + # other characters end end end |