diff options
Diffstat (limited to 'lib/rdoc/ruby_lex.rb')
-rw-r--r-- | lib/rdoc/ruby_lex.rb | 1521 |
1 files changed, 0 insertions, 1521 deletions
diff --git a/lib/rdoc/ruby_lex.rb b/lib/rdoc/ruby_lex.rb deleted file mode 100644 index e76fdf0414..0000000000 --- a/lib/rdoc/ruby_lex.rb +++ /dev/null @@ -1,1521 +0,0 @@ -# coding: US-ASCII -# frozen_string_literal: false - -#-- -# irb/ruby-lex.rb - ruby lexcal analyzer -# $Release Version: 0.9.5$ -# $Revision: 17979 $ -# $Date: 2008-07-09 10:17:05 -0700 (Wed, 09 Jul 2008) $ -# by Keiju ISHITSUKA([email protected]) -# -#++ - -require "e2mmap" -require "irb/slex" -require "stringio" - -## -# Ruby lexer adapted from irb. -# -# The internals are not documented because they are scary. - -class RDoc::RubyLex - - ## - # Raised upon invalid input - - class Error < RDoc::Error - end - - # :stopdoc: - - extend Exception2MessageMapper - - def_exception(:AlreadyDefinedToken, "Already defined token(%s)") - def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')") - def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')") - def_exception(:TkReading2TokenDuplicateError, - "key duplicate(token_n='%s', key='%s')") - def_exception(:SyntaxError, "%s") - - def_exception(:TerminateLineInput, "Terminate Line Input") - - include RDoc::RubyToken - include IRB - - attr_accessor :continue - attr_accessor :lex_state - attr_accessor :first_in_method_statement - attr_reader :reader - - class << self - attr_accessor :debug_level - end - - def self.debug? - @debug_level > 0 - end - - self.debug_level = 0 - - # :startdoc: - - ## - # Returns an Array of +ruby+ tokens. See ::new for a description of - # +options+. - - def self.tokenize ruby, options - tokens = [] - - scanner = RDoc::RubyLex.new ruby, options - scanner.exception_on_syntax_error = true - - while token = scanner.token do - tokens << token - end - - tokens - end - - ## - # Creates a new lexer for +content+. +options+ is an RDoc::Options, only - # +tab_width is used. - - def initialize(content, options) - lex_init - - if /\t/ =~ content then - tab_width = options.tab_width - content = content.split(/\n/).map do |line| - 1 while line.gsub!(/\t+/) { - ' ' * (tab_width*$&.length - $`.length % tab_width) - } && $~ - line - end.join("\n") - end - - content << "\n" unless content[-1, 1] == "\n" - - set_input StringIO.new content - - @base_char_no = 0 - @char_no = 0 - @exp_line_no = @line_no = 1 - @here_readed = [] - @readed = [] - @current_readed = @readed - @rests = [] - @seek = 0 - - @heredoc_queue = [] - - @indent = 0 - @indent_stack = [] - @lex_state = :EXPR_BEG - @space_seen = false - @escaped_nl = false - @first_in_method_statement = false - @after_question = false - - @continue = false - @line = "" - - @skip_space = false - @readed_auto_clean_up = false - @exception_on_syntax_error = true - - @prompt = nil - @prev_seek = nil - @ltype = nil - end - - # :stopdoc: - - def inspect # :nodoc: - "#<%s:0x%x pos %d lex_state %p space_seen %p>" % [ - self.class, object_id, - @io.pos, @lex_state, @space_seen, - ] - end - - attr_accessor :skip_space - attr_accessor :readed_auto_clean_up - attr_accessor :exception_on_syntax_error - - attr_reader :seek - attr_reader :char_no - attr_reader :line_no - attr_reader :indent - - # io functions - def set_input(io, p = nil, &block) - @io = io - if p.respond_to?(:call) - @input = p - elsif block_given? - @input = block - else - @input = Proc.new{@io.gets} - end - end - - def get_readed - if idx = @readed.rindex("\n") - @base_char_no = @readed.size - (idx + 1) - else - @base_char_no += @readed.size - end - - readed = @readed.join("") - @readed.clear - readed - end - - def getc - while @rests.empty? - # return nil unless buf_input - @rests.push nil unless buf_input - end - c = @rests.shift - @current_readed.push c - @seek += 1 - if c == "\n".freeze - @line_no += 1 - @char_no = 0 - else - @char_no += 1 - end - - c - end - - def gets - l = "" - while c = getc - l.concat(c) - break if c == "\n" - end - return nil if l == "" and c.nil? - l - end - - def eof? - @io.eof? - end - - def getc_of_rests - if @rests.empty? - nil - else - getc - end - end - - def ungetc(c = nil) - if @here_readed.empty? - c2 = @readed.pop - else - c2 = @here_readed.pop - end - c = c2 unless c - @rests.unshift c #c = - @seek -= 1 - if c == "\n" - @line_no -= 1 - if idx = @readed.rindex("\n") - @char_no = idx + 1 - else - @char_no = @base_char_no + @readed.size - end - else - @char_no -= 1 - end - end - - def peek_equal?(str) - chrs = str.split(//) - until @rests.size >= chrs.size - return false unless buf_input - end - @rests[0, chrs.size] == chrs - end - - def peek_match?(regexp) - while @rests.empty? - return false unless buf_input - end - regexp =~ @rests.join("") - end - - def peek(i = 0) - while @rests.size <= i - return nil unless buf_input - end - @rests[i] - end - - def buf_input - prompt - line = @input.call - return nil unless line - @rests.concat line.split(//) - true - end - private :buf_input - - def set_prompt(p = nil, &block) - p = block if block_given? - if p.respond_to?(:call) - @prompt = p - else - @prompt = Proc.new{print p} - end - end - - def prompt - if @prompt - @prompt.call(@ltype, @indent, @continue, @line_no) - end - end - - def initialize_input - @ltype = nil - @quoted = nil - @indent = 0 - @indent_stack = [] - @lex_state = :EXPR_BEG - @space_seen = false - @current_readed = @readed - - @continue = false - prompt - - @line = "" - @exp_line_no = @line_no - end - - def each_top_level_statement - initialize_input - catch(:TERM_INPUT) do - loop do - begin - @continue = false - prompt - unless l = lex - throw :TERM_INPUT if @line == '' - else - #p l - @line.concat l - if @ltype or @continue or @indent > 0 - next - end - end - if @line != "\n" - yield @line, @exp_line_no - end - break unless l - @line = '' - @exp_line_no = @line_no - - @indent = 0 - @indent_stack = [] - prompt - rescue TerminateLineInput - initialize_input - prompt - get_readed - end - end - end - end - - def lex - until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) && - !@continue or - tk.nil?) - #p tk - #p @lex_state - #p self - end - line = get_readed - # print self.inspect - if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil? - nil - else - line - end - end - - def token - # require "tracer" - # Tracer.on - @prev_seek = @seek - @prev_line_no = @line_no - @prev_char_no = @char_no - begin - begin - tk = @OP.match(self) - @space_seen = tk.kind_of?(TkSPACE) - @first_in_method_statement = false if !@space_seen && @first_in_method_statement - rescue SyntaxError => e - raise Error, "syntax error: #{e.message}" if - @exception_on_syntax_error - - tk = TkError.new(@seek, @line_no, @char_no) - end - end while @skip_space and tk.kind_of?(TkSPACE) - - if @readed_auto_clean_up - get_readed - end - - if TkSYMBEG === tk then - tk1 = token - set_token_position tk.seek, tk.line_no, tk.char_no - - case tk1 - when TkId, TkOp, TkSTRING, TkDSTRING, TkSTAR, TkAMPER then - if tk1.respond_to?(:name) then - tk = Token(TkSYMBOL, ":" + tk1.name) - else - tk = Token(TkSYMBOL, ":" + tk1.text) - end - else - tk = tk1 - end - elsif (TkPLUS === tk or TkMINUS === tk) and peek(0) =~ /\d/ then - tk1 = token - set_token_position tk.seek, tk.line_no, tk.char_no - tk = Token(tk1.class, tk.text + tk1.text) - end - @after_question = false if @after_question and !(TkQUESTION === tk) - - # Tracer.off - tk - end - - ENINDENT_CLAUSE = [ - "case", "class", "def", "do", "for", "if", - "module", "unless", "until", "while", "begin" #, "when" - ] - - DEINDENT_CLAUSE = ["end" #, "when" - ] - - PERCENT_LTYPE = { - "q" => "\'", - "Q" => "\"", - "x" => "\`", - "r" => "/", - "w" => "]", - "W" => "]", - "s" => ":", - "i" => "]", - "I" => "]" - } - - PERCENT_PAREN = { - "{" => "}", - "[" => "]", - "<" => ">", - "(" => ")" - } - - PERCENT_PAREN_REV = PERCENT_PAREN.invert - - Ltype2Token = { - "\'" => TkSTRING, - "\"" => TkSTRING, - "\`" => TkXSTRING, - "/" => TkREGEXP, - "]" => TkDSTRING, - ":" => TkSYMBOL - } - DLtype2Token = { - "\"" => TkDSTRING, - "\`" => TkDXSTRING, - "/" => TkDREGEXP, - } - - def lex_init() - @OP = IRB::SLex.new - @OP.def_rules("\0", "\004", "\032") do |op, io| - Token(TkEND_OF_SCRIPT, '') - end - - @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io| - @space_seen = true - str = op - while (ch = getc) =~ /[ \t\f\r\13]/ do - str << ch - end - ungetc - Token TkSPACE, str - end - - @OP.def_rule("#") do |op, io| - identify_comment - end - - @OP.def_rule("=begin", - proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do - |op, io| - @ltype = "=" - res = op - until (ch = getc) == "\n" do - res << ch - end - res << ch - - until ( peek_equal?("=end") && peek(4) =~ /\s/ ) do - (ch = getc) - res << ch - end - - res << gets # consume =end - - @ltype = nil - Token(TkRD_COMMENT, res) - end - - @OP.def_rule("\n") do |op, io| - print "\\n\n" if RDoc::RubyLex.debug? - unless @heredoc_queue.empty? - info = @heredoc_queue[0] - if !info[:started] # "\n" - info[:started] = true - ungetc "\n" - elsif info[:heredoc_end].nil? # heredoc body - tk, heredoc_end = identify_here_document_body(info[:quoted], info[:lt], info[:indent]) - info[:heredoc_end] = heredoc_end - ungetc "\n" - else # heredoc end - @heredoc_queue.shift - @lex_state = :EXPR_BEG - tk = Token(TkHEREDOCEND, info[:heredoc_end]) - if !@heredoc_queue.empty? - @heredoc_queue[0][:started] = true - ungetc "\n" - end - end - end - unless tk - case @lex_state - when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT - @continue = true - else - @continue = false - @lex_state = :EXPR_BEG unless @escaped_nl - until (@indent_stack.empty? || - [TkLPAREN, TkLBRACK, TkLBRACE, - TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) - @indent_stack.pop - end - end - @current_readed = @readed - @here_readed.clear - tk = Token(TkNL) - end - @escaped_nl = false - tk - end - - @OP.def_rules("=") do - |op, io| - case @lex_state - when :EXPR_FNAME, :EXPR_DOT - @lex_state = :EXPR_ARG - else - @lex_state = :EXPR_BEG - end - Token(op) - end - - @OP.def_rules("*", "**", - "==", "===", - "=~", "<=>", - "<", "<=", - ">", ">=", ">>", "=>") do - |op, io| - case @lex_state - when :EXPR_FNAME, :EXPR_DOT - tk = Token(TkId, op) - @lex_state = :EXPR_ARG - else - tk = Token(op) - @lex_state = :EXPR_BEG - end - tk - end - - @OP.def_rules("->") do - |op, io| - @lex_state = :EXPR_ENDFN - Token(op) - end - - @OP.def_rules("!", "!=", "!~") do - |op, io| - case @lex_state - when :EXPR_FNAME, :EXPR_DOT - @lex_state = :EXPR_ARG - Token(TkId, op) - else - @lex_state = :EXPR_BEG - Token(op) - end - end - - @OP.def_rules("<<") do - |op, io| - tk = nil - if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS && - (@lex_state != :EXPR_ARG || @space_seen) - c = peek(0) - if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-" || c == "~") - tk = identify_here_document(op) - end - end - unless tk - case @lex_state - when :EXPR_FNAME, :EXPR_DOT - tk = Token(TkId, op) - @lex_state = :EXPR_ARG - else - tk = Token(op) - @lex_state = :EXPR_BEG - end - end - tk - end - - @OP.def_rules("'", '"') do - |op, io| - identify_string(op) - end - - @OP.def_rules("`") do - |op, io| - if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state - @lex_state = :EXPR_ARG - Token(TkId, op) - else - identify_string(op) - end - end - - @OP.def_rules('?') do - |op, io| - if @lex_state == :EXPR_END - @lex_state = :EXPR_BEG - @after_question = true - Token(TkQUESTION) - else - ch = getc - if @lex_state == :EXPR_ARG && ch =~ /\s/ - ungetc - @lex_state = :EXPR_BEG; - Token(TkQUESTION) - else - @lex_state = :EXPR_END - ch << getc if "\\" == ch - Token(TkCHAR, "?#{ch}") - end - end - end - - @OP.def_rules("&&", "||") do - |op, io| - @lex_state = :EXPR_BEG - Token(op) - end - - @OP.def_rules("&", "|") do - |op, io| - case @lex_state - when :EXPR_FNAME, :EXPR_DOT - tk = Token(TkId, op) - @lex_state = :EXPR_ARG - else - tk = Token(op) - @lex_state = :EXPR_BEG - end - tk - end - - @OP.def_rules("+=", "-=", "*=", "**=", - "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do - |op, io| - @lex_state = :EXPR_BEG - op =~ /^(.*)=$/ - Token(TkOPASGN, $1) - end - - @OP.def_rule("+@", proc{|op, io| @lex_state == :EXPR_FNAME}) do - |op, io| - @lex_state = :EXPR_ARG - Token(TkId, op) - end - - @OP.def_rule("-@", proc{|op, io| @lex_state == :EXPR_FNAME}) do - |op, io| - @lex_state = :EXPR_ARG - Token(TkId, op) - end - - @OP.def_rules("+", "-") do - |op, io| - catch(:RET) do - if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state - tk = Token(TkId, op) - @lex_state = :EXPR_ARG - elsif @lex_state == :EXPR_ARG - if @space_seen and peek(0) =~ /[0-9]/ - throw :RET, identify_number(op) - else - @lex_state = :EXPR_BEG - end - elsif @lex_state != :EXPR_END and peek(0) =~ /[0-9]/ - throw :RET, identify_number(op) - else - @lex_state = :EXPR_BEG - end - tk = Token(op) unless tk - tk - end - end - - @OP.def_rules(".", "&.") do - |op, io| - @lex_state = :EXPR_BEG - if peek(0) =~ /[0-9]/ - ungetc - identify_number - else - # for "obj.if" or "obj&.if" etc. - @lex_state = :EXPR_DOT - Token(op) - end - end - - @OP.def_rules("..", "...") do - |op, io| - @lex_state = :EXPR_BEG - Token(op) - end - - lex_int2 - end - - def lex_int2 - @OP.def_rules("]", "}", ")") do - |op, io| - @lex_state = :EXPR_END - @indent -= 1 - @indent_stack.pop - Token(op) - end - - @OP.def_rule(":") do - |op, io| - if @lex_state == :EXPR_END || peek(0) =~ /\s/ - @lex_state = :EXPR_BEG - Token(TkCOLON) - else - @lex_state = :EXPR_FNAME; - Token(TkSYMBEG) - end - end - - @OP.def_rule("::") do - |op, io| - # p @lex_state.id2name, @space_seen - if @lex_state == :EXPR_BEG or @lex_state == :EXPR_ARG && @space_seen - @lex_state = :EXPR_BEG - Token(TkCOLON3) - else - @lex_state = :EXPR_DOT - Token(TkCOLON2) - end - end - - @OP.def_rule("/") do - |op, io| - if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state - @lex_state = :EXPR_ARG - Token(TkId, op) - elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID || @first_in_method_statement - identify_string(op) - elsif peek(0) == '=' - getc - @lex_state = :EXPR_BEG - Token(TkOPASGN, "/") #/) - elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/ - identify_string(op) - else - @lex_state = :EXPR_BEG - Token("/") #/) - end - end - - @OP.def_rules("^") do - |op, io| - case @lex_state - when :EXPR_FNAME, :EXPR_DOT - tk = Token(TkId, op) - @lex_state = :EXPR_ARG - else - tk = Token(op) - @lex_state = :EXPR_BEG - end - tk - end - - # @OP.def_rules("^=") do - # @lex_state = :EXPR_BEG - # Token(OP_ASGN, :^) - # end - - @OP.def_rules(",") do - |op, io| - @lex_state = :EXPR_BEG - Token(op) - end - - @OP.def_rules(";") do - |op, io| - @lex_state = :EXPR_BEG - until (@indent_stack.empty? || - [TkLPAREN, TkLBRACK, TkLBRACE, - TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) - @indent_stack.pop - end - Token(op) - end - - @OP.def_rule("~") do - |op, io| - case @lex_state - when :EXPR_FNAME, :EXPR_DOT - @lex_state = :EXPR_ARG - Token(TkId, op) - else - @lex_state = :EXPR_BEG - Token(op) - end - end - - @OP.def_rule("~@", proc{|op, io| @lex_state == :EXPR_FNAME}) do - |op, io| - @lex_state = :EXPR_BEG - Token("~") - end - - @OP.def_rule("(") do - |op, io| - @indent += 1 - if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID - @lex_state = :EXPR_BEG - tk_c = TkfLPAREN - else - @lex_state = :EXPR_BEG - tk_c = TkLPAREN - end - @indent_stack.push tk_c - Token tk_c - end - - @OP.def_rule("[]", proc{|op, io| @lex_state == :EXPR_FNAME}) do - |op, io| - @lex_state = :EXPR_ARG - Token(TkId, op) - end - - @OP.def_rule("[]=", proc{|op, io| @lex_state == :EXPR_FNAME}) do - |op, io| - @lex_state = :EXPR_ARG - Token(TkId, op) - end - - @OP.def_rule("[") do - |op, io| - text = nil - @indent += 1 - if @lex_state == :EXPR_FNAME - tk_c = TkfLBRACK - else - if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID - tk_c = TkLBRACK - elsif @lex_state == :EXPR_ARG && @space_seen - tk_c = TkLBRACK - elsif @lex_state == :EXPR_DOT - if peek(0) == "]" - tk_c = TkIDENTIFIER - getc - if peek(0) == "=" - text = "[]=" - else - text = "[]" - end - else - tk_c = TkOp - end - else - tk_c = TkfLBRACK - end - @lex_state = :EXPR_BEG - end - @indent_stack.push tk_c - Token(tk_c, text) - end - - @OP.def_rule("{") do - |op, io| - @indent += 1 - if @lex_state != :EXPR_END && @lex_state != :EXPR_ARG - tk_c = TkLBRACE - else - tk_c = TkfLBRACE - end - @lex_state = :EXPR_BEG - @indent_stack.push tk_c - Token(tk_c) - end - - @OP.def_rule('\\') do - |op, io| - if peek(0) == "\n" - @space_seen = true - @continue = true - @escaped_nl = true - end - Token("\\") - end - - @OP.def_rule('%') do - |op, io| - if :EXPR_FNAME == @lex_state or :EXPR_DOT == @lex_state - @lex_state = :EXPR_ARG - Token(TkId, op) - elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID - identify_quotation - elsif peek(0) == '=' - getc - @lex_state = :EXPR_BEG - Token(TkOPASGN, '%') - elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/ - identify_quotation - else - @lex_state = :EXPR_BEG - Token("%") #)) - end - end - - @OP.def_rule('$') do - |op, io| - identify_gvar - end - - @OP.def_rule('@') do - |op, io| - if peek(0) =~ /[\w@]/ - ungetc - identify_identifier - else - Token("@") - end - end - - # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do - # |op, io| - # @indent += 1 - # @lex_state = :EXPR_FNAME - # # @lex_state = :EXPR_END - # # until @rests[0] == "\n" or @rests[0] == ";" - # # rests.shift - # # end - # end - - @OP.def_rule("_") do - if peek_match?(/_END__/) and @lex_state == :EXPR_BEG then - 6.times { getc } - Token(TkEND_OF_SCRIPT, '__END__') - else - ungetc - identify_identifier - end - end - - @OP.def_rule("") do - |op, io| - printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug? - if peek(0) =~ /[0-9]/ - t = identify_number - else - t = identify_identifier - end - printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug? - t - end - - p @OP if RDoc::RubyLex.debug? - end - - def identify_gvar - @lex_state = :EXPR_END - - case ch = getc - when /[~_*$?!@\/\\;,=:<>".]/ #" - Token(TkGVAR, "$" + ch) - when "-" - Token(TkGVAR, "$-" + getc) - when "&", "`", "'", "+" - Token(TkBACK_REF, "$"+ch) - when /[1-9]/ - ref = ch - while (ch = getc) =~ /[0-9]/ do ref << ch end - ungetc - Token(TkNTH_REF, "$#{ref}") - when /\w/ - ungetc - ungetc - identify_identifier - else - ungetc - Token("$") - end - end - - IDENT_RE = eval '/[\w\u{0080}-\u{FFFFF}]/u' - - def identify_identifier - token = "" - if peek(0) =~ /[$@]/ - token.concat(c = getc) - if c == "@" and peek(0) == "@" - token.concat getc - end - end - - while (ch = getc) =~ IDENT_RE do - print " :#{ch}: " if RDoc::RubyLex.debug? - token.concat ch - end - - ungetc - - if ((ch == "!" && peek(1) != "=") || ch == "?") && token[0,1] =~ /\w/ - token.concat getc - end - - # almost fix token - - case token - when /^\$/ - return Token(TkGVAR, token) - when /^\@\@/ - @lex_state = :EXPR_END - # p Token(TkCVAR, token) - return Token(TkCVAR, token) - when /^\@/ - @lex_state = :EXPR_END - return Token(TkIVAR, token) - end - - if @lex_state != :EXPR_DOT - print token, "\n" if RDoc::RubyLex.debug? - - token_c, *trans = TkReading2Token[token] - if token_c - # reserved word? - - if (@lex_state != :EXPR_BEG && - @lex_state != :EXPR_FNAME && - trans[1]) - # modifiers - token_c = TkSymbol2Token[trans[1]] - @lex_state = trans[0] - else - if @lex_state != :EXPR_FNAME - if ENINDENT_CLAUSE.include?(token) - valid = peek(0) != ':' - - # check for ``class = val'' etc. - case token - when "class" - valid = false unless peek_match?(/^\s*(<<|\w|::)/) - when "def" - valid = false if peek_match?(/^\s*(([+-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/) - when "do" - valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&)/) - when *ENINDENT_CLAUSE - valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&|\|)/) - else - # no nothing - end if valid - - if valid - if token == "do" - if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last) - @indent += 1 - @indent_stack.push token_c - end - else - @indent += 1 - @indent_stack.push token_c - end - else - token_c = TkIDENTIFIER - end - - elsif DEINDENT_CLAUSE.include?(token) - @indent -= 1 - @indent_stack.pop - end - @lex_state = trans[0] - else - @lex_state = :EXPR_END - end - end - if token_c.ancestors.include?(TkId) and peek(0) == ':' and !peek_match?(/^::/) - token.concat getc - token_c = TkSYMBOL - end - return Token(token_c, token) - end - end - - if @lex_state == :EXPR_FNAME - @lex_state = :EXPR_END - if peek(0) == '=' and peek(1) != '>' - token.concat getc - end - elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_DOT || - @lex_state == :EXPR_ARG || @lex_state == :EXPR_MID - @lex_state = :EXPR_ARG - else - @lex_state = :EXPR_END - end - - if token[0, 1] =~ /[A-Z]/ - if token[-1] =~ /[!?]/ - token_c = TkIDENTIFIER - else - token_c = TkCONSTANT - end - elsif token[token.size - 1, 1] =~ /[!?]/ - token_c = TkFID - else - token_c = TkIDENTIFIER - end - if peek(0) == ':' and !peek_match?(/^::/) - token.concat getc - return Token(TkSYMBOL, token) - else - return Token(token_c, token) - end - end - - def identify_here_document(op) - ch = getc - start_token = op - # if lt = PERCENT_LTYPE[ch] - if ch == "-" or ch == "~" - start_token.concat ch - ch = getc - indent = true - end - if /['"`]/ =~ ch - start_token.concat ch - user_quote = lt = ch - quoted = "" - while (c = getc) && c != lt - quoted.concat c - end - start_token.concat quoted - start_token.concat lt - else - user_quote = nil - lt = '"' - quoted = ch.dup - while (c = getc) && c =~ /\w/ - quoted.concat c - end - start_token.concat quoted - ungetc - end - - @heredoc_queue << { - quoted: quoted, - lt: lt, - indent: indent, - started: false - } - @lex_state = :EXPR_END - Token(RDoc::RubyLex::TkHEREDOCBEG, start_token) - end - - def identify_here_document_body(quoted, lt, indent) - ltback, @ltype = @ltype, lt - - doc = "" - heredoc_end = nil - while l = gets - l = l.sub(/(:?\r)?\n\z/, "\n") - if (indent ? l.strip : l.chomp) == quoted - heredoc_end = l - break - end - doc << l - end - raise Error, "Missing terminating #{quoted} for string" unless heredoc_end - - @ltype = ltback - @lex_state = :EXPR_BEG - [Token(RDoc::RubyLex::TkHEREDOC, doc), heredoc_end] - end - - def identify_quotation - type = ch = getc - if lt = PERCENT_LTYPE[type] - ch = getc - elsif type =~ /\W/ - type = nil - lt = "\"" - else - return Token(TkMOD, '%') - end - # if ch !~ /\W/ - # ungetc - # next - # end - #@ltype = lt - @quoted = ch unless @quoted = PERCENT_PAREN[ch] - identify_string(lt, @quoted, type) - end - - def identify_number(op = "") - @lex_state = :EXPR_END - - num = op - - if peek(0) == "0" && peek(1) !~ /[.eEri]/ - num << getc - - case peek(0) - when /[xX]/ - ch = getc - match = /[0-9a-fA-F_]/ - when /[bB]/ - ch = getc - match = /[01_]/ - when /[oO]/ - ch = getc - match = /[0-7_]/ - when /[dD]/ - ch = getc - match = /[0-9_]/ - when /[0-7]/ - match = /[0-7_]/ - when /[89]/ - raise Error, "Illegal octal digit" - else - return Token(TkINTEGER, num) - end - - num << ch if ch - - len0 = true - non_digit = false - while ch = getc - num << ch - if match =~ ch - if ch == "_" - if non_digit - raise Error, "trailing `#{ch}' in number" - else - non_digit = ch - end - else - non_digit = false - len0 = false - end - else - ungetc - num[-1, 1] = '' - if len0 - raise Error, "numeric literal without digits" - end - if non_digit - raise Error, "trailing `#{non_digit}' in number" - end - break - end - end - return Token(TkINTEGER, num) - end - - type = TkINTEGER - allow_point = true - allow_e = true - allow_ri = true - non_digit = false - while ch = getc - num << ch - case ch - when /[0-9]/ - non_digit = false - when "_" - non_digit = ch - when allow_point && "." - if non_digit - raise Error, "trailing `#{non_digit}' in number" - end - type = TkFLOAT - if peek(0) !~ /[0-9]/ - type = TkINTEGER - ungetc - num[-1, 1] = '' - break - end - allow_point = false - when allow_e && "e", allow_e && "E" - if non_digit - raise Error, "trailing `#{non_digit}' in number" - end - type = TkFLOAT - if peek(0) =~ /[+-]/ - num << getc - end - allow_e = false - allow_ri = false - allow_point = false - non_digit = ch - when allow_ri && "r" - if non_digit - raise Error, "trailing `#{non_digit}' in number" - end - type = TkRATIONAL - if peek(0) == 'i' - type = TkIMAGINARY - num << getc - end - break - when allow_ri && "i" - if non_digit && non_digit != "r" - raise Error, "trailing `#{non_digit}' in number" - end - type = TkIMAGINARY - break - else - if non_digit - raise Error, "trailing `#{non_digit}' in number" - end - ungetc - num[-1, 1] = '' - break - end - end - - Token(type, num) - end - - def identify_string(ltype, quoted = ltype, type = nil) - close = PERCENT_PAREN.values.include?(quoted) - @ltype = ltype - @quoted = quoted - - str = if ltype == quoted and %w[" ' / `].include? ltype and type.nil? then - ltype.dup - else - "%#{type}#{PERCENT_PAREN_REV[quoted]||quoted}" - end - - subtype = nil - begin - nest = 0 - - while ch = getc - str << ch - - if @quoted == ch and nest <= 0 - break - elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#" - ch = getc - if ch == "{" then - subtype = true - str << ch << skip_inner_expression - next - else - ungetc - end - elsif ch == '\\' - case @ltype - when "'" then - case ch = getc - when "'", '\\' then - str << ch - else - str << ch - end - else - str << read_escape - end - end - - if close then - if PERCENT_PAREN[ch] == @quoted - nest += 1 - elsif ch == @quoted - nest -= 1 - end - end - end - - if @ltype == "/" - while peek(0) =~ /i|m|x|o|e|s|u|n/ - str << getc - end - end - - if peek(0) == ':' and !peek_match?(/^::/) and :EXPR_BEG == @lex_state and !@after_question - str.concat getc - return Token(TkSYMBOL, str) - elsif subtype - Token(DLtype2Token[ltype], str) - else - Token(Ltype2Token[ltype], str) - end - ensure - @ltype = nil - @quoted = nil - @lex_state = :EXPR_END - end - end - - def skip_inner_expression - res = "" - nest = 0 - while ch = getc - res << ch - if ch == '}' - break if nest.zero? - nest -= 1 - elsif ch == '{' - nest += 1 - end - end - res - end - - def identify_comment - @ltype = "#" - - comment = '#' - - while ch = getc - # if ch == "\\" #" - # read_escape - # end - if ch == "\n" - @ltype = nil - ungetc - break - end - - comment << ch - end - - return Token(TkCOMMENT, comment) - end - - def read_escape - escape = '' - ch = getc - - case ch - when "\n", "\r", "\f" - escape << ch - when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #" - escape << ch - when /[0-7]/ - ungetc ch - 3.times do - ch = getc - case ch - when /[0-7]/ - escape << ch - when nil - break - else - ungetc - break - end - end - - when "x" - escape << ch - - 2.times do - ch = getc - case ch - when /[0-9a-fA-F]/ - escape << ch - when nil - break - else - ungetc - break - end - end - - when "M" - escape << ch - - ch = getc - if ch != '-' - ungetc - else - escape << ch - - ch = getc - if ch == "\\" #" - ungetc - escape << read_escape - else - escape << ch - end - end - - when "C", "c" #, "^" - escape << ch - - if ch == "C" - ch = getc - - if ch == "-" - escape << ch - ch = getc - escape << ch - - escape << read_escape if ch == "\\" - else - ungetc - end - elsif (ch = getc) == "\\" #" - escape << ch << read_escape - end - else - escape << ch - - # other characters - end - - escape - end - - # :startdoc: - -end - -#RDoc::RubyLex.debug_level = 1 |