diff options
Diffstat (limited to 'lib/rdoc/parser/ruby.rb')
-rw-r--r-- | lib/rdoc/parser/ruby.rb | 2240 |
1 files changed, 506 insertions, 1734 deletions
diff --git a/lib/rdoc/parser/ruby.rb b/lib/rdoc/parser/ruby.rb index 1697ab85aa..f78dcf07d6 100644 --- a/lib/rdoc/parser/ruby.rb +++ b/lib/rdoc/parser/ruby.rb @@ -7,1339 +7,16 @@ # by Keiju ISHITSUKA (Nippon Rational Inc.) # -require 'e2mmap' -require 'irb/slex' +require 'rdoc/ruby_token' +require 'rdoc/ruby_lex' require 'rdoc/code_objects' require 'rdoc/tokenstream' require 'rdoc/markup/preprocess' require 'rdoc/parser' +require 'rdoc/parser/ruby_tools' $TOKEN_DEBUG ||= nil -#$TOKEN_DEBUG = $DEBUG_RDOC - -## -# Definitions of all tokens involved in the lexical analysis - -module RDoc::RubyToken - - EXPR_BEG = :EXPR_BEG - EXPR_MID = :EXPR_MID - EXPR_END = :EXPR_END - EXPR_ARG = :EXPR_ARG - EXPR_FNAME = :EXPR_FNAME - EXPR_DOT = :EXPR_DOT - EXPR_CLASS = :EXPR_CLASS - - class Token - NO_TEXT = "??".freeze - - attr_accessor :text - attr_reader :line_no - attr_reader :char_no - - def initialize(line_no, char_no) - @line_no = line_no - @char_no = char_no - @text = NO_TEXT - end - - def ==(other) - self.class == other.class and - other.line_no == @line_no and - other.char_no == @char_no and - other.text == @text - end - - ## - # Because we're used in contexts that expect to return a token, we set the - # text string and then return ourselves - - def set_text(text) - @text = text - self - end - - end - - class TkNode < Token - attr :node - end - - class TkId < Token - def initialize(line_no, char_no, name) - super(line_no, char_no) - @name = name - end - attr :name - end - - class TkKW < TkId - end - - class TkVal < Token - def initialize(line_no, char_no, value = nil) - super(line_no, char_no) - set_text(value) - end - end - - class TkOp < Token - def name - self.class.op_name - end - end - - class TkOPASGN < TkOp - def initialize(line_no, char_no, op) - super(line_no, char_no) - op = TkReading2Token[op] unless Symbol === op - @op = op - end - attr :op - end - - class TkUnknownChar < Token - def initialize(line_no, char_no, id) - super(line_no, char_no) - @name = char_no.chr - end - attr :name - end - - class TkError < Token - end - - def set_token_position(line, char) - @prev_line_no = line - @prev_char_no = char - end - - def Token(token, value = nil) - tk = nil - case token - when String, Symbol - source = String === token ? TkReading2Token : TkSymbol2Token - raise TkReading2TokenNoKey, token if (tk = source[token]).nil? - tk = Token(tk[0], value) - else - tk = if (token.ancestors & [TkId, TkVal, TkOPASGN, TkUnknownChar]).empty? - token.new(@prev_line_no, @prev_char_no) - else - token.new(@prev_line_no, @prev_char_no, value) - end - end - tk - end - - TokenDefinitions = [ - [:TkCLASS, TkKW, "class", EXPR_CLASS], - [:TkMODULE, TkKW, "module", EXPR_CLASS], - [:TkDEF, TkKW, "def", EXPR_FNAME], - [:TkUNDEF, TkKW, "undef", EXPR_FNAME], - [:TkBEGIN, TkKW, "begin", EXPR_BEG], - [:TkRESCUE, TkKW, "rescue", EXPR_MID], - [:TkENSURE, TkKW, "ensure", EXPR_BEG], - [:TkEND, TkKW, "end", EXPR_END], - [:TkIF, TkKW, "if", EXPR_BEG, :TkIF_MOD], - [:TkUNLESS, TkKW, "unless", EXPR_BEG, :TkUNLESS_MOD], - [:TkTHEN, TkKW, "then", EXPR_BEG], - [:TkELSIF, TkKW, "elsif", EXPR_BEG], - [:TkELSE, TkKW, "else", EXPR_BEG], - [:TkCASE, TkKW, "case", EXPR_BEG], - [:TkWHEN, TkKW, "when", EXPR_BEG], - [:TkWHILE, TkKW, "while", EXPR_BEG, :TkWHILE_MOD], - [:TkUNTIL, TkKW, "until", EXPR_BEG, :TkUNTIL_MOD], - [:TkFOR, TkKW, "for", EXPR_BEG], - [:TkBREAK, TkKW, "break", EXPR_END], - [:TkNEXT, TkKW, "next", EXPR_END], - [:TkREDO, TkKW, "redo", EXPR_END], - [:TkRETRY, TkKW, "retry", EXPR_END], - [:TkIN, TkKW, "in", EXPR_BEG], - [:TkDO, TkKW, "do", EXPR_BEG], - [:TkRETURN, TkKW, "return", EXPR_MID], - [:TkYIELD, TkKW, "yield", EXPR_END], - [:TkSUPER, TkKW, "super", EXPR_END], - [:TkSELF, TkKW, "self", EXPR_END], - [:TkNIL, TkKW, "nil", EXPR_END], - [:TkTRUE, TkKW, "true", EXPR_END], - [:TkFALSE, TkKW, "false", EXPR_END], - [:TkAND, TkKW, "and", EXPR_BEG], - [:TkOR, TkKW, "or", EXPR_BEG], - [:TkNOT, TkKW, "not", EXPR_BEG], - [:TkIF_MOD, TkKW], - [:TkUNLESS_MOD, TkKW], - [:TkWHILE_MOD, TkKW], - [:TkUNTIL_MOD, TkKW], - [:TkALIAS, TkKW, "alias", EXPR_FNAME], - [:TkDEFINED, TkKW, "defined?", EXPR_END], - [:TklBEGIN, TkKW, "BEGIN", EXPR_END], - [:TklEND, TkKW, "END", EXPR_END], - [:Tk__LINE__, TkKW, "__LINE__", EXPR_END], - [:Tk__FILE__, TkKW, "__FILE__", EXPR_END], - - [:TkIDENTIFIER, TkId], - [:TkFID, TkId], - [:TkGVAR, TkId], - [:TkIVAR, TkId], - [:TkCONSTANT, TkId], - - [:TkINTEGER, TkVal], - [:TkFLOAT, TkVal], - [:TkSTRING, TkVal], - [:TkXSTRING, TkVal], - [:TkREGEXP, TkVal], - [:TkCOMMENT, TkVal], - - [:TkDSTRING, TkNode], - [:TkDXSTRING, TkNode], - [:TkDREGEXP, TkNode], - [:TkNTH_REF, TkId], - [:TkBACK_REF, TkId], - - [:TkUPLUS, TkOp, "+@"], - [:TkUMINUS, TkOp, "-@"], - [:TkPOW, TkOp, "**"], - [:TkCMP, TkOp, "<=>"], - [:TkEQ, TkOp, "=="], - [:TkEQQ, TkOp, "==="], - [:TkNEQ, TkOp, "!="], - [:TkGEQ, TkOp, ">="], - [:TkLEQ, TkOp, "<="], - [:TkANDOP, TkOp, "&&"], - [:TkOROP, TkOp, "||"], - [:TkMATCH, TkOp, "=~"], - [:TkNMATCH, TkOp, "!~"], - [:TkDOT2, TkOp, ".."], - [:TkDOT3, TkOp, "..."], - [:TkAREF, TkOp, "[]"], - [:TkASET, TkOp, "[]="], - [:TkLSHFT, TkOp, "<<"], - [:TkRSHFT, TkOp, ">>"], - [:TkCOLON2, TkOp], - [:TkCOLON3, TkOp], -# [:OPASGN, TkOp], # +=, -= etc. # - [:TkASSOC, TkOp, "=>"], - [:TkQUESTION, TkOp, "?"], #? - [:TkCOLON, TkOp, ":"], #: - - [:TkfLPAREN], # func( # - [:TkfLBRACK], # func[ # - [:TkfLBRACE], # func{ # - [:TkSTAR], # *arg - [:TkAMPER], # &arg # - [:TkSYMBOL, TkId], # :SYMBOL - [:TkSYMBEG, TkId], - [:TkGT, TkOp, ">"], - [:TkLT, TkOp, "<"], - [:TkPLUS, TkOp, "+"], - [:TkMINUS, TkOp, "-"], - [:TkMULT, TkOp, "*"], - [:TkDIV, TkOp, "/"], - [:TkMOD, TkOp, "%"], - [:TkBITOR, TkOp, "|"], - [:TkBITXOR, TkOp, "^"], - [:TkBITAND, TkOp, "&"], - [:TkBITNOT, TkOp, "~"], - [:TkNOTOP, TkOp, "!"], - - [:TkBACKQUOTE, TkOp, "`"], - - [:TkASSIGN, Token, "="], - [:TkDOT, Token, "."], - [:TkLPAREN, Token, "("], #(exp) - [:TkLBRACK, Token, "["], #[arry] - [:TkLBRACE, Token, "{"], #{hash} - [:TkRPAREN, Token, ")"], - [:TkRBRACK, Token, "]"], - [:TkRBRACE, Token, "}"], - [:TkCOMMA, Token, ","], - [:TkSEMICOLON, Token, ";"], - - [:TkRD_COMMENT], - [:TkSPACE], - [:TkNL], - [:TkEND_OF_SCRIPT], - - [:TkBACKSLASH, TkUnknownChar, "\\"], - [:TkAT, TkUnknownChar, "@"], - [:TkDOLLAR, TkUnknownChar, "\$"], #" - ] - - # {reading => token_class} - # {reading => [token_class, *opt]} - TkReading2Token = {} - TkSymbol2Token = {} - - def self.def_token(token_n, super_token = Token, reading = nil, *opts) - token_n = token_n.id2name unless String === token_n - - fail AlreadyDefinedToken, token_n if const_defined?(token_n) - - token_c = Class.new super_token - const_set token_n, token_c -# token_c.inspect - - if reading - if TkReading2Token[reading] - fail TkReading2TokenDuplicateError, token_n, reading - end - if opts.empty? - TkReading2Token[reading] = [token_c] - else - TkReading2Token[reading] = [token_c].concat(opts) - end - end - TkSymbol2Token[token_n.intern] = token_c - - if token_c <= TkOp - token_c.class_eval %{ - def self.op_name; "#{reading}"; end - } - end - end - - for defs in TokenDefinitions - def_token(*defs) - end - - NEWLINE_TOKEN = TkNL.new(0,0) - NEWLINE_TOKEN.set_text("\n") - -end - -## -# Lexical analyzer for Ruby source - -class RDoc::RubyLex - - ## - # Read an input stream character by character. We allow for unlimited - # ungetting of characters just read. - # - # We simplify the implementation greatly by reading the entire input - # into a buffer initially, and then simply traversing it using - # pointers. - # - # We also have to allow for the <i>here document diversion</i>. This - # little gem comes about when the lexer encounters a here - # document. At this point we effectively need to split the input - # stream into two parts: one to read the body of the here document, - # the other to read the rest of the input line where the here - # document was initially encountered. For example, we might have - # - # do_something(<<-A, <<-B) - # stuff - # for - # A - # stuff - # for - # B - # - # When the lexer encounters the <<A, it reads until the end of the - # line, and keeps it around for later. It then reads the body of the - # here document. Once complete, it needs to read the rest of the - # original line, but then skip the here document body. - # - - class BufferedReader - - attr_reader :line_num - - def initialize(content, options) - @options = options - - if /\t/ =~ content - tab_width = @options.tab_width - content = content.split(/\n/).map do |line| - 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #` - line - end .join("\n") - end - @content = content - @content << "\n" unless @content[-1,1] == "\n" - @size = @content.size - @offset = 0 - @hwm = 0 - @line_num = 1 - @read_back_offset = 0 - @last_newline = 0 - @newline_pending = false - end - - def column - @offset - @last_newline - end - - def getc - return nil if @offset >= @size - ch = @content[@offset, 1] - - @offset += 1 - @hwm = @offset if @hwm < @offset - - if @newline_pending - @line_num += 1 - @last_newline = @offset - 1 - @newline_pending = false - end - - if ch == "\n" - @newline_pending = true - end - ch - end - - def getc_already_read - getc - end - - def ungetc(ch) - raise "unget past beginning of file" if @offset <= 0 - @offset -= 1 - if @content[@offset] == ?\n - @newline_pending = false - end - end - - def get_read - res = @content[@read_back_offset...@offset] - @read_back_offset = @offset - res - end - - def peek(at) - pos = @offset + at - if pos >= @size - nil - else - @content[pos, 1] - end - end - - def peek_equal(str) - @content[@offset, str.length] == str - end - - def divert_read_from(reserve) - @content[@offset, 0] = reserve - @size = @content.size - end - end - - # end of nested class BufferedReader - - extend Exception2MessageMapper - def_exception(:AlreadyDefinedToken, "Already defined token(%s)") - def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')") - def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')") - def_exception(:TkReading2TokenDuplicateError, - "key duplicate(token_n='%s', key='%s')") - def_exception(:SyntaxError, "%s") - - include RDoc::RubyToken - include IRB - - attr_reader :continue - attr_reader :lex_state - - def self.debug? - false - end - - def initialize(content, options) - lex_init - - @options = options - - @reader = BufferedReader.new content, @options - - @exp_line_no = @line_no = 1 - @base_char_no = 0 - @indent = 0 - - @ltype = nil - @quoted = nil - @lex_state = EXPR_BEG - @space_seen = false - - @continue = false - @line = "" - - @skip_space = false - @read_auto_clean_up = false - @exception_on_syntax_error = true - end - - attr_accessor :skip_space - attr_accessor :read_auto_clean_up - attr_accessor :exception_on_syntax_error - attr_reader :indent - - # io functions - def line_no - @reader.line_num - end - - def char_no - @reader.column - end - - def get_read - @reader.get_read - end - - def getc - @reader.getc - end - - def getc_of_rests - @reader.getc_already_read - end - - def gets - c = getc or return - l = "" - begin - l.concat c unless c == "\r" - break if c == "\n" - end while c = getc - l - end - - - def ungetc(c = nil) - @reader.ungetc(c) - end - - def peek_equal?(str) - @reader.peek_equal(str) - end - - def peek(i = 0) - @reader.peek(i) - end - - def lex - until (TkNL === (tk = token) or TkEND_OF_SCRIPT === tk) and - not @continue or tk.nil? - end - - line = get_read - - if line == "" and TkEND_OF_SCRIPT === tk or tk.nil? then - nil - else - line - end - end - - def token - set_token_position(line_no, char_no) - begin - begin - tk = @OP.match(self) - @space_seen = TkSPACE === tk - rescue SyntaxError => e - raise RDoc::Error, "syntax error: #{e.message}" if - @exception_on_syntax_error - - tk = TkError.new(line_no, char_no) - end - end while @skip_space and TkSPACE === tk - if @read_auto_clean_up - get_read - end -# throw :eof unless tk - tk - end - - ENINDENT_CLAUSE = [ - "case", "class", "def", "do", "for", "if", - "module", "unless", "until", "while", "begin" #, "when" - ] - DEINDENT_CLAUSE = ["end" #, "when" - ] - - PERCENT_LTYPE = { - "q" => "\'", - "Q" => "\"", - "x" => "\`", - "r" => "/", - "w" => "]" - } - - PERCENT_PAREN = { - "{" => "}", - "[" => "]", - "<" => ">", - "(" => ")" - } - - Ltype2Token = { - "\'" => TkSTRING, - "\"" => TkSTRING, - "\`" => TkXSTRING, - "/" => TkREGEXP, - "]" => TkDSTRING - } - Ltype2Token.default = TkSTRING - - DLtype2Token = { - "\"" => TkDSTRING, - "\`" => TkDXSTRING, - "/" => TkDREGEXP, - } - - def lex_init() - @OP = IRB::SLex.new - @OP.def_rules("\0", "\004", "\032") do |chars, io| - Token(TkEND_OF_SCRIPT).set_text(chars) - end - - @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io| - @space_seen = TRUE - while (ch = getc) =~ /[ \t\f\r\13]/ - chars << ch - end - ungetc - Token(TkSPACE).set_text(chars) - end - - @OP.def_rule("#") do - |op, io| - identify_comment - end - - @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do - |op, io| - str = op - @ltype = "=" - - - begin - line = "" - begin - ch = getc - line << ch - end until ch == "\n" - str << line - end until line =~ /^=end/ - - ungetc - - @ltype = nil - - if str =~ /\A=begin\s+rdoc/i - str.sub!(/\A=begin.*\n/, '') - str.sub!(/^=end.*/m, '') - Token(TkCOMMENT).set_text(str) - else - Token(TkRD_COMMENT)#.set_text(str) - end - end - - @OP.def_rule("\n") do - print "\\n\n" if RDoc::RubyLex.debug? - case @lex_state - when EXPR_BEG, EXPR_FNAME, EXPR_DOT - @continue = TRUE - else - @continue = FALSE - @lex_state = EXPR_BEG - end - Token(TkNL).set_text("\n") - end - - @OP.def_rules("*", "**", - "!", "!=", "!~", - "=", "==", "===", - "=~", "<=>", - "<", "<=", - ">", ">=", ">>") do - |op, io| - @lex_state = EXPR_BEG - Token(op).set_text(op) - end - - @OP.def_rules("<<") do - |op, io| - tk = nil - if @lex_state != EXPR_END && @lex_state != EXPR_CLASS && - (@lex_state != EXPR_ARG || @space_seen) - c = peek(0) - if /[-\w_\"\'\`]/ =~ c - tk = identify_here_document - end - end - if !tk - @lex_state = EXPR_BEG - tk = Token(op).set_text(op) - end - tk - end - - @OP.def_rules("'", '"') do - |op, io| - identify_string(op) - end - - @OP.def_rules("`") do - |op, io| - if @lex_state == EXPR_FNAME - Token(op).set_text(op) - else - identify_string(op) - end - end - - @OP.def_rules('?') do - |op, io| - if @lex_state == EXPR_END - @lex_state = EXPR_BEG - Token(TkQUESTION).set_text(op) - else - ch = getc - if @lex_state == EXPR_ARG && ch !~ /\s/ - ungetc - @lex_state = EXPR_BEG - Token(TkQUESTION).set_text(op) - else - str = op - str << ch - if (ch == '\\') #' - str << read_escape - end - @lex_state = EXPR_END - Token(TkINTEGER).set_text(str) - end - end - end - - @OP.def_rules("&", "&&", "|", "||") do - |op, io| - @lex_state = EXPR_BEG - Token(op).set_text(op) - end - - @OP.def_rules("+=", "-=", "*=", "**=", - "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do - |op, io| - @lex_state = EXPR_BEG - op =~ /^(.*)=$/ - Token(TkOPASGN, $1).set_text(op) - end - - @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io| - Token(TkUPLUS).set_text(op) - end - - @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io| - Token(TkUMINUS).set_text(op) - end - - @OP.def_rules("+", "-") do - |op, io| - catch(:RET) do - if @lex_state == EXPR_ARG - if @space_seen and peek(0) =~ /[0-9]/ - throw :RET, identify_number(op) - else - @lex_state = EXPR_BEG - end - elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/ - throw :RET, identify_number(op) - else - @lex_state = EXPR_BEG - end - Token(op).set_text(op) - end - end - - @OP.def_rule(".") do - @lex_state = EXPR_BEG - if peek(0) =~ /[0-9]/ - ungetc - identify_number("") - else - # for obj.if - @lex_state = EXPR_DOT - Token(TkDOT).set_text(".") - end - end - - @OP.def_rules("..", "...") do - |op, io| - @lex_state = EXPR_BEG - Token(op).set_text(op) - end - - lex_int2 - end - - def lex_int2 - @OP.def_rules("]", "}", ")") do - |op, io| - @lex_state = EXPR_END - @indent -= 1 - Token(op).set_text(op) - end - - @OP.def_rule(":") do - if @lex_state == EXPR_END || peek(0) =~ /\s/ - @lex_state = EXPR_BEG - tk = Token(TkCOLON) - else - @lex_state = EXPR_FNAME - tk = Token(TkSYMBEG) - end - tk.set_text(":") - end - - @OP.def_rule("::") do - if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen - @lex_state = EXPR_BEG - tk = Token(TkCOLON3) - else - @lex_state = EXPR_DOT - tk = Token(TkCOLON2) - end - tk.set_text("::") - end - - @OP.def_rule("/") do - |op, io| - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID - identify_string(op) - elsif peek(0) == '=' - getc - @lex_state = EXPR_BEG - Token(TkOPASGN, :/).set_text("/=") #") - elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ - identify_string(op) - else - @lex_state = EXPR_BEG - Token("/").set_text(op) - end - end - - @OP.def_rules("^") do - @lex_state = EXPR_BEG - Token("^").set_text("^") - end - - @OP.def_rules(",", ";") do - |op, io| - @lex_state = EXPR_BEG - Token(op).set_text(op) - end - - @OP.def_rule("~") do - @lex_state = EXPR_BEG - Token("~").set_text("~") - end - - @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do - @lex_state = EXPR_BEG - Token("~").set_text("~@") - end - - @OP.def_rule("(") do - @indent += 1 - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID - @lex_state = EXPR_BEG - tk = Token(TkfLPAREN) - else - @lex_state = EXPR_BEG - tk = Token(TkLPAREN) - end - tk.set_text("(") - end - - @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do - Token("[]").set_text("[]") - end - - @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do - Token("[]=").set_text("[]=") - end - - @OP.def_rule("[") do - @indent += 1 - if @lex_state == EXPR_FNAME - t = Token(TkfLBRACK) - else - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID - t = Token(TkLBRACK) - elsif @lex_state == EXPR_ARG && @space_seen - t = Token(TkLBRACK) - else - t = Token(TkfLBRACK) - end - @lex_state = EXPR_BEG - end - t.set_text("[") - end - - @OP.def_rule("{") do - @indent += 1 - if @lex_state != EXPR_END && @lex_state != EXPR_ARG - t = Token(TkLBRACE) - else - t = Token(TkfLBRACE) - end - @lex_state = EXPR_BEG - t.set_text("{") - end - - @OP.def_rule('\\') do #' - if getc == "\n" - @space_seen = true - @continue = true - Token(TkSPACE).set_text("\\\n") - else - ungetc - Token("\\").set_text("\\") #" - end - end - - @OP.def_rule('%') do - |op, io| - if @lex_state == EXPR_BEG || @lex_state == EXPR_MID - identify_quotation('%') - elsif peek(0) == '=' - getc - Token(TkOPASGN, "%").set_text("%=") - elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ - identify_quotation('%') - else - @lex_state = EXPR_BEG - Token("%").set_text("%") - end - end - - @OP.def_rule('$') do #' - identify_gvar - end - - @OP.def_rule('@') do - if peek(0) =~ /[@\w_]/ - ungetc - identify_identifier - else - Token("@").set_text("@") - end - end - - @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do - throw :eof - end - - @OP.def_rule("") do - |op, io| - printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug? - if peek(0) =~ /[0-9]/ - t = identify_number("") - elsif peek(0) =~ /[\w_]/ - t = identify_identifier - end - printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug? - t - end - end - - def identify_gvar - @lex_state = EXPR_END - str = "$" - - tk = case ch = getc - when /[~_*$?!@\/\\;,=:<>".]/ #" - str << ch - Token(TkGVAR, str) - - when "-" - str << "-" << getc - Token(TkGVAR, str) - - when "&", "`", "'", "+" - str << ch - Token(TkBACK_REF, str) - - when /[1-9]/ - str << ch - while (ch = getc) =~ /[0-9]/ - str << ch - end - ungetc - Token(TkNTH_REF) - when /\w/ - ungetc - ungetc - return identify_identifier - else - ungetc - Token("$") - end - tk.set_text(str) - end - - def identify_identifier - token = "" - token.concat getc if peek(0) =~ /[$@]/ - token.concat getc if peek(0) == "@" - - while (ch = getc) =~ /\w|_/ - print ":", ch, ":" if RDoc::RubyLex.debug? - token.concat ch - end - ungetc - - if ch == "!" or ch == "?" - token.concat getc - end - # fix token - - # $stderr.puts "identifier - #{token}, state = #@lex_state" - - case token - when /^\$/ - return Token(TkGVAR, token).set_text(token) - when /^\@/ - @lex_state = EXPR_END - return Token(TkIVAR, token).set_text(token) - end - - if @lex_state != EXPR_DOT - print token, "\n" if RDoc::RubyLex.debug? - - token_c, *trans = TkReading2Token[token] - if token_c - # reserved word? - - if (@lex_state != EXPR_BEG && - @lex_state != EXPR_FNAME && - trans[1]) - # modifiers - token_c = TkSymbol2Token[trans[1]] - @lex_state = trans[0] - else - if @lex_state != EXPR_FNAME - if ENINDENT_CLAUSE.include?(token) - @indent += 1 - elsif DEINDENT_CLAUSE.include?(token) - @indent -= 1 - end - @lex_state = trans[0] - else - @lex_state = EXPR_END - end - end - return Token(token_c, token).set_text(token) - end - end - - if @lex_state == EXPR_FNAME - @lex_state = EXPR_END - if peek(0) == '=' - token.concat getc - end - elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT - @lex_state = EXPR_ARG - else - @lex_state = EXPR_END - end - - if token[0, 1] =~ /[A-Z]/ - return Token(TkCONSTANT, token).set_text(token) - elsif token[token.size - 1, 1] =~ /[!?]/ - return Token(TkFID, token).set_text(token) - else - return Token(TkIDENTIFIER, token).set_text(token) - end - end - - def identify_here_document - ch = getc - if ch == "-" - ch = getc - indent = true - end - if /['"`]/ =~ ch # ' - lt = ch - quoted = "" - while (c = getc) && c != lt - quoted.concat c - end - else - lt = '"' - quoted = ch.dup - while (c = getc) && c =~ /\w/ - quoted.concat c - end - ungetc - end - - ltback, @ltype = @ltype, lt - reserve = "" - - while ch = getc - reserve << ch - if ch == "\\" #" - ch = getc - reserve << ch - elsif ch == "\n" - break - end - end - - str = "" - while (l = gets) - l.chomp! - l.strip! if indent - break if l == quoted - str << l.chomp << "\n" - end - - @reader.divert_read_from(reserve) - - @ltype = ltback - @lex_state = EXPR_END - Token(Ltype2Token[lt], str).set_text(str.dump) - end - - def identify_quotation(initial_char) - ch = getc - if lt = PERCENT_LTYPE[ch] - initial_char += ch - ch = getc - elsif ch =~ /\W/ - lt = "\"" - else - fail SyntaxError, "unknown type of %string ('#{ch}')" - end -# if ch !~ /\W/ -# ungetc -# next -# end - #@ltype = lt - @quoted = ch unless @quoted = PERCENT_PAREN[ch] - identify_string(lt, @quoted, ch, initial_char) - end - - def identify_number(start) - str = start.dup - - if start == "+" or start == "-" or start == "" - start = getc - str << start - end - - @lex_state = EXPR_END - - if start == "0" - if peek(0) == "x" - ch = getc - str << ch - match = /[0-9a-f_]/ - else - match = /[0-7_]/ - end - while ch = getc - if ch !~ match - ungetc - break - else - str << ch - end - end - return Token(TkINTEGER).set_text(str) - end - - type = TkINTEGER - allow_point = TRUE - allow_e = TRUE - while ch = getc - case ch - when /[0-9_]/ - str << ch - - when allow_point && "." - type = TkFLOAT - if peek(0) !~ /[0-9]/ - ungetc - break - end - str << ch - allow_point = false - - when allow_e && "e", allow_e && "E" - str << ch - type = TkFLOAT - if peek(0) =~ /[+-]/ - str << getc - end - allow_e = false - allow_point = false - else - ungetc - break - end - end - Token(type).set_text(str) - end - - def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil) - @ltype = ltype - @quoted = quoted - subtype = nil - - str = "" - str << initial_char if initial_char - str << (opener||quoted) - - nest = 0 - begin - while ch = getc - str << ch - if @quoted == ch - if nest == 0 - break - else - nest -= 1 - end - elsif opener == ch - nest += 1 - elsif @ltype != "'" && @ltype != "]" and ch == "#" - ch = getc - if ch == "{" - subtype = true - str << ch << skip_inner_expression - else - ungetc(ch) - end - elsif ch == '\\' #' - str << read_escape - end - end - if @ltype == "/" - if peek(0) =~ /i|o|n|e|s/ - str << getc - end - end - if subtype - Token(DLtype2Token[ltype], str) - else - Token(Ltype2Token[ltype], str) - end.set_text(str) - ensure - @ltype = nil - @quoted = nil - @lex_state = EXPR_END - end - end - - def skip_inner_expression - res = "" - nest = 0 - while (ch = getc) - res << ch - if ch == '}' - break if nest.zero? - nest -= 1 - elsif ch == '{' - nest += 1 - end - end - res - end - - def identify_comment - @ltype = "#" - comment = "#" - while ch = getc - if ch == "\\" - ch = getc - if ch == "\n" - ch = " " - else - comment << "\\" - end - else - if ch == "\n" - @ltype = nil - ungetc - break - end - end - comment << ch - end - return Token(TkCOMMENT).set_text(comment) - end - - def read_escape - res = "" - case ch = getc - when /[0-7]/ - ungetc ch - 3.times do - case ch = getc - when /[0-7]/ - when nil - break - else - ungetc - break - end - res << ch - end - - when "x" - res << ch - 2.times do - case ch = getc - when /[0-9a-fA-F]/ - when nil - break - else - ungetc - break - end - res << ch - end - - when "M" - res << ch - if (ch = getc) != '-' - ungetc - else - res << ch - if (ch = getc) == "\\" #" - res << ch - res << read_escape - else - res << ch - end - end - - when "C", "c" #, "^" - res << ch - if ch == "C" and (ch = getc) != "-" - ungetc - else - res << ch - if (ch = getc) == "\\" #" - res << ch - res << read_escape - else - res << ch - end - end - else - res << ch - end - res - end -end ## # Extracts code elements from a source file returning a TopLevel object @@ -1373,7 +50,7 @@ end # # ## # # This method tries over and over until it is tired -# +# # def go_go_go(thing_to_try, tries = 10) # :args: thing_to_try # puts thing_to_try # go_go_go thing_to_try, tries - 1 @@ -1393,7 +70,7 @@ end # # :call-seq: # # my_method(Range) # # my_method(offset, length) -# +# # def my_method(*args) # end # @@ -1404,7 +81,7 @@ end # # ## # # My method is awesome -# +# # def my_method(&block) # :yields: happy, times # block.call 1, 2 # end @@ -1416,7 +93,7 @@ end # # ## # # This is a meta-programmed method! -# +# # add_my_method :meta_method, :arg1, :arg2 # # The parser looks at the token after the identifier to determine the name, in @@ -1439,18 +116,29 @@ end # ## # # :singleton-method: woo_hoo! # -# == Hidden methods +# Additionally you can mark a method as an attribute by +# using :attr:, :attr_reader:, :attr_writer: or :attr_accessor:. Just like +# for :method:, the name is optional. +# +# ## +# # :attr_reader: my_attr_name +# +# == Hidden methods and attributes # # You can provide documentation for methods that don't appear using -# the :method: and :singleton-method: directives: +# the :method:, :singleton-method: and :attr: directives: # # ## +# # :attr_writer: ghost_writer +# # There is an attribute here, but you can't see it! +# +# ## # # :method: ghost_method # # There is a method here, but you can't see it! -# +# # ## # # this is a comment for a regular method -# +# # def regular_method() end # # Note that by default, the :method: directive will be ignored if there is a @@ -1458,12 +146,20 @@ end class RDoc::Parser::Ruby < RDoc::Parser - parse_files_matching(/\.(?:rbw?|rdoc)\z/) + parse_files_matching(/\.rbw?$/) include RDoc::RubyToken include RDoc::TokenStream + include RDoc::Parser::RubyTools + + ## + # RDoc::NormalClass type NORMAL = "::" + + ## + # RDoc::SingleClass type + SINGLE = "<<" def initialize(top_level, file_name, content, options, stats) @@ -1473,21 +169,17 @@ class RDoc::Parser::Ruby < RDoc::Parser @token_listeners = nil @scanner = RDoc::RubyLex.new content, @options @scanner.exception_on_syntax_error = false + @prev_seek = nil reset end - def add_token_listener(obj) - @token_listeners ||= [] - @token_listeners << obj - end - ## # Look for the first comment in a file that isn't a shebang line. def collect_first_comment skip_tkspace - res = '' + comment = '' first_line = true tk = get_tk @@ -1502,7 +194,7 @@ class RDoc::Parser::Ruby < RDoc::Parser tk = get_tk else first_line = false - res << tk.text << "\n" + comment << tk.text << "\n" tk = get_tk if TkNL === tk then @@ -1514,7 +206,7 @@ class RDoc::Parser::Ruby < RDoc::Parser unget_tk tk - res + comment end def error(msg) @@ -1560,24 +252,24 @@ class RDoc::Parser::Ruby < RDoc::Parser name_t = get_tk # class ::A -> A is in the top level - if TkCOLON2 === name_t then + case name_t + when TkCOLON2, TkCOLON3 then # bug name_t = get_tk container = @top_level end - skip_tkspace(false) + skip_tkspace false while TkCOLON2 === peek_tk do prev_container = container - container = container.find_module_named(name_t.name) - if !container -# warn("Couldn't find module #{name_t.name}") + container = container.find_module_named name_t.name + unless container then container = prev_container.add_module RDoc::NormalModule, name_t.name end get_tk name_t = get_tk end - skip_tkspace(false) + skip_tkspace false return [container, name_t] end @@ -1590,12 +282,12 @@ class RDoc::Parser::Ruby < RDoc::Parser res = "" while TkCOLON2 === tk or TkCOLON3 === tk or TkCONSTANT === tk do - res += tk.text + res += tk.name tk = get_tk end unget_tk(tk) - skip_tkspace(false) + skip_tkspace false get_tkread # empty out read buffer @@ -1617,11 +309,11 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_constant res = "" - skip_tkspace(false) + skip_tkspace false tk = get_tk while TkCOLON2 === tk or TkCOLON3 === tk or TkCONSTANT === tk do - res += tk.text + res += tk.name tk = get_tk end @@ -1636,88 +328,51 @@ class RDoc::Parser::Ruby < RDoc::Parser # Get a constant that may be surrounded by parens def get_constant_with_optional_parens - skip_tkspace(false) + skip_tkspace false nest = 0 while TkLPAREN === (tk = peek_tk) or TkfLPAREN === tk do get_tk - skip_tkspace(true) + skip_tkspace nest += 1 end name = get_constant while nest > 0 - skip_tkspace(true) + skip_tkspace tk = get_tk nest -= 1 if TkRPAREN === tk end + name end def get_symbol_or_name tk = get_tk case tk - when TkSYMBOL - tk.text.sub(/^:/, '') - when TkId, TkOp + when TkSYMBOL then + text = tk.text.sub(/^:/, '') + + if TkASSIGN === peek_tk then + get_tk + text << '=' + end + + text + when TkId, TkOp then tk.name - when TkSTRING + when TkSTRING, TkDSTRING then tk.text else - raise "Name or symbol expected (got #{tk})" + raise RDoc::Error, "Name or symbol expected (got #{tk})" end end - def get_tk - tk = nil - if @tokens.empty? - tk = @scanner.token - @read.push @scanner.get_read - puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG - else - @read.push @unget_read.shift - tk = @tokens.shift - puts "get_tk2 => #{tk.inspect}" if $TOKEN_DEBUG - end - - if TkSYMBEG === tk then - set_token_position(tk.line_no, tk.char_no) - tk1 = get_tk - if TkId === tk1 or TkOp === tk1 or TkSTRING === tk1 then - if tk1.respond_to?(:name) - tk = Token(TkSYMBOL).set_text(":" + tk1.name) - else - tk = Token(TkSYMBOL).set_text(":" + tk1.text) - end - # remove the identifier we just read (we're about to - # replace it with a symbol) - @token_listeners.each do |obj| - obj.pop_token - end if @token_listeners - else - warn("':' not followed by identifier or operator") - tk = tk1 - end - end - - # inform any listeners of our shiny new token - @token_listeners.each do |obj| - obj.add_token(tk) - end if @token_listeners - - tk - end - - def get_tkread - read = @read.join("") - @read = [] - read - end - ## # Look for directives in a normal comment block: # - # #-- - don't display comment from this point forward + # # :stopdoc: + # # Don't display comment from this point forward # # This routine modifies it's parameter @@ -1732,8 +387,9 @@ class RDoc::Parser::Ruby < RDoc::Parser when 'main' then @options.main_page = param '' - when 'method', 'singleton-method' then - false # ignore + when 'method', 'singleton-method', + 'attr', 'attr_accessor', 'attr_reader', 'attr_writer' then + false # handled elsewhere when 'section' then context.set_current_section(param, comment) comment.replace '' @@ -1754,23 +410,30 @@ class RDoc::Parser::Ruby < RDoc::Parser end end - remove_private_comments(comment) + remove_private_comments comment end - def make_message(msg) - prefix = "\n" + @file_name + ":" - if @scanner - prefix << "#{@scanner.line_no}:#{@scanner.char_no}: " - end - return prefix + msg + ## + # Adds useful info about the parser to +message+ + + def make_message message + prefix = "\n#{@file_name}:" + + prefix << "#{@scanner.line_no}:#{@scanner.char_no}:" if @scanner + + "#{prefix} #{message}" end + ## + # Creates an RDoc::Attr for the name following +tk+, setting the comment to + # +comment+. + def parse_attr(context, single, tk, comment) - args = parse_symbol_arg(1) + args = parse_symbol_arg 1 if args.size > 0 name = args[0] rw = "R" - skip_tkspace(false) + skip_tkspace false tk = get_tk if TkCOMMA === tk then rw = "RW" if get_bool @@ -1787,12 +450,16 @@ class RDoc::Parser::Ruby < RDoc::Parser end end + ## + # Creates an RDoc::Attr for each attribute listed after +tk+, setting the + # comment for each to +comment+. + def parse_attr_accessor(context, single, tk, comment) args = parse_symbol_arg read = get_tkread rw = "?" - # If nodoc is given, don't document any of them + # TODO If nodoc is given, don't document any of them tmp = RDoc::CodeObject.new read_documentation_modifiers tmp, RDoc::ATTR_MODIFIERS @@ -1803,8 +470,7 @@ class RDoc::Parser::Ruby < RDoc::Parser when "attr_writer" then rw = "W" when "attr_accessor" then rw = "RW" else - rw = @options.extra_accessor_flags[tk.name] - rw = '?' if rw.nil? + rw = '?' end for name in args @@ -1820,19 +486,24 @@ class RDoc::Parser::Ruby < RDoc::Parser skip_tkspace end new_name = get_symbol_or_name - @scanner.instance_eval{@lex_state = EXPR_FNAME} + + @scanner.instance_eval { @lex_state = EXPR_FNAME } + skip_tkspace if TkCOMMA === peek_tk then get_tk skip_tkspace end - old_name = get_symbol_or_name + + begin + old_name = get_symbol_or_name + rescue RDoc::Error + return + end al = RDoc::Alias.new get_tkread, old_name, new_name, comment read_documentation_modifiers al, RDoc::ATTR_MODIFIERS - if al.document_self - context.add_alias(al) - end + context.add_alias al if al.document_self end def parse_call_parameters(tk) @@ -1847,23 +518,25 @@ class RDoc::Parser::Ruby < RDoc::Parser nest = 0 loop do - case tk - when TkSEMICOLON - break - when TkLPAREN, TkfLPAREN - nest += 1 - when end_token - if end_token == TkRPAREN - nest -= 1 - break if @scanner.lex_state == EXPR_END and nest <= 0 - else - break unless @scanner.continue - end - when TkCOMMENT - unget_tk(tk) - break + case tk + when TkSEMICOLON + break + when TkLPAREN, TkfLPAREN + nest += 1 + when end_token + if end_token == TkRPAREN + nest -= 1 + break if @scanner.lex_state == EXPR_END and nest <= 0 + else + break unless @scanner.continue end - tk = get_tk + when TkCOMMENT + unget_tk(tk) + break + when nil then + break + end + tk = get_tk end res = get_tkread.tr("\n", " ").strip res = "" if res == ";" @@ -1871,7 +544,7 @@ class RDoc::Parser::Ruby < RDoc::Parser end def parse_class(container, single, tk, comment) - container, name_t = get_class_or_module(container) + container, name_t = get_class_or_module container case name_t when TkCONSTANT @@ -1880,7 +553,7 @@ class RDoc::Parser::Ruby < RDoc::Parser if TkLT === peek_tk then get_tk - skip_tkspace(true) + skip_tkspace superclass = get_class_specification superclass = "<unknown>" if superclass.empty? end @@ -1888,25 +561,24 @@ class RDoc::Parser::Ruby < RDoc::Parser cls_type = single == SINGLE ? RDoc::SingleClass : RDoc::NormalClass cls = container.add_class cls_type, name, superclass - @stats.add_class cls - read_documentation_modifiers cls, RDoc::CLASS_MODIFIERS cls.record_location @top_level - - parse_statements cls cls.comment = comment + @stats.add_class cls + + parse_statements cls when TkLSHFT case name = get_class_specification when "self", container.name - parse_statements(container, SINGLE) + parse_statements container, SINGLE else - other = RDoc::TopLevel.find_class_named(name) - unless other - # other = @top_level.add_class(NormalClass, name, nil) - # other.record_location(@top_level) - # other.comment = comment - other = RDoc::NormalClass.new "Dummy", nil + other = RDoc::TopLevel.find_class_named name + + unless other then + other = container.add_module RDoc::NormalModule, name + other.record_location @top_level + other.comment = comment end @stats.add_class other @@ -1920,47 +592,69 @@ class RDoc::Parser::Ruby < RDoc::Parser end end - def parse_constant(container, single, tk, comment) + def parse_constant(container, tk, comment) name = tk.name - skip_tkspace(false) + skip_tkspace false eq_tk = get_tk unless TkASSIGN === eq_tk then - unget_tk(eq_tk) + unget_tk eq_tk return end - nest = 0 get_tkread tk = get_tk + if TkGT === tk then - unget_tk(tk) - unget_tk(eq_tk) + unget_tk tk + unget_tk eq_tk return end + rhs_name = '' + loop do - case tk - when TkSEMICOLON + case tk + when TkSEMICOLON then + break + when TkLPAREN, TkfLPAREN, TkLBRACE, TkLBRACK, TkDO, TkIF, TkUNLESS, + TkCASE then + nest += 1 + when TkRPAREN, TkRBRACE, TkRBRACK, TkEND then + nest -= 1 + when TkCOMMENT then + if nest <= 0 && @scanner.lex_state == EXPR_END + unget_tk tk break - when TkLPAREN, TkfLPAREN, TkLBRACE, TkLBRACK, TkDO - nest += 1 - when TkRPAREN, TkRBRACE, TkRBRACK, TkEND - nest -= 1 - when TkCOMMENT - if nest <= 0 && @scanner.lex_state == EXPR_END - unget_tk(tk) - break - end - when TkNL - if (nest <= 0) && ((@scanner.lex_state == EXPR_END) || ([email protected])) - unget_tk(tk) - break - end end - tk = get_tk + when TkCONSTANT then + rhs_name << tk.name + + if nest <= 0 and TkNL === peek_tk then + mod = if rhs_name =~ /^::/ then + RDoc::TopLevel.find_class_or_module rhs_name + else + container.find_module_named rhs_name + end + + container.add_module_alias mod, name if mod + get_tk # TkNL + break + end + when TkNL then + if nest <= 0 && + (@scanner.lex_state == EXPR_END || [email protected]) then + unget_tk tk + break + end + when TkCOLON2, TkCOLON3 then + rhs_name << '::' + when nil then + break + end + tk = get_tk end res = get_tkread.tr("\n", " ").strip @@ -1969,42 +663,60 @@ class RDoc::Parser::Ruby < RDoc::Parser con = RDoc::Constant.new name, res, comment read_documentation_modifiers con, RDoc::CONSTANT_MODIFIERS - if con.document_self - container.add_constant(con) - end + @stats.add_constant con + container.add_constant con if con.document_self end + ## + # Generates an RDoc::Method or RDoc::Attr from +comment+ by looking for + # :method: or :attr: directives in +comment+. + def parse_comment(container, tk, comment) line_no = tk.line_no column = tk.char_no singleton = !!comment.sub!(/(^# +:?)(singleton-)(method:)/, '\1\3') + # REFACTOR if comment.sub!(/^# +:?method: *(\S*).*?\n/i, '') then name = $1 unless $1.empty? - else - return nil - end - meth = RDoc::GhostMethod.new get_tkread, name - meth.singleton = singleton + meth = RDoc::GhostMethod.new get_tkread, name + meth.singleton = singleton - @stats.add_method meth + meth.start_collecting_tokens + indent = TkSPACE.new nil, 1, 1 + indent.set_text " " * column - meth.start_collecting_tokens - indent = TkSPACE.new 1, 1 - indent.set_text " " * column + position_comment = TkCOMMENT.new nil, line_no, 1 + position_comment.set_text "# File #{@top_level.absolute_name}, line #{line_no}" + meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] - position_comment = TkCOMMENT.new(line_no, 1, "# File #{@top_level.file_absolute_name}, line #{line_no}") - meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] + meth.params = '' - meth.params = '' + extract_call_seq comment, meth - extract_call_seq comment, meth + return unless meth.name - container.add_method meth if meth.document_self + container.add_method meth if meth.document_self - meth.comment = comment + meth.comment = comment + + @stats.add_method meth + elsif comment.sub!(/# +:?(attr(_reader|_writer|_accessor)?:) *(\S*).*?\n/i, '') then + rw = case $1 + when 'attr_reader' then 'R' + when 'attr_writer' then 'W' + else 'RW' + end + + name = $3 unless $3.empty? + + att = RDoc::Attr.new get_tkread, name, rw, comment + container.add_attribute att + + @stats.add_method att + end end def parse_include(context, comment) @@ -2020,6 +732,66 @@ class RDoc::Parser::Ruby < RDoc::Parser end ## + # Parses a meta-programmed attribute and creates an RDoc::Attr. + # + # To create foo and bar attributes on class C with comment "My attributes": + # + # class C + # + # ## + # # :attr: + # # + # # My attributes + # + # my_attr :foo, :bar + # + # end + # + # To create a foo attribute on class C with comment "My attribute": + # + # class C + # + # ## + # # :attr: foo + # # + # # My attribute + # + # my_attr :foo, :bar + # + # end + + def parse_meta_attr(context, single, tk, comment) + args = parse_symbol_arg + read = get_tkread + rw = "?" + + # If nodoc is given, don't document any of them + + tmp = RDoc::CodeObject.new + read_documentation_modifiers tmp, RDoc::ATTR_MODIFIERS + return unless tmp.document_self + + if comment.sub!(/^# +:?(attr(_reader|_writer|_accessor)?): *(\S*).*?\n/i, '') then + rw = case $1 + when 'attr_reader' then 'R' + when 'attr_writer' then 'W' + else 'RW' + end + name = $3 unless $3.empty? + end + + if name then + att = RDoc::Attr.new get_tkread, name, rw, comment + context.add_attribute att + else + args.each do |attr_name| + att = RDoc::Attr.new get_tkread, attr_name, rw, comment + context.add_attribute att + end + end + end + + ## # Parses a meta-programmed method def parse_meta_method(container, single, tk, comment) @@ -2044,9 +816,12 @@ class RDoc::Parser::Ruby < RDoc::Parser when TkSYMBOL then name = name_t.text[1..-1] when TkSTRING then - name = name_t.text[1..-2] + name = name_t.value[1..-2] + when TkASSIGN then # ignore + remove_token_listener self + return else - warn "#{container.toplevel.file_relative_name}:#{name_t.line_no} unknown name token #{name_t.inspect} for meta-method" + warn "unknown name token #{name_t.inspect} for meta-method '#{tk.name}'" name = 'unknown' end end @@ -2054,166 +829,183 @@ class RDoc::Parser::Ruby < RDoc::Parser meth = RDoc::MetaMethod.new get_tkread, name meth.singleton = singleton - @stats.add_method meth - remove_token_listener self meth.start_collecting_tokens - indent = TkSPACE.new 1, 1 + indent = TkSPACE.new nil, 1, 1 indent.set_text " " * column - position_comment = TkCOMMENT.new(line_no, 1, "# File #{@top_level.file_absolute_name}, line #{line_no}") + position_comment = TkCOMMENT.new nil, line_no, 1 + position_comment.value = "# File #{@top_level.absolute_name}, line #{line_no}" meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] meth.add_tokens @token_stream - add_token_listener meth - - meth.params = '' + token_listener meth do + meth.params = '' - extract_call_seq comment, meth + extract_call_seq comment, meth - container.add_method meth if meth.document_self + container.add_method meth if meth.document_self - last_tk = tk + last_tk = tk - while tk = get_tk do - case tk - when TkSEMICOLON then - break - when TkNL then - break unless last_tk and TkCOMMA === last_tk - when TkSPACE then - # expression continues - else - last_tk = tk + while tk = get_tk do + case tk + when TkSEMICOLON then + break + when TkNL then + break unless last_tk and TkCOMMA === last_tk + when TkSPACE then + # expression continues + else + last_tk = tk + end end end - remove_token_listener meth - meth.comment = comment + + @stats.add_method meth end ## - # Parses a method + # Parses a normal method defined by +def+ def parse_method(container, single, tk, comment) + added_container = nil + meth = nil + name = nil line_no = tk.line_no column = tk.char_no start_collecting_tokens - add_token(tk) - add_token_listener(self) + add_token tk - @scanner.instance_eval do @lex_state = EXPR_FNAME end + token_listener self do + @scanner.instance_eval do @lex_state = EXPR_FNAME end - skip_tkspace(false) - name_t = get_tk - back_tk = skip_tkspace - meth = nil - added_container = false + skip_tkspace false + name_t = get_tk + back_tk = skip_tkspace + meth = nil + added_container = false - dot = get_tk - if TkDOT === dot or TkCOLON2 === dot then - @scanner.instance_eval do @lex_state = EXPR_FNAME end - skip_tkspace - name_t2 = get_tk + dot = get_tk + if TkDOT === dot or TkCOLON2 === dot then + @scanner.instance_eval do @lex_state = EXPR_FNAME end + skip_tkspace + name_t2 = get_tk + + case name_t + when TkSELF, TkMOD then + name = name_t2.name + when TkCONSTANT then + name = name_t2.name + prev_container = container + container = container.find_module_named(name_t.name) + unless container then + added_container = true + obj = name_t.name.split("::").inject(Object) do |state, item| + state.const_get(item) + end rescue nil + + type = obj.class == Class ? RDoc::NormalClass : RDoc::NormalModule + + unless [Class, Module].include?(obj.class) then + warn("Couldn't find #{name_t.name}. Assuming it's a module") + end - case name_t - when TkSELF then - name = name_t2.name - when TkCONSTANT then - name = name_t2.name - prev_container = container - container = container.find_module_named(name_t.name) - unless container then - added_container = true - obj = name_t.name.split("::").inject(Object) do |state, item| - state.const_get(item) - end rescue nil - - type = obj.class == Class ? RDoc::NormalClass : RDoc::NormalModule - - unless [Class, Module].include?(obj.class) then - warn("Couldn't find #{name_t.name}. Assuming it's a module") - end + if type == RDoc::NormalClass then + sclass = obj.superclass ? obj.superclass.name : nil + container = prev_container.add_class type, name_t.name, sclass + else + container = prev_container.add_module type, name_t.name + end - if type == RDoc::NormalClass then - container = prev_container.add_class(type, name_t.name, obj.superclass.name) - else - container = prev_container.add_module(type, name_t.name) + container.record_location @top_level end - - container.record_location @top_level + when TkIDENTIFIER, TkIVAR then + dummy = RDoc::Context.new + dummy.parent = container + skip_method dummy + return + else + warn "unexpected method name token #{name_t.inspect}" + # break + skip_method container + return end + + meth = RDoc::AnyMethod.new(get_tkread, name) + meth.singleton = true else - # warn("Unexpected token '#{name_t2.inspect}'") - # break - skip_method(container) - return - end + unget_tk dot + back_tk.reverse_each do |token| + unget_tk token + end - meth = RDoc::AnyMethod.new(get_tkread, name) - meth.singleton = true - else - unget_tk dot - back_tk.reverse_each do |token| - unget_tk token - end - name = name_t.name + name = case name_t + when TkSTAR, TkAMPER then + name_t.text + else + unless name_t.respond_to? :name then + warn "expected method name token, . or ::, got #{name_t.inspect}" + skip_method container + return + end + name_t.name + end - meth = RDoc::AnyMethod.new get_tkread, name - meth.singleton = (single == SINGLE) + meth = RDoc::AnyMethod.new get_tkread, name + meth.singleton = (single == SINGLE) + end end - @stats.add_method meth - - remove_token_listener self - meth.start_collecting_tokens - indent = TkSPACE.new 1, 1 + indent = TkSPACE.new nil, 1, 1 indent.set_text " " * column - token = TkCOMMENT.new(line_no, 1, "# File #{@top_level.file_absolute_name}, line #{line_no}") + token = TkCOMMENT.new nil, line_no, 1 + token.set_text "# File #{@top_level.absolute_name}, line #{line_no}" meth.add_tokens [token, NEWLINE_TOKEN, indent] meth.add_tokens @token_stream - add_token_listener meth - - @scanner.instance_eval do @continue = false end - parse_method_parameters meth + token_listener meth do + @scanner.instance_eval do @continue = false end + parse_method_parameters meth - if meth.document_self then - container.add_method meth - elsif added_container then - container.document_self = false - end + if meth.document_self then + container.add_method meth + elsif added_container then + container.document_self = false + end - # Having now read the method parameters and documentation modifiers, we - # now know whether we have to rename #initialize to ::new + # Having now read the method parameters and documentation modifiers, we + # now know whether we have to rename #initialize to ::new - if name == "initialize" && !meth.singleton then - if meth.dont_rename_initialize then - meth.visibility = :protected - else - meth.singleton = true - meth.name = "new" - meth.visibility = :public + if name == "initialize" && !meth.singleton then + if meth.dont_rename_initialize then + meth.visibility = :protected + else + meth.singleton = true + meth.name = "new" + meth.visibility = :public + end end - end - - parse_statements(container, single, meth) - remove_token_listener(meth) + parse_statements container, single, meth + end extract_call_seq comment, meth meth.comment = comment + + @stats.add_method meth end def parse_method_or_yield_parameters(method = nil, modifiers = RDoc::METHOD_MODIFIERS) - skip_tkspace(false) + skip_tkspace false tk = get_tk # Little hack going on here. In the statement @@ -2232,33 +1024,39 @@ class RDoc::Parser::Ruby < RDoc::Parser nest = 0 loop do - case tk - when TkSEMICOLON - break - when TkLBRACE - nest += 1 - when TkRBRACE - # we might have a.each {|i| yield i } - unget_tk(tk) if nest.zero? + case tk + when TkSEMICOLON then + break + when TkLBRACE then + nest += 1 + when TkRBRACE then + # we might have a.each {|i| yield i } + unget_tk(tk) if nest.zero? + nest -= 1 + break if nest <= 0 + when TkLPAREN, TkfLPAREN then + nest += 1 + when end_token then + if end_token == TkRPAREN nest -= 1 - break if nest <= 0 - when TkLPAREN, TkfLPAREN - nest += 1 - when end_token - if end_token == TkRPAREN - nest -= 1 - break if @scanner.lex_state == EXPR_END and nest <= 0 - else - break unless @scanner.continue - end - when method && method.block_params.nil? && TkCOMMENT - unget_tk(tk) - read_documentation_modifiers(method, modifiers) + break if @scanner.lex_state == EXPR_END and nest <= 0 + else + break unless @scanner.continue end + when method && method.block_params.nil? && TkCOMMENT then + unget_tk tk + read_documentation_modifiers method, modifiers + @read.pop + when TkCOMMENT then + @read.pop + when nil then + break + end tk = get_tk end - res = get_tkread.tr("\n", " ").strip - res = "" if res == ";" + + res = get_tkread.gsub(/\s+/, ' ').strip + res = '' if res == ';' res end @@ -2271,56 +1069,53 @@ class RDoc::Parser::Ruby < RDoc::Parser # and add this as the block_params for the method def parse_method_parameters(method) - res = parse_method_or_yield_parameters(method) - res = "(" + res + ")" unless res[0] == ?( + res = parse_method_or_yield_parameters method + + res = "(#{res})" unless res =~ /\A\(/ method.params = res unless method.params - if method.block_params.nil? - skip_tkspace(false) + + if method.block_params.nil? then + skip_tkspace false read_documentation_modifiers method, RDoc::METHOD_MODIFIERS end end def parse_module(container, single, tk, comment) - container, name_t = get_class_or_module(container) + container, name_t = get_class_or_module container name = name_t.name mod = container.add_module RDoc::NormalModule, name mod.record_location @top_level - @stats.add_module mod - read_documentation_modifiers mod, RDoc::CLASS_MODIFIERS parse_statements(mod) mod.comment = comment + + @stats.add_module mod end def parse_require(context, comment) skip_tkspace_comment tk = get_tk + if TkLPAREN === tk then skip_tkspace_comment tk = get_tk end - name = nil - case tk - when TkSTRING - name = tk.text - # when TkCONSTANT, TkIDENTIFIER, TkIVAR, TkGVAR - # name = tk.name - when TkDSTRING - warn "Skipping require of dynamic string: #{tk.text}" - # else - # warn "'require' used as variable" - end - if name + name = tk.text if TkSTRING === tk + + if name then context.add_require RDoc::Require.new(name, comment) else - unget_tk(tk) + unget_tk tk end end + ## + # The core of the ruby parser. + def parse_statements(container, single = NORMAL, current_method = nil, comment = '') nest = 1 @@ -2335,7 +1130,7 @@ class RDoc::Parser::Ruby < RDoc::Parser case tk when TkNL then - skip_tkspace true # Skip blanks and newlines + skip_tkspace tk = get_tk if TkCOMMENT === tk then @@ -2349,8 +1144,9 @@ class RDoc::Parser::Ruby < RDoc::Parser while TkCOMMENT === tk do comment << tk.text << "\n" - tk = get_tk # this is the newline - skip_tkspace(false) # leading spaces + + tk = get_tk # this is the newline + skip_tkspace false # leading spaces tk = get_tk end @@ -2393,11 +1189,11 @@ class RDoc::Parser::Ruby < RDoc::Parser when TkCONSTANT then if container.document_self then - parse_constant container, single, tk, comment + parse_constant container, tk, comment end when TkALIAS then - if container.document_self then + if container.document_self and not current_method then parse_alias container, single, tk, comment end @@ -2434,15 +1230,21 @@ class RDoc::Parser::Ruby < RDoc::Parser keep_comment = true when 'attr' then parse_attr container, single, tk, comment - when /^attr_(reader|writer|accessor)$/, @options.extra_accessors then + when /^attr_(reader|writer|accessor)$/ then parse_attr_accessor container, single, tk, comment when 'alias_method' then - if container.document_self then - parse_alias container, single, tk, comment - end + parse_alias container, single, tk, comment if + container.document_self + when 'require', 'include' then + # ignore else if container.document_self and comment =~ /\A#\#$/ then - parse_meta_method container, single, tk, comment + case comment + when /^# +:?attr(_reader|_writer|_accessor)?:/ then + parse_meta_attr container, single, tk, comment + else + parse_meta_method container, single, tk, comment + end end end end @@ -2459,16 +1261,18 @@ class RDoc::Parser::Ruby < RDoc::Parser if nest == 0 then read_documentation_modifiers container, RDoc::CLASS_MODIFIERS container.ongoing_visibility = save_visibility + + parse_comment container, tk, comment unless comment.empty? + return end - end comment = '' unless keep_comment begin get_tkread - skip_tkspace(false) + skip_tkspace false end while peek_tk == TkNL end end @@ -2503,7 +1307,7 @@ class RDoc::Parser::Ruby < RDoc::Parser end loop do - skip_tkspace(false) + skip_tkspace false tk1 = get_tk unless TkCOMMA === tk1 then @@ -2533,7 +1337,7 @@ class RDoc::Parser::Ruby < RDoc::Parser end end - def parse_toplevel_statements(container) + def parse_top_level_statements(container) comment = collect_first_comment look_for_directives_in(container, comment) container.comment = comment unless comment.empty? @@ -2559,7 +1363,7 @@ class RDoc::Parser::Ruby < RDoc::Parser singleton = true :public else - raise "Invalid visibility: #{tk.name}" + raise RDoc::Error, "Invalid visibility: #{tk.name}" end skip_tkspace_comment false @@ -2613,18 +1417,6 @@ class RDoc::Parser::Ruby < RDoc::Parser end end - def peek_read - @read.join('') - end - - ## - # Peek at the next token, but don't remove it from the stream - - def peek_tk - unget_tk(tk = get_tk) - tk - end - ## # Directives are modifier comments that can appear after class, module, or # method names. For example: @@ -2640,16 +1432,18 @@ class RDoc::Parser::Ruby < RDoc::Parser def read_directive(allowed) tk = get_tk result = nil - if TkCOMMENT === tk - if tk.text =~ /\s*:?(\w+):\s*(.*)/ + + if TkCOMMENT === tk then + if tk.text =~ /\s*:?(\w+):\s*(.*)/ then directive = $1.downcase - if allowed.include?(directive) + if allowed.include? directive then result = [directive, $2] end end else - unget_tk(tk) + unget_tk tk end + result end @@ -2687,40 +1481,40 @@ class RDoc::Parser::Ruby < RDoc::Parser comment.sub!(/^#--\n.*/m, '') end - def remove_token_listener(obj) - @token_listeners.delete(obj) - end - - def reset - @tokens = [] - @unget_read = [] - @read = [] - end - def scan reset - catch(:eof) do - catch(:enddoc) do + catch :eof do + catch :enddoc do begin - parse_toplevel_statements(@top_level) - rescue Exception => e - $stderr.puts <<-EOF - - -RDoc failure in #{@file_name} at or around line #{@scanner.line_no} column -#{@scanner.char_no} + parse_top_level_statements @top_level + rescue StandardError => e + bytes = '' + + 20.times do @scanner.ungetc end + count = 0 + 60.times do |i| + count = i + byte = @scanner.getc + break unless byte + bytes << byte + end + count -= 20 + count.times do @scanner.ungetc end -Before reporting this, could you check that the file you're documenting -compiles cleanly--RDoc is not a full Ruby parser, and gets confused easily if -fed invalid programs. + $stderr.puts <<-EOF -The internal error was: +#{self.class} failure around line #{@scanner.line_no} of +#{@file_name} EOF - e.set_backtrace(e.backtrace[0,4]) - raise + unless bytes.empty? then + $stderr.puts + $stderr.puts bytes.inspect + end + + raise e end end end @@ -2732,7 +1526,7 @@ The internal error was: # while, until, and for have an optional do def skip_optional_do_after_expression - skip_tkspace(false) + skip_tkspace false tk = get_tk case tk when TkLPAREN, TkfLPAREN @@ -2759,10 +1553,12 @@ The internal error was: else break unless @scanner.continue end + when nil then + break end tk = get_tk end - skip_tkspace(false) + skip_tkspace false get_tk if TkDO === peek_tk end @@ -2771,31 +1567,17 @@ The internal error was: # skip the var [in] part of a 'for' statement def skip_for_variable - skip_tkspace(false) + skip_tkspace false tk = get_tk - skip_tkspace(false) + skip_tkspace false tk = get_tk unget_tk(tk) unless TkIN === tk end - def skip_method(container) + def skip_method container meth = RDoc::AnyMethod.new "", "anon" - parse_method_parameters(meth) - parse_statements(container, false, meth) - end - - ## - # Skip spaces - - def skip_tkspace(skip_nl = true) - tokens = [] - - while TkSPACE === (tk = get_tk) or (skip_nl and TkNL === tk) do - tokens.push tk - end - - unget_tk(tk) - tokens + parse_method_parameters meth + parse_statements container, false, meth end ## @@ -2803,22 +1585,12 @@ The internal error was: def skip_tkspace_comment(skip_nl = true) loop do - skip_tkspace(skip_nl) + skip_tkspace skip_nl return unless TkCOMMENT === peek_tk get_tk end end - def unget_tk(tk) - @tokens.unshift tk - @unget_read.unshift @read.pop - - # Remove this token from any listeners - @token_listeners.each do |obj| - obj.pop_token - end if @token_listeners - end - def warn(msg) return if @options.quiet msg = make_message msg |