1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# coding: US-ASCII
# frozen_string_literal: true
##
# This class is a wrapper around File IO and Encoding that helps RDoc load
# files and convert them to the correct encoding.
module RDoc::Encoding
HEADER_REGEXP = /^
(?:
\A\#!.*\n
|
^\#\s+frozen[-_]string[-_]literal[=:].+\n
|
^\#[^\n]+\b(?:en)?coding[=:]\s*(?<name>[^\s;]+).*\n
|
<\?xml[^?]*encoding=(?<quote>["'])(?<name>.*?)\k<quote>.*\n
)+
/xi # :nodoc:
##
# Reads the contents of +filename+ and handles any encoding directives in
# the file.
#
# The content will be converted to the +encoding+. If the file cannot be
# converted a warning will be printed and nil will be returned.
#
# If +force_transcode+ is true the document will be transcoded and any
# unknown character in the target encoding will be replaced with '?'
def self.read_file filename, encoding, force_transcode = false
content = File.open filename, "rb" do |f| f.read end
content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/
utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
enc = RDoc::Encoding.detect_encoding content
content = RDoc::Encoding.change_encoding content, enc if enc
begin
encoding ||= Encoding.default_external
orig_encoding = content.encoding
if not orig_encoding.ascii_compatible? then
content = content.encode encoding
elsif utf8 then
content = RDoc::Encoding.change_encoding content, Encoding::UTF_8
content = content.encode encoding
else
# assume the content is in our output encoding
content = RDoc::Encoding.change_encoding content, encoding
end
unless content.valid_encoding? then
# revert and try to transcode
content = RDoc::Encoding.change_encoding content, orig_encoding
content = content.encode encoding
end
unless content.valid_encoding? then
warn "unable to convert #{filename} to #{encoding}, skipping"
content = nil
end
rescue Encoding::InvalidByteSequenceError,
Encoding::UndefinedConversionError => e
if force_transcode then
content = RDoc::Encoding.change_encoding content, orig_encoding
content = content.encode(encoding,
:invalid => :replace,
:undef => :replace,
:replace => '?')
return content
else
warn "unable to convert #{e.message} for #{filename}, skipping"
return nil
end
end
content
rescue ArgumentError => e
raise unless e.message =~ /unknown encoding name - (.*)/
warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
nil
rescue Errno::EISDIR, Errno::ENOENT
nil
end
def self.remove_frozen_string_literal string
string =~ /\A(?:#!.*\n)?(.*\n)/
first_line = $1
if first_line =~ /\A# +frozen[-_]string[-_]literal[=:].+$/i
string = string.sub first_line, ''
end
string
end
##
# Detects the encoding of +string+ based on the magic comment
def self.detect_encoding string
result = HEADER_REGEXP.match string
name = result && result[:name]
name ? Encoding.find(name) : nil
end
##
# Removes magic comments and shebang
def self.remove_magic_comment string
string.sub HEADER_REGEXP do |s|
s.gsub(/[^\n]/, '')
end
end
##
# Changes encoding based on +encoding+ without converting and returns new
# string
def self.change_encoding text, encoding
if text.kind_of? RDoc::Comment
text.encode! encoding
else
String.new text, encoding: encoding
end
end
end
|