1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class StringEncodingTest < TestCase
each_encoding do |encoding, _|
define_method(:"test_#{encoding.name}") do
assert_encoding(encoding)
end
end
def test_coding
actual = Prism.parse_statement("# coding: utf-8\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_coding_with_whitespace
actual = Prism.parse_statement("# coding \t \r \v : \t \v \r ascii-8bit \n'string'").unescaped.encoding
assert_equal Encoding::ASCII_8BIT, actual
end
def test_emacs_style
actual = Prism.parse_statement("# -*- coding: utf-8 -*-\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_unix
actual = Prism.parse_statement("# coding: utf-8-unix\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_dos
actual = Prism.parse_statement("# coding: utf-8-dos\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_mac
actual = Prism.parse_statement("# coding: utf-8-mac\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_utf_8_star
actual = Prism.parse_statement("# coding: utf-8-*\n'string'").unescaped.encoding
assert_equal Encoding::UTF_8, actual
end
def test_first_lexed_token
encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
assert_equal Encoding::ASCII_8BIT, encoding
end
if !ENV["PRISM_BUILD_MINIMAL"]
# This test may be a little confusing. Basically when we use our strpbrk,
# it takes into account the encoding of the file.
def test_strpbrk_multibyte
result = Prism.parse(<<~RUBY)
# encoding: Shift_JIS
%w[\x81\x5c]
RUBY
assert(result.errors.empty?)
assert_equal(
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
result.statement.elements.first.unescaped
)
end
def test_slice_encoding
slice = Prism.parse("# encoding: Shift_JIS\nア").value.slice
assert_equal (+"ア").force_encoding(Encoding::SHIFT_JIS), slice
assert_equal Encoding::SHIFT_JIS, slice.encoding
end
def test_multibyte_escapes
[
["'", "'"],
["\"", "\""],
["`", "`"],
["/", "/"],
["<<'HERE'\n", "\nHERE"],
["<<-HERE\n", "\nHERE"]
].each do |opening, closing|
assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
end
end
end
private
def assert_encoding(encoding)
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
escapes.each do |escaped|
source = "# encoding: #{encoding.name}\n\"#{escaped}\""
expected =
begin
eval(source).encoding
rescue SyntaxError => error
if error.message.include?("UTF-8 mixed within")
error.message[/UTF-8 mixed within .+? source/]
else
raise
end
end
actual =
Prism.parse(source).then do |result|
if result.success?
string = result.statement
if string.forced_utf8_encoding?
Encoding::UTF_8
elsif string.forced_binary_encoding?
Encoding::ASCII_8BIT
else
encoding
end
else
error = result.errors.first
if error.message.include?("mixed")
error.message
else
raise error.message
end
end
end
assert_equal expected, actual
end
end
end
end
|