summaryrefslogtreecommitdiff
path: root/test/prism/encoding/string_encoding_test.rb
blob: 6f9d86df3be454c95e65a31c8d561f960c95efc9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# frozen_string_literal: true

require_relative "../test_helper"

module Prism
  class StringEncodingTest < TestCase
    each_encoding do |encoding, _|
      define_method(:"test_#{encoding.name}") do
        assert_encoding(encoding)
      end
    end

    def test_coding
      actual = Prism.parse_statement("# coding: utf-8\n'string'").unescaped.encoding
      assert_equal Encoding::UTF_8, actual
    end

    def test_coding_with_whitespace
      actual = Prism.parse_statement("# coding \t \r  \v   :     \t \v    \r   ascii-8bit \n'string'").unescaped.encoding
      assert_equal Encoding::ASCII_8BIT, actual
    end

    def test_emacs_style
      actual = Prism.parse_statement("# -*- coding: utf-8 -*-\n'string'").unescaped.encoding
      assert_equal Encoding::UTF_8, actual
    end

    def test_utf_8_unix
      actual = Prism.parse_statement("# coding: utf-8-unix\n'string'").unescaped.encoding
      assert_equal Encoding::UTF_8, actual
    end

    def test_utf_8_dos
      actual = Prism.parse_statement("# coding: utf-8-dos\n'string'").unescaped.encoding
      assert_equal Encoding::UTF_8, actual
    end

    def test_utf_8_mac
      actual = Prism.parse_statement("# coding: utf-8-mac\n'string'").unescaped.encoding
      assert_equal Encoding::UTF_8, actual
    end

    def test_utf_8_star
      actual = Prism.parse_statement("# coding: utf-8-*\n'string'").unescaped.encoding
      assert_equal Encoding::UTF_8, actual
    end

    def test_first_lexed_token
      encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
      assert_equal Encoding::ASCII_8BIT, encoding
    end

    if !ENV["PRISM_BUILD_MINIMAL"]
      # This test may be a little confusing. Basically when we use our strpbrk,
      # it takes into account the encoding of the file.
      def test_strpbrk_multibyte
        result = Prism.parse(<<~RUBY)
          # encoding: Shift_JIS
          %w[\x81\x5c]
        RUBY

        assert(result.errors.empty?)
        assert_equal(
          (+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
          result.statement.elements.first.unescaped
        )
      end

      def test_slice_encoding
        slice = Prism.parse("# encoding: Shift_JIS\nア").value.slice
        assert_equal (+"ア").force_encoding(Encoding::SHIFT_JIS), slice
        assert_equal Encoding::SHIFT_JIS, slice.encoding
      end

      def test_multibyte_escapes
        [
          ["'", "'"],
          ["\"", "\""],
          ["`", "`"],
          ["/", "/"],
          ["<<'HERE'\n", "\nHERE"],
          ["<<-HERE\n", "\nHERE"]
        ].each do |opening, closing|
          assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
        end
      end
    end

    private

    def assert_encoding(encoding)
      escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
      escapes = escapes.concat(escapes.product(escapes).map(&:join))

      escapes.each do |escaped|
        source = "# encoding: #{encoding.name}\n\"#{escaped}\""

        expected =
          begin
            eval(source).encoding
          rescue SyntaxError => error
            if error.message.include?("UTF-8 mixed within")
              error.message[/UTF-8 mixed within .+? source/]
            else
              raise
            end
          end

        actual =
          Prism.parse(source).then do |result|
            if result.success?
              string = result.statement

              if string.forced_utf8_encoding?
                Encoding::UTF_8
              elsif string.forced_binary_encoding?
                Encoding::ASCII_8BIT
              else
                encoding
              end
            else
              error = result.errors.first

              if error.message.include?("mixed")
                error.message
              else
                raise error.message
              end
            end
          end

        assert_equal expected, actual
      end
    end
  end
end