summaryrefslogtreecommitdiff
path: root/prism/util/pm_char.c
blob: 42c3896626bf206c25900d5f63bb03511013f282 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#include "yarp/util/yp_char.h"

#define YP_CHAR_BIT_WHITESPACE (1 << 0)
#define YP_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
#define YP_CHAR_BIT_REGEXP_OPTION (1 << 2)

#define YP_NUMBER_BIT_BINARY_DIGIT (1 << 0)
#define YP_NUMBER_BIT_BINARY_NUMBER (1 << 1)
#define YP_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
#define YP_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
#define YP_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
#define YP_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
#define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
#define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)

static const uint8_t yp_byte_table[256] = {
//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
    0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x
    0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
};

static const uint8_t yp_number_table[256] = {
    // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
    0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
};

static inline size_t
yp_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

    while (size < maximum && (yp_byte_table[string[size]] & kind)) size++;
    return size;
}

// Returns the number of characters at the start of the string that are
// whitespace. Disallows searching past the given maximum number of characters.
size_t
yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE);
}

// Returns the number of characters at the start of the string that are
// whitespace while also tracking the location of each newline. Disallows
// searching past the given maximum number of characters.
size_t
yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

    while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) {
        if (string[size] == '\n') {
            yp_newline_list_append(newline_list, string + size);
        }

        size++;
    }

    return size;
}

// Returns the number of characters at the start of the string that are inline
// whitespace. Disallows searching past the given maximum number of characters.
size_t
yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE);
}

// Returns the number of characters at the start of the string that are regexp
// options. Disallows searching past the given maximum number of characters.
size_t
yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION);
}

static inline bool
yp_char_is_char_kind(const uint8_t b, uint8_t kind) {
    return (yp_byte_table[b] & kind) != 0;
}

// Returns true if the given character is a whitespace character.
bool
yp_char_is_whitespace(const uint8_t b) {
    return yp_char_is_char_kind(b, YP_CHAR_BIT_WHITESPACE);
}

// Returns true if the given character is an inline whitespace character.
bool
yp_char_is_inline_whitespace(const uint8_t b) {
    return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
}

// Scan through the string and return the number of characters at the start of
// the string that match the given kind. Disallows searching past the given
// maximum number of characters.
static inline size_t
yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

    while (size < maximum && (yp_number_table[string[size]] & kind)) size++;
    return size;
}

// Scan through the string and return the number of characters at the start of
// the string that match the given kind. Disallows searching past the given
// maximum number of characters.
//
// Additionally, report the location of the last invalid underscore character
// found in the string through the out invalid parameter.
static inline size_t
yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

    bool underscore = false;
    while (size < maximum && (yp_number_table[string[size]] & kind)) {
        if (string[size] == '_') {
            if (underscore) *invalid = string + size;
            underscore = true;
        } else {
            underscore = false;
        }

        size++;
    }

    if (string[size - 1] == '_') *invalid = string + size - 1;
    return size;
}

// Returns the number of characters at the start of the string that are binary
// digits or underscores. Disallows searching past the given maximum number of
// characters.
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
    return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_BINARY_NUMBER);
}

// Returns the number of characters at the start of the string that are octal
// digits or underscores. Disallows searching past the given maximum number of
// characters.
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
    return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_OCTAL_NUMBER);
}

// Returns the number of characters at the start of the string that are decimal
// digits. Disallows searching past the given maximum number of characters.
size_t
yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT);
}

// Returns the number of characters at the start of the string that are decimal
// digits or underscores. Disallows searching past the given maximum number of
// characters.
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
    return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_DECIMAL_NUMBER);
}

// Returns the number of characters at the start of the string that are
// hexadecimal digits. Disallows searching past the given maximum number of
// characters.
size_t
yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
}

// Returns the number of characters at the start of the string that are
// hexadecimal digits or underscores. Disallows searching past the given maximum
// number of characters.
//
// If multiple underscores are found in a row or if an underscore is
// found at the end of the number, then the invalid pointer is set to the index
// of the first invalid underscore.
size_t
yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
    return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
}

static inline bool
yp_char_is_number_kind(const uint8_t b, uint8_t kind) {
    return (yp_number_table[b] & kind) != 0;
}

// Returns true if the given character is a binary digit.
bool
yp_char_is_binary_digit(const uint8_t b) {
    return yp_char_is_number_kind(b, YP_NUMBER_BIT_BINARY_DIGIT);
}

// Returns true if the given character is an octal digit.
bool
yp_char_is_octal_digit(const uint8_t b) {
    return yp_char_is_number_kind(b, YP_NUMBER_BIT_OCTAL_DIGIT);
}

// Returns true if the given character is a decimal digit.
bool
yp_char_is_decimal_digit(const uint8_t b) {
    return yp_char_is_number_kind(b, YP_NUMBER_BIT_DECIMAL_DIGIT);
}

// Returns true if the given character is a hexadecimal digit.
bool
yp_char_is_hexadecimal_digit(const uint8_t b) {
    return yp_char_is_number_kind(b, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
}

#undef YP_CHAR_BIT_WHITESPACE
#undef YP_CHAR_BIT_INLINE_WHITESPACE
#undef YP_CHAR_BIT_REGEXP_OPTION

#undef YP_NUMBER_BIT_BINARY_DIGIT
#undef YP_NUMBER_BIT_BINARY_NUMBER
#undef YP_NUMBER_BIT_OCTAL_DIGIT
#undef YP_NUMBER_BIT_OCTAL_NUMBER
#undef YP_NUMBER_BIT_DECIMAL_DIGIT
#undef YP_NUMBER_BIT_DECIMAL_NUMBER
#undef YP_NUMBER_BIT_HEXADECIMAL_NUMBER
#undef YP_NUMBER_BIT_HEXADECIMAL_DIGIT