summaryrefslogtreecommitdiff
path: root/prism/enc/pm_unicode.c
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2023-09-27 12:24:48 -0400
committerKevin Newton <[email protected]>2023-09-27 13:57:38 -0400
commit4f73a7c2f7ff16aa78cf0dec2d4c7f90a2c41c9b (patch)
tree3b6f0cedc858d46d30a28c6d03439d653884a915 /prism/enc/pm_unicode.c
parent8ab56869a64fdccc094f4a83c6367fb23b72d38b (diff)
Sync to prism rename commits
Diffstat (limited to 'prism/enc/pm_unicode.c')
-rw-r--r--prism/enc/pm_unicode.c80
1 files changed, 40 insertions, 40 deletions
diff --git a/prism/enc/pm_unicode.c b/prism/enc/pm_unicode.c
index 196955d483..ab10044424 100644
--- a/prism/enc/pm_unicode.c
+++ b/prism/enc/pm_unicode.c
@@ -1,16 +1,16 @@
// Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
// decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
-#include "yarp/enc/yp_encoding.h"
+#include "prism/enc/pm_encoding.h"
-typedef uint32_t yp_unicode_codepoint_t;
+typedef uint32_t pm_unicode_codepoint_t;
// Each element of the following table contains a bitfield that indicates a
// piece of information about the corresponding unicode codepoint. Note that
// this table is different from other encodings where we used a lookup table
// because the indices of those tables are the byte representations, not the
// codepoints themselves.
-const uint8_t yp_encoding_unicode_table[256] = {
+const uint8_t pm_encoding_unicode_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -31,7 +31,7 @@ const uint8_t yp_encoding_unicode_table[256] = {
};
#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
-static const yp_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
+static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
0x100, 0x2C1,
0x2C6, 0x2D1,
0x2E0, 0x2E4,
@@ -760,7 +760,7 @@ static const yp_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
};
#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528
-static const yp_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
+static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
0x100, 0x2C1,
0x2C6, 0x2D1,
0x2E0, 0x2E4,
@@ -1528,7 +1528,7 @@ static const yp_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
};
#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1296
-static const yp_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
+static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
0x100, 0x100,
0x102, 0x102,
0x104, 0x104,
@@ -2180,7 +2180,7 @@ static const yp_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
};
static bool
-yp_unicode_codepoint_match(yp_unicode_codepoint_t codepoint, const yp_unicode_codepoint_t *codepoints, size_t size) {
+pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
size_t start = 0;
size_t end = size;
@@ -2202,7 +2202,7 @@ yp_unicode_codepoint_match(yp_unicode_codepoint_t codepoint, const yp_unicode_co
return false;
}
-static const uint8_t yp_utf_8_dfa[] = {
+static const uint8_t pm_utf_8_dfa[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
@@ -2219,8 +2219,8 @@ static const uint8_t yp_utf_8_dfa[] = {
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
};
-static yp_unicode_codepoint_t
-yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
+static pm_unicode_codepoint_t
+pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
assert(n >= 1);
size_t maximum = (size_t) n;
@@ -2229,16 +2229,16 @@ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
for (size_t index = 0; index < 4 && index < maximum; index++) {
uint32_t byte = b[index];
- uint32_t type = yp_utf_8_dfa[byte];
+ uint32_t type = pm_utf_8_dfa[byte];
codepoint = (state != 0) ?
(byte & 0x3fu) | (codepoint << 6) :
(0xffu >> type) & (byte);
- state = yp_utf_8_dfa[256 + (state * 16) + type];
+ state = pm_utf_8_dfa[256 + (state * 16) + type];
if (!state) {
*width = index + 1;
- return (yp_unicode_codepoint_t) codepoint;
+ return (pm_unicode_codepoint_t) codepoint;
}
}
@@ -2247,57 +2247,57 @@ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
}
static size_t
-yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
+pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
size_t width;
- yp_utf_8_codepoint(b, n, &width);
+ pm_utf_8_codepoint(b, n, &width);
return width;
}
size_t
-yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (*b < 0x80) {
- return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
+ return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
}
size_t width;
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
+ pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
if (codepoint <= 0xFF) {
- return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
+ return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0;
} else {
- return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
+ return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
}
}
size_t
-yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (*b < 0x80) {
- return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
+ return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
size_t width;
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
+ pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
if (codepoint <= 0xFF) {
- return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
+ return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
} else {
- return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
+ return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
}
}
static bool
-yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (*b < 0x80) {
- return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
+ return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
}
size_t width;
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
+ pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
if (codepoint <= 0xFF) {
- return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
+ return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
} else {
- return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
+ return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
}
}
@@ -2305,20 +2305,20 @@ yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
#undef UNICODE_ALNUM_CODEPOINTS_LENGTH
#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
-yp_encoding_t yp_encoding_utf_8 = {
+pm_encoding_t pm_encoding_utf_8 = {
.name = "utf-8",
- .char_width = yp_encoding_utf_8_char_width,
- .alnum_char = yp_encoding_utf_8_alnum_char,
- .alpha_char = yp_encoding_utf_8_alpha_char,
- .isupper_char = yp_encoding_utf_8_isupper_char,
+ .char_width = pm_encoding_utf_8_char_width,
+ .alnum_char = pm_encoding_utf_8_alnum_char,
+ .alpha_char = pm_encoding_utf_8_alpha_char,
+ .isupper_char = pm_encoding_utf_8_isupper_char,
.multibyte = true
};
-yp_encoding_t yp_encoding_utf8_mac = {
+pm_encoding_t pm_encoding_utf8_mac = {
.name = "utf8-mac",
- .char_width = yp_encoding_utf_8_char_width,
- .alnum_char = yp_encoding_utf_8_alnum_char,
- .alpha_char = yp_encoding_utf_8_alpha_char,
- .isupper_char = yp_encoding_utf_8_isupper_char,
+ .char_width = pm_encoding_utf_8_char_width,
+ .alnum_char = pm_encoding_utf_8_alnum_char,
+ .alpha_char = pm_encoding_utf_8_alpha_char,
+ .isupper_char = pm_encoding_utf_8_isupper_char,
.multibyte = true
};