Skip to content

Commit 43cdfa3

Browse files
committedDec 12, 2022
Optimize SJIS-Mobile#DOCOMO decoder for speed
From my microbenchmarks, the new decoder makes encoding conversion from SJIS-Mobile#DOCOMO about 15-20% faster.
1 parent 4ebfddf commit 43cdfa3

File tree

1 file changed

+45
-49
lines changed

1 file changed

+45
-49
lines changed
 

‎ext/mbstring/libmbfl/filters/mbfilter_sjis.c

+45-49
Original file line numberDiff line numberDiff line change
@@ -409,11 +409,11 @@ int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
409409
return 0;
410410
}
411411

412-
static unsigned short sjis_decode_tbl1[] = {
413-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, -6204, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648
412+
static const unsigned short sjis_decode_tbl1[] = {
413+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, -6204, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 8836, 9024, 9212, 9400, 9588, 9776, 9964, 10152, 10340, 10528, 10716, 10904, 11092
414414
};
415415

416-
static unsigned short sjis_decode_tbl2[] = {
416+
static const unsigned short sjis_decode_tbl2[] = {
417417
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 0xFFFF, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 0xFFFF, 0xFFFF, 0xFFFF
418418
};
419419

@@ -2117,59 +2117,55 @@ static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32
21172117
break;
21182118
}
21192119
unsigned char c2 = *p++;
2120+
uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
21202121

2121-
if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
2122-
uint32_t w = 0;
2123-
unsigned int s1, s2;
2124-
SJIS_DECODE(c, c2, s1, s2);
2125-
unsigned int s = ((s1 - 0x21) * 94) + s2 - 0x21;
2126-
2127-
if (s <= 137) {
2128-
if (s == 31) {
2129-
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
2130-
} else if (s == 32) {
2131-
w = 0xFF5E; /* FULLWIDTH TILDE */
2132-
} else if (s == 33) {
2133-
w = 0x2225; /* PARALLEL TO */
2134-
} else if (s == 60) {
2135-
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
2136-
} else if (s == 80) {
2137-
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
2138-
} else if (s == 81) {
2139-
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
2140-
} else if (s == 137) {
2141-
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
2142-
}
2122+
if (w <= 137) {
2123+
if (w == 31) {
2124+
*out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
2125+
continue;
2126+
} else if (w == 32) {
2127+
*out++ = 0xFF5E; /* FULLWIDTH TILDE */
2128+
continue;
2129+
} else if (w == 33) {
2130+
*out++ = 0x2225; /* PARALLEL TO */
2131+
continue;
2132+
} else if (w == 60) {
2133+
*out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
2134+
continue;
2135+
} else if (w == 80) {
2136+
*out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */
2137+
continue;
2138+
} else if (w == 81) {
2139+
*out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */
2140+
continue;
2141+
} else if (w == 137) {
2142+
*out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */
2143+
continue;
21432144
}
2145+
}
21442146

2145-
if (!w) {
2146-
if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) {
2147-
int snd = 0;
2148-
w = mbfilter_sjis_emoji_docomo2unicode(s, &snd);
2149-
if (snd) {
2150-
*out++ = snd;
2151-
}
2152-
} else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
2153-
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
2154-
} else if (s < jisx0208_ucs_table_size) {
2155-
w = jisx0208_ucs_table[s];
2156-
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
2157-
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
2158-
}
2159-
2160-
if (!w) {
2161-
if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
2162-
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
2163-
} else if (s >= (94*94) && s < (114*94)) {
2164-
w = s - (94*94) + 0xE000;
2165-
}
2166-
}
2147+
if (w >= mb_tbl_code2uni_docomo1_min && w <= mb_tbl_code2uni_docomo1_max) {
2148+
int snd = 0;
2149+
w = mbfilter_sjis_emoji_docomo2unicode(w, &snd);
2150+
if (snd) {
2151+
*out++ = snd;
21672152
}
2168-
2169-
*out++ = w ? w : MBFL_BAD_INPUT;
2153+
} else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) {
2154+
w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min];
2155+
} else if (w < jisx0208_ucs_table_size) {
2156+
w = jisx0208_ucs_table[w];
2157+
} else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) {
2158+
w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min];
2159+
} else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) {
2160+
w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min];
2161+
} else if (w >= (94*94) && w < (114*94)) {
2162+
w = w - (94*94) + 0xE000;
21702163
} else {
21712164
*out++ = MBFL_BAD_INPUT;
2165+
continue;
21722166
}
2167+
2168+
*out++ = w ? w : MBFL_BAD_INPUT;
21732169
} else {
21742170
*out++ = MBFL_BAD_INPUT;
21752171
}

0 commit comments

Comments
 (0)