Skip to content

Commit e36c600

Browse files
committed
Optimize SJIS-Mobile#SOFTBANK decoder for speed
From my microbenchmarks, the new decoder makes encoding conversion from SJIS-Mobile#SOFTBANK about 15-40% faster.
1 parent 6bf0c44 commit e36c600

File tree

1 file changed

+47
-44
lines changed

1 file changed

+47
-44
lines changed

ext/mbstring/libmbfl/filters/mbfilter_sjis.c

+47-44
Original file line numberDiff line numberDiff line change
@@ -2652,59 +2652,62 @@ static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
26522652
break;
26532653
}
26542654
unsigned char c2 = *p++;
2655+
uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
26552656

2656-
if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
2657-
uint32_t w = 0;
2658-
unsigned int s1, s2;
2659-
SJIS_DECODE(c, c2, s1, s2);
2660-
unsigned int s = ((s1 - 0x21) * 94) + s2 - 0x21;
2661-
2662-
if (s <= 137) {
2663-
if (s == 31) {
2664-
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
2665-
} else if (s == 32) {
2666-
w = 0xFF5E; /* FULLWIDTH TILDE */
2667-
} else if (s == 33) {
2668-
w = 0x2225; /* PARALLEL TO */
2669-
} else if (s == 60) {
2670-
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
2671-
} else if (s == 80) {
2672-
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
2673-
} else if (s == 81) {
2674-
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
2675-
} else if (s == 137) {
2676-
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
2677-
}
2657+
if (w <= 137) {
2658+
if (w == 31) {
2659+
*out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
2660+
continue;
2661+
} else if (w == 32) {
2662+
*out++ = 0xFF5E; /* FULLWIDTH TILDE */
2663+
continue;
2664+
} else if (w == 33) {
2665+
*out++ = 0x2225; /* PARALLEL TO */
2666+
continue;
2667+
} else if (w == 60) {
2668+
*out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
2669+
continue;
2670+
} else if (w == 80) {
2671+
*out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */
2672+
continue;
2673+
} else if (w == 81) {
2674+
*out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */
2675+
continue;
2676+
} else if (w == 137) {
2677+
*out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */
2678+
continue;
26782679
}
2680+
}
26792681

2682+
if (w >= mb_tbl_code2uni_sb1_min && w <= mb_tbl_code2uni_sb3_max) {
2683+
int snd = 0;
2684+
w = mbfilter_sjis_emoji_sb2unicode(w, &snd);
26802685
if (!w) {
2681-
if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) {
2682-
int snd = 0;
2683-
w = mbfilter_sjis_emoji_sb2unicode(s, &snd);
2684-
if (snd) {
2685-
*out++ = snd;
2686-
}
2687-
} else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
2688-
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
2689-
} else if (s < jisx0208_ucs_table_size) {
2690-
w = jisx0208_ucs_table[s];
2691-
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
2692-
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
2693-
}
2694-
2695-
if (!w) {
2696-
if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
2697-
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
2698-
} else if (s >= (94*94) && s < (114*94)) {
2699-
w = s - (94*94) + 0xE000;
2700-
}
2686+
w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
2687+
if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) {
2688+
w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min];
2689+
} else if (w >= (94*94) && w < (114*94)) {
2690+
w = w - (94*94) + 0xE000;
27012691
}
2692+
} else if (snd) {
2693+
*out++ = snd;
27022694
}
2703-
2704-
*out++ = w ? w : MBFL_BAD_INPUT;
2695+
} else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) {
2696+
w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min];
2697+
} else if (w < jisx0208_ucs_table_size) {
2698+
w = jisx0208_ucs_table[w];
2699+
} else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) {
2700+
w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min];
2701+
} else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) {
2702+
w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min];
2703+
} else if (w >= (94*94) && w < (114*94)) {
2704+
w = w - (94*94) + 0xE000;
27052705
} else {
27062706
*out++ = MBFL_BAD_INPUT;
2707+
continue;
27072708
}
2709+
2710+
*out++ = w ? w : MBFL_BAD_INPUT;
27082711
} else {
27092712
*out++ = MBFL_BAD_INPUT;
27102713
}

0 commit comments

Comments
 (0)