Skip to content

Commit 74319de

Browse files
committed
Combine uhc1_ucs_table and uhc2_ucs_table for UHC/EUC-KR/ISO-2022-KR conversion
These two tables cover contiguous ranges of the KSX 1001/KSC 5601 charset. There seems to be no reason to divide them into two tables instead of one.
1 parent ef114f9 commit 74319de

File tree

4 files changed

+28
-41
lines changed

4 files changed

+28
-41
lines changed

ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c

+12-10
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,9 @@ int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter)
118118
}
119119
if (flag > 0 && c >= 0xa1 && c <= 0xfe) {
120120
if (flag == 1) { /* 1st: 0xa1..0xc6, 2nd: 0x41..0x7a, 0x81..0xfe */
121-
w = (c1 - 0xa1)*190 + c - 0x41;
122-
ZEND_ASSERT(w < uhc2_ucs_table_size);
123-
w = uhc2_ucs_table[w];
121+
w = (c1 - 0x81)*190 + c - 0x41;
122+
ZEND_ASSERT(w < uhc1_ucs_table_size);
123+
w = uhc1_ucs_table[w];
124124
} else { /* 1st: 0xc7..0xc8,0xca..0xfe, 2nd: 0xa1..0xfe */
125125
w = (c1 - 0xc7)*94 + c - 0xa1;
126126
ZEND_ASSERT(w < uhc3_ucs_table_size);
@@ -216,23 +216,25 @@ static size_t mb_euckr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *bu
216216
*out++ = c;
217217
} else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9 && p < e) {
218218
unsigned char c2 = *p++;
219+
if (c2 < 0xA1 || c2 == 0xFF) {
220+
*out++ = MBFL_BAD_INPUT;
221+
continue;
222+
}
219223

220-
if (c >= 0xA1 && c <= 0xC6 && c2 >= 0xA1 && c2 <= 0xFE) {
221-
unsigned int w = (c - 0xA1)*190 + c2 - 0x41;
222-
ZEND_ASSERT(w < uhc2_ucs_table_size);
223-
w = uhc2_ucs_table[w];
224+
if (c <= 0xC6) {
225+
unsigned int w = (c - 0x81)*190 + c2 - 0x41;
226+
ZEND_ASSERT(w < uhc1_ucs_table_size);
227+
w = uhc1_ucs_table[w];
224228
if (!w)
225229
w = MBFL_BAD_INPUT;
226230
*out++ = w;
227-
} else if (c >= 0xC7 && c <= 0xFE && c != 0xC9 && c2 >= 0xA1 && c2 <= 0xFE) {
231+
} else {
228232
unsigned int w = (c - 0xC7)*94 + c2 - 0xA1;
229233
ZEND_ASSERT(w < uhc3_ucs_table_size);
230234
w = uhc3_ucs_table[w];
231235
if (!w)
232236
w = MBFL_BAD_INPUT;
233237
*out++ = w;
234-
} else {
235-
*out++ = MBFL_BAD_INPUT;
236238
}
237239
} else {
238240
*out++ = MBFL_BAD_INPUT;

ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,9 @@ int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
119119
if (flag > 0 && c > 0x20 && c < 0x7f) {
120120
if (flag == 1) {
121121
if (c1 != 0x22 || c <= 0x65) {
122-
w = (c1 - 0x21)*190 + (c - 0x41) + 0x80;
123-
ZEND_ASSERT(w < uhc2_ucs_table_size);
124-
w = uhc2_ucs_table[w];
122+
w = (c1 - 1)*190 + (c - 0x41) + 0x80;
123+
ZEND_ASSERT(w < uhc1_ucs_table_size);
124+
w = uhc1_ucs_table[w];
125125
}
126126
} else {
127127
w = (c1 - 0x47)*94 + c - 0x21;
@@ -329,9 +329,9 @@ static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t
329329

330330
if (c < 0x47) {
331331
if (c != 0x22 || c2 <= 0x65) {
332-
w = (c - 0x21)*190 + (c2 - 0x41) + 0x80;
333-
ZEND_ASSERT(w < uhc2_ucs_table_size);
334-
w = uhc2_ucs_table[w];
332+
w = (c - 1)*190 + c2 - 0x41 + 0x80;
333+
ZEND_ASSERT(w < uhc1_ucs_table_size);
334+
w = uhc1_ucs_table[w];
335335
}
336336
} else if (c != 0x49 && c <= 0x7D) {
337337
w = (c - 0x47)*94 + c2 - 0x21;

ext/mbstring/libmbfl/filters/mbfilter_uhc.c

+6-18
Original file line numberDiff line numberDiff line change
@@ -114,16 +114,11 @@ int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter)
114114
filter->status = 0;
115115
int c1 = filter->cache, w = 0;
116116

117-
if (c1 >= 0x81 && c1 <= 0xa0 && c >= 0x41 && c <= 0xfe) {
117+
if (c1 >= 0x81 && c1 <= 0xc6 && c >= 0x41 && c <= 0xfe) {
118118
w = (c1 - 0x81)*190 + (c - 0x41);
119119
if (w >= 0 && w < uhc1_ucs_table_size) {
120120
w = uhc1_ucs_table[w];
121121
}
122-
} else if (c1 >= 0xa1 && c1 <= 0xc6 && c >= 0x41 && c <= 0xfe) {
123-
w = (c1 - 0xa1)*190 + (c - 0x41);
124-
if (w >= 0 && w < uhc2_ucs_table_size) {
125-
w = uhc2_ucs_table[w];
126-
}
127122
} else if (c1 >= 0xc7 && c1 < 0xfe && c >= 0xa1 && c <= 0xfe) {
128123
w = (c1 - 0xc7)*94 + (c - 0xa1);
129124
if (w >= 0 && w < uhc3_ucs_table_size) {
@@ -214,21 +209,14 @@ static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf,
214209
}
215210
unsigned int w = 0;
216211

217-
if (c <= 0xA0) {
212+
if (c <= 0xC6) {
218213
w = (c - 0x81)*190 + c2 - 0x41;
219-
if (w < uhc1_ucs_table_size) {
220-
w = uhc1_ucs_table[w];
221-
}
222-
} else if (c <= 0xC6) {
223-
w = (c - 0xA1)*190 + c2 - 0x41;
224-
if (w < uhc2_ucs_table_size) {
225-
w = uhc2_ucs_table[w];
226-
}
214+
ZEND_ASSERT(w < uhc1_ucs_table_size);
215+
w = uhc1_ucs_table[w];
227216
} else if (c2 >= 0xA1) {
228217
w = (c - 0xC7)*94 + c2 - 0xA1;
229-
if (w < uhc3_ucs_table_size) {
230-
w = uhc3_ucs_table[w];
231-
}
218+
ZEND_ASSERT(w < uhc3_ucs_table_size);
219+
w = uhc3_ucs_table[w];
232220
}
233221
if (!w) {
234222
w = MBFL_BAD_INPUT;

ext/mbstring/libmbfl/filters/unicode_table_uhc.h

+4-7
Original file line numberDiff line numberDiff line change
@@ -790,12 +790,8 @@ const unsigned short uhc1_ucs_table[] = {
790790
0xc876,0xc877,0xc879,0xc87b,0xc87c,0xc87d,0xc87e,0xc87f,
791791
0xc882,0xc884,0xc888,0xc889,0xc88a,0xc88e,0xc88f,0xc890,
792792
0xc891,0xc892,0xc893,0xc895,0xc896,0xc897,0xc898,0xc899,
793-
0xc89a,0xc89b,0xc89c,0xc89e,0xc8a0,0xc8a2,0xc8a3,0xc8a4
794-
};
795-
796-
const int uhc1_ucs_table_size = (sizeof(uhc1_ucs_table)/sizeof(unsigned short));
793+
0xc89a,0xc89b,0xc89c,0xc89e,0xc8a0,0xc8a2,0xc8a3,0xc8a4,
797794

798-
const unsigned short uhc2_ucs_table[] = {
799795
0xc8a5,0xc8a6,0xc8a7,0xc8a9,0xc8aa,0xc8ab,0xc8ac,0xc8ad,
800796
0xc8ae,0xc8af,0xc8b0,0xc8b1,0xc8b2,0xc8b3,0xc8b4,0xc8b5,
801797
0xc8b6,0xc8b7,0xc8b8,0xc8b9,0xc8ba,0xc8bb,0xc8be,0xc8bf,
@@ -1698,9 +1694,10 @@ const unsigned short uhc2_ucs_table[] = {
16981694
0xd391,0xd398,0xd399,0xd39c,0xd3a0,0xd3a8,0xd3a9,0xd3ab,
16991695
0xd3ad,0xd3b4,0xd3b8,0xd3bc,0xd3c4,0xd3c5,0xd3c8,0xd3c9,
17001696
0xd3d0,0xd3d8,0xd3e1,0xd3e3,0xd3ec,0xd3ed,0xd3f0,0xd3f4,
1701-
0xd3fc,0xd3fd,0xd3ff,0xd401};
1697+
0xd3fc,0xd3fd,0xd3ff,0xd401
1698+
};
17021699

1703-
const int uhc2_ucs_table_size = (sizeof(uhc2_ucs_table)/sizeof(unsigned short));
1700+
const int uhc1_ucs_table_size = (sizeof(uhc1_ucs_table)/sizeof(unsigned short));
17041701

17051702
const unsigned short uhc3_ucs_table[] = {
17061703
0xd408,0xd41d,0xd440,0xd444,0xd45c,0xd460,0xd464,0xd46d,

0 commit comments

Comments
 (0)