Skip to content

Major overhaul of mbstring (part 20) #8257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
1a3e686
Implement fast text conversion interface for '7bit'
alexdowad Dec 26, 2021
c9fb81e
Implement fast text conversion interface for ISO-2022-KR
alexdowad Dec 28, 2021
85e2be8
Implement fast text conversion interface for SJIS-mac
alexdowad Jan 9, 2022
41a7be2
Implement fast text conversion interface for UTF7-IMAP
alexdowad Jan 21, 2022
41d87c2
Implement fast text conversion interface for mobile SJIS variants
alexdowad Jan 23, 2022
aff9a0d
Implement fast text conversion interface for EUC-JP-2004
alexdowad Jan 24, 2022
48657d9
Implement fast text conversion interface for SJIS-2004
alexdowad Jan 28, 2022
9a0cc3a
Implement fast text conversion interface for ISO-2022-JP-2004
alexdowad Jan 28, 2022
215a23c
Implement fast text conversion interface for mobile variants of UTF-8
alexdowad Feb 5, 2022
2c6ad6e
Implement fast text conversion interface for ISO-2022-JP-MS
alexdowad Feb 13, 2022
e5b5d48
Implement fast text conversion interface for ISO-2022-JP-KDDI
alexdowad Feb 27, 2022
3a97806
Implement fast text conversion interface for '8bit'
alexdowad Mar 28, 2022
3ced851
Implement fast text conversion interface for UUENCODE
alexdowad Apr 23, 2022
5bfbdf2
Implement fast text conversion interface for Base64
alexdowad Apr 23, 2022
8804db3
Simplify code for converting UTF-8
alexdowad Apr 23, 2022
3bd69a6
Implement fast text conversion interface for HTML-ENTITIES
alexdowad Apr 24, 2022
e055b42
Implement fast text conversion interface for QPrint
alexdowad Apr 25, 2022
7d37ba6
For JIS/ISO-2022-JP, treat a truncated escape sequence as error
alexdowad May 2, 2022
0c339e8
Add more tests for mbstring encoding conversion
alexdowad Apr 26, 2022
38c4193
Add assertions to help catch buffer overflows in mbstring text conver…
alexdowad May 8, 2022
fd35531
Fix buffer overflow bugs in UTF-7 text conversion
alexdowad May 8, 2022
35e4768
Use fast text conversion filters to implement php_mb_convert_encoding_ex
alexdowad May 13, 2022
7d716b8
Fix buffer overflow bug in HZ text conversion code
alexdowad May 22, 2022
0eb000f
Fix buffer overflow bugs in CP50222 text conversion code
alexdowad May 23, 2022
df05cde
php_mb_convert_encoding{,_ex} returns zend_string
alexdowad May 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions ext/mbstring/config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,12 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_gb18030.c
libmbfl/filters/mbfilter_euc_cn.c
libmbfl/filters/mbfilter_euc_jp.c
libmbfl/filters/mbfilter_euc_jp_2004.c
libmbfl/filters/mbfilter_euc_jp_win.c
libmbfl/filters/mbfilter_euc_kr.c
libmbfl/filters/mbfilter_euc_tw.c
libmbfl/filters/mbfilter_htmlent.c
libmbfl/filters/mbfilter_hz.c
libmbfl/filters/mbfilter_iso2022_jp_ms.c
libmbfl/filters/mbfilter_iso2022jp_2004.c
libmbfl/filters/mbfilter_iso2022jp_mobile.c
libmbfl/filters/mbfilter_iso2022_kr.c
libmbfl/filters/mbfilter_jis.c
Expand Down
4 changes: 2 additions & 2 deletions ext/mbstring/config.w32
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ if (PHP_MBSTRING != "no") {
mbfilter_sjis_2004.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
mbfilter_utf8_mobile.c mbfilter_euc_jp_2004.c mbfilter_uuencode.c \
mbfilter_utf8_mobile.c mbfilter_uuencode.c \
mbfilter_cp5022x.c mbfilter_sjis_mobile.c \
mbfilter_sjis_mac.c mbfilter_iso2022jp_2004.c \
mbfilter_sjis_mac.c \
mbfilter_iso2022jp_mobile.c mbfilter_singlebyte.c \
mbfilter_tl_jisx0201_jisx0208.c", "mbstring");

Expand Down
41 changes: 39 additions & 2 deletions ext/mbstring/libmbfl/filters/mbfilter_7bit.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
#include "mbfilter.h"
#include "mbfilter_7bit.h"

static size_t mb_7bit_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_7bit(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);

const mbfl_encoding mbfl_encoding_7bit = {
mbfl_no_encoding_7bit,
"7bit",
Expand All @@ -40,8 +43,8 @@ const mbfl_encoding mbfl_encoding_7bit = {
MBFL_ENCTYPE_SBCS,
NULL,
NULL,
NULL,
NULL
mb_7bit_to_wchar,
mb_wchar_to_7bit
};

const struct mbfl_convert_vtbl vtbl_8bit_7bit = {
Expand Down Expand Up @@ -82,3 +85,37 @@ int mbfl_filt_conv_any_7bit(int c, mbfl_convert_filter *filter)
}
return 0;
}

static size_t mb_7bit_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
uint32_t *out = buf, *limit = buf + bufsize;

while (p < e && out < limit) {
unsigned char c = *p++;
*out++ = (c < 0x80) ? c : MBFL_BAD_INPUT;
}

*in_len = e - p;
*in = p;
return out - buf;
}

static void mb_wchar_to_7bit(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);

while (len--) {
uint32_t w = *in++;
if (w <= 0x7F) {
out = mb_convert_buf_add(out, w);
} else {
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_7bit);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
}
}

MB_CONVERT_BUF_STORE(buf, out, limit);
}
126 changes: 124 additions & 2 deletions ext/mbstring/libmbfl/filters/mbfilter_base64.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
#include "mbfilter.h"
#include "mbfilter_base64.h"

static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);

const mbfl_encoding mbfl_encoding_base64 = {
mbfl_no_encoding_base64,
"BASE64",
Expand All @@ -40,8 +43,8 @@ const mbfl_encoding mbfl_encoding_base64 = {
MBFL_ENCTYPE_GL_UNSAFE,
NULL,
NULL,
NULL,
NULL
mb_base64_to_wchar,
mb_wchar_to_base64
};

const struct mbfl_convert_vtbl vtbl_8bit_b64 = {
Expand Down Expand Up @@ -212,3 +215,122 @@ int mbfl_filt_conv_base64dec_flush(mbfl_convert_filter *filter)
}
return 0;
}

static int decode_base64(char c)
{
if (c >= 'A' && c <= 'Z') {
return c - 'A';
} else if (c >= 'a' && c <= 'z') { /* a - z */
return c - 'a' + 26;
} else if (c >= '0' && c <= '9') { /* 0 - 9 */
return c - '0' + 52;
} else if (c == '+') {
return 62;
} else if (c == '/') {
return 63;
}
return -1;
}

static size_t mb_base64_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
{
unsigned char *p = *in, *e = p + *in_len;
/* Reserve two slots at the end of the output buffer so that we always have
* space to emit any trailing bytes when we hit the end of the input string */
uint32_t *out = buf, *limit = buf + bufsize - 2;

unsigned int bits = *state & 0xFF, cache = *state >> 8;

while (p < e && (limit - out) >= 3) {
unsigned char c = *p++;

if (c == '\r' || c == '\n' || c == ' ' || c == '\t' || c == '=') {
continue;
}

int value = decode_base64(c);

if (value == -1) {
*out++ = MBFL_BAD_INPUT;
} else {
bits += 6;
cache = (cache << 6) | (value & 0x3F);
if (bits == 24) {
*out++ = (cache >> 16) & 0xFF;
*out++ = (cache >> 8) & 0xFF;
*out++ = cache & 0xFF;
bits = cache = 0;
}
}
}

if (p == e) {
if (bits) {
if (bits == 18) {
*out++ = (cache >> 10) & 0xFF;
*out++ = (cache >> 2) & 0xFF;
} else if (bits == 12) {
*out++ = (cache >> 4) & 0xFF;
}
}
} else {
*state = (cache << 8) | (bits & 0xFF);
}

*in_len = e - p;
*in = p;
return out - buf;
}

static void mb_wchar_to_base64(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned int bits = (buf->state & 0x3) * 8;
unsigned int chars_output = ((buf->state >> 2) & 0x3F) * 4;
unsigned int cache = buf->state >> 8;

unsigned char *out, *limit;
MB_CONVERT_BUF_LOAD(buf, out, limit);
/* Every 3 bytes of input converts to 4 bytes of output... but if the number of input
* bytes is not a multiple of 3, we still pad the output out to a multiple of 4
* That's `(len + 2) * 4 / 3`, to calculate the amount of space needed in the output buffer
*
* But also, we add a CR+LF line ending (2 bytes) for every 76 bytes of output
* That means we must multiply the above number by 78/76
* Use `zend_safe_address_guarded` to check that the multiplication doesn't overflow
*
* And since we may enter this function multiple times when converting a large string, and
* we might already be close to where a CR+LF needs to be emitted, make space for an extra
* CR+LF pair in the output buffer */
MB_CONVERT_BUF_ENSURE(buf, out, limit, (zend_safe_address_guarded(len + (bits / 8), 26, 52) / 19) + 2);

while (len--) {
uint32_t w = *in++;
cache = (cache << 8) | (w & 0xFF);
bits += 8;
if (bits == 24) {
if (chars_output > 72) {
out = mb_convert_buf_add2(out, '\r', '\n');
chars_output = 0;
}
out = mb_convert_buf_add4(out,
mbfl_base64_table[(cache >> 18) & 0x3F],
mbfl_base64_table[(cache >> 12) & 0x3F],
mbfl_base64_table[(cache >> 6) & 0x3F],
mbfl_base64_table[cache & 0x3F]);
chars_output += 4;
bits = cache = 0;
}
}

if (end && bits) {
if (bits == 8) {
out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 2) & 0x3F], mbfl_base64_table[(cache & 0x3) << 4], '=', '=');
} else {
out = mb_convert_buf_add4(out, mbfl_base64_table[(cache >> 10) & 0x3F], mbfl_base64_table[(cache >> 4) & 0x3F], mbfl_base64_table[(cache & 0xF) << 2], '=');
}
} else {
buf->state = (cache << 8) | (((chars_output / 4) & 0x3F) << 2) | ((bits / 8) & 0x3);
}

MB_CONVERT_BUF_STORE(buf, out, limit);
}
4 changes: 2 additions & 2 deletions ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ static void mb_wchar_to_cp50222(uint32_t *in, size_t len, mb_convert_buf *buf, b
out = mb_convert_buf_add(out, s - 0x80);
} else if (s <= 0x927E) {
/* JISX 0208 Kanji */
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6);
if (buf->state == JISX_0201_KANA) {
out = mb_convert_buf_add(out, 0xF);
}
Expand All @@ -1002,7 +1002,7 @@ static void mb_wchar_to_cp50222(uint32_t *in, size_t len, mb_convert_buf *buf, b
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
} else if (s >= 0x10000) {
/* JISX 0201 Latin; we 'tag' these by adding 0x10000 */
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5);
if (buf->state == JISX_0201_KANA) {
out = mb_convert_buf_add(out, 0xF);
}
Expand Down
69 changes: 0 additions & 69 deletions ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c

This file was deleted.

3 changes: 0 additions & 3 deletions ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,4 @@ extern const mbfl_encoding mbfl_encoding_eucjp2004;
extern const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004;

int mbfl_filt_conv_eucjp2004_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_eucjp2004(int c, mbfl_convert_filter *filter);

#endif /* MBFL_MBFILTER_EUC_JP_2004_H */
Loading