diff options
author | John Naylor | 2022-08-31 03:39:17 +0000 |
---|---|---|
committer | John Naylor | 2022-09-02 02:36:22 +0000 |
commit | 0a8de93a48ce1e7479fb75fe10a8859559ec0c09 (patch) | |
tree | 497ee333fbb408e740a0b539481f50e9401db81c /src | |
parent | 05519126a02ee39bf0957d3d85a1da5bd7e1c09c (diff) |
Speed up lexing of long JSON strings
Use optimized linear search when looking ahead for end quotes,
backslashes, and non-printable characters. This results in nearly 40%
faster JSON parsing on x86-64 when most values are long strings, and
all platforms should see some improvement.
Reviewed by Andres Freund and Nathan Bossart
Discussion: https://www.postgresql.org/message-id/CAFBsxsGhaR2KQ5eisaK%3D6Vm60t%3DaxhD8Ckj1qFoCH1pktZi%2B2w%40mail.gmail.com
Discussion: https://www.postgresql.org/message-id/CAFBsxsESLUyJ5spfOSyPrOvKUEYYNqsBosue9SV1j8ecgNXSKA%40mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r-- | src/common/jsonapi.c | 13 | ||||
-rw-r--r-- | src/test/regress/expected/json.out | 13 | ||||
-rw-r--r-- | src/test/regress/sql/json.sql | 5 |
3 files changed, 28 insertions, 3 deletions
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index fefd1d24d90..cfc025749cc 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -19,6 +19,7 @@ #include "common/jsonapi.h" #include "mb/pg_wchar.h" +#include "port/pg_lfind.h" #ifndef FRONTEND #include "miscadmin.h" @@ -844,7 +845,7 @@ json_lex_string(JsonLexContext *lex) } else { - char *p; + char *p = s; if (hi_surrogate != -1) return JSON_UNICODE_LOW_SURROGATE; @@ -853,11 +854,17 @@ json_lex_string(JsonLexContext *lex) * Skip to the first byte that requires special handling, so we * can batch calls to appendBinaryStringInfo. */ - for (p = s; p < end; p++) + while (p < end - sizeof(Vector8) && + !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) && + !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) && + !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8))) + p += sizeof(Vector8); + + for (; p < end; p++) { if (*p == '\\' || *p == '"') break; - else if ((unsigned char) *p < 32) + else if ((unsigned char) *p <= 31) { /* Per RFC4627, these characters MUST be escaped. */ /* diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index e9d6e9faf29..cb181226e9f 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -42,6 +42,19 @@ LINE 1: SELECT '"\v"'::json; ^ DETAIL: Escape sequence "\v" is invalid. CONTEXT: JSON data, line 1: "\v... +-- Check fast path for longer strings (at least 16 bytes long) +SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK + json +------------------- + "............abc" +(1 row) + +SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes + json +--------------------- + "............abc\n" +(1 row) + -- see json_encoding test for input with unicode escapes -- Numbers. SELECT '1'::json; -- OK diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index e366c6f51b6..589e0cea367 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -7,6 +7,11 @@ SELECT '"abc def"'::json; -- ERROR, unescaped newline in string constant SELECT '"\n\"\\"'::json; -- OK, legal escapes SELECT '"\v"'::json; -- ERROR, not a valid JSON escape + +-- Check fast path for longer strings (at least 16 bytes long) +SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK +SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes + -- see json_encoding test for input with unicode escapes -- Numbers. |