summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn Naylor2022-08-31 03:39:17 +0000
committerJohn Naylor2022-09-02 02:36:22 +0000
commit0a8de93a48ce1e7479fb75fe10a8859559ec0c09 (patch)
tree497ee333fbb408e740a0b539481f50e9401db81c /src
parent05519126a02ee39bf0957d3d85a1da5bd7e1c09c (diff)
Speed up lexing of long JSON strings
Use optimized linear search when looking ahead for end quotes, backslashes, and non-printable characters. This results in nearly 40% faster JSON parsing on x86-64 when most values are long strings, and all platforms should see some improvement. Reviewed by Andres Freund and Nathan Bossart Discussion: https://www.postgresql.org/message-id/CAFBsxsGhaR2KQ5eisaK%3D6Vm60t%3DaxhD8Ckj1qFoCH1pktZi%2B2w%40mail.gmail.com Discussion: https://www.postgresql.org/message-id/CAFBsxsESLUyJ5spfOSyPrOvKUEYYNqsBosue9SV1j8ecgNXSKA%40mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/common/jsonapi.c13
-rw-r--r--src/test/regress/expected/json.out13
-rw-r--r--src/test/regress/sql/json.sql5
3 files changed, 28 insertions, 3 deletions
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index fefd1d24d90..cfc025749cc 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -19,6 +19,7 @@
#include "common/jsonapi.h"
#include "mb/pg_wchar.h"
+#include "port/pg_lfind.h"
#ifndef FRONTEND
#include "miscadmin.h"
@@ -844,7 +845,7 @@ json_lex_string(JsonLexContext *lex)
}
else
{
- char *p;
+ char *p = s;
if (hi_surrogate != -1)
return JSON_UNICODE_LOW_SURROGATE;
@@ -853,11 +854,17 @@ json_lex_string(JsonLexContext *lex)
* Skip to the first byte that requires special handling, so we
* can batch calls to appendBinaryStringInfo.
*/
- for (p = s; p < end; p++)
+ while (p < end - sizeof(Vector8) &&
+ !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
+ !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
+ !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
+ p += sizeof(Vector8);
+
+ for (; p < end; p++)
{
if (*p == '\\' || *p == '"')
break;
- else if ((unsigned char) *p < 32)
+ else if ((unsigned char) *p <= 31)
{
/* Per RFC4627, these characters MUST be escaped. */
/*
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index e9d6e9faf29..cb181226e9f 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -42,6 +42,19 @@ LINE 1: SELECT '"\v"'::json;
^
DETAIL: Escape sequence "\v" is invalid.
CONTEXT: JSON data, line 1: "\v...
+-- Check fast path for longer strings (at least 16 bytes long)
+SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK
+ json
+-------------------
+ "............abc"
+(1 row)
+
+SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes
+ json
+---------------------
+ "............abc\n"
+(1 row)
+
-- see json_encoding test for input with unicode escapes
-- Numbers.
SELECT '1'::json; -- OK
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index e366c6f51b6..589e0cea367 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -7,6 +7,11 @@ SELECT '"abc
def"'::json; -- ERROR, unescaped newline in string constant
SELECT '"\n\"\\"'::json; -- OK, legal escapes
SELECT '"\v"'::json; -- ERROR, not a valid JSON escape
+
+-- Check fast path for longer strings (at least 16 bytes long)
+SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK
+SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes
+
-- see json_encoding test for input with unicode escapes
-- Numbers.