|
12 | 12 | #include <string.h>
|
13 | 13 | #include <stdio.h>
|
14 | 14 |
|
| 15 | +#include "common/jsonapi.h" |
15 | 16 | #include "fe_utils/psqlscan.h"
|
16 | 17 | #include "fe_utils/string_utils.h"
|
17 | 18 | #include "getopt_long.h"
|
@@ -164,6 +165,88 @@ encoding_conflicts_ascii(int encoding)
|
164 | 165 | }
|
165 | 166 |
|
166 | 167 |
|
| 168 | +/* |
| 169 | + * Confirm escaping doesn't read past the end of an allocation. Consider the |
| 170 | + * result of malloc(4096), in the absence of freelist entries satisfying the |
| 171 | + * allocation. On OpenBSD, reading one byte past the end of that object |
| 172 | + * yields SIGSEGV. |
| 173 | + * |
| 174 | + * Run this test before the program's other tests, so freelists are minimal. |
| 175 | + * len=4096 didn't SIGSEGV, likely due to free() calls in libpq. len=8192 |
| 176 | + * did. Use 128 KiB, to somewhat insulate the outcome from distant new free() |
| 177 | + * calls and libc changes. |
| 178 | + */ |
| 179 | +static void |
| 180 | +test_gb18030_page_multiple(pe_test_config *tc) |
| 181 | +{ |
| 182 | + PQExpBuffer testname; |
| 183 | + size_t input_len = 0x20000; |
| 184 | + char *input; |
| 185 | + |
| 186 | + /* prepare input */ |
| 187 | + input = pg_malloc(input_len); |
| 188 | + memset(input, '-', input_len - 1); |
| 189 | + input[input_len - 1] = 0xfe; |
| 190 | + |
| 191 | + /* name to describe the test */ |
| 192 | + testname = createPQExpBuffer(); |
| 193 | + appendPQExpBuffer(testname, ">repeat(%c, %zu)", input[0], input_len - 1); |
| 194 | + escapify(testname, input + input_len - 1, 1); |
| 195 | + appendPQExpBuffer(testname, "< - GB18030 - PQescapeLiteral"); |
| 196 | + |
| 197 | + /* test itself */ |
| 198 | + PQsetClientEncoding(tc->conn, "GB18030"); |
| 199 | + report_result(tc, PQescapeLiteral(tc->conn, input, input_len) == NULL, |
| 200 | + testname->data, "", |
| 201 | + "input validity vs escape success", "ok"); |
| 202 | + |
| 203 | + destroyPQExpBuffer(testname); |
| 204 | + pg_free(input); |
| 205 | +} |
| 206 | + |
| 207 | +/* |
| 208 | + * Confirm json parsing doesn't read past the end of an allocation. This |
| 209 | + * exercises wchar.c infrastructure like the true "escape" tests do, but this |
| 210 | + * isn't an "escape" test. |
| 211 | + */ |
| 212 | +static void |
| 213 | +test_gb18030_json(pe_test_config *tc) |
| 214 | +{ |
| 215 | + PQExpBuffer raw_buf; |
| 216 | + PQExpBuffer testname; |
| 217 | + const char input[] = "{\"\\u\xFE"; |
| 218 | + size_t input_len = sizeof(input) - 1; |
| 219 | + JsonLexContext *lex; |
| 220 | + JsonSemAction sem = {0}; /* no callbacks */ |
| 221 | + JsonParseErrorType json_error; |
| 222 | + |
| 223 | + /* prepare input like test_one_vector_escape() does */ |
| 224 | + raw_buf = createPQExpBuffer(); |
| 225 | + appendBinaryPQExpBuffer(raw_buf, input, input_len); |
| 226 | + appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR); |
| 227 | + VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[input_len], |
| 228 | + raw_buf->len - input_len); |
| 229 | + |
| 230 | + /* name to describe the test */ |
| 231 | + testname = createPQExpBuffer(); |
| 232 | + appendPQExpBuffer(testname, ">"); |
| 233 | + escapify(testname, input, input_len); |
| 234 | + appendPQExpBuffer(testname, "< - GB18030 - pg_parse_json"); |
| 235 | + |
| 236 | + /* test itself */ |
| 237 | + lex = makeJsonLexContextCstringLen(NULL, raw_buf->data, input_len, |
| 238 | + PG_GB18030, false); |
| 239 | + json_error = pg_parse_json(lex, &sem); |
| 240 | + report_result(tc, json_error == JSON_UNICODE_ESCAPE_FORMAT, |
| 241 | + testname->data, "", |
| 242 | + "diagnosed", json_errdetail(json_error, lex)); |
| 243 | + |
| 244 | + freeJsonLexContext(lex); |
| 245 | + destroyPQExpBuffer(testname); |
| 246 | + destroyPQExpBuffer(raw_buf); |
| 247 | +} |
| 248 | + |
| 249 | + |
167 | 250 | static bool
|
168 | 251 | escape_literal(PGconn *conn, PQExpBuffer target,
|
169 | 252 | const char *unescaped, size_t unescaped_len,
|
@@ -451,8 +534,18 @@ static pe_test_vector pe_test_vectors[] =
|
451 | 534 | * Testcases that are not null terminated for the specified input length.
|
452 | 535 | * That's interesting to verify that escape functions don't read beyond
|
453 | 536 | * the intended input length.
|
| 537 | + * |
| 538 | + * One interesting special case is GB18030, which has the odd behaviour |
| 539 | + * needing to read beyond the first byte to determine the length of a |
| 540 | + * multi-byte character. |
454 | 541 | */
|
455 | 542 | TV_LEN("gbk", "\x80", 1),
|
| 543 | + TV_LEN("GB18030", "\x80", 1), |
| 544 | + TV_LEN("GB18030", "\x80\0", 2), |
| 545 | + TV_LEN("GB18030", "\x80\x30", 2), |
| 546 | + TV_LEN("GB18030", "\x80\x30\0", 3), |
| 547 | + TV_LEN("GB18030", "\x80\x30\x30", 3), |
| 548 | + TV_LEN("GB18030", "\x80\x30\x30\0", 4), |
456 | 549 | TV_LEN("UTF-8", "\xC3\xb6 ", 1),
|
457 | 550 | TV_LEN("UTF-8", "\xC3\xb6 ", 2),
|
458 | 551 | };
|
@@ -861,6 +954,9 @@ main(int argc, char *argv[])
|
861 | 954 | exit(1);
|
862 | 955 | }
|
863 | 956 |
|
| 957 | + test_gb18030_page_multiple(&tc); |
| 958 | + test_gb18030_json(&tc); |
| 959 | + |
864 | 960 | for (int i = 0; i < lengthof(pe_test_vectors); i++)
|
865 | 961 | {
|
866 | 962 | test_one_vector(&tc, &pe_test_vectors[i]);
|
|
0 commit comments