Skip to content

Commit b23af45

Browse files
committed
Fix erroneous hash calculations in gin_extract_jsonb_path().
The jsonb_path_ops code calculated hash values inconsistently in some cases involving nested arrays and objects. This would result in queries possibly not finding entries that they should find, when using a jsonb_path_ops GIN index for the search. The problem cases involve JSONB values that contain both scalars and sub-objects at the same nesting level, for example an array containing both scalars and sub-arrays. To fix, reset the current stack->hash after processing each value or sub-object, not before; and don't try to be cute about the outermost level's initial hash. Correcting this means that existing jsonb_path_ops indexes may now be inconsistent with the new hash calculation code. The symptom is the same --- searches not finding entries they should find --- but the specific rows affected are likely to be different. Users will need to REINDEX jsonb_path_ops indexes to make sure that all searches work as expected. Per bug #13756 from Daniel Cheng. Back-patch to 9.4 where the faulty logic was introduced.
1 parent 8c75ad4 commit b23af45

File tree

3 files changed

+88
-33
lines changed

3 files changed

+88
-33
lines changed

src/backend/utils/adt/jsonb_gin.c

+18-33
Original file line numberDiff line numberDiff line change
@@ -375,58 +375,43 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS)
375375
parent = stack;
376376
stack = (PathHashStack *) palloc(sizeof(PathHashStack));
377377

378-
if (parent->parent)
379-
{
380-
/*
381-
* We pass forward hashes from previous container nesting
382-
* levels so that nested arrays with an outermost nested
383-
* object will have element hashes mixed with the
384-
* outermost key. It's also somewhat useful to have
385-
* nested objects' innermost values have hashes that are a
386-
* function of not just their own key, but outer keys too.
387-
*
388-
* Nesting an array within another array will not alter
389-
* innermost scalar element hash values, but that seems
390-
* inconsequential.
391-
*/
392-
stack->hash = parent->hash;
393-
}
394-
else
395-
{
396-
/*
397-
* At the outermost level, initialize hash with container
398-
* type proxy value. Note that this makes JB_FARRAY and
399-
* JB_FOBJECT part of the on-disk representation, but they
400-
* are that in the base jsonb object storage already.
401-
*/
402-
stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT;
403-
}
378+
/*
379+
* We pass forward hashes from outer nesting levels so that
380+
* the hashes for nested values will include outer keys as
381+
* well as their own keys.
382+
*
383+
* Nesting an array within another array will not alter
384+
* innermost scalar element hash values, but that seems
385+
* inconsequential.
386+
*/
387+
stack->hash = parent->hash;
404388
stack->parent = parent;
405389
break;
406390
case WJB_KEY:
407-
/* initialize hash from parent */
408-
stack->hash = stack->parent->hash;
409-
/* and mix in this key */
391+
/* mix this key into the current outer hash */
410392
JsonbHashScalarValue(&v, &stack->hash);
411393
/* hash is now ready to incorporate the value */
412394
break;
413395
case WJB_ELEM:
414-
/* array elements use parent hash mixed with element's hash */
415-
stack->hash = stack->parent->hash;
416-
/* FALL THRU */
417396
case WJB_VALUE:
418397
/* mix the element or value's hash into the prepared hash */
419398
JsonbHashScalarValue(&v, &stack->hash);
420399
/* and emit an index entry */
421400
entries[i++] = UInt32GetDatum(stack->hash);
422-
/* Note: we assume we'll see KEY before another VALUE */
401+
/* reset hash for next key, value, or sub-object */
402+
stack->hash = stack->parent->hash;
423403
break;
424404
case WJB_END_ARRAY:
425405
case WJB_END_OBJECT:
426406
/* Pop the stack */
427407
parent = stack->parent;
428408
pfree(stack);
429409
stack = parent;
410+
/* reset hash for next key, value, or sub-object */
411+
if (stack->parent)
412+
stack->hash = stack->parent->hash;
413+
else
414+
stack->hash = 0;
430415
break;
431416
default:
432417
elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);

src/test/regress/expected/jsonb.out

+50
Original file line numberDiff line numberDiff line change
@@ -2420,6 +2420,56 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}';
24202420
t
24212421
(1 row)
24222422

2423+
-- check some corner cases for indexed nested containment (bug #13756)
2424+
create temp table nestjsonb (j jsonb);
2425+
insert into nestjsonb (j) values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
2426+
insert into nestjsonb (j) values ('[[14,2,3]]');
2427+
insert into nestjsonb (j) values ('[1,[14,2,3]]');
2428+
create index on nestjsonb using gin(j jsonb_path_ops);
2429+
set enable_seqscan = on;
2430+
set enable_bitmapscan = off;
2431+
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
2432+
j
2433+
---------------------------------------------------
2434+
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
2435+
(1 row)
2436+
2437+
select * from nestjsonb where j @> '{"c":3}';
2438+
j
2439+
---------------------------------------------------
2440+
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
2441+
(1 row)
2442+
2443+
select * from nestjsonb where j @> '[[14]]';
2444+
j
2445+
-----------------
2446+
[[14, 2, 3]]
2447+
[1, [14, 2, 3]]
2448+
(2 rows)
2449+
2450+
set enable_seqscan = off;
2451+
set enable_bitmapscan = on;
2452+
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
2453+
j
2454+
---------------------------------------------------
2455+
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
2456+
(1 row)
2457+
2458+
select * from nestjsonb where j @> '{"c":3}';
2459+
j
2460+
---------------------------------------------------
2461+
{"a": [["b", {"x": 1}], ["b", {"x": 2}]], "c": 3}
2462+
(1 row)
2463+
2464+
select * from nestjsonb where j @> '[[14]]';
2465+
j
2466+
-----------------
2467+
[[14, 2, 3]]
2468+
[1, [14, 2, 3]]
2469+
(2 rows)
2470+
2471+
reset enable_seqscan;
2472+
reset enable_bitmapscan;
24232473
-- nested object field / array index lookup
24242474
SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n';
24252475
?column?

src/test/regress/sql/jsonb.sql

+20
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,26 @@ SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4}]}';
618618
SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},3]}';
619619
SELECT '{"a":[1,2,{"c":3,"x":4}],"c":"b"}'::jsonb @> '{"a":[{"x":4},1]}';
620620

621+
-- check some corner cases for indexed nested containment (bug #13756)
622+
create temp table nestjsonb (j jsonb);
623+
insert into nestjsonb (j) values ('{"a":[["b",{"x":1}],["b",{"x":2}]],"c":3}');
624+
insert into nestjsonb (j) values ('[[14,2,3]]');
625+
insert into nestjsonb (j) values ('[1,[14,2,3]]');
626+
create index on nestjsonb using gin(j jsonb_path_ops);
627+
628+
set enable_seqscan = on;
629+
set enable_bitmapscan = off;
630+
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
631+
select * from nestjsonb where j @> '{"c":3}';
632+
select * from nestjsonb where j @> '[[14]]';
633+
set enable_seqscan = off;
634+
set enable_bitmapscan = on;
635+
select * from nestjsonb where j @> '{"a":[[{"x":2}]]}'::jsonb;
636+
select * from nestjsonb where j @> '{"c":3}';
637+
select * from nestjsonb where j @> '[[14]]';
638+
reset enable_seqscan;
639+
reset enable_bitmapscan;
640+
621641
-- nested object field / array index lookup
622642
SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'n';
623643
SELECT '{"n":null,"a":1,"b":[1,2],"c":{"1":2},"d":{"1":[2,3]}}'::jsonb -> 'a';

0 commit comments

Comments
 (0)