From cde9f3c1af6aa1c2c33db651af31a81fd5f9a724 Mon Sep 17 00:00:00 2001 From: ChangAo Chen Date: Fri, 13 Mar 2026 14:53:21 +0800 Subject: [PATCH v4] Fix wrong result of refresh matview concurrently. During populating the diff table, we use the "*=" operator which considers two NULLs equal. This can lead to one row may match more than one row during the full join. If this happens, we will get a wrong diff table. To fix it, we add a new built-in function called record_image_eq_variant which considers two NULLs not equal and use it when populating the diff table. This makes sure that each row can match at most one row during the full join. --- src/backend/commands/matview.c | 6 +++- src/backend/utils/adt/rowtypes.c | 46 +++++++++++++++++---------- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 5 +++ src/test/regress/expected/matview.out | 18 +++++++++++ src/test/regress/sql/matview.sql | 17 ++++++++++ 6 files changed, 75 insertions(+), 19 deletions(-) diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 81a55a33ef2..fc63e86eac1 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -826,8 +826,12 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, errmsg("could not find suitable unique index on materialized view \"%s\"", RelationGetRelationName(matviewRel))); + /* + * We use record_image_eq_variant which considers two NULLs not equal + * so that each row can match at most one row during the full join. + */ appendStringInfoString(&querybuf, - " AND newdata.* OPERATOR(pg_catalog.*=) mv.*) " + " AND pg_catalog.record_image_eq_variant(newdata.*, mv.*)) " "WHERE newdata.* IS NULL OR mv.* IS NULL " "ORDER BY tid"); diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index e4eb7111ee7..64592162fab 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -1583,16 +1583,19 @@ record_image_cmp(FunctionCallInfo fcinfo) } /* - * record_image_eq : + * record_image_eq_internal : * compares two records for identical contents, based on byte images * result : * returns true if the records are identical, false otherwise. * * Note: we do not use record_image_cmp here, since we can avoid * de-toasting for unequal lengths this way. + * + * Note: we consider two NULLs equal if null_equals_null is true, otherwise + * not equal. */ -Datum -record_image_eq(PG_FUNCTION_ARGS) +static bool +record_image_eq_internal(FunctionCallInfo fcinfo, bool null_equals_null) { HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0); HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1); @@ -1720,21 +1723,18 @@ record_image_eq(PG_FUNCTION_ARGS) j + 1))); /* - * We consider two NULLs equal; NULL > not-NULL. + * We consider two NULLs equal if null_equals_null is + * true, otherwise not equal. */ - if (!nulls1[i1] || !nulls2[i2]) - { - if (nulls1[i1] || nulls2[i2]) - { - result = false; - break; - } - - /* Compare the pair of elements */ + if (nulls1[i1] && nulls2[i2]) + result = null_equals_null; + else if (nulls1[i1] || nulls2[i2]) + result = false; + else result = datum_image_eq(values1[i1], values2[i2], att1->attbyval, att2->attlen); - if (!result) - break; - } + + if (!result) + break; /* equal, so continue to next column */ i1++, i2++, j++; @@ -1764,7 +1764,19 @@ record_image_eq(PG_FUNCTION_ARGS) PG_FREE_IF_COPY(record1, 0); PG_FREE_IF_COPY(record2, 1); - PG_RETURN_BOOL(result); + return result; +} + +Datum +record_image_eq(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_image_eq_internal(fcinfo, true)); +} + +Datum +record_image_eq_variant(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(record_image_eq_internal(fcinfo, false)); } Datum diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 90f46b03502..3da6a75ff87 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202603101 +#define CATALOG_VERSION_NO 202603131 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 361e2cfffeb..42e7d5f946d 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -10856,6 +10856,11 @@ proname => 'btequalimage', prorettype => 'bool', proargtypes => 'oid', prosrc => 'btequalimage' }, +# used for REFRESH MATERIALIZED VIEW CONCURRENTLY +{ oid => '9145', descr => 'variant of record_image_eq', + proname => 'record_image_eq_variant', prorettype => 'bool', + proargtypes => 'record record', prosrc => 'record_image_eq_variant' }, + # Extensions { oid => '3082', descr => 'list available extensions', proname => 'pg_available_extensions', procost => '10', prorows => '100', diff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out index 0355720dfc6..ebd9d806698 100644 --- a/src/test/regress/expected/matview.out +++ b/src/test/regress/expected/matview.out @@ -400,6 +400,24 @@ ERROR: new data for materialized view "mvtest_mv" contains duplicate rows witho DETAIL: Row: (1,10) DROP TABLE mvtest_foo CASCADE; NOTICE: drop cascades to materialized view mvtest_mv +-- test that two NULLs not equal when populating the diff table +CREATE TABLE mvtest_foo(a int, b int); +INSERT INTO mvtest_foo VALUES(1, NULL); +CREATE MATERIALIZED VIEW mvtest_mv AS SELECT * FROM mvtest_foo; +CREATE UNIQUE INDEX ON mvtest_mv(a); +INSERT INTO mvtest_foo SELECT * FROM mvtest_foo; +DO $$ +BEGIN + BEGIN + REFRESH MATERIALIZED VIEW CONCURRENTLY mvtest_mv; + EXCEPTION + WHEN unique_violation THEN RAISE NOTICE 'unique violation'; + END; +END; +$$; +NOTICE: unique violation +DROP TABLE mvtest_foo CASCADE; +NOTICE: drop cascades to materialized view mvtest_mv -- make sure that all columns covered by unique indexes works CREATE TABLE mvtest_foo(a, b, c) AS VALUES(1, 2, 3); CREATE MATERIALIZED VIEW mvtest_mv AS SELECT * FROM mvtest_foo; diff --git a/src/test/regress/sql/matview.sql b/src/test/regress/sql/matview.sql index 934426b9ae8..a27f81c349d 100644 --- a/src/test/regress/sql/matview.sql +++ b/src/test/regress/sql/matview.sql @@ -135,6 +135,23 @@ REFRESH MATERIALIZED VIEW mvtest_mv; REFRESH MATERIALIZED VIEW CONCURRENTLY mvtest_mv; DROP TABLE mvtest_foo CASCADE; +-- test that two NULLs not equal when populating the diff table +CREATE TABLE mvtest_foo(a int, b int); +INSERT INTO mvtest_foo VALUES(1, NULL); +CREATE MATERIALIZED VIEW mvtest_mv AS SELECT * FROM mvtest_foo; +CREATE UNIQUE INDEX ON mvtest_mv(a); +INSERT INTO mvtest_foo SELECT * FROM mvtest_foo; +DO $$ +BEGIN + BEGIN + REFRESH MATERIALIZED VIEW CONCURRENTLY mvtest_mv; + EXCEPTION + WHEN unique_violation THEN RAISE NOTICE 'unique violation'; + END; +END; +$$; +DROP TABLE mvtest_foo CASCADE; + -- make sure that all columns covered by unique indexes works CREATE TABLE mvtest_foo(a, b, c) AS VALUES(1, 2, 3); CREATE MATERIALIZED VIEW mvtest_mv AS SELECT * FROM mvtest_foo; -- 2.34.1