diff options
author | David Rowley | 2021-11-23 21:06:59 +0000 |
---|---|---|
committer | David Rowley | 2021-11-23 21:06:59 +0000 |
commit | e502150f7d0be41e3c8784be007fa871a32d8a7f (patch) | |
tree | a6c96abbe3eae534d938d05539627b4f03d23f62 /src/backend/executor/nodeMemoize.c | |
parent | 1922d7c6e1a74178bd2f1d5aa5a6ab921b3fcd34 (diff) |
Allow Memoize to operate in binary comparison mode
Memoize would always use the hash equality operator for the cache key
types to determine if the current set of parameters were the same as some
previously cached set. Certain types such as floating points where -0.0
and +0.0 differ in their binary representation but are classed as equal by
the hash equality operator may cause problems as unless the join uses the
same operator it's possible that whichever join operator is being used
would be able to distinguish the two values. In which case we may
accidentally return in the incorrect rows out of the cache.
To fix this here we add a binary mode to Memoize to allow it to the
current set of parameters to previously cached values by comparing
bit-by-bit rather than logically using the hash equality operator. This
binary mode is always used for LATERAL joins and it's used for normal
joins when any of the join operators are not hashable.
Reported-by: Tom Lane
Author: David Rowley
Discussion: https://postgr.es/m/[email protected]
Backpatch-through: 14, where Memoize was added
Diffstat (limited to 'src/backend/executor/nodeMemoize.c')
-rw-r--r-- | src/backend/executor/nodeMemoize.c | 92 |
1 files changed, 76 insertions, 16 deletions
diff --git a/src/backend/executor/nodeMemoize.c b/src/backend/executor/nodeMemoize.c index bec588b3a04..683502dd90e 100644 --- a/src/backend/executor/nodeMemoize.c +++ b/src/backend/executor/nodeMemoize.c @@ -71,6 +71,7 @@ #include "executor/nodeMemoize.h" #include "lib/ilist.h" #include "miscadmin.h" +#include "utils/datum.h" #include "utils/lsyscache.h" /* States of the ExecMemoize state machine */ @@ -131,7 +132,7 @@ typedef struct MemoizeEntry static uint32 MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key); -static int MemoizeHash_equal(struct memoize_hash *tb, +static bool MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *params1, const MemoizeKey *params2); @@ -140,7 +141,7 @@ static int MemoizeHash_equal(struct memoize_hash *tb, #define SH_KEY_TYPE MemoizeKey * #define SH_KEY key #define SH_HASH_KEY(tb, key) MemoizeHash_hash(tb, key) -#define SH_EQUAL(tb, a, b) (MemoizeHash_equal(tb, a, b) == 0) +#define SH_EQUAL(tb, a, b) MemoizeHash_equal(tb, a, b) #define SH_SCOPE static inline #define SH_STORE_HASH #define SH_GET_HASH(tb, a) a->hash @@ -160,21 +161,45 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key) TupleTableSlot *pslot = mstate->probeslot; uint32 hashkey = 0; int numkeys = mstate->nkeys; - FmgrInfo *hashfunctions = mstate->hashfunctions; - Oid *collations = mstate->collations; - for (int i = 0; i < numkeys; i++) + if (mstate->binary_mode) { - /* rotate hashkey left 1 bit at each step */ - hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + for (int i = 0; i < numkeys; i++) + { + /* rotate hashkey left 1 bit at each step */ + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + + if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */ + { + FormData_pg_attribute *attr; + uint32 hkey; + + attr = &pslot->tts_tupleDescriptor->attrs[i]; + + hkey = datum_image_hash(pslot->tts_values[i], attr->attbyval, attr->attlen); + + hashkey ^= hkey; + } + } + } + else + { + FmgrInfo *hashfunctions = mstate->hashfunctions; + Oid *collations = mstate->collations; - if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */ + for (int i = 0; i < numkeys; i++) { - uint32 hkey; + /* rotate hashkey left 1 bit at each step */ + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + + if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */ + { + uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], - collations[i], pslot->tts_values[i])); - hashkey ^= hkey; + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], + collations[i], pslot->tts_values[i])); + hashkey ^= hkey; + } } } @@ -187,7 +212,7 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key) * table lookup. 'key2' is never used. Instead the MemoizeState's * probeslot is always populated with details of what's being looked up. */ -static int +static bool MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1, const MemoizeKey *key2) { @@ -199,9 +224,38 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1, /* probeslot should have already been prepared by prepare_probe_slot() */ ExecStoreMinimalTuple(key1->params, tslot, false); - econtext->ecxt_innertuple = tslot; - econtext->ecxt_outertuple = pslot; - return !ExecQualAndReset(mstate->cache_eq_expr, econtext); + if (mstate->binary_mode) + { + int numkeys = mstate->nkeys; + + slot_getallattrs(tslot); + slot_getallattrs(pslot); + + for (int i = 0; i < numkeys; i++) + { + FormData_pg_attribute *attr; + + if (tslot->tts_isnull[i] != pslot->tts_isnull[i]) + return false; + + /* both NULL? they're equal */ + if (tslot->tts_isnull[i]) + continue; + + /* perform binary comparison on the two datums */ + attr = &tslot->tts_tupleDescriptor->attrs[i]; + if (!datum_image_eq(tslot->tts_values[i], pslot->tts_values[i], + attr->attbyval, attr->attlen)) + return false; + } + return true; + } + else + { + econtext->ecxt_innertuple = tslot; + econtext->ecxt_outertuple = pslot; + return ExecQualAndReset(mstate->cache_eq_expr, econtext); + } } /* @@ -926,6 +980,12 @@ ExecInitMemoize(Memoize *node, EState *estate, int eflags) */ mstate->singlerow = node->singlerow; + /* + * Record if the cache keys should be compared bit by bit, or logically + * using the type's hash equality operator + */ + mstate->binary_mode = node->binary_mode; + /* Zero the statistics counters */ memset(&mstate->stats, 0, sizeof(MemoizeInstrumentation)); |