diff options
| author | Tom Lane | 2011-01-08 00:16:24 +0000 |
|---|---|---|
| committer | Tom Lane | 2011-01-08 00:16:24 +0000 |
| commit | 73912e7fbd1b52c51d914214abbec1cda64595f2 (patch) | |
| tree | f6ae2849198dd7a17ae6a5ec174796848ec07cdb /src/backend/access/gin/ginget.c | |
| parent | 9b4271deb97270d336c9d34ac911748faa5a4892 (diff) | |
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
Diffstat (limited to 'src/backend/access/gin/ginget.c')
| -rw-r--r-- | src/backend/access/gin/ginget.c | 858 |
1 files changed, 501 insertions, 357 deletions
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index ed1e71c9d6f..aaef6efbb50 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -14,7 +14,7 @@ #include "postgres.h" -#include "access/gin.h" +#include "access/gin_private.h" #include "access/relscan.h" #include "catalog/index.h" #include "miscadmin.h" @@ -34,10 +34,43 @@ typedef struct pendingPosition /* - * Tries to refind previously taken ItemPointer on page. + * Convenience function for invoking a key's consistentFn */ static bool -findItemInPage(Page page, ItemPointer item, OffsetNumber *off) +callConsistentFn(GinState *ginstate, GinScanKey key) +{ + /* + * If we're dealing with a dummy EVERYTHING key, we don't want to call + * the consistentFn; just claim it matches. + */ + if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING) + { + key->recheckCurItem = false; + return true; + } + + /* + * Initialize recheckCurItem in case the consistentFn doesn't know it + * should set it. The safe assumption in that case is to force recheck. + */ + key->recheckCurItem = true; + + return DatumGetBool(FunctionCall8(&ginstate->consistentFn[key->attnum - 1], + PointerGetDatum(key->entryRes), + UInt16GetDatum(key->strategy), + key->query, + UInt32GetDatum(key->nuserentries), + PointerGetDatum(key->extra_data), + PointerGetDatum(&key->recheckCurItem), + PointerGetDatum(key->queryValues), + PointerGetDatum(key->queryCategories))); +} + +/* + * Tries to refind previously taken ItemPointer on a posting page. + */ +static bool +findItemInPostingPage(Page page, ItemPointer item, OffsetNumber *off) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; int res; @@ -79,7 +112,9 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack) return false; /* no more pages */ LockBuffer(stack->buffer, GIN_UNLOCK); - stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno); + stack->buffer = ReleaseAndReadBuffer(stack->buffer, + btree->index, + stack->blkno); LockBuffer(stack->buffer, GIN_SHARE); stack->off = FirstOffsetNumber; } @@ -88,17 +123,19 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack) } /* - * Does fullscan of posting tree and saves ItemPointers - * in scanEntry->partialMatch TIDBitmap + * Scan all pages of a posting tree and save all its heap ItemPointers + * in scanEntry->matchBitmap */ static void -scanForItems(Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree) +scanPostingTree(Relation index, GinScanEntry scanEntry, + BlockNumber rootPostingTree) { GinPostingTreeScan *gdi; Buffer buffer; Page page; BlockNumber blkno; + /* Descend to the leftmost leaf page */ gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE); buffer = ginScanBeginPostingTree(gdi); @@ -108,51 +145,72 @@ scanForItems(Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree pfree(gdi); /* - * Goes through all leaves + * Loop iterates through all leaf pages of posting tree */ for (;;) { page = BufferGetPage(buffer); - if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber) + if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && + GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber) { - tbm_add_tuples(scanEntry->partialMatch, + tbm_add_tuples(scanEntry->matchBitmap, (ItemPointer) GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff, false); scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff; } - blkno = GinPageGetOpaque(page)->rightlink; if (GinPageRightMost(page)) - { - UnlockReleaseBuffer(buffer); - return; /* no more pages */ - } + break; /* no more pages */ + blkno = GinPageGetOpaque(page)->rightlink; LockBuffer(buffer, GIN_UNLOCK); buffer = ReleaseAndReadBuffer(buffer, index, blkno); LockBuffer(buffer, GIN_SHARE); } + + UnlockReleaseBuffer(buffer); } /* - * Collects all ItemPointer into the TIDBitmap struct - * for entries partially matched to search entry. + * Collects TIDs into scanEntry->matchBitmap for all heap tuples that + * match the search entry. This supports three different match modes: + * + * 1. Partial-match support: scan from current point until the + * comparePartialFn says we're done. + * 2. SEARCH_MODE_ALL: scan from current point (which should be first + * key for the current attnum) until we hit null items or end of attnum + * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first + * key for the current attnum) until we hit end of attnum * - * Returns true if done, false if it's needed to restart scan from scratch + * Returns true if done, false if it's necessary to restart scan from scratch */ static bool -computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry) +collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack, + GinScanEntry scanEntry) { - Page page; - IndexTuple itup; - Datum idatum; - int32 cmp; + OffsetNumber attnum; + Form_pg_attribute attr; - scanEntry->partialMatch = tbm_create(work_mem * 1024L); + /* Initialize empty bitmap result */ + scanEntry->matchBitmap = tbm_create(work_mem * 1024L); + + /* Null query cannot partial-match anything */ + if (scanEntry->isPartialMatch && + scanEntry->queryCategory != GIN_CAT_NORM_KEY) + return true; + + /* Locate tupdesc entry for key column (for attbyval/attlen data) */ + attnum = scanEntry->attnum; + attr = btree->ginstate->origTupdesc->attrs[attnum - 1]; for (;;) { + Page page; + IndexTuple itup; + Datum idatum; + GinNullCategory icategory; + /* * stack->off points to the interested entry, buffer is already locked */ @@ -165,56 +223,84 @@ computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry /* * If tuple stores another attribute then stop scan */ - if (gintuple_get_attrnum(btree->ginstate, itup) != scanEntry->attnum) + if (gintuple_get_attrnum(btree->ginstate, itup) != attnum) return true; - idatum = gin_index_getattr(btree->ginstate, itup); + /* Safe to fetch attribute value */ + idatum = gintuple_get_key(btree->ginstate, itup, &icategory); - - /*---------- - * Check of partial match. - * case cmp == 0 => match - * case cmp > 0 => not match and finish scan - * case cmp < 0 => not match and continue scan - *---------- + /* + * Check for appropriate scan stop conditions */ - cmp = DatumGetInt32(FunctionCall4(&btree->ginstate->comparePartialFn[scanEntry->attnum - 1], - scanEntry->entry, - idatum, - UInt16GetDatum(scanEntry->strategy), - PointerGetDatum(scanEntry->extra_data))); + if (scanEntry->isPartialMatch) + { + int32 cmp; - if (cmp > 0) - return true; - else if (cmp < 0) + /* + * In partial match, stop scan at any null (including + * placeholders); partial matches never match nulls + */ + if (icategory != GIN_CAT_NORM_KEY) + return true; + + /*---------- + * Check of partial match. + * case cmp == 0 => match + * case cmp > 0 => not match and finish scan + * case cmp < 0 => not match and continue scan + *---------- + */ + cmp = DatumGetInt32(FunctionCall4(&btree->ginstate->comparePartialFn[attnum - 1], + scanEntry->queryKey, + idatum, + UInt16GetDatum(scanEntry->strategy), + PointerGetDatum(scanEntry->extra_data))); + + if (cmp > 0) + return true; + else if (cmp < 0) + { + stack->off++; + continue; + } + } + else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL) { - stack->off++; - continue; + /* + * In ALL mode, we are not interested in null items, so we can + * stop if we get to a null-item placeholder (which will be the + * last entry for a given attnum). We do want to include NULL_KEY + * and EMPTY_ITEM entries, though. + */ + if (icategory == GIN_CAT_NULL_ITEM) + return true; } + /* + * OK, we want to return the TIDs listed in this entry. + */ if (GinIsPostingTree(itup)) { BlockNumber rootPostingTree = GinGetPostingTree(itup); - Datum newDatum, - savedDatum = datumCopy( - idatum, - btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attbyval, - btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attlen - ); /* * We should unlock current page (but not unpin) during tree scan * to prevent deadlock with vacuum processes. * - * We save current entry value (savedDatum) to be able to refind + * We save current entry value (idatum) to be able to re-find * our tuple after re-locking */ + if (icategory == GIN_CAT_NORM_KEY) + idatum = datumCopy(idatum, attr->attbyval, attr->attlen); + LockBuffer(stack->buffer, GIN_UNLOCK); - scanForItems(btree->index, scanEntry, rootPostingTree); + + /* Collect all the TIDs in this entry's posting tree */ + scanPostingTree(btree->index, scanEntry, rootPostingTree); /* * We lock again the entry page and while it was unlocked insert - * might occured, so we need to refind our position + * might have occurred, so we need to re-find our position. */ LockBuffer(stack->buffer, GIN_SHARE); page = BufferGetPage(stack->buffer); @@ -222,45 +308,49 @@ computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry { /* * Root page becomes non-leaf while we unlock it. We will - * start again, this situation doesn't cause often - root can - * became a non-leaf only one per life of index. + * start again, this situation doesn't occur often - root can + * became a non-leaf only once per life of index. */ - return false; } + /* Search forward to re-find idatum */ for (;;) { + Datum newDatum; + GinNullCategory newCategory; + if (moveRightIfItNeeded(btree, stack) == false) elog(ERROR, "lost saved point in index"); /* must not happen !!! */ page = BufferGetPage(stack->buffer); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); - newDatum = gin_index_getattr(btree->ginstate, itup); - if (gintuple_get_attrnum(btree->ginstate, itup) != scanEntry->attnum) + if (gintuple_get_attrnum(btree->ginstate, itup) != attnum) elog(ERROR, "lost saved point in index"); /* must not happen !!! */ + newDatum = gintuple_get_key(btree->ginstate, itup, + &newCategory); - if (ginCompareEntries(btree->ginstate, scanEntry->attnum, - newDatum, savedDatum) == 0) - { - /* Found! */ - if (btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attbyval == false) - pfree(DatumGetPointer(savedDatum)); - break; - } + if (ginCompareEntries(btree->ginstate, attnum, + newDatum, newCategory, + idatum, icategory) == 0) + break; /* Found! */ stack->off++; } + + if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval) + pfree(DatumGetPointer(idatum)); } else { - tbm_add_tuples(scanEntry->partialMatch, GinGetPosting(itup), GinGetNPosting(itup), false); + tbm_add_tuples(scanEntry->matchBitmap, + GinGetPosting(itup), GinGetNPosting(itup), false); scanEntry->predictNumberResult += GinGetNPosting(itup); } /* - * Ok, we save ItemPointers, go to the next entry + * Done with this entry, go to the next */ stack->off++; } @@ -272,19 +362,20 @@ computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry * Start* functions setup beginning state of searches: finds correct buffer and pins it. */ static void -startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) +startScanEntry(GinState *ginstate, GinScanEntry entry) { GinBtreeData btreeEntry; GinBtreeStack *stackEntry; Page page; - bool needUnlock = TRUE; + bool needUnlock; +restartScanEntry: entry->buffer = InvalidBuffer; entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; - entry->partialMatch = NULL; - entry->partialMatchResult = NULL; + entry->matchBitmap = NULL; + entry->matchResult = NULL; entry->reduceResult = FALSE; entry->predictNumberResult = 0; @@ -298,46 +389,50 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) * we should find entry, and begin scan of posting tree or just store * posting list in memory */ - - ginPrepareEntryScan(&btreeEntry, index, entry->attnum, entry->entry, ginstate); + ginPrepareEntryScan(&btreeEntry, entry->attnum, + entry->queryKey, entry->queryCategory, + ginstate); btreeEntry.searchMode = TRUE; stackEntry = ginFindLeafPage(&btreeEntry, NULL); page = BufferGetPage(stackEntry->buffer); + needUnlock = TRUE; entry->isFinished = TRUE; - if (entry->isPartialMatch) + if (entry->isPartialMatch || + entry->queryCategory == GIN_CAT_EMPTY_QUERY) { /* - * btreeEntry.findItem points to the first equal or greater value than - * needed. So we will scan further and collect all ItemPointers + * btreeEntry.findItem locates the first item >= given search key. + * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item + * because of the way the GIN_CAT_EMPTY_QUERY category code is + * assigned.) We scan forward from there and collect all TIDs needed + * for the entry type. */ btreeEntry.findItem(&btreeEntry, stackEntry); - if (computePartialMatchList(&btreeEntry, stackEntry, entry) == false) + if (collectMatchBitmap(&btreeEntry, stackEntry, entry) == false) { /* * GIN tree was seriously restructured, so we will cleanup all * found data and rescan. See comments near 'return false' in - * computePartialMatchList() + * collectMatchBitmap() */ - if (entry->partialMatch) + if (entry->matchBitmap) { - if (entry->partialMatchIterator) - tbm_end_iterate(entry->partialMatchIterator); - entry->partialMatchIterator = NULL; - tbm_free(entry->partialMatch); - entry->partialMatch = NULL; + if (entry->matchIterator) + tbm_end_iterate(entry->matchIterator); + entry->matchIterator = NULL; + tbm_free(entry->matchBitmap); + entry->matchBitmap = NULL; } LockBuffer(stackEntry->buffer, GIN_UNLOCK); freeGinBtreeStack(stackEntry); - - startScanEntry(index, ginstate, entry); - return; + goto restartScanEntry; } - if (entry->partialMatch && !tbm_is_empty(entry->partialMatch)) + if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap)) { - entry->partialMatchIterator = tbm_begin_iterate(entry->partialMatch); + entry->matchIterator = tbm_begin_iterate(entry->matchBitmap); entry->isFinished = FALSE; } } @@ -352,7 +447,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) Page page; /* - * We should unlock entry page before make deal with posting tree + * We should unlock entry page before touching posting tree * to prevent deadlocks with vacuum processes. Because entry is * never deleted from page and posting tree is never reduced to * the posting list, we can unlock page after getting BlockNumber @@ -360,7 +455,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) */ LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; - gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE); + gdi = ginPrepareScanPostingTree(ginstate->index, rootPostingTree, TRUE); entry->buffer = ginScanBeginPostingTree(gdi); @@ -402,7 +497,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) } static void -startScanKey(Relation index, GinState *ginstate, GinScanKey key) +startScanKey(GinState *ginstate, GinScanKey key) { uint32 i; @@ -410,7 +505,7 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key) return; for (i = 0; i < key->nentries; i++) - startScanEntry(index, ginstate, key->scanEntry + i); + startScanEntry(ginstate, key->scanEntry + i); key->isFinished = FALSE; key->firstCall = FALSE; @@ -444,7 +539,7 @@ startScan(IndexScanDesc scan) GinScanOpaque so = (GinScanOpaque) scan->opaque; for (i = 0; i < so->nkeys; i++) - startScanKey(scan->indexRelation, &so->ginstate, so->keys + i); + startScanKey(&so->ginstate, so->keys + i); } /* @@ -453,7 +548,7 @@ startScan(IndexScanDesc scan) * to prevent interference with vacuum */ static void -entryGetNextItem(Relation index, GinScanEntry entry) +entryGetNextItem(GinState *ginstate, GinScanEntry entry) { Page page; BlockNumber blkno; @@ -487,13 +582,15 @@ entryGetNextItem(Relation index, GinScanEntry entry) return; } - entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno); + entry->buffer = ReleaseAndReadBuffer(entry->buffer, + ginstate->index, + blkno); LockBuffer(entry->buffer, GIN_SHARE); page = BufferGetPage(entry->buffer); entry->offset = InvalidOffsetNumber; if (!ItemPointerIsValid(&entry->curItem) || - findItemInPage(page, &entry->curItem, &entry->offset)) + findItemInPostingPage(page, &entry->curItem, &entry->offset)) { /* * Found position equal to or greater than stored @@ -512,7 +609,6 @@ entryGetNextItem(Relation index, GinScanEntry entry) * First pages are deleted or empty, or we found exact * position, so break inner loop and continue outer one. */ - break; } @@ -527,25 +623,6 @@ entryGetNextItem(Relation index, GinScanEntry entry) } } -/* convenience function for invoking a key's consistentFn */ -static inline bool -callConsistentFn(GinState *ginstate, GinScanKey key) -{ - /* - * Initialize recheckCurItem in case the consistentFn doesn't know it - * should set it. The safe assumption in that case is to force recheck. - */ - key->recheckCurItem = true; - - return DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1], - PointerGetDatum(key->entryRes), - UInt16GetDatum(key->strategy), - key->query, - UInt32GetDatum(key->nentries), - PointerGetDatum(key->extra_data), - PointerGetDatum(&key->recheckCurItem))); -} - #define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE)) #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) ) @@ -563,7 +640,7 @@ callConsistentFn(GinState *ginstate, GinScanKey key) * current implementation this is guaranteed by the behavior of tidbitmaps. */ static void -entryGetItem(Relation index, GinScanEntry entry) +entryGetItem(GinState *ginstate, GinScanEntry entry) { Assert(!entry->isFinished); @@ -572,41 +649,40 @@ entryGetItem(Relation index, GinScanEntry entry) entry->isFinished = entry->master->isFinished; entry->curItem = entry->master->curItem; } - else if (entry->partialMatch) + else if (entry->matchBitmap) { do { - if (entry->partialMatchResult == NULL || - entry->offset >= entry->partialMatchResult->ntuples) + if (entry->matchResult == NULL || + entry->offset >= entry->matchResult->ntuples) { - entry->partialMatchResult = tbm_iterate(entry->partialMatchIterator); + entry->matchResult = tbm_iterate(entry->matchIterator); - if (entry->partialMatchResult == NULL) + if (entry->matchResult == NULL) { ItemPointerSetInvalid(&entry->curItem); - tbm_end_iterate(entry->partialMatchIterator); - entry->partialMatchIterator = NULL; + tbm_end_iterate(entry->matchIterator); + entry->matchIterator = NULL; entry->isFinished = TRUE; break; } /* - * reset counter to the beginning of - * entry->partialMatchResult. Note: entry->offset is still - * greater than partialMatchResult->ntuples if - * partialMatchResult is lossy. So, on next call we will get - * next result from TIDBitmap. + * Reset counter to the beginning of entry->matchResult. + * Note: entry->offset is still greater than + * matchResult->ntuples if matchResult is lossy. So, on next + * call we will get next result from TIDBitmap. */ entry->offset = 0; } - if (entry->partialMatchResult->ntuples < 0) + if (entry->matchResult->ntuples < 0) { /* * lossy result, so we need to check the whole page */ ItemPointerSetLossyPage(&entry->curItem, - entry->partialMatchResult->blockno); + entry->matchResult->blockno); /* * We might as well fall out of the loop; we could not @@ -617,8 +693,8 @@ entryGetItem(Relation index, GinScanEntry entry) } ItemPointerSet(&entry->curItem, - entry->partialMatchResult->blockno, - entry->partialMatchResult->offsets[entry->offset]); + entry->matchResult->blockno, + entry->matchResult->offsets[entry->offset]); entry->offset++; } while (entry->reduceResult == TRUE && dropItem(entry)); } @@ -637,7 +713,7 @@ entryGetItem(Relation index, GinScanEntry entry) { do { - entryGetNextItem(index, entry); + entryGetNextItem(ginstate, entry); } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry)); @@ -662,7 +738,7 @@ entryGetItem(Relation index, GinScanEntry entry) * logic in scanGetItem.) */ static void -keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, +keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key, ItemPointer advancePast) { ItemPointerData myAdvancePast = *advancePast; @@ -701,7 +777,7 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, while (entry->isFinished == FALSE && ginCompareItemPointers(&entry->curItem, &myAdvancePast) <= 0) - entryGetItem(index, entry); + entryGetItem(ginstate, entry); if (entry->isFinished == FALSE && ginCompareItemPointers(&entry->curItem, &key->curItem) < 0) @@ -800,12 +876,12 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, * item pointers, possibly in combination with a lossy pointer. Our * strategy if there's a lossy pointer is to try the consistentFn both * ways and return a hit if it accepts either one (forcing the hit to - * be marked lossy so it will be rechecked). + * be marked lossy so it will be rechecked). An exception is that + * we don't need to try it both ways if the lossy pointer is in a + * "hidden" entry, because the consistentFn's result can't depend on + * that. * * Prepare entryRes array to be passed to consistentFn. - * - * (If key->nentries == 1 then the consistentFn should always succeed, - * but we must call it anyway to find out the recheck status.) */ for (i = 0; i < key->nentries; i++) { @@ -821,7 +897,7 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, res = callConsistentFn(ginstate, key); - if (!res && haveLossyEntry) + if (!res && haveLossyEntry && lossyEntry < key->nuserentries) { /* try the other way for the lossy item */ key->entryRes[lossyEntry] = FALSE; @@ -839,13 +915,137 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, } while (!res); } +/* + * Get next heap item pointer (after advancePast) from scan. + * Returns true if anything found. + * On success, *item and *recheck are set. + * + * Note: this is very nearly the same logic as in keyGetItem(), except + * that we know the keys are to be combined with AND logic, whereas in + * keyGetItem() the combination logic is known only to the consistentFn. + */ +static bool +scanGetItem(IndexScanDesc scan, ItemPointer advancePast, + ItemPointerData *item, bool *recheck) +{ + GinScanOpaque so = (GinScanOpaque) scan->opaque; + ItemPointerData myAdvancePast = *advancePast; + uint32 i; + bool match; + + for (;;) + { + /* + * Advance any keys that are <= myAdvancePast. In particular, + * since key->curItem was initialized with ItemPointerSetMin, this + * ensures we fetch the first item for each key on the first call. + * Then set *item to the minimum of the key curItems. + * + * Note: a lossy-page entry is encoded by a ItemPointer with max value + * for offset (0xffff), so that it will sort after any exact entries + * for the same page. So we'll prefer to return exact pointers not + * lossy pointers, which is good. Also, when we advance past an exact + * entry after processing it, we will not advance past lossy entries + * for the same page in other keys, which is NECESSARY for correct + * results (since we might have additional entries for the same page + * in the first key). + */ + ItemPointerSetMax(item); + + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + while (key->isFinished == FALSE && + ginCompareItemPointers(&key->curItem, &myAdvancePast) <= 0) + keyGetItem(&so->ginstate, so->tempCtx, + key, &myAdvancePast); + + if (key->isFinished) + return FALSE; /* finished one of keys */ + + if (ginCompareItemPointers(&key->curItem, item) < 0) + *item = key->curItem; + } + + Assert(!ItemPointerIsMax(item)); + + /*---------- + * Now *item contains first ItemPointer after previous result. + * + * The item is a valid hit only if all the keys returned either + * that exact TID, or a lossy reference to the same page. + * + * This logic works only if a keyGetItem stream can never contain both + * exact and lossy pointers for the same page. Else we could have a + * case like + * + * stream 1 stream 2 + * ... ... + * 42/6 42/7 + * 50/1 42/0xffff + * ... ... + * + * We would conclude that 42/6 is not a match and advance stream 1, + * thus never detecting the match to the lossy pointer in stream 2. + * (keyGetItem has a similar problem versus entryGetItem.) + *---------- + */ + match = true; + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + if (ginCompareItemPointers(item, &key->curItem) == 0) + continue; + if (ItemPointerIsLossyPage(&key->curItem) && + GinItemPointerGetBlockNumber(&key->curItem) == + GinItemPointerGetBlockNumber(item)) + continue; + match = false; + break; + } + + if (match) + break; + + /* + * No hit. Update myAdvancePast to this TID, so that on the next + * pass we'll move to the next possible entry. + */ + myAdvancePast = *item; + } + + /* + * We must return recheck = true if any of the keys are marked recheck. + */ + *recheck = false; + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + if (key->recheckCurItem) + { + *recheck = true; + break; + } + } + + return TRUE; +} + + +/* + * Functions for scanning the pending list + */ + /* * Get ItemPointer of next heap row to be checked from pending list. - * Returns false if there are no more. On pages with several rows + * Returns false if there are no more. On pages with several heap rows * it returns each row separately, on page with part of heap row returns - * per page data. pos->firstOffset and pos->lastOffset points - * fraction of tuples for current heap row. + * per page data. pos->firstOffset and pos->lastOffset are set to identify + * the range of pending-list tuples belonging to this heap row. * * The pendingBuffer is presumed pinned and share-locked on entry, and is * pinned and share-locked on success exit. On failure exit it's released. @@ -917,10 +1117,9 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos) /* * Now pos->firstOffset points to the first tuple of current heap - * row, pos->lastOffset points to the first tuple of second heap + * row, pos->lastOffset points to the first tuple of next heap * row (or to the end of page) */ - break; } } @@ -929,35 +1128,47 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos) } /* - * Scan page from current tuple (off) up till the first of: + * Scan pending-list page from current tuple (off) up till the first of: * - match is found (then returns true) * - no later match is possible * - tuple's attribute number is not equal to entry's attrnum * - reach end of page + * + * datum[]/category[]/datumExtracted[] arrays are used to cache the results + * of gintuple_get_key() on the current page. */ static bool matchPartialInPendingList(GinState *ginstate, Page page, OffsetNumber off, OffsetNumber maxoff, - Datum value, OffsetNumber attrnum, - Datum *datum, bool *datumExtracted, - StrategyNumber strategy, - Pointer extra_data) + GinScanEntry entry, + Datum *datum, GinNullCategory *category, + bool *datumExtracted) { IndexTuple itup; int32 cmp; + /* Partial match to a null is not possible */ + if (entry->queryCategory != GIN_CAT_NORM_KEY) + return false; + while (off < maxoff) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); - if (attrnum != gintuple_get_attrnum(ginstate, itup)) + + if (gintuple_get_attrnum(ginstate, itup) != entry->attnum) return false; if (datumExtracted[off - 1] == false) { - datum[off - 1] = gin_index_getattr(ginstate, itup); + datum[off - 1] = gintuple_get_key(ginstate, itup, + &category[off - 1]); datumExtracted[off - 1] = true; } + /* Once we hit nulls, no further match is possible */ + if (category[off - 1] != GIN_CAT_NORM_KEY) + return false; + /*---------- * Check partial match. * case cmp == 0 => match @@ -965,11 +1176,11 @@ matchPartialInPendingList(GinState *ginstate, Page page, * case cmp < 0 => not match and continue scan *---------- */ - cmp = DatumGetInt32(FunctionCall4(&ginstate->comparePartialFn[attrnum - 1], - value, + cmp = DatumGetInt32(FunctionCall4(&ginstate->comparePartialFn[entry->attnum - 1], + entry->queryKey, datum[off - 1], - UInt16GetDatum(strategy), - PointerGetDatum(extra_data))); + UInt16GetDatum(entry->strategy), + PointerGetDatum(entry->extra_data))); if (cmp == 0) return true; else if (cmp > 0) @@ -981,27 +1192,20 @@ matchPartialInPendingList(GinState *ginstate, Page page, return false; } -static bool -hasAllMatchingKeys(GinScanOpaque so, pendingPosition *pos) -{ - int i; - - for (i = 0; i < so->nkeys; i++) - if (pos->hasMatchKey[i] == false) - return false; - - return true; -} - /* - * Sets entryRes array for each key by looking at - * every entry per indexed value (heap's row) in pending list. - * returns true if at least one of datum was matched by key's entry + * Set up the entryRes array for each key by looking at + * every entry for current heap row in pending list. + * + * Returns true if each scan key has at least one entryRes match. + * This corresponds to the situations where the normal index search will + * try to apply the key's consistentFn. (A tuple not meeting that requirement + * cannot be returned by the normal search since no entry stream will + * source its TID.) * * The pendingBuffer is presumed pinned and share-locked on entry. */ static bool -collectDatumForItem(IndexScanDesc scan, pendingPosition *pos) +collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos) { GinScanOpaque so = (GinScanOpaque) scan->opaque; OffsetNumber attrnum; @@ -1011,7 +1215,7 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos) j; /* - * Reset entryRes + * Reset all entryRes and hasMatchKey flags */ for (i = 0; i < so->nkeys; i++) { @@ -1021,13 +1225,19 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos) } memset(pos->hasMatchKey, FALSE, so->nkeys); + /* + * Outer loop iterates over multiple pending-list pages when a single + * heap row has entries spanning those pages. + */ for (;;) { Datum datum[BLCKSZ / sizeof(IndexTupleData)]; + GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)]; bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)]; Assert(pos->lastOffset > pos->firstOffset); - memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset)); + memset(datumExtracted + pos->firstOffset - 1, 0, + sizeof(bool) * (pos->lastOffset - pos->firstOffset)); page = BufferGetPage(pos->pendingBuffer); @@ -1037,128 +1247,175 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos) for (j = 0; j < key->nentries; j++) { + GinScanEntry entry = key->scanEntry + j; OffsetNumber StopLow = pos->firstOffset, StopHigh = pos->lastOffset, StopMiddle; - GinScanEntry entry = key->scanEntry + j; - /* already true - do not extra work */ + /* If already matched on earlier page, do no extra work */ if (key->entryRes[j]) continue; /* - * Interested tuples are from pos->firstOffset to + * Interesting tuples are from pos->firstOffset to * pos->lastOffset and they are ordered by (attnum, Datum) as - * it's done in entry tree So we could use binary search to - * prevent linear scanning + * it's done in entry tree. So we can use binary search to + * avoid linear scanning. */ while (StopLow < StopHigh) { + int res; + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle)); + attrnum = gintuple_get_attrnum(&so->ginstate, itup); if (key->attnum < attrnum) + { StopHigh = StopMiddle; - else if (key->attnum > attrnum) + continue; + } + if (key->attnum > attrnum) + { StopLow = StopMiddle + 1; - else + continue; + } + + if (datumExtracted[StopMiddle - 1] == false) { - int res; + datum[StopMiddle - 1] = + gintuple_get_key(&so->ginstate, itup, + &category[StopMiddle - 1]); + datumExtracted[StopMiddle - 1] = true; + } - if (datumExtracted[StopMiddle - 1] == false) + if (entry->queryCategory == GIN_CAT_EMPTY_QUERY) + { + /* special behavior depending on searchMode */ + if (entry->searchMode == GIN_SEARCH_MODE_ALL) { - datum[StopMiddle - 1] = gin_index_getattr(&so->ginstate, itup); - datumExtracted[StopMiddle - 1] = true; + /* match anything except NULL_ITEM */ + if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM) + res = -1; + else + res = 0; + } + else + { + /* match everything */ + res = 0; } + } + else + { res = ginCompareEntries(&so->ginstate, entry->attnum, - entry->entry, - datum[StopMiddle - 1]); + entry->queryKey, + entry->queryCategory, + datum[StopMiddle - 1], + category[StopMiddle - 1]); + } - if (res == 0) - { - /* - * The exact match causes, so we just scan from - * current position to find a partial match. See - * comment above about tuple's ordering. - */ - if (entry->isPartialMatch) - key->entryRes[j] = - matchPartialInPendingList(&so->ginstate, - page, StopMiddle, - pos->lastOffset, - entry->entry, - entry->attnum, - datum, - datumExtracted, - entry->strategy, - entry->extra_data); - else - key->entryRes[j] = true; - break; - } - else if (res < 0) - StopHigh = StopMiddle; + if (res == 0) + { + /* + * Found exact match (there can be only one, except + * in EMPTY_QUERY mode). + * + * If doing partial match, scan forward from + * here to end of page to check for matches. + * + * See comment above about tuple's ordering. + */ + if (entry->isPartialMatch) + key->entryRes[j] = + matchPartialInPendingList(&so->ginstate, + page, + StopMiddle, + pos->lastOffset, + entry, + datum, + category, + datumExtracted); else - StopLow = StopMiddle + 1; + key->entryRes[j] = true; + + /* done with binary search */ + break; } + else if (res < 0) + StopHigh = StopMiddle; + else + StopLow = StopMiddle + 1; } if (StopLow >= StopHigh && entry->isPartialMatch) { /* - * The exact match wasn't found, so we need to start scan - * from first tuple greater then current entry See comment - * above about tuple's ordering. + * No exact match on this page. If doing partial + * match, scan from the first tuple greater than + * target value to end of page. Note that since we + * don't remember whether the comparePartialFn told us + * to stop early on a previous page, we will uselessly + * apply comparePartialFn to the first tuple on each + * subsequent page. */ key->entryRes[j] = matchPartialInPendingList(&so->ginstate, - page, StopHigh, + page, + StopHigh, pos->lastOffset, - entry->entry, - entry->attnum, + entry, datum, - datumExtracted, - entry->strategy, - entry->extra_data); + category, + datumExtracted); } pos->hasMatchKey[i] |= key->entryRes[j]; } } + /* Advance firstOffset over the scanned tuples */ pos->firstOffset = pos->lastOffset; if (GinPageHasFullRow(page)) { /* - * We scan all values from one tuple, go to next one + * We have examined all pending entries for the current heap row. + * Break out of loop over pages. */ - - return hasAllMatchingKeys(so, pos); + break; } else { - ItemPointerData item = pos->item; - /* - * need to get next portion of tuples of row containing on several - * pages + * Advance to next page of pending entries for the current heap + * row. Complain if there isn't one. */ + ItemPointerData item = pos->item; - if (scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item)) - elog(ERROR, "Could not process tuple"); /* XXX should not be - * here ! */ + if (scanGetCandidate(scan, pos) == false || + !ItemPointerEquals(&pos->item, &item)) + elog(ERROR, "could not find additional pending pages for same heap tuple"); } } - return hasAllMatchingKeys(so, pos); + /* + * Now return "true" if all scan keys have at least one matching datum + */ + for (i = 0; i < so->nkeys; i++) + { + if (pos->hasMatchKey[i] == false) + return false; + } + + return true; } /* - * Collect all matched rows from pending list in bitmap + * Collect all matched rows from pending list into bitmap */ static void scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) @@ -1201,11 +1458,13 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) while (scanGetCandidate(scan, &pos)) { /* - * Check entries in tuple and setup entryRes array If tuples of heap's - * row are placed on several pages collectDatumForItem will read all - * of that pages. + * Check entries in tuple and set up entryRes array. + * + * If pending tuples belonging to the current heap row are spread + * across several pages, collectMatchesForHeapRow will read all of + * those pages. */ - if (!collectDatumForItem(scan, &pos)) + if (!collectMatchesForHeapRow(scan, &pos)) continue; /* @@ -1241,124 +1500,6 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) pfree(pos.hasMatchKey); } -/* - * Get next heap item pointer (after advancePast) from scan. - * Returns true if anything found. - * On success, *item and *recheck are set. - * - * Note: this is very nearly the same logic as in keyGetItem(), except - * that we know the keys are to be combined with AND logic, whereas in - * keyGetItem() the combination logic is known only to the consistentFn. - */ -static bool -scanGetItem(IndexScanDesc scan, ItemPointer advancePast, - ItemPointerData *item, bool *recheck) -{ - GinScanOpaque so = (GinScanOpaque) scan->opaque; - ItemPointerData myAdvancePast = *advancePast; - uint32 i; - bool match; - - for (;;) - { - /* - * Advance any keys that are <= myAdvancePast. In particular, - * since key->curItem was initialized with ItemPointerSetMin, this - * ensures we fetch the first item for each key on the first call. - * Then set *item to the minimum of the key curItems. - * - * Note: a lossy-page entry is encoded by a ItemPointer with max value - * for offset (0xffff), so that it will sort after any exact entries - * for the same page. So we'll prefer to return exact pointers not - * lossy pointers, which is good. Also, when we advance past an exact - * entry after processing it, we will not advance past lossy entries - * for the same page in other keys, which is NECESSARY for correct - * results (since we might have additional entries for the same page - * in the first key). - */ - ItemPointerSetMax(item); - - for (i = 0; i < so->nkeys; i++) - { - GinScanKey key = so->keys + i; - - while (key->isFinished == FALSE && - ginCompareItemPointers(&key->curItem, &myAdvancePast) <= 0) - keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, - key, &myAdvancePast); - - if (key->isFinished) - return FALSE; /* finished one of keys */ - - if (ginCompareItemPointers(&key->curItem, item) < 0) - *item = key->curItem; - } - - Assert(!ItemPointerIsMax(item)); - - /*---------- - * Now *item contains first ItemPointer after previous result. - * - * The item is a valid hit only if all the keys returned either - * that exact TID, or a lossy reference to the same page. - * - * This logic works only if a keyGetItem stream can never contain both - * exact and lossy pointers for the same page. Else we could have a - * case like - * - * stream 1 stream 2 - * ... ... - * 42/6 42/7 - * 50/1 42/0xffff - * ... ... - * - * We would conclude that 42/6 is not a match and advance stream 1, - * thus never detecting the match to the lossy pointer in stream 2. - * (keyGetItem has a similar problem versus entryGetItem.) - *---------- - */ - match = true; - for (i = 0; i < so->nkeys; i++) - { - GinScanKey key = so->keys + i; - - if (ginCompareItemPointers(item, &key->curItem) == 0) - continue; - if (ItemPointerIsLossyPage(&key->curItem) && - GinItemPointerGetBlockNumber(&key->curItem) == - GinItemPointerGetBlockNumber(item)) - continue; - match = false; - break; - } - - if (match) - break; - - /* - * No hit. Update myAdvancePast to this TID, so that on the next - * pass we'll move to the next possible entry. - */ - myAdvancePast = *item; - } - - /* - * We must return recheck = true if any of the keys are marked recheck. - */ - *recheck = false; - for (i = 0; i < so->nkeys; i++) - { - GinScanKey key = so->keys + i; - - if (key->recheckCurItem) - { - *recheck = true; - break; - } - } - - return TRUE; -} #define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL ) #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes ) @@ -1372,6 +1513,9 @@ gingetbitmap(PG_FUNCTION_ARGS) ItemPointerData iptr; bool recheck; + /* + * Set up the scan keys, and check for unsatisfiable query. + */ if (GinIsNewKey(scan)) ginNewScanKey(scan); |
