From 6a780e5fe167b0f767011c282e9939d539215fac Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Mon, 2 Dec 2024 15:38:14 +0100 Subject: [PATCH 1/2] Remove HeapBitmapScan's skip_fetch optimization The optimization doesn't take concurrent vacuum's removal of TIDs into account, which can remove dead TIDs and make ALL_VISIBLE pages for which we have pointers to those dead TIDs in the bitmap. The optimization itself isn't impossible, but would require more work figuring out that vacuum started removing TIDs and then stop using the optimization. However, we currently don't have such a system in place, making the optimization unsound to keep around. Reported-By: Konstantin Knizhnik Backpatch-through: 13 --- src/backend/access/heap/heapam.c | 62 +++-------------------- src/backend/access/heap/heapam_handler.c | 44 +--------------- src/backend/executor/nodeBitmapHeapscan.c | 15 +----- src/include/access/heapam.h | 12 +---- src/include/access/tableam.h | 12 +---- 5 files changed, 13 insertions(+), 132 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index cedaa195cb6f..c13288d83b7c 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -314,31 +314,6 @@ bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, tbmres->blockno >= hscan->rs_nblocks) continue; - /* - * We can skip fetching the heap page if we don't need any fields from - * the heap, the bitmap entries don't need rechecking, and all tuples - * on the page are visible to our transaction. - */ - if (!(sscan->rs_flags & SO_NEED_TUPLES) && - !tbmres->recheck && - VM_ALL_VISIBLE(sscan->rs_rd, tbmres->blockno, &bscan->rs_vmbuffer)) - { - OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE]; - int noffsets; - - /* can't be lossy in the skip_fetch case */ - Assert(!tbmres->lossy); - Assert(bscan->rs_empty_tuples_pending >= 0); - - /* - * We throw away the offsets, but this is the easiest way to get a - * count of tuples. - */ - noffsets = tbm_extract_page_tuple(tbmres, offsets, TBM_MAX_TUPLES_PER_PAGE); - bscan->rs_empty_tuples_pending += noffsets; - continue; - } - return tbmres->blockno; } @@ -1123,9 +1098,10 @@ heap_beginscan(Relation relation, Snapshot snapshot, if (flags & SO_TYPE_BITMAPSCAN) { BitmapHeapScanDesc bscan = palloc(sizeof(BitmapHeapScanDescData)); - - bscan->rs_vmbuffer = InvalidBuffer; - bscan->rs_empty_tuples_pending = 0; + /* + * Bitmap Heap scans do not have any new fields that a normal Heap + * Scan does not have, so no special initializations required here. + */ scan = (HeapScanDesc) bscan; } else @@ -1280,23 +1256,10 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, scan->rs_cbuf = InvalidBuffer; } - if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN) - { - BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan; - - /* - * Reset empty_tuples_pending, a field only used by bitmap heap scan, - * to avoid incorrectly emitting NULL-filled tuples from a previous - * scan on rescan. - */ - bscan->rs_empty_tuples_pending = 0; - - if (BufferIsValid(bscan->rs_vmbuffer)) - { - ReleaseBuffer(bscan->rs_vmbuffer); - bscan->rs_vmbuffer = InvalidBuffer; - } - } + /* + * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any + * additional data vs a normal HeapScan + */ /* * The read stream is reset on rescan. This must be done before @@ -1325,15 +1288,6 @@ heap_endscan(TableScanDesc sscan) if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); - if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN) - { - BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) sscan; - - bscan->rs_empty_tuples_pending = 0; - if (BufferIsValid(bscan->rs_vmbuffer)) - ReleaseBuffer(bscan->rs_vmbuffer); - } - /* * Must free the read stream before freeing the BufferAccessStrategy. */ diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 24d3765aa201..453470e5aee3 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2137,32 +2137,6 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan, */ while (hscan->rs_cindex >= hscan->rs_ntuples) { - /* - * Emit empty tuples before advancing to the next block - */ - if (bscan->rs_empty_tuples_pending > 0) - { - /* - * If we don't have to fetch the tuple, just return nulls. - */ - ExecStoreAllNullTuple(slot); - bscan->rs_empty_tuples_pending--; - - /* - * We do not recheck all NULL tuples. Because the streaming read - * API only yields TBMIterateResults for blocks actually fetched - * from the heap, we must unset `recheck` ourselves here to ensure - * correct results. - * - * Our read stream callback accrues a count of empty tuples to - * emit and then emits them after emitting tuples from the next - * fetched block. If no blocks need fetching, we'll emit the - * accrued count at the end of the scan. - */ - *recheck = false; - return true; - } - /* * Returns false if the bitmap is exhausted and there are no further * blocks we need to scan. @@ -2516,24 +2490,10 @@ BitmapHeapScanNextBlock(TableScanDesc scan, if (BufferIsInvalid(hscan->rs_cbuf)) { - if (BufferIsValid(bscan->rs_vmbuffer)) - { - ReleaseBuffer(bscan->rs_vmbuffer); - bscan->rs_vmbuffer = InvalidBuffer; - } - /* - * The bitmap is exhausted. Now emit any remaining empty tuples. The - * read stream API only returns TBMIterateResults for blocks actually - * fetched from the heap. Our callback will accrue a count of empty - * tuples to emit for all blocks we skipped fetching. So, if we skip - * fetching heap blocks at the end of the relation (or no heap blocks - * are fetched) we need to ensure we emit empty tuples before ending - * the scan. We don't recheck empty tuples so ensure `recheck` is - * unset. + * The bitmap is exhausted. */ - *recheck = false; - return bscan->rs_empty_tuples_pending > 0; + return false; } Assert(per_buffer_data); diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 3e33360c0fce..bf24f3d7fe0a 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -105,24 +105,11 @@ BitmapTableScanSetup(BitmapHeapScanState *node) */ if (!node->ss.ss_currentScanDesc) { - bool need_tuples = false; - - /* - * We can potentially skip fetching heap pages if we do not need any - * columns of the table, either for checking non-indexable quals or - * for returning data. This test is a bit simplistic, as it checks - * the stronger condition that there's no qual or return tlist at all. - * But in most cases it's probably not worth working harder than that. - */ - need_tuples = (node->ss.ps.plan->qual != NIL || - node->ss.ps.plan->targetlist != NIL); - node->ss.ss_currentScanDesc = table_beginscan_bm(node->ss.ss_currentRelation, node->ss.ps.state->es_snapshot, 0, - NULL, - need_tuples); + NULL); } node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 1640d9c32f7c..e48fe434cd39 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -103,17 +103,7 @@ typedef struct BitmapHeapScanDescData { HeapScanDescData rs_heap_base; - /* - * These fields are only used for bitmap scans for the "skip fetch" - * optimization. Bitmap scans needing no fields from the heap may skip - * fetching an all visible block, instead using the number of tuples per - * block reported by the bitmap to determine how many NULL-filled tuples - * to return. They are common to parallel and serial BitmapHeapScans - */ - - /* page of VM containing info for current block */ - Buffer rs_vmbuffer; - int rs_empty_tuples_pending; + /* Holds no data */ } BitmapHeapScanDescData; typedef struct BitmapHeapScanDescData *BitmapHeapScanDesc; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index b8cb1e744ad1..8713e12cbfb9 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -62,13 +62,6 @@ typedef enum ScanOptions /* unregister snapshot at scan end? */ SO_TEMP_SNAPSHOT = 1 << 9, - - /* - * At the discretion of the table AM, bitmap table scans may be able to - * skip fetching a block from the table if none of the table data is - * needed. If table data may be needed, set SO_NEED_TUPLES. - */ - SO_NEED_TUPLES = 1 << 10, } ScanOptions; /* @@ -920,13 +913,10 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, */ static inline TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, - int nkeys, struct ScanKeyData *key, bool need_tuple) + int nkeys, struct ScanKeyData *key) { uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; - if (need_tuple) - flags |= SO_NEED_TUPLES; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); } From a9276a4282b98c06f60f568441ede05a38d9d68a Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 2 Dec 2024 14:38:11 -0500 Subject: [PATCH 2/2] isolationtester showing broken index-only bitmap heap scan --- .../expected/index-only-bitmapscan.out | 28 ++++++ src/test/isolation/isolation_schedule | 1 + .../specs/index-only-bitmapscan.spec | 85 +++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 src/test/isolation/expected/index-only-bitmapscan.out create mode 100644 src/test/isolation/specs/index-only-bitmapscan.spec diff --git a/src/test/isolation/expected/index-only-bitmapscan.out b/src/test/isolation/expected/index-only-bitmapscan.out new file mode 100644 index 000000000000..132ff1bda701 --- /dev/null +++ b/src/test/isolation/expected/index-only-bitmapscan.out @@ -0,0 +1,28 @@ +Parsed test spec with 2 sessions + +starting permutation: s2_vacuum s2_mod s1_begin s1_prepare s1_fetch_1 s2_vacuum s1_fetch_all s1_commit +step s2_vacuum: VACUUM (TRUNCATE false) ios_bitmap; +step s2_mod: + DELETE FROM ios_bitmap WHERE a > 1; + +step s1_begin: BEGIN; +step s1_prepare: + DECLARE foo NO SCROLL CURSOR FOR SELECT row_number() OVER () FROM ios_bitmap WHERE a > 0 or b > 0; + +step s1_fetch_1: + FETCH FROM foo; + +row_number +---------- + 1 +(1 row) + +step s2_vacuum: VACUUM (TRUNCATE false) ios_bitmap; +step s1_fetch_all: + FETCH ALL FROM foo; + +row_number +---------- +(0 rows) + +step s1_commit: COMMIT; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 143109aa4da9..e3c669a29c7a 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -17,6 +17,7 @@ test: partial-index test: two-ids test: multiple-row-versions test: index-only-scan +test: index-only-bitmapscan test: predicate-lock-hot-tuple test: update-conflict-out test: deadlock-simple diff --git a/src/test/isolation/specs/index-only-bitmapscan.spec b/src/test/isolation/specs/index-only-bitmapscan.spec new file mode 100644 index 000000000000..9962b8dc5312 --- /dev/null +++ b/src/test/isolation/specs/index-only-bitmapscan.spec @@ -0,0 +1,85 @@ +# index-only-bitmapscan test showing wrong results +# +setup +{ + -- by using a low fillfactor and a wide tuple we can get multiple blocks + -- with just few rows + CREATE TABLE ios_bitmap (a int NOT NULL, b int not null, pad char(1024) default '') + WITH (AUTOVACUUM_ENABLED = false, FILLFACTOR = 10); + + INSERT INTO ios_bitmap SELECT g.i, g.i FROM generate_series(1, 10) g(i); + + CREATE INDEX ios_bitmap_a ON ios_bitmap(a); + CREATE INDEX ios_bitmap_b ON ios_bitmap(b); +} + +teardown +{ + DROP TABLE ios_bitmap; +} + + +session s1 + +setup { + SET enable_seqscan = false; +} + +step s1_begin { BEGIN; } +step s1_commit { COMMIT; } + + +# The test query uses an or between two indexes to ensure make it more likely +# to use a bitmap index scan +# +# The row_number() hack is a way to have something returned (isolationtester +# doesn't display empty rows) while still allowing for the index-only scan +# optimization in bitmap heap scans, which requires an empty targetlist. +step s1_prepare { + DECLARE foo NO SCROLL CURSOR FOR SELECT row_number() OVER () FROM ios_bitmap WHERE a > 0 or b > 0; +} + +step s1_fetch_1 { + FETCH FROM foo; +} + +step s1_fetch_all { + FETCH ALL FROM foo; +} + + +session s2 + +# Don't delete row 1 so we have a row for the cursor to "rest" on. +step s2_mod +{ + DELETE FROM ios_bitmap WHERE a > 1; +} + +# Disable truncation, as otherwise we'll just wait for a timeout while trying +# to acquire the lock +step s2_vacuum { VACUUM (TRUNCATE false) ios_bitmap; } + +permutation + # Vacuum first, to ensure VM exists, otherwise the bitmapscan will consider + # VM to be size 0, due to caching. Can't do that in setup because + s2_vacuum + + # delete nearly all rows, to make issue visible + s2_mod + # create a cursor + s1_begin + s1_prepare + + # fetch one row from the cursor, that ensures the index scan portion is done + # before the vacuum in the next step + s1_fetch_1 + + # with the bug this vacuum will mark pages as all-visible that the scan in + # the next step then considers all-visible, despite all rows from those + # pages having been removed. + s2_vacuum + # if this returns any rows, we're busted + s1_fetch_all + + s1_commit