diff options
| author | Melanie Plageman | 2025-03-15 14:34:42 +0000 |
|---|---|---|
| committer | Melanie Plageman | 2025-03-15 14:34:42 +0000 |
| commit | 2b73a8cd33b745c5b8a7f44322f86642519e3a40 (patch) | |
| tree | 43c4cb7aa83247b634ff15d4df76a75f16221230 /src/backend/access | |
| parent | 944e81bf99db2b5b70b8a389d4f273534da73f74 (diff) | |
BitmapHeapScan uses the read stream API
Make Bitmap Heap Scan use the read stream API instead of invoking
ReadBuffer() for each block indicated by the bitmap.
The read stream API handles prefetching, so remove all of the explicit
prefetching from bitmap heap scan code.
Now, heap table AM implements a read stream callback which uses the
bitmap iterator to return the next required block to the read stream
code.
Tomas Vondra conducted extensive regression testing of this feature.
Andres Freund, Thomas Munro, and I analyzed regressions and Thomas Munro
patched the read stream API.
Author: Melanie Plageman <[email protected]>
Reviewed-by: Tomas Vondra <[email protected]>
Tested-by: Tomas Vondra <[email protected]>
Tested-by: Andres Freund <[email protected]>
Tested-by: Thomas Munro <[email protected]>
Tested-by: Nazir Bilal Yavuz <[email protected]>
Discussion: https://postgr.es/m/flat/CAAKRu_ZwCwWFeL_H3ia26bP2e7HiKLWt0ZmGXPVwPO6uXq0vaA%40mail.gmail.com
Diffstat (limited to 'src/backend/access')
| -rw-r--r-- | src/backend/access/heap/heapam.c | 80 | ||||
| -rw-r--r-- | src/backend/access/heap/heapam_handler.c | 90 |
2 files changed, 120 insertions, 50 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 21575a8ffef..b12b583c4d9 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -280,6 +280,72 @@ heap_scan_stream_read_next_serial(ReadStream *stream, return scan->rs_prefetch_block; } +/* + * Read stream API callback for bitmap heap scans. + * Returns the next block the caller wants from the read stream or + * InvalidBlockNumber when done. + */ +static BlockNumber +bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, + void *per_buffer_data) +{ + TBMIterateResult *tbmres = per_buffer_data; + BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) private_data; + HeapScanDesc hscan = (HeapScanDesc) bscan; + TableScanDesc sscan = &hscan->rs_base; + + for (;;) + { + CHECK_FOR_INTERRUPTS(); + + /* no more entries in the bitmap */ + if (!tbm_iterate(&sscan->st.rs_tbmiterator, tbmres)) + return InvalidBlockNumber; + + /* + * Ignore any claimed entries past what we think is the end of the + * relation. It may have been extended after the start of our scan (we + * only hold an AccessShareLock, and it could be inserts from this + * backend). We don't take this optimization in SERIALIZABLE + * isolation though, as we need to examine all invisible tuples + * reachable by the index. + */ + if (!IsolationIsSerializable() && + tbmres->blockno >= hscan->rs_nblocks) + continue; + + /* + * We can skip fetching the heap page if we don't need any fields from + * the heap, the bitmap entries don't need rechecking, and all tuples + * on the page are visible to our transaction. + */ + if (!(sscan->rs_flags & SO_NEED_TUPLES) && + !tbmres->recheck && + VM_ALL_VISIBLE(sscan->rs_rd, tbmres->blockno, &bscan->rs_vmbuffer)) + { + OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE]; + int noffsets; + + /* can't be lossy in the skip_fetch case */ + Assert(!tbmres->lossy); + Assert(bscan->rs_empty_tuples_pending >= 0); + + /* + * We throw away the offsets, but this is the easiest way to get a + * count of tuples. + */ + noffsets = tbm_extract_page_tuple(tbmres, offsets, TBM_MAX_TUPLES_PER_PAGE); + bscan->rs_empty_tuples_pending += noffsets; + continue; + } + + return tbmres->blockno; + } + + /* not reachable */ + Assert(false); +} + /* ---------------- * initscan - scan code common to heap_beginscan and heap_rescan * ---------------- @@ -1068,6 +1134,7 @@ heap_beginscan(Relation relation, Snapshot snapshot, scan->rs_base.rs_flags = flags; scan->rs_base.rs_parallel = parallel_scan; scan->rs_strategy = NULL; /* set in initscan */ + scan->rs_cbuf = InvalidBuffer; /* * Disable page-at-a-time mode if it's not a MVCC-safe snapshot. @@ -1147,6 +1214,16 @@ heap_beginscan(Relation relation, Snapshot snapshot, scan, 0); } + else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN) + { + scan->rs_read_stream = read_stream_begin_relation(READ_STREAM_DEFAULT, + scan->rs_strategy, + scan->rs_base.rs_rd, + MAIN_FORKNUM, + bitmapheap_stream_read_next, + scan, + sizeof(TBMIterateResult)); + } return (TableScanDesc) scan; @@ -1181,7 +1258,10 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, * unpin scan buffers */ if (BufferIsValid(scan->rs_cbuf)) + { ReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; + } if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN) { diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 25d26409e2c..3035adacade 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2117,82 +2117,72 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths, static bool heapam_scan_bitmap_next_block(TableScanDesc scan, - BlockNumber *blockno, bool *recheck, + bool *recheck, uint64 *lossy_pages, uint64 *exact_pages) { BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan; HeapScanDesc hscan = (HeapScanDesc) bscan; BlockNumber block; + void *per_buffer_data; Buffer buffer; Snapshot snapshot; int ntup; - TBMIterateResult tbmres; + TBMIterateResult *tbmres; OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE]; int noffsets = -1; Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN); + Assert(hscan->rs_read_stream); hscan->rs_cindex = 0; hscan->rs_ntuples = 0; - *blockno = InvalidBlockNumber; - *recheck = true; - - do + /* Release buffer containing previous block. */ + if (BufferIsValid(hscan->rs_cbuf)) { - CHECK_FOR_INTERRUPTS(); + ReleaseBuffer(hscan->rs_cbuf); + hscan->rs_cbuf = InvalidBuffer; + } - if (!tbm_iterate(&scan->st.rs_tbmiterator, &tbmres)) - return false; + hscan->rs_cbuf = read_stream_next_buffer(hscan->rs_read_stream, + &per_buffer_data); - /* Exact pages need their tuple offsets extracted. */ - if (!tbmres.lossy) - noffsets = tbm_extract_page_tuple(&tbmres, offsets, - TBM_MAX_TUPLES_PER_PAGE); + if (BufferIsInvalid(hscan->rs_cbuf)) + { + if (BufferIsValid(bscan->rs_vmbuffer)) + { + ReleaseBuffer(bscan->rs_vmbuffer); + bscan->rs_vmbuffer = InvalidBuffer; + } /* - * Ignore any claimed entries past what we think is the end of the - * relation. It may have been extended after the start of our scan (we - * only hold an AccessShareLock, and it could be inserts from this - * backend). We don't take this optimization in SERIALIZABLE - * isolation though, as we need to examine all invisible tuples - * reachable by the index. + * Bitmap is exhausted. Time to emit empty tuples if relevant. We emit + * all empty tuples at the end instead of emitting them per block we + * skip fetching. This is necessary because the streaming read API + * will only return TBMIterateResults for blocks actually fetched. + * When we skip fetching a block, we keep track of how many empty + * tuples to emit at the end of the BitmapHeapScan. We do not recheck + * all NULL tuples. */ - } while (!IsolationIsSerializable() && - tbmres.blockno >= hscan->rs_nblocks); + *recheck = false; + return bscan->rs_empty_tuples_pending > 0; + } - /* Got a valid block */ - *blockno = tbmres.blockno; - *recheck = tbmres.recheck; + Assert(per_buffer_data); - /* - * We can skip fetching the heap page if we don't need any fields from the - * heap, the bitmap entries don't need rechecking, and all tuples on the - * page are visible to our transaction. - */ - if (!(scan->rs_flags & SO_NEED_TUPLES) && - !tbmres.recheck && - VM_ALL_VISIBLE(scan->rs_rd, tbmres.blockno, &bscan->rs_vmbuffer)) - { - /* can't be lossy in the skip_fetch case */ - Assert(!tbmres.lossy); - Assert(bscan->rs_empty_tuples_pending >= 0); - Assert(noffsets > -1); + tbmres = per_buffer_data; - bscan->rs_empty_tuples_pending += noffsets; + Assert(BlockNumberIsValid(tbmres->blockno)); + Assert(BufferGetBlockNumber(hscan->rs_cbuf) == tbmres->blockno); - return true; - } + /* Exact pages need their tuple offsets extracted. */ + if (!tbmres->lossy) + noffsets = tbm_extract_page_tuple(tbmres, offsets, + TBM_MAX_TUPLES_PER_PAGE); - block = tbmres.blockno; + *recheck = tbmres->recheck; - /* - * Acquire pin on the target heap page, trading in any pin we held before. - */ - hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf, - scan->rs_rd, - block); - hscan->rs_cblock = block; + block = hscan->rs_cblock = tbmres->blockno; buffer = hscan->rs_cbuf; snapshot = scan->rs_snapshot; @@ -2213,7 +2203,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, /* * We need two separate strategies for lossy and non-lossy cases. */ - if (!tbmres.lossy) + if (!tbmres->lossy) { /* * Bitmap is non-lossy, so we just look through the offsets listed in @@ -2277,7 +2267,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, Assert(ntup <= MaxHeapTuplesPerPage); hscan->rs_ntuples = ntup; - if (tbmres.lossy) + if (tbmres->lossy) (*lossy_pages)++; else (*exact_pages)++; |
