@@ -101,11 +101,37 @@ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status
101
101
uint16 infomask , Relation rel , int * remaining ,
102
102
bool logLockFailure );
103
103
static void index_delete_sort (TM_IndexDeleteOp * delstate );
104
+ static inline int heap_ivc_process_block (Relation rel , Buffer * vmbuf ,
105
+ TM_VisCheck * checks , int nchecks );
106
+ static void heap_ivc_process_all (Relation rel , Buffer * vmbuf ,
107
+ TM_VisCheck * checks , int nchecks );
104
108
static int bottomup_sort_and_shrink (TM_IndexDeleteOp * delstate );
105
109
static XLogRecPtr log_heap_new_cid (Relation relation , HeapTuple tup );
106
110
static HeapTuple ExtractReplicaIdentity (Relation relation , HeapTuple tp , bool key_required ,
107
111
bool * copy );
108
112
113
+ /* sort template definitions for index */
114
+ #define ST_SORT heap_ivc_sortby_tidheapblk
115
+ #define ST_ELEMENT_TYPE TM_VisCheck
116
+ #define ST_DECLARE
117
+ #define ST_DEFINE
118
+ #define ST_SCOPE static inline
119
+ #define ST_COMPARE (a , b ) ( \
120
+ a->tidblkno < b->tidblkno ? -1 : ( \
121
+ a->tidblkno > b->tidblkno ? 1 : 0 \
122
+ ) \
123
+ )
124
+
125
+ #include "lib/sort_template.h"
126
+
127
+ #define ST_SORT heap_ivc_sortby_idx
128
+ #define ST_ELEMENT_TYPE TM_VisCheck
129
+ #define ST_DECLARE
130
+ #define ST_DEFINE
131
+ #define ST_SCOPE static inline
132
+ #define ST_COMPARE (a , b ) (((int) a->idxoffnum) - ((int) b->idxoffnum))
133
+ #include "lib/sort_template.h"
134
+
109
135
110
136
/*
111
137
* Each tuple lock mode has a corresponding heavyweight lock, and one or two
@@ -8750,6 +8776,157 @@ bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
8750
8776
return nblocksfavorable ;
8751
8777
}
8752
8778
8779
+ /*
8780
+ * heapam implementation of tableam's index_vischeck_tuples interface.
8781
+ *
8782
+ * This helper function is called by index AMs during index-only scans,
8783
+ * to do VM-based visibility checks on individual tuples, so that the AM
8784
+ * can hold the tuple in memory for e.g. reordering for extended periods of
8785
+ * time while without holding thousands of pins to conflict with VACUUM.
8786
+ *
8787
+ * It's possible for this to generate a fair amount of I/O, since we may be
8788
+ * checking hundreds of tuples from a single index block, but that is
8789
+ * preferred over holding thousands of pins.
8790
+ *
8791
+ * We use heuristics to balance the costs of sorting TIDs with VM page
8792
+ * lookups.
8793
+ */
8794
+ void
8795
+ heap_index_vischeck_tuples (Relation rel , TM_IndexVisibilityCheckOp * checkop )
8796
+ {
8797
+ Buffer vmbuf = * checkop -> vmbuf ;
8798
+ Buffer storvmbuf = vmbuf ;
8799
+ TM_VisCheck * checks = checkop -> checktids ;
8800
+ int checkntids = checkop -> checkntids ;
8801
+ int upcomingvmbufchanges = 0 ;
8802
+
8803
+ /*
8804
+ * The first index scan will have to pin the VM buffer, and that first
8805
+ * change in the vm buffer shouldn't put us into the expensive VM page &
8806
+ * sort path; so we special-case this operation.
8807
+ */
8808
+ if (!BufferIsValid (vmbuf ))
8809
+ {
8810
+ int processed ;
8811
+ processed = heap_ivc_process_block (rel , & vmbuf , checks ,checkntids );
8812
+ checkntids -= processed ;
8813
+ checks += processed ;
8814
+ storvmbuf = vmbuf ;
8815
+ Assert (processed > 0 );
8816
+ }
8817
+
8818
+ while (vmbuf == storvmbuf && checkntids > 0 )
8819
+ {
8820
+ int processed ;
8821
+
8822
+ processed = heap_ivc_process_block (rel , & vmbuf , checks ,checkntids );
8823
+
8824
+ Assert (processed <= checkntids );
8825
+
8826
+ checkntids -= processed ;
8827
+ checks += processed ;
8828
+ }
8829
+
8830
+ * checkop -> vmbuf = vmbuf ;
8831
+
8832
+ if (checkntids == 0 )
8833
+ {
8834
+ return ;
8835
+ }
8836
+
8837
+ upcomingvmbufchanges = 0 ;
8838
+
8839
+ for (int i = 1 ; i < checkntids ; i ++ )
8840
+ {
8841
+ /*
8842
+ * Instead of storing the previous iteration's result, we only match
8843
+ * the block numbers
8844
+ */
8845
+ BlockNumber lastblkno = checks [i - 1 ].tidblkno ;
8846
+ BlockNumber newblkno = checks [i ].tidblkno ;
8847
+ /*
8848
+ * divide-by-constant can be faster than BufferGetBlockNumber()
8849
+ */
8850
+ BlockNumber lastvmblkno = HEAPBLK_TO_VMBLOCK (lastblkno );
8851
+ BlockNumber newvmblkno = HEAPBLK_TO_VMBLOCK (newblkno );
8852
+
8853
+ if (lastvmblkno != newvmblkno )
8854
+ upcomingvmbufchanges ++ ;
8855
+ }
8856
+
8857
+ if (upcomingvmbufchanges <= pg_ceil_log2_32 (checkntids ))
8858
+ {
8859
+ /*
8860
+ * No big amount of VM buf changes, so do all visibility checks
8861
+ * without sorting.
8862
+ */
8863
+ heap_ivc_process_all (rel , checkop -> vmbuf , checks , checkntids );
8864
+
8865
+ return ;
8866
+ }
8867
+
8868
+ /*
8869
+ * Order the TIDs to heap order, so that we will only need to visit every
8870
+ * VM page at most once.
8871
+ */
8872
+ heap_ivc_sortby_tidheapblk (checks , checkntids );
8873
+
8874
+ /* do all visibility checks */
8875
+ heap_ivc_process_all (rel , checkop -> vmbuf , checks , checkntids );
8876
+
8877
+ /* put the checks back in index order */
8878
+ heap_ivc_sortby_idx (checks , checkntids );
8879
+ }
8880
+
8881
+
8882
+ static inline int
8883
+ heap_ivc_process_block (Relation rel , Buffer * vmbuf , TM_VisCheck * checks ,
8884
+ int nchecks )
8885
+ {
8886
+ BlockNumber blkno ;
8887
+ BlockNumber prevblkno = blkno = checks -> tidblkno ;
8888
+ TMVC_Result result ;
8889
+ int processed = 0 ;
8890
+
8891
+ if (VM_ALL_VISIBLE (rel , blkno , vmbuf ))
8892
+ result = TMVC_Visible ;
8893
+ else
8894
+ result = TMVC_MaybeVisible ;
8895
+
8896
+ do
8897
+ {
8898
+ checks -> vischeckresult = result ;
8899
+
8900
+ nchecks -- ;
8901
+ processed ++ ;
8902
+ checks ++ ;
8903
+
8904
+ if (nchecks <= 0 )
8905
+ return processed ;
8906
+
8907
+ blkno = checks -> tidblkno ;
8908
+ } while (blkno == prevblkno );
8909
+
8910
+ return processed ;
8911
+ }
8912
+
8913
+ static void
8914
+ heap_ivc_process_all (Relation rel , Buffer * vmbuf ,
8915
+ TM_VisCheck * checks , int nchecks )
8916
+ {
8917
+ while (nchecks > 0 )
8918
+ {
8919
+ int processed ;
8920
+
8921
+ processed = heap_ivc_process_block (rel , vmbuf , checks , nchecks );
8922
+
8923
+ Assert (processed <= nchecks );
8924
+
8925
+ nchecks -= processed ;
8926
+ checks += processed ;
8927
+ }
8928
+ }
8929
+
8753
8930
/*
8754
8931
* Perform XLogInsert for a heap-visible operation. 'block' is the block
8755
8932
* being marked all-visible, and vm_buffer is the buffer containing the
0 commit comments