postgresql-cfbot
diff --git a/‎doc/src/sgml/btree.sgml
+30-1 b/‎doc/src/sgml/btree.sgml
+30-1
diff --git a/‎doc/src/sgml/indexam.sgml
+2-1 b/‎doc/src/sgml/indexam.sgml
+2-1
diff --git a/‎doc/src/sgml/indices.sgml
+54-14 b/‎doc/src/sgml/indices.sgml
+54-14
diff --git a/‎doc/src/sgml/monitoring.sgml
+4-1 b/‎doc/src/sgml/monitoring.sgml
+4-1
diff --git a/‎doc/src/sgml/perform.sgml
+32 b/‎doc/src/sgml/perform.sgml
+32
diff --git a/‎doc/src/sgml/xindex.sgml
+13-3 b/‎doc/src/sgml/xindex.sgml
+13-3
diff --git a/‎src/backend/access/index/indexam.c
+2-1 b/‎src/backend/access/index/indexam.c
+2-1
@@ -207,7 +207,7 @@
 
  <para>
   As shown in <xref linkend="xindex-btree-support-table"/>, btree defines
-  one required and four optional support functions.  The five
+  one required and five optional support functions.  The six
   user-defined methods are:
  </para>
  <variablelist>
@@ -583,6 +583,35 @@ options(<replaceable>relopts</replaceable> <type>local_relopts *</type>) returns
     </para>
    </listitem>
   </varlistentry>
+  <varlistentry>
+   <term><function>skipsupport</function></term>
+   <listitem>
+    <para>
+     Optionally, a btree operator family may provide a <firstterm>skip
+      support</firstterm> function, registered under support function number 6.
+     These functions give the B-tree code a way to iterate through every
+     possible value that can be represented by an operator class's underlying
+     input type, in key space order.  This is used by the core code when it
+     applies the skip scan optimization.  The APIs involved in this are
+     defined in <filename>src/include/utils/skipsupport.h</filename>.
+    </para>
+    <para>
+     Operator classes that do not provide a skip support function are still
+     eligible to use skip scan.  The core code can still use its fallback
+     strategy, though that might be suboptimal for some discrete types.  It
+     usually doesn't make sense (and may not even be feasible) for operator
+     classes on continuous types to provide a skip support function.
+    </para>
+    <para>
+     It is not sensible for an operator family to register a cross-type
+     <function>skipsupport</function> function, and attempting to do so will
+     result in an error.  This is because determining the next indexable value
+     must happen by incrementing a value copied from an index tuple.  The
+     values generated must all be of the same underlying data type (the
+     <quote>skipped</quote> index column's opclass input type).
+    </para>
+   </listitem>
+  </varlistentry>
  </variablelist>
 
 </sect2>
 
@@ -835,7 +835,8 @@ amrestrpos (IndexScanDesc scan);
   <para>
 <programlisting>
 Size
-amestimateparallelscan (int nkeys,
+amestimateparallelscan (Relation indexRelation,
+                        int nkeys,
                         int norderbys);
 </programlisting>
    Estimate and return the number of bytes of dynamic shared memory which
 
@@ -460,20 +460,56 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor);
    efficient when there are constraints on the leading (leftmost) columns.
    The exact rule is that equality constraints on leading columns, plus
    any inequality constraints on the first column that does not have an
-   equality constraint, will be used to limit the portion of the index
+   equality constraint, will always be used to limit the portion of the index
    that is scanned.  Constraints on columns to the right of these columns
-   are checked in the index, so they save visits to the table proper, but
-   they do not reduce the portion of the index that has to be scanned.
+   are checked in the index, so they'll always save visits to the table
+   proper, but they do not necessarily reduce the portion of the index that
+   has to be scanned.  If a B-tree index scan can apply the skip scan
+   optimization effectively, it will apply every column constraint when
+   navigating through the index via repeated index searches.  This can reduce
+   the portion of the index that has to be read, even though one or more
+   columns (prior to the least significant index column from the query
+   predicate) lacks a conventional equality constraint.  Skip scan works by
+   generating a dynamic equality constraint internally, that matches every
+   possible value in an index column (though only given a column that lacks
+   an equality constraint that comes from the query predicate, and only when
+   the generated constraint can be used in conjunction with a later column
+   constraint from the query predicate).
+  </para>
+
+  <para>
+   For example, given an index on <literal>(x, y)</literal>, and a query
+   condition <literal>WHERE y = 7700</literal>, a B-tree index scan might be
+   able to apply the skip scan optimization.  This generally happens when the
+   query planner expects that repeated <literal>WHERE x = N AND y = 7700</literal>
+   searches for every possible value of <literal>N</literal> (or for every
+   <literal>x</literal> value that is actually stored in the index) is the
+   fastest possible approach, given the available indexes on the table.  This
+   approach is generally only taken when there are so few distinct
+   <literal>x</literal> values that the planner expects the scan to skip over
+   most of the index (because most of its leaf pages cannot possibly contain
+   relevant tuples).  If there are many distinct <literal>x</literal> values,
+   then the entire index will have to be scanned, so in most cases the planner
+   will prefer a sequential table scan over using the index.
+  </para>
+
+  <para>
+   The skip scan optimization can also be applied selectively, during B-tree
+   scans that have at least some useful constraints from the query predicate.
    For example, given an index on <literal>(a, b, c)</literal> and a
    query condition <literal>WHERE a = 5 AND b &gt;= 42 AND c &lt; 77</literal>,
-   the index would have to be scanned from the first entry with
-   <literal>a</literal> = 5 and <literal>b</literal> = 42 up through the last entry with
-   <literal>a</literal> = 5.  Index entries with <literal>c</literal> &gt;= 77 would be
-   skipped, but they'd still have to be scanned through.
-   This index could in principle be used for queries that have constraints
-   on <literal>b</literal> and/or <literal>c</literal> with no constraint on <literal>a</literal>
-   &mdash; but the entire index would have to be scanned, so in most cases
-   the planner would prefer a sequential table scan over using the index.
+   the index might have to be scanned from the first entry with
+   <literal>a</literal> = 5 and <literal>b</literal> = 42 up through the last
+   entry with <literal>a</literal> = 5.  Index entries with
+   <literal>c</literal> &gt;= 77 will never need to be filtered at the table
+   level, but it may or may not be profitable to skip over them within the
+   index.  When skipping takes place, the scan starts a new index search to
+   reposition itself from the end of the current <literal>a</literal> = 5 and
+   <literal>b</literal> = N grouping (i.e. from the position in the index
+   where the first tuple <literal>a = 5 AND b = N AND c &gt;= 77</literal>
+   appears), to the start of the next such grouping (i.e. the position in the
+   index where the first tuple <literal>a = 5 AND b = N + 1</literal>
+   appears).
   </para>
 
   <para>
@@ -669,9 +705,13 @@ CREATE INDEX test3_desc_index ON test3 (id DESC NULLS LAST);
    multicolumn index on <literal>(x, y)</literal>.  This index would typically be
    more efficient than index combination for queries involving both
    columns, but as discussed in <xref linkend="indexes-multicolumn"/>, it
-   would be almost useless for queries involving only <literal>y</literal>, so it
-   should not be the only index.  A combination of the multicolumn index
-   and a separate index on <literal>y</literal> would serve reasonably well.  For
+   would be less useful for queries involving only <literal>y</literal>.  Just
+   how useful will depend on how effective the B-tree index skip scan
+   optimization is; if <literal>x</literal> has no more than several hundred
+   distinct values, skip scan will make searches for specific
+   <literal>y</literal> values execute reasonably efficiently.  A combination
+   of a multicolumn index on <literal>(x, y)</literal> and a separate index on
+   <literal>y</literal> might also serve reasonably well.  For
    queries involving only <literal>x</literal>, the multicolumn index could be
    used, though it would be larger and hence slower than an index on
    <literal>x</literal> alone.  The last alternative is to create all three
 
@@ -4263,7 +4263,10 @@ description | Waiting for a newly initialized WAL file to reach durable storage
      <replaceable>column_name</replaceable> =
      <replaceable>value2</replaceable> ...</literal> construct, though only
     when the optimizer transforms the construct into an equivalent
-    multi-valued array representation.
+    multi-valued array representation.  Similarly, when B-tree index scans use
+    the skip scan optimization, an index search is performed each time the
+    scan is repositioned to the next index leaf page that might have matching
+    tuples (see <xref linkend="indexes-multicolumn"/>).
    </para>
   </note>
   <tip>
 
@@ -860,6 +860,38 @@ EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE thousand IN (1, 2, 3, 4);
     <structname>tenk1_thous_tenthous</structname> index leaf page.
    </para>
 
+   <para>
+    The <quote>Index Searches</quote> line is also useful with B-tree index
+    scans that apply the <firstterm>skip scan</firstterm> optimization to
+    more efficiently traverse through an index:
+<screen>
+EXPLAIN ANALYZE SELECT four, unique1 FROM tenk1 WHERE four BETWEEN 1 AND 3 AND unique1 = 42;
+                                                              QUERY PLAN
+-------------------------------------------------------------------&zwsp;---------------------------------------------------------------
+ Index Only Scan using tenk1_four_unique1_idx on tenk1  (cost=0.29..6.90 rows=1 width=8) (actual time=0.006..0.007 rows=1.00 loops=1)
+   Index Cond: ((four &gt;= 1) AND (four &lt;= 3) AND (unique1 = 42))
+   Heap Fetches: 0
+   Index Searches: 3
+   Buffers: shared hit=7
+ Planning Time: 0.029 ms
+ Execution Time: 0.012 ms
+</screen>
+
+    Here we see an Index-Only Scan node using
+    <structname>tenk1_four_unique1_idx</structname>, a multi-column index on the
+    <structname>tenk1</structname> table's <structfield>four</structfield> and
+    <structfield>unique1</structfield> columns.  The scan performs 3 searches
+    that each read a single index leaf page:
+    <quote><literal>four = 1 AND unique1 = 42</literal></quote>,
+    <quote><literal>four = 2 AND unique1 = 42</literal></quote>, and
+    <quote><literal>four = 3 AND unique1 = 42</literal></quote>.  This index
+    is generally a good target for skip scan, since, as discussed in
+    <xref linkend="indexes-multicolumn"/>, its leading column (the
+    <structfield>four</structfield> column) contains only 4 distinct values,
+    while its second/final column (the <structfield>unique1</structfield>
+    column) contains many distinct values.
+   </para>
+
    <para>
     Another type of extra information is the number of rows removed by a
     filter condition:
 
@@ -461,6 +461,13 @@
        </entry>
        <entry>5</entry>
       </row>
+      <row>
+       <entry>
+        Return the addresses of C-callable skip support function(s)
+        (optional)
+       </entry>
+       <entry>6</entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
@@ -1062,7 +1069,8 @@ DEFAULT FOR TYPE int8 USING btree FAMILY integer_ops AS
   FUNCTION 1 btint8cmp(int8, int8) ,
   FUNCTION 2 btint8sortsupport(internal) ,
   FUNCTION 3 in_range(int8, int8, int8, boolean, boolean) ,
-  FUNCTION 4 btequalimage(oid) ;
+  FUNCTION 4 btequalimage(oid) ,
+  FUNCTION 6 btint8skipsupport(internal) ;
 
 CREATE OPERATOR CLASS int4_ops
 DEFAULT FOR TYPE int4 USING btree FAMILY integer_ops AS
@@ -1075,7 +1083,8 @@ DEFAULT FOR TYPE int4 USING btree FAMILY integer_ops AS
   FUNCTION 1 btint4cmp(int4, int4) ,
   FUNCTION 2 btint4sortsupport(internal) ,
   FUNCTION 3 in_range(int4, int4, int4, boolean, boolean) ,
-  FUNCTION 4 btequalimage(oid) ;
+  FUNCTION 4 btequalimage(oid) ,
+  FUNCTION 6 btint4skipsupport(internal) ;
 
 CREATE OPERATOR CLASS int2_ops
 DEFAULT FOR TYPE int2 USING btree FAMILY integer_ops AS
@@ -1088,7 +1097,8 @@ DEFAULT FOR TYPE int2 USING btree FAMILY integer_ops AS
   FUNCTION 1 btint2cmp(int2, int2) ,
   FUNCTION 2 btint2sortsupport(internal) ,
   FUNCTION 3 in_range(int2, int2, int2, boolean, boolean) ,
-  FUNCTION 4 btequalimage(oid) ;
+  FUNCTION 4 btequalimage(oid) ,
+  FUNCTION 6 btint2skipsupport(internal) ;
 
 ALTER OPERATOR FAMILY integer_ops USING btree ADD
   -- cross-type comparisons int8 vs int2
 
@@ -489,7 +489,8 @@ index_parallelscan_estimate(Relation indexRelation, int nkeys, int norderbys,
 	if (parallel_aware &&
 		indexRelation->rd_indam->amestimateparallelscan != NULL)
 		nbytes = add_size(nbytes,
-						  indexRelation->rd_indam->amestimateparallelscan(nkeys,
+						  indexRelation->rd_indam->amestimateparallelscan(indexRelation,
+																		  nkeys,
 																		  norderbys));
 
 	return nbytes;