summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/Makefile3
-rw-r--r--src/backend/access/heap/heapam.c41
-rw-r--r--src/backend/access/tablesample/Makefile17
-rw-r--r--src/backend/access/tablesample/bernoulli.c235
-rw-r--r--src/backend/access/tablesample/system.c186
-rw-r--r--src/backend/access/tablesample/tablesample.c368
-rw-r--r--src/backend/catalog/Makefile5
-rw-r--r--src/backend/commands/analyze.c2
-rw-r--r--src/backend/commands/explain.c21
-rw-r--r--src/backend/executor/Makefile2
-rw-r--r--src/backend/executor/execAmi.c8
-rw-r--r--src/backend/executor/execCurrent.c1
-rw-r--r--src/backend/executor/execProcnode.c14
-rw-r--r--src/backend/executor/nodeSamplescan.c256
-rw-r--r--src/backend/nodes/copyfuncs.c60
-rw-r--r--src/backend/nodes/equalfuncs.c37
-rw-r--r--src/backend/nodes/nodeFuncs.c28
-rw-r--r--src/backend/nodes/outfuncs.c48
-rw-r--r--src/backend/nodes/readfuncs.c45
-rw-r--r--src/backend/optimizer/path/allpaths.c49
-rw-r--r--src/backend/optimizer/path/costsize.c67
-rw-r--r--src/backend/optimizer/plan/createplan.c69
-rw-r--r--src/backend/optimizer/plan/planner.c15
-rw-r--r--src/backend/optimizer/plan/setrefs.c11
-rw-r--r--src/backend/optimizer/plan/subselect.c1
-rw-r--r--src/backend/optimizer/util/pathnode.c22
-rw-r--r--src/backend/parser/gram.y36
-rw-r--r--src/backend/parser/parse_clause.c56
-rw-r--r--src/backend/parser/parse_func.c143
-rw-r--r--src/backend/rewrite/rewriteHandler.c3
-rw-r--r--src/backend/utils/adt/ruleutils.c50
-rw-r--r--src/backend/utils/cache/lsyscache.c27
-rw-r--r--src/backend/utils/cache/syscache.c23
-rw-r--r--src/backend/utils/misc/sampling.c33
-rw-r--r--src/include/access/heapam.h4
-rw-r--r--src/include/access/relscan.h1
-rw-r--r--src/include/access/tablesample.h60
-rw-r--r--src/include/catalog/indexing.h5
-rw-r--r--src/include/catalog/pg_proc.h27
-rw-r--r--src/include/catalog/pg_tablesample_method.h78
-rw-r--r--src/include/executor/nodeSamplescan.h24
-rw-r--r--src/include/nodes/execnodes.h9
-rw-r--r--src/include/nodes/nodes.h4
-rw-r--r--src/include/nodes/parsenodes.h37
-rw-r--r--src/include/nodes/plannodes.h6
-rw-r--r--src/include/optimizer/cost.h1
-rw-r--r--src/include/optimizer/pathnode.h2
-rw-r--r--src/include/parser/kwlist.h1
-rw-r--r--src/include/parser/parse_func.h5
-rw-r--r--src/include/port.h4
-rw-r--r--src/include/utils/lsyscache.h1
-rw-r--r--src/include/utils/rel.h1
-rw-r--r--src/include/utils/sampling.h15
-rw-r--r--src/include/utils/syscache.h2
-rw-r--r--src/port/erand48.c3
-rw-r--r--src/test/regress/expected/rowsecurity.out26
-rw-r--r--src/test/regress/expected/sanity_check.out1
-rw-r--r--src/test/regress/expected/tablesample.out231
-rw-r--r--src/test/regress/parallel_schedule2
-rw-r--r--src/test/regress/serial_schedule1
-rw-r--r--src/test/regress/sql/rowsecurity.sql4
-rw-r--r--src/test/regress/sql/tablesample.sql74
62 files changed, 2574 insertions, 37 deletions
diff --git a/src/backend/access/Makefile b/src/backend/access/Makefile
index 21721b48f04..bd93a6a8d1e 100644
--- a/src/backend/access/Makefile
+++ b/src/backend/access/Makefile
@@ -8,6 +8,7 @@ subdir = src/backend/access
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist transam
+SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist \
+ tablesample transam
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 1a8d2f2d0b5..f0c2394e600 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -80,8 +80,9 @@ bool synchronize_seqscans = true;
static HeapScanDesc heap_beginscan_internal(Relation relation,
Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync,
- bool is_bitmapscan, bool temp_snap);
+ bool allow_strat, bool allow_sync, bool allow_pagemode,
+ bool is_bitmapscan, bool is_samplescan,
+ bool temp_snap);
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
@@ -294,9 +295,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
/*
* Currently, we don't have a stats counter for bitmap heap scans (but the
- * underlying bitmap index scans will be counted).
+ * underlying bitmap index scans will be counted) or sample scans (we only
+ * update stats for tuple fetches there)
*/
- if (!scan->rs_bitmapscan)
+ if (!scan->rs_bitmapscan && !scan->rs_samplescan)
pgstat_count_heap_scan(scan->rs_rd);
}
@@ -315,7 +317,7 @@ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
* In page-at-a-time mode it performs additional work, namely determining
* which tuples on the page are visible.
*/
-static void
+void
heapgetpage(HeapScanDesc scan, BlockNumber page)
{
Buffer buffer;
@@ -1310,6 +1312,9 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
* HeapScanDesc for a bitmap heap scan. Although that scan technology is
* really quite unlike a standard seqscan, there is just enough commonality
* to make it worth using the same data structure.
+ *
+ * heap_beginscan_samplingscan is alternate entry point for setting up a
+ * HeapScanDesc for a TABLESAMPLE scan.
* ----------------
*/
HeapScanDesc
@@ -1317,7 +1322,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- true, true, false, false);
+ true, true, true, false, false, false);
}
HeapScanDesc
@@ -1327,7 +1332,7 @@ heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- true, true, false, true);
+ true, true, true, false, false, true);
}
HeapScanDesc
@@ -1336,7 +1341,8 @@ heap_beginscan_strat(Relation relation, Snapshot snapshot,
bool allow_strat, bool allow_sync)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- allow_strat, allow_sync, false, false);
+ allow_strat, allow_sync, true,
+ false, false, false);
}
HeapScanDesc
@@ -1344,14 +1350,24 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- false, false, true, false);
+ false, false, true, true, false, false);
+}
+
+HeapScanDesc
+heap_beginscan_sampling(Relation relation, Snapshot snapshot,
+ int nkeys, ScanKey key,
+ bool allow_strat, bool allow_pagemode)
+{
+ return heap_beginscan_internal(relation, snapshot, nkeys, key,
+ allow_strat, false, allow_pagemode,
+ false, true, false);
}
static HeapScanDesc
heap_beginscan_internal(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync,
- bool is_bitmapscan, bool temp_snap)
+ bool allow_strat, bool allow_sync, bool allow_pagemode,
+ bool is_bitmapscan, bool is_samplescan, bool temp_snap)
{
HeapScanDesc scan;
@@ -1373,6 +1389,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
scan->rs_snapshot = snapshot;
scan->rs_nkeys = nkeys;
scan->rs_bitmapscan = is_bitmapscan;
+ scan->rs_samplescan = is_samplescan;
scan->rs_strategy = NULL; /* set in initscan */
scan->rs_allow_strat = allow_strat;
scan->rs_allow_sync = allow_sync;
@@ -1381,7 +1398,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
/*
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
*/
- scan->rs_pageatatime = IsMVCCSnapshot(snapshot);
+ scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
/*
* For a seqscan in a serializable transaction, acquire a predicate lock
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile
new file mode 100644
index 00000000000..46eeb59f9c4
--- /dev/null
+++ b/src/backend/access/tablesample/Makefile
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for utils/tablesample
+#
+# IDENTIFICATION
+# src/backend/utils/tablesample/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/tablesample
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = tablesample.o system.o bernoulli.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
new file mode 100644
index 00000000000..c91f3f593e5
--- /dev/null
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -0,0 +1,235 @@
+/*-------------------------------------------------------------------------
+ *
+ * bernoulli.c
+ * interface routines for BERNOULLI tablesample method
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/tablesample/bernoulli.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+
+#include "access/tablesample.h"
+#include "access/relscan.h"
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
+#include "storage/bufmgr.h"
+#include "utils/sampling.h"
+
+
+/* tsdesc */
+typedef struct
+{
+ uint32 seed; /* random seed */
+ BlockNumber startblock; /* starting block, we use ths for syncscan support */
+ BlockNumber nblocks; /* number of blocks */
+ BlockNumber blockno; /* current block */
+ float4 probability; /* probabilty that tuple will be returned (0.0-1.0) */
+ OffsetNumber lt; /* last tuple returned from current block */
+ SamplerRandomState randstate; /* random generator tsdesc */
+} BernoulliSamplerData;
+
+/*
+ * Initialize the state.
+ */
+Datum
+tsm_bernoulli_init(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ uint32 seed = PG_GETARG_UINT32(1);
+ float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
+ HeapScanDesc scan = tsdesc->heapScan;
+ BernoulliSamplerData *sampler;
+
+ if (percent < 0 || percent > 100)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid sample size"),
+ errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+
+ sampler = palloc0(sizeof(BernoulliSamplerData));
+
+ /* Remember initial values for reinit */
+ sampler->seed = seed;
+ sampler->startblock = scan->rs_startblock;
+ sampler->nblocks = scan->rs_nblocks;
+ sampler->blockno = InvalidBlockNumber;
+ sampler->probability = percent / 100;
+ sampler->lt = InvalidOffsetNumber;
+ sampler_random_init_state(sampler->seed, sampler->randstate);
+
+ tsdesc->tsmdata = (void *) sampler;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get next block number to read or InvalidBlockNumber if we are at the
+ * end of the relation.
+ */
+Datum
+tsm_bernoulli_nextblock(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+
+ /*
+ * Bernoulli sampling scans all blocks on the table and supports
+ * syncscan so loop from startblock to startblock instead of
+ * from 0 to nblocks.
+ */
+ if (sampler->blockno == InvalidBlockNumber)
+ sampler->blockno = sampler->startblock;
+ else
+ {
+ sampler->blockno++;
+
+ if (sampler->blockno >= sampler->nblocks)
+ sampler->blockno = 0;
+
+ if (sampler->blockno == sampler->startblock)
+ PG_RETURN_UINT32(InvalidBlockNumber);
+ }
+
+ PG_RETURN_UINT32(sampler->blockno);
+}
+
+/*
+ * Get next tuple from current block.
+ *
+ * This method implements the main logic in bernoulli sampling.
+ * The algorithm simply generates new random number (in 0.0-1.0 range) and if
+ * it falls within user specified probability (in the same range) return the
+ * tuple offset.
+ *
+ * It is ok here to return tuple offset without knowing if tuple is visible
+ * and not check it via examinetuple. The reason for that is that we do the
+ * coinflip (random number generation) for every tuple in the table. Since all
+ * tuples have same probability of being returned the visible and invisible
+ * tuples will be returned in same ratio as they have in the actual table.
+ * This means that there is no skew towards either visible or invisible tuples
+ * and the number returned visible tuples to from the executor node is the
+ * fraction of visible tuples which was specified in input.
+ *
+ * This is faster than doing the coinflip in the examinetuple because we don't
+ * have to do visibility checks on uninteresting tuples.
+ *
+ * If we reach end of the block return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+Datum
+tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ OffsetNumber maxoffset = PG_GETARG_UINT16(2);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+ OffsetNumber tupoffset = sampler->lt;
+ float4 probability = sampler->probability;
+
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ /*
+ * Loop over tuple offsets until the random generator returns value that
+ * is within the probability of returning the tuple or until we reach
+ * end of the block.
+ *
+ * (This is our implementation of bernoulli trial)
+ */
+ while (sampler_random_fract(sampler->randstate) > probability)
+ {
+ tupoffset++;
+
+ if (tupoffset > maxoffset)
+ break;
+ }
+
+ if (tupoffset > maxoffset)
+ /* Tell SampleScan that we want next block. */
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ PG_RETURN_UINT16(tupoffset);
+}
+
+/*
+ * Cleanup method.
+ */
+Datum
+tsm_bernoulli_end(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+
+ pfree(tsdesc->tsmdata);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Reset tsdesc (called by ReScan).
+ */
+Datum
+tsm_bernoulli_reset(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+
+ sampler->blockno = InvalidBlockNumber;
+ sampler->lt = InvalidOffsetNumber;
+ sampler_random_init_state(sampler->seed, sampler->randstate);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Costing function.
+ */
+Datum
+tsm_bernoulli_cost(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Path *path = (Path *) PG_GETARG_POINTER(1);
+ RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ List *args = (List *) PG_GETARG_POINTER(3);
+ BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
+ double *tuples = (double *) PG_GETARG_POINTER(5);
+ Node *pctnode;
+ float4 samplesize;
+
+ *pages = baserel->pages;
+
+ pctnode = linitial(args);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, RelabelType))
+ pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+
+ if (IsA(pctnode, Const))
+ {
+ samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ samplesize /= 100.0;
+ }
+ else
+ {
+ /* Default samplesize if the estimation didn't return Const. */
+ samplesize = 0.1f;
+ }
+
+ *tuples = path->rows * samplesize;
+ path->rows = *tuples;
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
new file mode 100644
index 00000000000..1412e511faf
--- /dev/null
+++ b/src/backend/access/tablesample/system.c
@@ -0,0 +1,186 @@
+/*-------------------------------------------------------------------------
+ *
+ * system.c
+ * interface routines for system tablesample method
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/tablesample/system.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+
+#include "access/tablesample.h"
+#include "access/relscan.h"
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
+#include "storage/bufmgr.h"
+#include "utils/sampling.h"
+
+
+/*
+ * State
+ */
+typedef struct
+{
+ BlockSamplerData bs;
+ uint32 seed; /* random seed */
+ BlockNumber nblocks; /* number of block in relation */
+ int samplesize; /* number of blocks to return */
+ OffsetNumber lt; /* last tuple returned from current block */
+} SystemSamplerData;
+
+
+/*
+ * Initializes the state.
+ */
+Datum
+tsm_system_init(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ uint32 seed = PG_GETARG_UINT32(1);
+ float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
+ HeapScanDesc scan = tsdesc->heapScan;
+ SystemSamplerData *sampler;
+
+ if (percent < 0 || percent > 100)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid sample size"),
+ errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+
+ sampler = palloc0(sizeof(SystemSamplerData));
+
+ /* Remember initial values for reinit */
+ sampler->seed = seed;
+ sampler->nblocks = scan->rs_nblocks;
+ sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0));
+ sampler->lt = InvalidOffsetNumber;
+
+ BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
+ sampler->seed);
+
+ tsdesc->tsmdata = (void *) sampler;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get next block number or InvalidBlockNumber when we're done.
+ *
+ * Uses the same logic as ANALYZE for picking the random blocks.
+ */
+Datum
+tsm_system_nextblock(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ BlockNumber blockno;
+
+ if (!BlockSampler_HasMore(&sampler->bs))
+ PG_RETURN_UINT32(InvalidBlockNumber);
+
+ blockno = BlockSampler_Next(&sampler->bs);
+
+ PG_RETURN_UINT32(blockno);
+}
+
+/*
+ * Get next tuple offset in current block or InvalidOffsetNumber if we are done
+ * with this block.
+ */
+Datum
+tsm_system_nexttuple(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ OffsetNumber maxoffset = PG_GETARG_UINT16(2);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ OffsetNumber tupoffset = sampler->lt;
+
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ if (tupoffset > maxoffset)
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ PG_RETURN_UINT16(tupoffset);
+}
+
+/*
+ * Cleanup method.
+ */
+Datum
+tsm_system_end(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+
+ pfree(tsdesc->tsmdata);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Reset state (called by ReScan).
+ */
+Datum
+tsm_system_reset(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+
+ sampler->lt = InvalidOffsetNumber;
+ BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
+ sampler->seed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Costing function.
+ */
+Datum
+tsm_system_cost(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Path *path = (Path *) PG_GETARG_POINTER(1);
+ RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ List *args = (List *) PG_GETARG_POINTER(3);
+ BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
+ double *tuples = (double *) PG_GETARG_POINTER(5);
+ Node *pctnode;
+ float4 samplesize;
+
+ pctnode = linitial(args);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, RelabelType))
+ pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+
+ if (IsA(pctnode, Const))
+ {
+ samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ samplesize /= 100.0;
+ }
+ else
+ {
+ /* Default samplesize if the estimation didn't return Const. */
+ samplesize = 0.1f;
+ }
+
+ *pages = baserel->pages * samplesize;
+ *tuples = path->rows * samplesize;
+ path->rows = *tuples;
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c
new file mode 100644
index 00000000000..ef55d062e75
--- /dev/null
+++ b/src/backend/access/tablesample/tablesample.c
@@ -0,0 +1,368 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablesample.c
+ * TABLESAMPLE internal API
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/access/tablesample/tablesample.c
+ *
+ * TABLESAMPLE is the SQL standard clause for sampling the relations.
+ *
+ * The API is interface between the Executor and the TABLESAMPLE Methods.
+ *
+ * TABLESAMPLE Methods are implementations of actual sampling algorithms which
+ * can be used for returning a sample of the source relation.
+ * Methods don't read the table directly but are asked for block number and
+ * tuple offset which they want to examine (or return) and the tablesample
+ * interface implemented here does the reading for them.
+ *
+ * We currently only support sampling of the physical relations, but in the
+ * future we might extend the API to support subqueries as well.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/tablesample.h"
+
+#include "catalog/pg_tablesample_method.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/predicate.h"
+#include "utils/rel.h"
+#include "utils/tqual.h"
+
+
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan);
+
+
+/*
+ * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
+ */
+TableSampleDesc *
+tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
+{
+ FunctionCallInfoData fcinfo;
+ int i;
+ List *args = tablesample->args;
+ ListCell *arg;
+ ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+ TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));
+
+ /* Load functions */
+ fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
+ fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
+ fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
+ if (OidIsValid(tablesample->tsmexaminetuple))
+ fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
+ else
+ tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
+ fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
+ fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));
+
+ InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
+ list_length(args) + 2,
+ InvalidOid, NULL, NULL);
+
+ tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
+ tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;
+
+ /* First argument for init function is always TableSampleDesc */
+ fcinfo.arg[0] = PointerGetDatum(tsdesc);
+ fcinfo.argnull[0] = false;
+
+ /*
+ * Second arg for init function is always REPEATABLE
+ * When tablesample->repeatable is NULL then REPEATABLE clause was not
+ * specified.
+ * When specified, the expression cannot evaluate to NULL.
+ */
+ if (tablesample->repeatable)
+ {
+ ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
+ (PlanState *) scanstate);
+ fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
+ &fcinfo.argnull[1], NULL);
+ if (fcinfo.argnull[1])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("REPEATABLE clause must be NOT NULL numeric value")));
+ }
+ else
+ {
+ fcinfo.arg[1] = UInt32GetDatum(random());
+ fcinfo.argnull[1] = false;
+ }
+
+ /* Rest of the arguments come from user. */
+ i = 2;
+ foreach(arg, args)
+ {
+ Expr *argexpr = (Expr *) lfirst(arg);
+ ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);
+
+ if (argstate == NULL)
+ {
+ fcinfo.argnull[i] = true;
+ fcinfo.arg[i] = (Datum) 0;;
+ }
+
+ fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
+ &fcinfo.argnull[i], NULL);
+ i++;
+ }
+ Assert(i == fcinfo.nargs);
+
+ (void) FunctionCallInvoke(&fcinfo);
+
+ return tsdesc;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE Method.
+ */
+HeapTuple
+tablesample_getnext(TableSampleDesc *desc)
+{
+ HeapScanDesc scan = desc->heapScan;
+ HeapTuple tuple = &(scan->rs_ctup);
+ bool pagemode = scan->rs_pageatatime;
+ BlockNumber blockno;
+ Page page;
+ bool page_all_visible;
+ ItemId itemid;
+ OffsetNumber tupoffset,
+ maxoffset;
+
+ if (!scan->rs_inited)
+ {
+ /*
+ * return null immediately if relation is empty
+ */
+ if (scan->rs_nblocks == 0)
+ {
+ Assert(!BufferIsValid(scan->rs_cbuf));
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
+ PointerGetDatum(desc)));
+ if (!BlockNumberIsValid(blockno))
+ {
+ tuple->t_data = NULL;
+ return NULL;
+ }
+
+ heapgetpage(scan, blockno);
+ scan->rs_inited = true;
+ }
+ else
+ {
+ /* continue from previously returned page/tuple */
+ blockno = scan->rs_cblock; /* current page */
+ }
+
+ /*
+ * When pagemode is disabled, the scan will do visibility checks for each
+ * tuple it finds so the buffer needs to be locked.
+ */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ page_all_visible = PageIsAllVisible(page);
+ maxoffset = PageGetMaxOffsetNumber(page);
+
+ for (;;)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple,
+ PointerGetDatum(desc),
+ UInt32GetDatum(blockno),
+ UInt16GetDatum(maxoffset)));
+
+ if (OffsetNumberIsValid(tupoffset))
+ {
+ bool visible;
+ bool found;
+
+ /* Skip invalid tuple pointers. */
+ itemid = PageGetItemId(page, tupoffset);
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid);
+ tuple->t_len = ItemIdGetLength(itemid);
+ ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+ if (page_all_visible)
+ visible = true;
+ else
+ visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+ /*
+ * Let the sampling method examine the actual tuple and decide if we
+ * should return it.
+ *
+ * Note that we let it examine even invisible tuples for
+ * statistical purposes, but not return them since user should
+ * never see invisible tuples.
+ */
+ if (OidIsValid(desc->tsmexaminetuple.fn_oid))
+ {
+ found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple,
+ PointerGetDatum(desc),
+ UInt32GetDatum(blockno),
+ PointerGetDatum(tuple),
+ BoolGetDatum(visible)));
+ /* Should not happen if sampling method is well written. */
+ if (found && !visible)
+ elog(ERROR, "Sampling method wanted to return invisible tuple");
+ }
+ else
+ found = visible;
+
+ /* Found visible tuple, return it. */
+ if (found)
+ {
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ break;
+ }
+ else
+ {
+ /* Try next tuple from same page. */
+ continue;
+ }
+ }
+
+
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
+ PointerGetDatum(desc)));
+
+ /*
+ * Report our new scan position for synchronization purposes. We
+ * don't do that when moving backwards, however. That would just
+ * mess up any other forward-moving scanners.
+ *
+ * Note: we do this before checking for end of scan so that the
+ * final state of the position hint is back at the start of the
+ * rel. That's not strictly necessary, but otherwise when you run
+ * the same query multiple times the starting position would shift
+ * a little bit backwards on every invocation, which is confusing.
+ * We don't guarantee any specific ordering in general, though.
+ */
+ if (scan->rs_syncscan)
+ ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ?
+ blockno : scan->rs_startblock);
+
+ /*
+ * Reached end of scan.
+ */
+ if (!BlockNumberIsValid(blockno))
+ {
+ if (BufferIsValid(scan->rs_cbuf))
+ ReleaseBuffer(scan->rs_cbuf);
+ scan->rs_cbuf = InvalidBuffer;
+ scan->rs_cblock = InvalidBlockNumber;
+ tuple->t_data = NULL;
+ scan->rs_inited = false;
+ return NULL;
+ }
+
+ heapgetpage(scan, blockno);
+
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ page_all_visible = PageIsAllVisible(page);
+ maxoffset = PageGetMaxOffsetNumber(page);
+ }
+
+ pgstat_count_heap_getnext(scan->rs_rd);
+
+ return &(scan->rs_ctup);
+}
+
+/*
+ * Reset the sampling to starting state
+ */
+void
+tablesample_reset(TableSampleDesc *desc)
+{
+ (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc));
+}
+
+/*
+ * Signal the sampling method that the scan has finished.
+ */
+void
+tablesample_end(TableSampleDesc *desc)
+{
+ (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc));
+}
+
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+ /*
+ * If this scan is reading whole pages at a time, there is already
+ * visibility info present in rs_vistuples so we can just search it
+ * for the tupoffset.
+ */
+ if (scan->rs_pageatatime)
+ {
+ int start = 0,
+ end = scan->rs_ntuples - 1;
+
+ /*
+ * Do the binary search over rs_vistuples, it's already sorted by
+ * OffsetNumber so we don't need to do any sorting ourselves here.
+ *
+ * We could use bsearch() here but it's slower for integers because
+ * of the function call overhead and because it needs boiler plate code
+ * it would not save us anything code-wise anyway.
+ */
+ while (start <= end)
+ {
+ int mid = start + (end - start) / 2;
+ OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+ if (curoffset == tupoffset)
+ return true;
+ else if (curoffset > tupoffset)
+ end = mid - 1;
+ else
+ start = mid + 1;
+ }
+
+ return false;
+ }
+ else
+ {
+ /* No pagemode, we have to check the tuple itself. */
+ Snapshot snapshot = scan->rs_snapshot;
+ Buffer buffer = scan->rs_cbuf;
+
+ bool visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+
+ CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer,
+ snapshot);
+
+ return visible;
+ }
+}
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 37d05d1acc6..3d1139b5ba0 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -40,9 +40,8 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_ts_parser.h pg_ts_template.h pg_extension.h \
pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
pg_foreign_table.h pg_policy.h pg_replication_origin.h \
- pg_default_acl.h pg_seclabel.h pg_shseclabel.h pg_collation.h pg_range.h \
- pg_transform.h \
- toasting.h indexing.h \
+ pg_tablesample_method.h pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
+ pg_collation.h pg_range.h pg_transform.h toasting.h indexing.h \
)
# location of Catalog.pm
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 952cf204a0a..65e329eab07 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1150,7 +1150,7 @@ acquire_sample_rows(Relation onerel, int elevel,
* Found a suitable tuple, so save it, replacing one
* old tuple at random
*/
- int k = (int) (targrows * sampler_random_fract());
+ int k = (int) (targrows * sampler_random_fract(rstate.randstate));
Assert(k >= 0 && k < targrows);
heap_freetuple(rows[k]);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index eeb8f19017e..478771c6ba1 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -731,6 +731,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
+ case T_SampleScan:
*rels_used = bms_add_member(*rels_used,
((Scan *) plan)->scanrelid);
break;
@@ -967,6 +968,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
else
pname = sname;
break;
+ case T_SampleScan:
+ {
+ /*
+ * Fetch the tablesample method name from RTE.
+ *
+ * It would be nice to also show parameters, but since we
+ * support arbitrary expressions as parameter it might get
+ * quite messy.
+ */
+ RangeTblEntry *rte;
+ rte = rt_fetch(((SampleScan *) plan)->scanrelid, es->rtable);
+ custom_name = get_tablesample_method_name(rte->tablesample->tsmid);
+ pname = psprintf("Sample Scan (%s)", custom_name);
+ }
+ break;
case T_Material:
pname = sname = "Materialize";
break;
@@ -1089,6 +1105,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (((Scan *) plan)->scanrelid > 0)
ExplainScanTarget((Scan *) plan, es);
break;
+ case T_SampleScan:
+ ExplainScanTarget((Scan *) plan, es);
+ break;
case T_IndexScan:
{
IndexScan *indexscan = (IndexScan *) plan;
@@ -1339,6 +1358,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_CteScan:
case T_WorkTableScan:
case T_SubqueryScan:
+ case T_SampleScan:
show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
if (plan->qual)
show_instrumentation_count("Rows Removed by Filter", 1,
@@ -2238,6 +2258,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
case T_TidScan:
case T_ForeignScan:
case T_CustomScan:
+ case T_SampleScan:
case T_ModifyTable:
/* Assert it's on a real relation */
Assert(rte->rtekind == RTE_RELATION);
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index bc5d373d68a..08cba6fa2b5 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -21,7 +21,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \
nodeLimit.o nodeLockRows.o \
nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \
nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \
- nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
+ nodeSamplescan.o nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
nodeValuesscan.o nodeCtescan.o nodeWorktablescan.o \
nodeGroup.o nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o \
nodeForeignscan.o nodeWindowAgg.o tstoreReceiver.o spi.o
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 6ebad2f03f0..4948a265cb2 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -39,6 +39,7 @@
#include "executor/nodeNestloop.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
+#include "executor/nodeSamplescan.h"
#include "executor/nodeSeqscan.h"
#include "executor/nodeSetOp.h"
#include "executor/nodeSort.h"
@@ -155,6 +156,10 @@ ExecReScan(PlanState *node)
ExecReScanSeqScan((SeqScanState *) node);
break;
+ case T_SampleScanState:
+ ExecReScanSampleScan((SampleScanState *) node);
+ break;
+
case T_IndexScanState:
ExecReScanIndexScan((IndexScanState *) node);
break;
@@ -480,6 +485,9 @@ ExecSupportsBackwardScan(Plan *node)
}
return false;
+ case T_SampleScan:
+ return false;
+
case T_Material:
case T_Sort:
/* these don't evaluate tlist */
diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c
index d87be963a95..bcd287f8742 100644
--- a/src/backend/executor/execCurrent.c
+++ b/src/backend/executor/execCurrent.c
@@ -261,6 +261,7 @@ search_plan_tree(PlanState *node, Oid table_oid)
* Relation scan nodes can all be treated alike
*/
case T_SeqScanState:
+ case T_SampleScanState:
case T_IndexScanState:
case T_IndexOnlyScanState:
case T_BitmapHeapScanState:
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 9892499fb7a..03c2febc3e1 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -102,6 +102,7 @@
#include "executor/nodeNestloop.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
+#include "executor/nodeSamplescan.h"
#include "executor/nodeSeqscan.h"
#include "executor/nodeSetOp.h"
#include "executor/nodeSort.h"
@@ -190,6 +191,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
estate, eflags);
break;
+ case T_SampleScan:
+ result = (PlanState *) ExecInitSampleScan((SampleScan *) node,
+ estate, eflags);
+ break;
+
case T_IndexScan:
result = (PlanState *) ExecInitIndexScan((IndexScan *) node,
estate, eflags);
@@ -406,6 +412,10 @@ ExecProcNode(PlanState *node)
result = ExecSeqScan((SeqScanState *) node);
break;
+ case T_SampleScanState:
+ result = ExecSampleScan((SampleScanState *) node);
+ break;
+
case T_IndexScanState:
result = ExecIndexScan((IndexScanState *) node);
break;
@@ -644,6 +654,10 @@ ExecEndNode(PlanState *node)
ExecEndSeqScan((SeqScanState *) node);
break;
+ case T_SampleScanState:
+ ExecEndSampleScan((SampleScanState *) node);
+ break;
+
case T_IndexScanState:
ExecEndIndexScan((IndexScanState *) node);
break;
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
new file mode 100644
index 00000000000..fc89d1dca03
--- /dev/null
+++ b/src/backend/executor/nodeSamplescan.c
@@ -0,0 +1,256 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeSamplescan.c
+ * Support routines for sample scans of relations (table sampling).
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/executor/nodeSamplescan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tablesample.h"
+#include "executor/executor.h"
+#include "executor/nodeSamplescan.h"
+#include "miscadmin.h"
+#include "parser/parsetree.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/predicate.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/tqual.h"
+
+static void InitScanRelation(SampleScanState *node, EState *estate,
+ int eflags, TableSampleClause *tablesample);
+static TupleTableSlot *SampleNext(SampleScanState *node);
+
+
+/* ----------------------------------------------------------------
+ * Scan Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ * SampleNext
+ *
+ * This is a workhorse for ExecSampleScan
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+SampleNext(SampleScanState *node)
+{
+ TupleTableSlot *slot;
+ TableSampleDesc *tsdesc;
+ HeapTuple tuple;
+
+ /*
+ * get information from the scan state
+ */
+ slot = node->ss.ss_ScanTupleSlot;
+ tsdesc = node->tsdesc;
+
+ tuple = tablesample_getnext(tsdesc);
+
+ if (tuple)
+ ExecStoreTuple(tuple, /* tuple to store */
+ slot, /* slot to store in */
+ tsdesc->heapScan->rs_cbuf, /* buffer associated with this tuple */
+ false); /* don't pfree this pointer */
+ else
+ ExecClearTuple(slot);
+
+ return slot;
+}
+
+/*
+ * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
+{
+ /* No need to recheck for SampleScan */
+ return true;
+}
+
+/* ----------------------------------------------------------------
+ * ExecSampleScan(node)
+ *
+ * Scans the relation using the sampling method and returns
+ * the next qualifying tuple.
+ * We call the ExecScan() routine and pass it the appropriate
+ * access method functions.
+ * ----------------------------------------------------------------
+ */
+TupleTableSlot *
+ExecSampleScan(SampleScanState *node)
+{
+ return ExecScan((ScanState *) node,
+ (ExecScanAccessMtd) SampleNext,
+ (ExecScanRecheckMtd) SampleRecheck);
+}
+
+/* ----------------------------------------------------------------
+ * InitScanRelation
+ *
+ * Set up to access the scan relation.
+ * ----------------------------------------------------------------
+ */
+static void
+InitScanRelation(SampleScanState *node, EState *estate, int eflags,
+ TableSampleClause *tablesample)
+{
+ Relation currentRelation;
+
+ /*
+ * get the relation object id from the relid'th entry in the range table,
+ * open that relation and acquire appropriate lock on it.
+ */
+ currentRelation = ExecOpenScanRelation(estate,
+ ((SampleScan *) node->ss.ps.plan)->scanrelid,
+ eflags);
+
+ node->ss.ss_currentRelation = currentRelation;
+
+ /*
+ * Even though we aren't going to do a conventional seqscan, it is useful
+ * to create a HeapScanDesc --- many of the fields in it are usable.
+ */
+ node->ss.ss_currentScanDesc =
+ heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
+ tablesample->tsmseqscan,
+ tablesample->tsmpagemode);
+
+ /* and report the scan tuple slot's rowtype */
+ ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
+}
+
+
+/* ----------------------------------------------------------------
+ * ExecInitSampleScan
+ * ----------------------------------------------------------------
+ */
+SampleScanState *
+ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
+{
+ SampleScanState *scanstate;
+ RangeTblEntry *rte = rt_fetch(node->scanrelid,
+ estate->es_range_table);
+
+ Assert(outerPlan(node) == NULL);
+ Assert(innerPlan(node) == NULL);
+ Assert(rte->tablesample != NULL);
+
+ /*
+ * create state structure
+ */
+ scanstate = makeNode(SampleScanState);
+ scanstate->ss.ps.plan = (Plan *) node;
+ scanstate->ss.ps.state = estate;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * create expression context for node
+ */
+ ExecAssignExprContext(estate, &scanstate->ss.ps);
+
+ /*
+ * initialize child expressions
+ */
+ scanstate->ss.ps.targetlist = (List *)
+ ExecInitExpr((Expr *) node->plan.targetlist,
+ (PlanState *) scanstate);
+ scanstate->ss.ps.qual = (List *)
+ ExecInitExpr((Expr *) node->plan.qual,
+ (PlanState *) scanstate);
+
+ /*
+ * tuple table initialization
+ */
+ ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
+ ExecInitScanTupleSlot(estate, &scanstate->ss);
+
+ /*
+ * initialize scan relation
+ */
+ InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+
+ scanstate->ss.ps.ps_TupFromTlist = false;
+
+ /*
+ * Initialize result tuple type and projection info.
+ */
+ ExecAssignResultTypeFromTL(&scanstate->ss.ps);
+ ExecAssignScanProjectionInfo(&scanstate->ss);
+
+ scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+
+ return scanstate;
+}
+
+/* ----------------------------------------------------------------
+ * ExecEndSampleScan
+ *
+ * frees any storage allocated through C routines.
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndSampleScan(SampleScanState *node)
+{
+ /*
+ * Tell sampling function that we finished the scan.
+ */
+ tablesample_end(node->tsdesc);
+
+ /*
+ * Free the exprcontext
+ */
+ ExecFreeExprContext(&node->ss.ps);
+
+ /*
+ * clean out the tuple table
+ */
+ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
+
+ /*
+ * close heap scan
+ */
+ heap_endscan(node->ss.ss_currentScanDesc);
+
+ /*
+ * close the heap relation.
+ */
+ ExecCloseScanRelation(node->ss.ss_currentRelation);
+}
+
+/* ----------------------------------------------------------------
+ * Join Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ * ExecReScanSampleScan
+ *
+ * Rescans the relation.
+ *
+ * ----------------------------------------------------------------
+ */
+void
+ExecReScanSampleScan(SampleScanState *node)
+{
+ heap_rescan(node->ss.ss_currentScanDesc, NULL);
+
+ /*
+ * Tell sampling function to reset its state for rescan.
+ */
+ tablesample_reset(node->tsdesc);
+
+ ExecScanReScan(&node->ss);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 25839eed945..bdc7e61935c 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -642,6 +642,22 @@ _copyCustomScan(const CustomScan *from)
}
/*
+ * _copySampleScan
+ */
+static SampleScan *
+_copySampleScan(const SampleScan *from)
+{
+ SampleScan *newnode = makeNode(SampleScan);
+
+ /*
+ * copy node superclass fields
+ */
+ CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+ return newnode;
+}
+
+/*
* CopyJoinFields
*
* This function copies the fields of the Join node. It is used by
@@ -2063,6 +2079,7 @@ _copyRangeTblEntry(const RangeTblEntry *from)
COPY_SCALAR_FIELD(rtekind);
COPY_SCALAR_FIELD(relid);
COPY_SCALAR_FIELD(relkind);
+ COPY_NODE_FIELD(tablesample);
COPY_NODE_FIELD(subquery);
COPY_SCALAR_FIELD(security_barrier);
COPY_SCALAR_FIELD(jointype);
@@ -2224,6 +2241,40 @@ _copyCommonTableExpr(const CommonTableExpr *from)
return newnode;
}
+static RangeTableSample *
+_copyRangeTableSample(const RangeTableSample *from)
+{
+ RangeTableSample *newnode = makeNode(RangeTableSample);
+
+ COPY_NODE_FIELD(relation);
+ COPY_STRING_FIELD(method);
+ COPY_NODE_FIELD(repeatable);
+ COPY_NODE_FIELD(args);
+
+ return newnode;
+}
+
+static TableSampleClause *
+_copyTableSampleClause(const TableSampleClause *from)
+{
+ TableSampleClause *newnode = makeNode(TableSampleClause);
+
+ COPY_SCALAR_FIELD(tsmid);
+ COPY_SCALAR_FIELD(tsmseqscan);
+ COPY_SCALAR_FIELD(tsmpagemode);
+ COPY_SCALAR_FIELD(tsminit);
+ COPY_SCALAR_FIELD(tsmnextblock);
+ COPY_SCALAR_FIELD(tsmnexttuple);
+ COPY_SCALAR_FIELD(tsmexaminetuple);
+ COPY_SCALAR_FIELD(tsmend);
+ COPY_SCALAR_FIELD(tsmreset);
+ COPY_SCALAR_FIELD(tsmcost);
+ COPY_NODE_FIELD(repeatable);
+ COPY_NODE_FIELD(args);
+
+ return newnode;
+}
+
static A_Expr *
_copyAExpr(const A_Expr *from)
{
@@ -4179,6 +4230,9 @@ copyObject(const void *from)
case T_CustomScan:
retval = _copyCustomScan(from);
break;
+ case T_SampleScan:
+ retval = _copySampleScan(from);
+ break;
case T_Join:
retval = _copyJoin(from);
break;
@@ -4842,6 +4896,12 @@ copyObject(const void *from)
case T_CommonTableExpr:
retval = _copyCommonTableExpr(from);
break;
+ case T_RangeTableSample:
+ retval = _copyRangeTableSample(from);
+ break;
+ case T_TableSampleClause:
+ retval = _copyTableSampleClause(from);
+ break;
case T_FuncWithArgs:
retval = _copyFuncWithArgs(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index c4b3615caf3..d483221fb7a 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2359,6 +2359,7 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b)
COMPARE_SCALAR_FIELD(rtekind);
COMPARE_SCALAR_FIELD(relid);
COMPARE_SCALAR_FIELD(relkind);
+ COMPARE_NODE_FIELD(tablesample);
COMPARE_NODE_FIELD(subquery);
COMPARE_SCALAR_FIELD(security_barrier);
COMPARE_SCALAR_FIELD(jointype);
@@ -2503,6 +2504,36 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
}
static bool
+_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
+{
+ COMPARE_NODE_FIELD(relation);
+ COMPARE_STRING_FIELD(method);
+ COMPARE_NODE_FIELD(repeatable);
+ COMPARE_NODE_FIELD(args);
+
+ return true;
+}
+
+static bool
+_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
+{
+ COMPARE_SCALAR_FIELD(tsmid);
+ COMPARE_SCALAR_FIELD(tsmseqscan);
+ COMPARE_SCALAR_FIELD(tsmpagemode);
+ COMPARE_SCALAR_FIELD(tsminit);
+ COMPARE_SCALAR_FIELD(tsmnextblock);
+ COMPARE_SCALAR_FIELD(tsmnexttuple);
+ COMPARE_SCALAR_FIELD(tsmexaminetuple);
+ COMPARE_SCALAR_FIELD(tsmend);
+ COMPARE_SCALAR_FIELD(tsmreset);
+ COMPARE_SCALAR_FIELD(tsmcost);
+ COMPARE_NODE_FIELD(repeatable);
+ COMPARE_NODE_FIELD(args);
+
+ return true;
+}
+
+static bool
_equalXmlSerialize(const XmlSerialize *a, const XmlSerialize *b)
{
COMPARE_SCALAR_FIELD(xmloption);
@@ -3236,6 +3267,12 @@ equal(const void *a, const void *b)
case T_CommonTableExpr:
retval = _equalCommonTableExpr(a, b);
break;
+ case T_RangeTableSample:
+ retval = _equalRangeTableSample(a, b);
+ break;
+ case T_TableSampleClause:
+ retval = _equalTableSampleClause(a, b);
+ break;
case T_FuncWithArgs:
retval = _equalFuncWithArgs(a, b);
break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index eac02159236..42d62d32d93 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -2058,6 +2058,14 @@ range_table_walker(List *rtable,
switch (rte->rtekind)
{
case RTE_RELATION:
+ if (rte->tablesample)
+ {
+ if (walker(rte->tablesample->args, context))
+ return true;
+ if (walker(rte->tablesample->repeatable, context))
+ return true;
+ }
+ break;
case RTE_CTE:
/* nothing to do */
break;
@@ -2813,6 +2821,14 @@ range_table_mutator(List *rtable,
switch (rte->rtekind)
{
case RTE_RELATION:
+ if (rte->tablesample)
+ {
+ MUTATE(rte->tablesample->args, rte->tablesample->args,
+ List *);
+ MUTATE(rte->tablesample->repeatable,
+ rte->tablesample->repeatable, Node *);
+ }
+ break;
case RTE_CTE:
/* we don't bother to copy eref, aliases, etc; OK? */
break;
@@ -3309,6 +3325,18 @@ raw_expression_tree_walker(Node *node,
break;
case T_CommonTableExpr:
return walker(((CommonTableExpr *) node)->ctequery, context);
+ case T_RangeTableSample:
+ {
+ RangeTableSample *rts = (RangeTableSample *) node;
+
+ if (walker(rts->relation, context))
+ return true;
+ if (walker(rts->repeatable, context))
+ return true;
+ if (walker(rts->args, context))
+ return true;
+ }
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(node));
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index fe868b889d9..7918553da0a 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -592,6 +592,14 @@ _outCustomScan(StringInfo str, const CustomScan *node)
}
static void
+_outSampleScan(StringInfo str, const SampleScan *node)
+{
+ WRITE_NODE_TYPE("SAMPLESCAN");
+
+ _outScanInfo(str, (const Scan *) node);
+}
+
+static void
_outJoin(StringInfo str, const Join *node)
{
WRITE_NODE_TYPE("JOIN");
@@ -2445,6 +2453,36 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
}
static void
+_outRangeTableSample(StringInfo str, const RangeTableSample *node)
+{
+ WRITE_NODE_TYPE("RANGETABLESAMPLE");
+
+ WRITE_NODE_FIELD(relation);
+ WRITE_STRING_FIELD(method);
+ WRITE_NODE_FIELD(repeatable);
+ WRITE_NODE_FIELD(args);
+}
+
+static void
+_outTableSampleClause(StringInfo str, const TableSampleClause *node)
+{
+ WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
+
+ WRITE_OID_FIELD(tsmid);
+ WRITE_BOOL_FIELD(tsmseqscan);
+ WRITE_BOOL_FIELD(tsmpagemode);
+ WRITE_OID_FIELD(tsminit);
+ WRITE_OID_FIELD(tsmnextblock);
+ WRITE_OID_FIELD(tsmnexttuple);
+ WRITE_OID_FIELD(tsmexaminetuple);
+ WRITE_OID_FIELD(tsmend);
+ WRITE_OID_FIELD(tsmreset);
+ WRITE_OID_FIELD(tsmcost);
+ WRITE_NODE_FIELD(repeatable);
+ WRITE_NODE_FIELD(args);
+}
+
+static void
_outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
{
WRITE_NODE_TYPE("SETOPERATIONSTMT");
@@ -2474,6 +2512,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node)
case RTE_RELATION:
WRITE_OID_FIELD(relid);
WRITE_CHAR_FIELD(relkind);
+ WRITE_NODE_FIELD(tablesample);
break;
case RTE_SUBQUERY:
WRITE_NODE_FIELD(subquery);
@@ -2973,6 +3012,9 @@ _outNode(StringInfo str, const void *obj)
case T_CustomScan:
_outCustomScan(str, obj);
break;
+ case T_SampleScan:
+ _outSampleScan(str, obj);
+ break;
case T_Join:
_outJoin(str, obj);
break;
@@ -3319,6 +3361,12 @@ _outNode(StringInfo str, const void *obj)
case T_CommonTableExpr:
_outCommonTableExpr(str, obj);
break;
+ case T_RangeTableSample:
+ _outRangeTableSample(str, obj);
+ break;
+ case T_TableSampleClause:
+ _outTableSampleClause(str, obj);
+ break;
case T_SetOperationStmt:
_outSetOperationStmt(str, obj);
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 8136306e1e5..c8fb894a75a 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -352,6 +352,46 @@ _readCommonTableExpr(void)
}
/*
+ * _readRangeTableSample
+ */
+static RangeTableSample *
+_readRangeTableSample(void)
+{
+ READ_LOCALS(RangeTableSample);
+
+ READ_NODE_FIELD(relation);
+ READ_STRING_FIELD(method);
+ READ_NODE_FIELD(repeatable);
+ READ_NODE_FIELD(args);
+
+ READ_DONE();
+}
+
+/*
+ * _readTableSampleClause
+ */
+static TableSampleClause *
+_readTableSampleClause(void)
+{
+ READ_LOCALS(TableSampleClause);
+
+ READ_OID_FIELD(tsmid);
+ READ_BOOL_FIELD(tsmseqscan);
+ READ_BOOL_FIELD(tsmpagemode);
+ READ_OID_FIELD(tsminit);
+ READ_OID_FIELD(tsmnextblock);
+ READ_OID_FIELD(tsmnexttuple);
+ READ_OID_FIELD(tsmexaminetuple);
+ READ_OID_FIELD(tsmend);
+ READ_OID_FIELD(tsmreset);
+ READ_OID_FIELD(tsmcost);
+ READ_NODE_FIELD(repeatable);
+ READ_NODE_FIELD(args);
+
+ READ_DONE();
+}
+
+/*
* _readSetOperationStmt
*/
static SetOperationStmt *
@@ -1255,6 +1295,7 @@ _readRangeTblEntry(void)
case RTE_RELATION:
READ_OID_FIELD(relid);
READ_CHAR_FIELD(relkind);
+ READ_NODE_FIELD(tablesample);
break;
case RTE_SUBQUERY:
READ_NODE_FIELD(subquery);
@@ -1351,6 +1392,10 @@ parseNodeString(void)
return_value = _readRowMarkClause();
else if (MATCH("COMMONTABLEEXPR", 15))
return_value = _readCommonTableExpr();
+ else if (MATCH("RANGETABLESAMPLE", 16))
+ return_value = _readRangeTableSample();
+ else if (MATCH("TABLESAMPLECLAUSE", 17))
+ return_value = _readTableSampleClause();
else if (MATCH("SETOPERATIONSTMT", 16))
return_value = _readSetOperationStmt();
else if (MATCH("ALIAS", 5))
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 9caca94f64b..4cd1bf65e74 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -71,6 +71,10 @@ static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
+static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -265,6 +269,11 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
/* Foreign table */
set_foreign_size(root, rel, rte);
}
+ else if (rte->tablesample != NULL)
+ {
+ /* Sampled relation */
+ set_tablesample_rel_size(root, rel, rte);
+ }
else
{
/* Plain relation */
@@ -332,6 +341,11 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
/* Foreign table */
set_foreign_pathlist(root, rel, rte);
}
+ else if (rte->tablesample != NULL)
+ {
+ /* Build sample scan on relation */
+ set_tablesample_rel_pathlist(root, rel, rte);
+ }
else
{
/* Plain relation */
@@ -418,6 +432,41 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
}
/*
+ * set_tablesample_rel_size
+ * Set size estimates for a sampled relation.
+ */
+static void
+set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ /* Mark rel with estimated output rows, width, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_tablesample_rel_pathlist
+ * Build access paths for a sampled relation
+ *
+ * There is only one possible path - sampling scan
+ */
+static void
+set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Relids required_outer;
+ Path *path;
+
+ /*
+ * We don't support pushing join clauses into the quals of a seqscan, but
+ * it could still have required parameterization due to LATERAL refs in
+ * its tlist.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* We only do sample scan if it was requested */
+ path = create_samplescan_path(root, rel, required_outer);
+ rel->pathlist = list_make1(path);
+}
+
+/*
* set_foreign_size
* Set size estimates for a foreign table RTE
*/
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1a0d358c5f7..c2b2b7622a6 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -220,6 +220,73 @@ cost_seqscan(Path *path, PlannerInfo *root,
}
/*
+ * cost_samplescan
+ * Determines and returns the cost of scanning a relation using sampling.
+ *
+ * From planner/optimizer perspective, we don't care all that much about cost
+ * itself since there is always only one scan path to consider when sampling
+ * scan is present, but number of rows estimation is still important.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ double spc_seq_page_cost,
+ spc_random_page_cost,
+ spc_page_cost;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ BlockNumber pages;
+ double tuples;
+ RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
+ TableSampleClause *tablesample = rte->tablesample;
+
+ /* Should only be applied to base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (path->param_info)
+ path->rows = path->param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Call the sampling method's costing function. */
+ OidFunctionCall6(tablesample->tsmcost, PointerGetDatum(root),
+ PointerGetDatum(path), PointerGetDatum(baserel),
+ PointerGetDatum(tablesample->args),
+ PointerGetDatum(&pages), PointerGetDatum(&tuples));
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+
+ spc_page_cost = tablesample->tsmseqscan ? spc_seq_page_cost :
+ spc_random_page_cost;
+
+ /*
+ * disk costs
+ */
+ run_cost += spc_page_cost * pages;
+
+ /* CPU costs */
+ get_restriction_qual_cost(root, baserel, path->param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
* cost_index
* Determines and returns the cost of scanning a relation using an index.
*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 783e34b4fbc..c6095167e80 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -59,6 +59,8 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path
static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path);
static SeqScan *create_seqscan_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses);
+static SampleScan *create_samplescan_plan(PlannerInfo *root, Path *best_path,
+ List *tlist, List *scan_clauses);
static Scan *create_indexscan_plan(PlannerInfo *root, IndexPath *best_path,
List *tlist, List *scan_clauses, bool indexonly);
static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root,
@@ -101,6 +103,7 @@ static List *order_qual_clauses(PlannerInfo *root, List *clauses);
static void copy_path_costsize(Plan *dest, Path *src);
static void copy_plan_costsize(Plan *dest, Plan *src);
static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
+static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid);
static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
Oid indexid, List *indexqual, List *indexqualorig,
List *indexorderby, List *indexorderbyorig, Oid *indexorderbyops,
@@ -229,6 +232,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path)
switch (best_path->pathtype)
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
@@ -344,6 +348,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path)
scan_clauses);
break;
+ case T_SampleScan:
+ plan = (Plan *) create_samplescan_plan(root,
+ best_path,
+ tlist,
+ scan_clauses);
+ break;
+
case T_IndexScan:
plan = (Plan *) create_indexscan_plan(root,
(IndexPath *) best_path,
@@ -547,6 +558,7 @@ disuse_physical_tlist(PlannerInfo *root, Plan *plan, Path *path)
switch (path->pathtype)
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
@@ -1134,6 +1146,45 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path,
}
/*
+ * create_samplescan_plan
+ * Returns a samplecan plan for the base relation scanned by 'best_path'
+ * with restriction clauses 'scan_clauses' and targetlist 'tlist'.
+ */
+static SampleScan *
+create_samplescan_plan(PlannerInfo *root, Path *best_path,
+ List *tlist, List *scan_clauses)
+{
+ SampleScan *scan_plan;
+ Index scan_relid = best_path->parent->relid;
+
+ /* it should be a base rel with tablesample clause... */
+ Assert(scan_relid > 0);
+ Assert(best_path->parent->rtekind == RTE_RELATION);
+ Assert(best_path->pathtype == T_SampleScan);
+
+ /* Sort clauses into best execution order */
+ scan_clauses = order_qual_clauses(root, scan_clauses);
+
+ /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
+ scan_clauses = extract_actual_clauses(scan_clauses, false);
+
+ /* Replace any outer-relation variables with nestloop params */
+ if (best_path->param_info)
+ {
+ scan_clauses = (List *)
+ replace_nestloop_params(root, (Node *) scan_clauses);
+ }
+
+ scan_plan = make_samplescan(tlist,
+ scan_clauses,
+ scan_relid);
+
+ copy_path_costsize(&scan_plan->plan, best_path);
+
+ return scan_plan;
+}
+
+/*
* create_indexscan_plan
* Returns an indexscan plan for the base relation scanned by 'best_path'
* with restriction clauses 'scan_clauses' and targetlist 'tlist'.
@@ -3378,6 +3429,24 @@ make_seqscan(List *qptlist,
return node;
}
+static SampleScan *
+make_samplescan(List *qptlist,
+ List *qpqual,
+ Index scanrelid)
+{
+ SampleScan *node = makeNode(SampleScan);
+ Plan *plan = &node->plan;
+
+ /* cost should be inserted by caller */
+ plan->targetlist = qptlist;
+ plan->qual = qpqual;
+ plan->lefttree = NULL;
+ plan->righttree = NULL;
+ node->scanrelid = scanrelid;
+
+ return node;
+}
+
static IndexScan *
make_indexscan(List *qptlist,
List *qpqual,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 8de57c8e6bb..9ba10516bb2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -60,6 +60,7 @@ planner_hook_type planner_hook = NULL;
#define EXPRKIND_LIMIT 6
#define EXPRKIND_APPINFO 7
#define EXPRKIND_PHV 8
+#define EXPRKIND_TABLESAMPLE 9
/* Passthrough data for standard_qp_callback */
typedef struct
@@ -486,7 +487,19 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
int kind;
- if (rte->rtekind == RTE_SUBQUERY)
+ if (rte->rtekind == RTE_RELATION)
+ {
+ if (rte->tablesample)
+ {
+ rte->tablesample->args = (List *)
+ preprocess_expression(root, (Node *) rte->tablesample->args,
+ EXPRKIND_TABLESAMPLE);
+ rte->tablesample->repeatable = (Node *)
+ preprocess_expression(root, rte->tablesample->repeatable,
+ EXPRKIND_TABLESAMPLE);
+ }
+ }
+ else if (rte->rtekind == RTE_SUBQUERY)
{
/*
* We don't want to do all preprocessing yet on the subquery's
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 517409d28a0..73b19888369 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -451,6 +451,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
fix_scan_list(root, splan->plan.qual, rtoffset);
}
break;
+ case T_SampleScan:
+ {
+ SampleScan *splan = (SampleScan *) plan;
+
+ splan->scanrelid += rtoffset;
+ splan->plan.targetlist =
+ fix_scan_list(root, splan->plan.targetlist, rtoffset);
+ splan->plan.qual =
+ fix_scan_list(root, splan->plan.qual, rtoffset);
+ }
+ break;
case T_IndexScan:
{
IndexScan *splan = (IndexScan *) plan;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index afccee53acf..2f7f5c0df0e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2167,6 +2167,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
break;
case T_SeqScan:
+ case T_SampleScan:
context.paramids = bms_add_members(context.paramids, scan_params);
break;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index faca30b322d..ea7a47bdf45 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -706,6 +706,26 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
}
/*
+ * create_samplescan_path
+ * Like seqscan but uses sampling function while scanning.
+ */
+Path *
+create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
+{
+ Path *pathnode = makeNode(Path);
+
+ pathnode->pathtype = T_SampleScan;
+ pathnode->parent = rel;
+ pathnode->param_info = get_baserel_parampathinfo(root, rel,
+ required_outer);
+ pathnode->pathkeys = NIL; /* samplescan has unordered result */
+
+ cost_samplescan(pathnode, root, rel);
+
+ return pathnode;
+}
+
+/*
* create_index_path
* Creates a path node for an index scan.
*
@@ -1778,6 +1798,8 @@ reparameterize_path(PlannerInfo *root, Path *path,
case T_SubqueryScan:
return create_subqueryscan_path(root, rel, path->pathkeys,
required_outer);
+ case T_SampleScan:
+ return (Path *) create_samplescan_path(root, rel, required_outer);
default:
break;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2dce87879e7..14397830686 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -454,6 +454,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type <range> relation_expr
%type <range> relation_expr_opt_alias
%type <target> target_el single_set_clause set_target insert_column_item
+%type <node> relation_expr_tablesample tablesample_clause opt_repeatable_clause
%type <str> generic_option_name
%type <node> generic_option_arg
@@ -625,8 +626,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING
SYMMETRIC SYSID SYSTEM_P
- TABLE TABLES TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN TIME TIMESTAMP
- TO TRAILING TRANSACTION TRANSFORM TREAT TRIGGER TRIM TRUE_P
+ TABLE TABLES TABLESAMPLE TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN
+ TIME TIMESTAMP TO TRAILING TRANSACTION TRANSFORM TREAT TRIGGER TRIM TRUE_P
TRUNCATE TRUSTED TYPE_P TYPES_P
UNBOUNDED UNCOMMITTED UNENCRYPTED UNION UNIQUE UNKNOWN UNLISTEN UNLOGGED
@@ -10386,6 +10387,10 @@ table_ref: relation_expr opt_alias_clause
$1->alias = $2;
$$ = (Node *) $1;
}
+ | relation_expr_tablesample
+ {
+ $$ = (Node *) $1;
+ }
| func_table func_alias_clause
{
RangeFunction *n = (RangeFunction *) $1;
@@ -10711,6 +10716,32 @@ relation_expr_opt_alias: relation_expr %prec UMINUS
}
;
+
+relation_expr_tablesample: relation_expr opt_alias_clause tablesample_clause
+ {
+ RangeTableSample *n = (RangeTableSample *) $3;
+ n->relation = $1;
+ n->relation->alias = $2;
+ $$ = (Node *) n;
+ }
+ ;
+
+tablesample_clause:
+ TABLESAMPLE ColId '(' expr_list ')' opt_repeatable_clause
+ {
+ RangeTableSample *n = makeNode(RangeTableSample);
+ n->method = $2;
+ n->args = $4;
+ n->repeatable = $6;
+ $$ = (Node *) n;
+ }
+ ;
+
+opt_repeatable_clause:
+ REPEATABLE '(' a_expr ')' { $$ = (Node *) $3; }
+ | /*EMPTY*/ { $$ = NULL; }
+ ;
+
/*
* func_table represents a function invocation in a FROM list. It can be
* a plain function call, like "foo(...)", or a ROWS FROM expression with
@@ -13804,6 +13835,7 @@ type_func_name_keyword:
| OVERLAPS
| RIGHT
| SIMILAR
+ | TABLESAMPLE
| VERBOSE
;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 73c505ed85b..6b1bbe57d0e 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -17,6 +17,7 @@
#include "access/heapam.h"
#include "catalog/catalog.h"
+#include "access/htup_details.h"
#include "catalog/heap.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_type.h"
@@ -31,6 +32,7 @@
#include "parser/parse_coerce.h"
#include "parser/parse_collate.h"
#include "parser/parse_expr.h"
+#include "parser/parse_func.h"
#include "parser/parse_oper.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
@@ -39,6 +41,7 @@
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/syscache.h"
/* Convenience macro for the most common makeNamespaceItem() case */
@@ -419,6 +422,39 @@ transformJoinOnClause(ParseState *pstate, JoinExpr *j, List *namespace)
return result;
}
+static RangeTblEntry *
+transformTableSampleEntry(ParseState *pstate, RangeTableSample *rv)
+{
+ RangeTblEntry *rte = NULL;
+ CommonTableExpr *cte = NULL;
+ TableSampleClause *tablesample = NULL;
+
+ /* if relation has an unqualified name, it might be a CTE reference */
+ if (!rv->relation->schemaname)
+ {
+ Index levelsup;
+ cte = scanNameSpaceForCTE(pstate, rv->relation->relname, &levelsup);
+ }
+
+ /* We first need to build a range table entry */
+ if (!cte)
+ rte = transformTableEntry(pstate, rv->relation);
+
+ if (!rte ||
+ (rte->relkind != RELKIND_RELATION &&
+ rte->relkind != RELKIND_MATVIEW))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("TABLESAMPLE clause can only be used on tables and materialized views"),
+ parser_errposition(pstate, rv->relation->location)));
+
+ tablesample = ParseTableSample(pstate, rv->method, rv->repeatable,
+ rv->args, rv->relation->location);
+ rte->tablesample = tablesample;
+
+ return rte;
+}
+
/*
* transformTableEntry --- transform a RangeVar (simple relation reference)
*/
@@ -1127,6 +1163,26 @@ transformFromClauseItem(ParseState *pstate, Node *n,
return (Node *) j;
}
+ else if (IsA(n, RangeTableSample))
+ {
+ /* Tablesample reference */
+ RangeTableSample *rv = (RangeTableSample *) n;
+ RangeTblRef *rtr;
+ RangeTblEntry *rte = NULL;
+ int rtindex;
+
+ rte = transformTableSampleEntry(pstate, rv);
+
+ /* assume new rte is at end */
+ rtindex = list_length(pstate->p_rtable);
+ Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
+ *top_rte = rte;
+ *top_rti = rtindex;
+ *namespace = list_make1(makeDefaultNSItem(rte));
+ rtr = makeNode(RangeTblRef);
+ rtr->rtindex = rtindex;
+ return (Node *) rtr;
+ }
else
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
return NULL; /* can't get here, keep compiler quiet */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index f7affebf84e..fa50f92d8dd 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -18,6 +18,7 @@
#include "catalog/pg_aggregate.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
+#include "catalog/pg_tablesample_method.h"
#include "funcapi.h"
#include "lib/stringinfo.h"
#include "nodes/makefuncs.h"
@@ -26,6 +27,7 @@
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parse_func.h"
+#include "parser/parse_expr.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
#include "parser/parse_type.h"
@@ -767,6 +769,147 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
}
+/*
+ * ParseTableSample
+ *
+ * Parse TABLESAMPLE clause and process the arguments
+ */
+TableSampleClause *
+ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable,
+ List *sampleargs, int location)
+{
+ HeapTuple tuple;
+ Form_pg_tablesample_method tsm;
+ Form_pg_proc procform;
+ TableSampleClause *tablesample;
+ List *fargs;
+ ListCell *larg;
+ int nargs, initnargs;
+ Oid init_arg_types[FUNC_MAX_ARGS];
+
+ /* Load the tablesample method */
+ tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablesample method \"%s\" does not exist",
+ samplemethod),
+ parser_errposition(pstate, location)));
+
+ tablesample = makeNode(TableSampleClause);
+ tablesample->tsmid = HeapTupleGetOid(tuple);
+
+ tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
+
+ tablesample->tsmseqscan = tsm->tsmseqscan;
+ tablesample->tsmpagemode = tsm->tsmpagemode;
+ tablesample->tsminit = tsm->tsminit;
+ tablesample->tsmnextblock = tsm->tsmnextblock;
+ tablesample->tsmnexttuple = tsm->tsmnexttuple;
+ tablesample->tsmexaminetuple = tsm->tsmexaminetuple;
+ tablesample->tsmend = tsm->tsmend;
+ tablesample->tsmreset = tsm->tsmreset;
+ tablesample->tsmcost = tsm->tsmcost;
+
+ ReleaseSysCache(tuple);
+
+ /* Validate the parameters against init function definition. */
+ tuple = SearchSysCache1(PROCOID,
+ ObjectIdGetDatum(tablesample->tsminit));
+
+ if (!HeapTupleIsValid(tuple)) /* should not happen */
+ elog(ERROR, "cache lookup failed for function %u",
+ tablesample->tsminit);
+
+ procform = (Form_pg_proc) GETSTRUCT(tuple);
+ initnargs = procform->pronargs;
+ Assert(initnargs >= 3);
+
+ /*
+ * First parameter is used to pass the SampleScanState, second is
+ * seed (REPEATABLE), skip the processing for them here, just assert
+ * that the types are correct.
+ */
+ Assert(procform->proargtypes.values[0] == INTERNALOID);
+ Assert(procform->proargtypes.values[1] == INT4OID);
+ initnargs -= 2;
+ memcpy(init_arg_types, procform->proargtypes.values + 2,
+ initnargs * sizeof(Oid));
+
+ /* Now we are done with the catalog */
+ ReleaseSysCache(tuple);
+
+ /* Process repeatable (seed) */
+ if (repeatable != NULL)
+ {
+ Node *arg = repeatable;
+
+ if (arg && IsA(arg, A_Const))
+ {
+ A_Const *con = (A_Const *) arg;
+
+ if (con->val.type == T_Null)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("REPEATABLE clause must be NOT NULL numeric value"),
+ parser_errposition(pstate, con->location)));
+
+ }
+
+ arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
+ arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE");
+ tablesample->repeatable = arg;
+ }
+ else
+ tablesample->repeatable = NULL;
+
+ /* Check user provided expected number of arguments. */
+ if (list_length(sampleargs) != initnargs)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+ errmsg_plural("tablesample method \"%s\" expects %d argument got %d",
+ "tablesample method \"%s\" expects %d arguments got %d",
+ initnargs,
+ samplemethod,
+ initnargs, list_length(sampleargs)),
+ parser_errposition(pstate, location)));
+
+ /* Transform the arguments, typecasting them as needed. */
+ fargs = NIL;
+ nargs = 0;
+ foreach(larg, sampleargs)
+ {
+ Node *inarg = (Node *) lfirst(larg);
+ Node *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION);
+ Oid argtype = exprType(arg);
+
+ if (argtype != init_arg_types[nargs])
+ {
+ if (!can_coerce_type(1, &argtype, &init_arg_types[nargs],
+ COERCION_IMPLICIT))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("wrong parameter %d for tablesample method \"%s\"",
+ nargs + 1, samplemethod),
+ errdetail("Expected type %s got %s.",
+ format_type_be(init_arg_types[nargs]),
+ format_type_be(argtype)),
+ parser_errposition(pstate, exprLocation(inarg))));
+
+ arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1,
+ COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
+ }
+
+ fargs = lappend(fargs, arg);
+ nargs++;
+ }
+
+ /* Pass the arguments down */
+ tablesample->args = fargs;
+
+ return tablesample;
+}
+
/* func_match_argtypes()
*
* Given a list of candidate functions (having the right name and number
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 39302a410b8..e27afd1a3e0 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -2209,6 +2209,9 @@ view_query_is_auto_updatable(Query *viewquery, bool check_cols)
base_rte->relkind != RELKIND_VIEW))
return gettext_noop("Views that do not select from a single table or view are not automatically updatable.");
+ if (base_rte->tablesample)
+ return gettext_noop("Views containing TABLESAMPLE are not automatically updatable.");
+
/*
* Check that the view has at least one updatable column. This is required
* for INSERT/UPDATE but not for DELETE.
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 903e80aea35..298eebf5e67 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -32,6 +32,7 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
@@ -345,6 +346,8 @@ static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
int prettyFlags);
static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
int prettyFlags, int wrapColumn);
+static void get_tablesample_def(TableSampleClause *tablesample,
+ deparse_context *context);
static void get_query_def(Query *query, StringInfo buf, List *parentnamespace,
TupleDesc resultDesc,
int prettyFlags, int wrapColumn, int startIndent);
@@ -4220,6 +4223,50 @@ make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
heap_close(ev_relation, AccessShareLock);
}
+/* ----------
+ * get_tablesample_def - Convert TableSampleClause back to SQL
+ * ----------
+ */
+static void
+get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ HeapTuple tuple;
+ Form_pg_tablesample_method tsm;
+ char *tsmname;
+ int nargs;
+ ListCell *l;
+
+ /* Load the tablesample method */
+ tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tablesample->tsmid));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("cache lookup failed for tablesample method %u",
+ tablesample->tsmid)));
+
+ tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
+ tsmname = NameStr(tsm->tsmname);
+ appendStringInfo(buf, " TABLESAMPLE %s (", quote_identifier(tsmname));
+
+ ReleaseSysCache(tuple);
+
+ nargs = 0;
+ foreach(l, tablesample->args)
+ {
+ if (nargs++ > 0)
+ appendStringInfoString(buf, ", ");
+ get_rule_expr((Node *) lfirst(l), context, true);
+ }
+ appendStringInfoChar(buf, ')');
+
+ if (tablesample->repeatable != NULL)
+ {
+ appendStringInfoString(buf, " REPEATABLE (");
+ get_rule_expr(tablesample->repeatable, context, true);
+ appendStringInfoChar(buf, ')');
+ }
+}
/* ----------
* get_query_def - Parse back one query parsetree
@@ -8529,6 +8576,9 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
only_marker(rte),
generate_relation_name(rte->relid,
context->namespaces));
+
+ if (rte->tablesample)
+ get_tablesample_def(rte->tablesample, context);
break;
case RTE_SUBQUERY:
/* Subquery RTE */
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 1dc293297d9..f259751e157 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -32,6 +32,7 @@
#include "catalog/pg_range.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_transform.h"
+#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@@ -2996,3 +2997,29 @@ get_range_subtype(Oid rangeOid)
else
return InvalidOid;
}
+
+/* ---------- PG_TABLESAMPLE_METHOD CACHE ---------- */
+
+/*
+ * get_tablesample_method_name - given a tablesample method OID,
+ * look up the name or NULL if not found
+ */
+char *
+get_tablesample_method_name(Oid tsmid)
+{
+ HeapTuple tuple;
+
+ tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tsmid));
+ if (HeapTupleIsValid(tuple))
+ {
+ Form_pg_tablesample_method tup =
+ (Form_pg_tablesample_method) GETSTRUCT(tuple);
+ char *result;
+
+ result = pstrdup(NameStr(tup->tsmname));
+ ReleaseSysCache(tuple);
+ return result;
+ }
+ else
+ return NULL;
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index f58e1cebf2a..7def1be32ae 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -56,6 +56,7 @@
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_ts_config.h"
@@ -666,6 +667,28 @@ static const struct cachedesc cacheinfo[] = {
},
128
},
+ {TableSampleMethodRelationId, /* TABLESAMPLEMETHODNAME */
+ TableSampleMethodNameIndexId,
+ 1,
+ {
+ Anum_pg_tablesample_method_tsmname,
+ 0,
+ 0,
+ 0,
+ },
+ 2
+ },
+ {TableSampleMethodRelationId, /* TABLESAMPLEMETHODOID */
+ TableSampleMethodOidIndexId,
+ 1,
+ {
+ ObjectIdAttributeNumber,
+ 0,
+ 0,
+ 0,
+ },
+ 2
+ },
{TableSpaceRelationId, /* TABLESPACEOID */
TablespaceOidIndexId,
1,
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index 1eeabaf1588..9becc63bf82 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -46,6 +46,8 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
bs->n = samplesize;
bs->t = 0; /* blocks scanned so far */
bs->m = 0; /* blocks selected so far */
+
+ sampler_random_init_state(randseed, bs->randstate);
}
bool
@@ -92,7 +94,7 @@ BlockSampler_Next(BlockSampler bs)
* less than k, which means that we cannot fail to select enough blocks.
*----------
*/
- V = sampler_random_fract();
+ V = sampler_random_fract(bs->randstate);
p = 1.0 - (double) k / (double) K;
while (V < p)
{
@@ -126,8 +128,14 @@ BlockSampler_Next(BlockSampler bs)
void
reservoir_init_selection_state(ReservoirState rs, int n)
{
+ /*
+ * Reservoir sampling is not used anywhere where it would need to return
+ * repeatable results so we can initialize it randomly.
+ */
+ sampler_random_init_state(random(), rs->randstate);
+
/* Initial value of W (for use when Algorithm Z is first applied) */
- *rs = exp(-log(sampler_random_fract()) / n);
+ rs->W = exp(-log(sampler_random_fract(rs->randstate)) / n);
}
double
@@ -142,7 +150,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
double V,
quot;
- V = sampler_random_fract(); /* Generate V */
+ V = sampler_random_fract(rs->randstate); /* Generate V */
S = 0;
t += 1;
/* Note: "num" in Vitter's code is always equal to t - n */
@@ -158,7 +166,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
else
{
/* Now apply Algorithm Z */
- double W = *rs;
+ double W = rs->W;
double term = t - (double) n + 1;
for (;;)
@@ -174,7 +182,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
tmp;
/* Generate U and X */
- U = sampler_random_fract();
+ U = sampler_random_fract(rs->randstate);
X = t * (W - 1.0);
S = floor(X); /* S is tentatively set to floor(X) */
/* Test if U <= h(S)/cg(X) in the manner of (6.3) */
@@ -203,11 +211,11 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
y *= numer / denom;
denom -= 1;
}
- W = exp(-log(sampler_random_fract()) / n); /* Generate W in advance */
+ W = exp(-log(sampler_random_fract(rs->randstate)) / n); /* Generate W in advance */
if (exp(log(y) / n) <= (t + X) / t)
break;
}
- *rs = W;
+ rs->W = W;
}
return S;
}
@@ -217,10 +225,17 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
* Random number generator used by sampling
*----------
*/
+void
+sampler_random_init_state(long seed, SamplerRandomState randstate)
+{
+ randstate[0] = RAND48_SEED_0;
+ randstate[1] = (unsigned short) seed;
+ randstate[2] = (unsigned short) (seed >> 16);
+}
/* Select a random value R uniformly distributed in (0 - 1) */
double
-sampler_random_fract()
+sampler_random_fract(SamplerRandomState randstate)
{
- return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+ return pg_erand48(randstate);
}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 49c8ca4d66e..eec7c95b218 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -114,8 +114,12 @@ extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot,
bool allow_strat, bool allow_sync);
extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key);
+extern HeapScanDesc heap_beginscan_sampling(Relation relation,
+ Snapshot snapshot, int nkeys, ScanKey key,
+ bool allow_strat, bool allow_pagemode);
extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
BlockNumber endBlk);
+extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
extern void heap_rescan(HeapScanDesc scan, ScanKey key);
extern void heap_endscan(HeapScanDesc scan);
extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 5a0d724aca5..1b9b2993957 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -29,6 +29,7 @@ typedef struct HeapScanDescData
int rs_nkeys; /* number of scan keys */
ScanKey rs_key; /* array of scan key descriptors */
bool rs_bitmapscan; /* true if this is really a bitmap scan */
+ bool rs_samplescan; /* true if this is really a sample scan */
bool rs_pageatatime; /* verify visibility page-at-a-time? */
bool rs_allow_strat; /* allow or disallow use of access strategy */
bool rs_allow_sync; /* allow or disallow use of syncscan */
diff --git a/src/include/access/tablesample.h b/src/include/access/tablesample.h
new file mode 100644
index 00000000000..222fa8d5561
--- /dev/null
+++ b/src/include/access/tablesample.h
@@ -0,0 +1,60 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablesample.h
+ * Public header file for TABLESAMPLE clause interface
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tablesample.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TABLESAMPLE_H
+#define TABLESAMPLE_H
+
+#include "access/relscan.h"
+#include "executor/executor.h"
+
+typedef struct TableSampleDesc {
+ HeapScanDesc heapScan;
+ TupleDesc tupDesc; /* Mostly useful for tsmexaminetuple */
+
+ void *tsmdata; /* private method data */
+
+ /* These point to he function of the TABLESAMPLE Method. */
+ FmgrInfo tsminit;
+ FmgrInfo tsmnextblock;
+ FmgrInfo tsmnexttuple;
+ FmgrInfo tsmexaminetuple;
+ FmgrInfo tsmreset;
+ FmgrInfo tsmend;
+} TableSampleDesc;
+
+
+extern TableSampleDesc *tablesample_init(SampleScanState *scanstate,
+ TableSampleClause *tablesample);
+extern HeapTuple tablesample_getnext(TableSampleDesc *desc);
+extern void tablesample_reset(TableSampleDesc *desc);
+extern void tablesample_end(TableSampleDesc *desc);
+extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc);
+extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid,
+ bool *visible);
+
+extern Datum tsm_system_init(PG_FUNCTION_ARGS);
+extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS);
+extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS);
+extern Datum tsm_system_end(PG_FUNCTION_ARGS);
+extern Datum tsm_system_reset(PG_FUNCTION_ARGS);
+extern Datum tsm_system_cost(PG_FUNCTION_ARGS);
+
+extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS);
+
+
+#endif
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 71e0010a6f8..f20567ed5f5 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -316,6 +316,11 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replicati
DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname varchar_pattern_ops));
#define ReplicationOriginNameIndex 6002
+DECLARE_UNIQUE_INDEX(pg_tablesample_method_name_index, 3331, on pg_tablesample_method using btree(tsmname name_ops));
+#define TableSampleMethodNameIndexId 3331
+DECLARE_UNIQUE_INDEX(pg_tablesample_method_oid_index, 3332, on pg_tablesample_method using btree(oid oid_ops));
+#define TableSampleMethodOidIndexId 3332
+
/* last step of initialization script: build the indexes declared above */
BUILD_INDICES
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 1c9edbc3b37..c2185bd9add 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -5297,6 +5297,33 @@ DESCR("get an individual replication origin's replication progress");
DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ ));
DESCR("get progress for all replication origins");
+/* tablesample */
+DATA(insert OID = 3335 ( tsm_system_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_system_init _null_ _null_ _null_ ));
+DESCR("tsm_system_init(internal)");
+DATA(insert OID = 3336 ( tsm_system_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nextblock _null_ _null_ _null_ ));
+DESCR("tsm_system_nextblock(internal)");
+DATA(insert OID = 3337 ( tsm_system_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nexttuple _null_ _null_ _null_ ));
+DESCR("tsm_system_nexttuple(internal)");
+DATA(insert OID = 3338 ( tsm_system_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_end _null_ _null_ _null_ ));
+DESCR("tsm_system_end(internal)");
+DATA(insert OID = 3339 ( tsm_system_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_reset _null_ _null_ _null_ ));
+DESCR("tsm_system_reset(internal)");
+DATA(insert OID = 3340 ( tsm_system_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_system_cost _null_ _null_ _null_ ));
+DESCR("tsm_system_cost(internal)");
+
+DATA(insert OID = 3341 ( tsm_bernoulli_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_init _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_init(internal)");
+DATA(insert OID = 3342 ( tsm_bernoulli_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nextblock _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_nextblock(internal)");
+DATA(insert OID = 3343 ( tsm_bernoulli_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nexttuple _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_nexttuple(internal)");
+DATA(insert OID = 3344 ( tsm_bernoulli_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_end _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_end(internal)");
+DATA(insert OID = 3345 ( tsm_bernoulli_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_reset _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_reset(internal)");
+DATA(insert OID = 3346 ( tsm_bernoulli_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_cost _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_cost(internal)");
+
/*
* Symbolic values for provolatile column: these indicate whether the result
* of a function is dependent *only* on the values of its explicit arguments,
diff --git a/src/include/catalog/pg_tablesample_method.h b/src/include/catalog/pg_tablesample_method.h
new file mode 100644
index 00000000000..968d1e696ab
--- /dev/null
+++ b/src/include/catalog/pg_tablesample_method.h
@@ -0,0 +1,78 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_tablesample_method.h
+ * definition of the table scan methods.
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_tablesample_method.h
+ *
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_TABLESAMPLE_METHOD_H
+#define PG_TABLESAMPLE_METHOD_H
+
+#include "catalog/genbki.h"
+#include "catalog/objectaddress.h"
+
+/* ----------------
+ * pg_tablesample_method definition. cpp turns this into
+ * typedef struct FormData_pg_tablesample_method
+ * ----------------
+ */
+#define TableSampleMethodRelationId 3330
+
+CATALOG(pg_tablesample_method,3330)
+{
+ NameData tsmname; /* tablesample method name */
+ bool tsmseqscan; /* does this method scan whole table sequentially? */
+ bool tsmpagemode; /* does this method scan page at a time? */
+ regproc tsminit; /* init scan function */
+ regproc tsmnextblock; /* function returning next block to sample
+ or InvalidBlockOffset if finished */
+ regproc tsmnexttuple; /* function returning next tuple offset from current block
+ or InvalidOffsetNumber if end of the block was reacher */
+ regproc tsmexaminetuple; /* optional function which can examine tuple contents and
+ decide if tuple should be returned or not */
+ regproc tsmend; /* end scan function*/
+ regproc tsmreset; /* reset state - used by rescan */
+ regproc tsmcost; /* costing function */
+} FormData_pg_tablesample_method;
+
+/* ----------------
+ * Form_pg_tablesample_method corresponds to a pointer to a tuple with
+ * the format of pg_tablesample_method relation.
+ * ----------------
+ */
+typedef FormData_pg_tablesample_method *Form_pg_tablesample_method;
+
+/* ----------------
+ * compiler constants for pg_tablesample_method
+ * ----------------
+ */
+#define Natts_pg_tablesample_method 10
+#define Anum_pg_tablesample_method_tsmname 1
+#define Anum_pg_tablesample_method_tsmseqscan 2
+#define Anum_pg_tablesample_method_tsmpagemode 3
+#define Anum_pg_tablesample_method_tsminit 4
+#define Anum_pg_tablesample_method_tsmnextblock 5
+#define Anum_pg_tablesample_method_tsmnexttuple 6
+#define Anum_pg_tablesample_method_tsmexaminetuple 7
+#define Anum_pg_tablesample_method_tsmend 8
+#define Anum_pg_tablesample_method_tsmreset 9
+#define Anum_pg_tablesample_method_tsmcost 10
+
+/* ----------------
+ * initial contents of pg_tablesample_method
+ * ----------------
+ */
+
+DATA(insert OID = 3333 ( system false true tsm_system_init tsm_system_nextblock tsm_system_nexttuple - tsm_system_end tsm_system_reset tsm_system_cost ));
+DESCR("SYSTEM table sampling method");
+DATA(insert OID = 3334 ( bernoulli true false tsm_bernoulli_init tsm_bernoulli_nextblock tsm_bernoulli_nexttuple - tsm_bernoulli_end tsm_bernoulli_reset tsm_bernoulli_cost ));
+DESCR("BERNOULLI table sampling method");
+
+#endif /* PG_TABLESAMPLE_METHOD_H */
diff --git a/src/include/executor/nodeSamplescan.h b/src/include/executor/nodeSamplescan.h
new file mode 100644
index 00000000000..4b769daec8b
--- /dev/null
+++ b/src/include/executor/nodeSamplescan.h
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeSamplescan.h
+ *
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/executor/nodeSamplescan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NODESAMPLESCAN_H
+#define NODESAMPLESCAN_H
+
+#include "nodes/execnodes.h"
+
+extern SampleScanState *ExecInitSampleScan(SampleScan *node, EState *estate, int eflags);
+extern TupleTableSlot *ExecSampleScan(SampleScanState *node);
+extern void ExecEndSampleScan(SampleScanState *node);
+extern void ExecReScanSampleScan(SampleScanState *node);
+
+#endif /* NODESAMPLESCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index fcfe1107f92..972368019a9 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1238,6 +1238,15 @@ typedef struct ScanState
typedef ScanState SeqScanState;
/*
+ * SampleScan
+ */
+typedef struct SampleScanState
+{
+ ScanState ss;
+ struct TableSampleDesc *tsdesc;
+} SampleScanState;
+
+/*
* These structs store information about index quals that don't have simple
* constant right-hand sides. See comments for ExecIndexBuildScanKeys()
* for discussion.
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 768f413a451..8b275f6e263 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -61,6 +61,7 @@ typedef enum NodeTag
T_ValuesScan,
T_CteScan,
T_WorkTableScan,
+ T_SampleScan,
T_ForeignScan,
T_CustomScan,
T_Join,
@@ -97,6 +98,7 @@ typedef enum NodeTag
T_BitmapOrState,
T_ScanState,
T_SeqScanState,
+ T_SampleScanState,
T_IndexScanState,
T_IndexOnlyScanState,
T_BitmapIndexScanState,
@@ -419,6 +421,8 @@ typedef enum NodeTag
T_OnConflictClause,
T_CommonTableExpr,
T_RoleSpec,
+ T_RangeTableSample,
+ T_TableSampleClause,
/*
* TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 053f1b01213..6723f46f3f1 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -336,6 +336,26 @@ typedef struct FuncCall
} FuncCall;
/*
+ * TableSampleClause - a sampling method information
+ */
+typedef struct TableSampleClause
+{
+ NodeTag type;
+ Oid tsmid;
+ bool tsmseqscan;
+ bool tsmpagemode;
+ Oid tsminit;
+ Oid tsmnextblock;
+ Oid tsmnexttuple;
+ Oid tsmexaminetuple;
+ Oid tsmend;
+ Oid tsmreset;
+ Oid tsmcost;
+ Node *repeatable;
+ List *args;
+} TableSampleClause;
+
+/*
* A_Star - '*' representing all columns of a table or compound field
*
* This can appear within ColumnRef.fields, A_Indirection.indirection, and
@@ -536,6 +556,22 @@ typedef struct RangeFunction
} RangeFunction;
/*
+ * RangeTableSample - represents <table> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ *
+ * SQL Standard specifies only one parameter which is percentage. But we allow
+ * custom tablesample methods which may need different input arguments so we
+ * accept list of arguments.
+ */
+typedef struct RangeTableSample
+{
+ NodeTag type;
+ RangeVar *relation;
+ char *method; /* sampling method */
+ Node *repeatable;
+ List *args; /* arguments for sampling method */
+} RangeTableSample;
+
+/*
* ColumnDef - column definition (used in various creates)
*
* If the column has a default value, we may have the value expression
@@ -772,6 +808,7 @@ typedef struct RangeTblEntry
*/
Oid relid; /* OID of the relation */
char relkind; /* relation kind (see pg_class.relkind) */
+ TableSampleClause *tablesample; /* sampling method and parameters */
/*
* Fields valid for a subquery RTE (else NULL):
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 65f71d81705..4e655b0e6c1 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -287,6 +287,12 @@ typedef struct Scan
typedef Scan SeqScan;
/* ----------------
+ * table sample scan node
+ * ----------------
+ */
+typedef Scan SampleScan;
+
+/* ----------------
* index scan node
*
* indexqualorig is an implicitly-ANDed list of index qual expressions, each
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 9c2000b15aa..24003ae3591 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -68,6 +68,7 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
double index_pages, PlannerInfo *root);
extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info);
+extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
extern void cost_index(IndexPath *path, PlannerInfo *root,
double loop_count);
extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 9923f0eb3ea..89c8deda95c 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -32,6 +32,8 @@ extern bool add_path_precheck(RelOptInfo *parent_rel,
extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer);
+extern Path *create_samplescan_path(PlannerInfo *root, RelOptInfo *rel,
+ Relids required_outer);
extern IndexPath *create_index_path(PlannerInfo *root,
IndexOptInfo *index,
List *indexclauses,
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index faea99108c4..7d5f857ae59 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -368,6 +368,7 @@ PG_KEYWORD("sysid", SYSID, UNRESERVED_KEYWORD)
PG_KEYWORD("system", SYSTEM_P, UNRESERVED_KEYWORD)
PG_KEYWORD("table", TABLE, RESERVED_KEYWORD)
PG_KEYWORD("tables", TABLES, UNRESERVED_KEYWORD)
+PG_KEYWORD("tablesample", TABLESAMPLE, TYPE_FUNC_NAME_KEYWORD)
PG_KEYWORD("tablespace", TABLESPACE, UNRESERVED_KEYWORD)
PG_KEYWORD("temp", TEMP, UNRESERVED_KEYWORD)
PG_KEYWORD("template", TEMPLATE, UNRESERVED_KEYWORD)
diff --git a/src/include/parser/parse_func.h b/src/include/parser/parse_func.h
index 32646918e20..40c007c35fc 100644
--- a/src/include/parser/parse_func.h
+++ b/src/include/parser/parse_func.h
@@ -33,6 +33,11 @@ typedef enum
extern Node *ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
FuncCall *fn, int location);
+extern TableSampleClause *ParseTableSample(ParseState *pstate,
+ char *samplemethod,
+ Node *repeatable, List *args,
+ int location);
+
extern FuncDetailCode func_get_detail(List *funcname,
List *fargs, List *fargnames,
int nargs, Oid *argtypes,
diff --git a/src/include/port.h b/src/include/port.h
index 3787cbfb761..71113c03944 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -357,6 +357,10 @@ extern off_t ftello(FILE *stream);
#endif
#endif
+#define RAND48_SEED_0 (0x330e)
+#define RAND48_SEED_1 (0xabcd)
+#define RAND48_SEED_2 (0x1234)
+
extern double pg_erand48(unsigned short xseed[3]);
extern long pg_lrand48(void);
extern void pg_srand48(long seed);
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index ffbaa61e5e2..e2e5734ea7b 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -156,6 +156,7 @@ extern void free_attstatsslot(Oid atttype,
extern char *get_namespace_name(Oid nspid);
extern char *get_namespace_name_or_temp(Oid nspid);
extern Oid get_range_subtype(Oid rangeOid);
+extern char *get_tablesample_method_name(Oid tsmid);
#define type_is_array(typid) (get_element_type(typid) != InvalidOid)
/* type_is_array_domain accepts both plain arrays and domains over arrays */
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 9e17d87413d..fd40366bcda 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -63,7 +63,6 @@ typedef struct RelationAmInfo
FmgrInfo amcanreturn;
} RelationAmInfo;
-
/*
* Here are the contents of a relation cache entry.
*/
diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h
index e3e7f9cf6ae..4ac208dc364 100644
--- a/src/include/utils/sampling.h
+++ b/src/include/utils/sampling.h
@@ -15,7 +15,12 @@
#include "storage/bufmgr.h"
-extern double sampler_random_fract(void);
+/* Random generator for sampling code */
+typedef unsigned short SamplerRandomState[3];
+
+extern void sampler_random_init_state(long seed,
+ SamplerRandomState randstate);
+extern double sampler_random_fract(SamplerRandomState randstate);
/* Block sampling methods */
/* Data structure for Algorithm S from Knuth 3.4.2 */
@@ -25,6 +30,7 @@ typedef struct
int n; /* desired sample size */
BlockNumber t; /* current block number */
int m; /* blocks selected so far */
+ SamplerRandomState randstate; /* random generator state */
} BlockSamplerData;
typedef BlockSamplerData *BlockSampler;
@@ -35,7 +41,12 @@ extern bool BlockSampler_HasMore(BlockSampler bs);
extern BlockNumber BlockSampler_Next(BlockSampler bs);
/* Reservoid sampling methods */
-typedef double ReservoirStateData;
+typedef struct
+{
+ double W;
+ SamplerRandomState randstate; /* random generator state */
+} ReservoirStateData;
+
typedef ReservoirStateData *ReservoirState;
extern void reservoir_init_selection_state(ReservoirState rs, int n);
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 6634099cbe2..2dbd38488b6 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -81,6 +81,8 @@ enum SysCacheIdentifier
REPLORIGNAME,
RULERELNAME,
STATRELATTINH,
+ TABLESAMPLEMETHODNAME,
+ TABLESAMPLEMETHODOID,
TABLESPACEOID,
TRFOID,
TRFTYPELANG,
diff --git a/src/port/erand48.c b/src/port/erand48.c
index 9d471197c35..12efd8193c4 100644
--- a/src/port/erand48.c
+++ b/src/port/erand48.c
@@ -33,9 +33,6 @@
#include <math.h>
-#define RAND48_SEED_0 (0x330e)
-#define RAND48_SEED_1 (0xabcd)
-#define RAND48_SEED_2 (0x1234)
#define RAND48_MULT_0 (0xe66d)
#define RAND48_MULT_1 (0xdeec)
#define RAND48_MULT_2 (0x0005)
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index 82bc47895a8..0ae555783bd 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -101,6 +101,17 @@ NOTICE: f_leak => great manga
44 | 8 | 1 | rls_regress_user2 | great manga | manga
(4 rows)
+SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+NOTICE: f_leak => my first novel
+NOTICE: f_leak => my first manga
+NOTICE: f_leak => great science fiction
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-----------------------
+ 1 | 11 | 1 | rls_regress_user1 | my first novel
+ 4 | 44 | 1 | rls_regress_user1 | my first manga
+ 6 | 22 | 1 | rls_regress_user2 | great science fiction
+(3 rows)
+
-- viewpoint from rls_regress_user2
SET SESSION AUTHORIZATION rls_regress_user2;
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
@@ -145,6 +156,21 @@ NOTICE: f_leak => great manga
44 | 8 | 1 | rls_regress_user2 | great manga | manga
(8 rows)
+SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+NOTICE: f_leak => my first novel
+NOTICE: f_leak => my second novel
+NOTICE: f_leak => my first manga
+NOTICE: f_leak => great science fiction
+NOTICE: f_leak => great technology book
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-----------------------
+ 1 | 11 | 1 | rls_regress_user1 | my first novel
+ 2 | 11 | 2 | rls_regress_user1 | my second novel
+ 4 | 44 | 1 | rls_regress_user1 | my first manga
+ 6 | 22 | 1 | rls_regress_user2 | great science fiction
+ 7 | 33 | 2 | rls_regress_user2 | great technology book
+(5 rows)
+
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
QUERY PLAN
----------------------------------------------------------
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index eb0bc88ef1f..14acd16da3b 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -128,6 +128,7 @@ pg_shdepend|t
pg_shdescription|t
pg_shseclabel|t
pg_statistic|t
+pg_tablesample_method|t
pg_tablespace|t
pg_transform|t
pg_trigger|t
diff --git a/src/test/regress/expected/tablesample.out b/src/test/regress/expected/tablesample.out
new file mode 100644
index 00000000000..04e5eb8b807
--- /dev/null
+++ b/src/test/regress/expected/tablesample.out
@@ -0,0 +1,231 @@
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+ id
+----
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 9
+(7 rows)
+
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
+ id
+----
+ 6
+ 7
+ 8
+(3 rows)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
+ count
+-------
+ 10
+(1 row)
+
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+ id
+----
+ 0
+ 1
+ 2
+ 6
+ 7
+ 8
+ 9
+(7 rows)
+
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
+ id
+----
+ 0
+ 1
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+ id
+----
+ 0
+ 5
+(2 rows)
+
+CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
+ pg_get_viewdef
+--------------------------------------------------------------------------------
+ SELECT test_tablesample.id +
+ FROM test_tablesample TABLESAMPLE system (((10 * 2))::real) REPEATABLE (2);
+(1 row)
+
+SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
+ pg_get_viewdef
+-----------------------------------------------------------
+ SELECT test_tablesample.id +
+ FROM test_tablesample TABLESAMPLE system ((99)::real);
+(1 row)
+
+BEGIN;
+DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+FETCH FIRST FROM tablesample_cur;
+ id
+----
+ 0
+(1 row)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 1
+(1 row)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 2
+(1 row)
+
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+ id
+----
+ 0
+ 1
+ 2
+ 3
+ 4
+ 5
+ 9
+(7 rows)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 6
+(1 row)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 7
+(1 row)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 8
+(1 row)
+
+FETCH FIRST FROM tablesample_cur;
+ id
+----
+ 0
+(1 row)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 1
+(1 row)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 2
+(1 row)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 6
+(1 row)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 7
+(1 row)
+
+FETCH NEXT FROM tablesample_cur;
+ id
+----
+ 8
+(1 row)
+
+CLOSE tablesample_cur;
+END;
+EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Sample Scan (system) on test_tablesample (cost=0.00..26.35 rows=635 width=4)
+(1 row)
+
+EXPLAIN SELECT * FROM test_tablesample_v1;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Sample Scan (system) on test_tablesample (cost=0.00..10.54 rows=254 width=4)
+(1 row)
+
+-- errors
+SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
+ERROR: tablesample method "foobar" does not exist
+LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
+ ^
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
+ERROR: REPEATABLE clause must be NOT NULL numeric value
+LINE 1: ... test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
+ ^
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
+ERROR: invalid sample size
+HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200);
+ERROR: invalid sample size
+HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1);
+ERROR: invalid sample size
+HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200);
+ERROR: invalid sample size
+HINT: Sample size must be numeric value between 0 and 100 (inclusive).
+SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1);
+ERROR: TABLESAMPLE clause can only be used on tables and materialized views
+LINE 1: SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1)...
+ ^
+INSERT INTO test_tablesample_v1 VALUES(1);
+ERROR: cannot insert into view "test_tablesample_v1"
+DETAIL: Views containing TABLESAMPLE are not automatically updatable.
+HINT: To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.
+WITH query_select AS (SELECT * FROM test_tablesample)
+SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+ERROR: TABLESAMPLE clause can only be used on tables and materialized views
+LINE 2: SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEA...
+ ^
+SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
+ERROR: syntax error at or near "TABLESAMPLE"
+LINE 1: ...CT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPL...
+ ^
+-- catalog sanity
+SELECT *
+FROM pg_tablesample_method
+WHERE tsminit IS NULL
+ OR tsmseqscan IS NULL
+ OR tsmpagemode IS NULL
+ OR tsmnextblock IS NULL
+ OR tsmnexttuple IS NULL
+ OR tsmend IS NULL
+ OR tsmreset IS NULL
+ OR tsmcost IS NULL;
+ tsmname | tsmseqscan | tsmpagemode | tsminit | tsmnextblock | tsmnexttuple | tsmexaminetuple | tsmend | tsmreset | tsmcost
+---------+------------+-------------+---------+--------------+--------------+-----------------+--------+----------+---------
+(0 rows)
+
+-- done
+DROP TABLE test_tablesample CASCADE;
+NOTICE: drop cascades to 2 other objects
+DETAIL: drop cascades to view test_tablesample_v1
+drop cascades to view test_tablesample_v2
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index b0ebb6b3f4c..f39b73abc2a 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -84,7 +84,7 @@ test: select_into select_distinct select_distinct_on select_implicit select_havi
# ----------
# Another group of parallel tests
# ----------
-test: brin gin gist spgist privileges security_label collate matview lock replica_identity rowsecurity object_address
+test: brin gin gist spgist privileges security_label collate matview lock replica_identity rowsecurity object_address tablesample
# ----------
# Another group of parallel tests
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 8409c0f3ef2..9441b97e3a6 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -154,3 +154,4 @@ test: with
test: xml
test: event_trigger
test: stats
+test: tablesample
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
index e8c5932b201..fdadf99fd62 100644
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -94,11 +94,15 @@ SET row_security TO ON;
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
+SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+
-- viewpoint from rls_regress_user2
SET SESSION AUTHORIZATION rls_regress_user2;
SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
+SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
diff --git a/src/test/regress/sql/tablesample.sql b/src/test/regress/sql/tablesample.sql
new file mode 100644
index 00000000000..7b3eb9bedf7
--- /dev/null
+++ b/src/test/regress/sql/tablesample.sql
@@ -0,0 +1,74 @@
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
+
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
+
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+
+CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
+SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
+
+BEGIN;
+DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+FETCH FIRST FROM tablesample_cur;
+FETCH NEXT FROM tablesample_cur;
+FETCH NEXT FROM tablesample_cur;
+
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+
+FETCH NEXT FROM tablesample_cur;
+FETCH NEXT FROM tablesample_cur;
+FETCH NEXT FROM tablesample_cur;
+
+FETCH FIRST FROM tablesample_cur;
+FETCH NEXT FROM tablesample_cur;
+FETCH NEXT FROM tablesample_cur;
+FETCH NEXT FROM tablesample_cur;
+FETCH NEXT FROM tablesample_cur;
+FETCH NEXT FROM tablesample_cur;
+
+CLOSE tablesample_cur;
+END;
+
+EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+EXPLAIN SELECT * FROM test_tablesample_v1;
+
+-- errors
+SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
+
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
+
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200);
+
+SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1);
+INSERT INTO test_tablesample_v1 VALUES(1);
+
+WITH query_select AS (SELECT * FROM test_tablesample)
+SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+
+SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
+
+-- catalog sanity
+
+SELECT *
+FROM pg_tablesample_method
+WHERE tsminit IS NULL
+ OR tsmseqscan IS NULL
+ OR tsmpagemode IS NULL
+ OR tsmnextblock IS NULL
+ OR tsmnexttuple IS NULL
+ OR tsmend IS NULL
+ OR tsmreset IS NULL
+ OR tsmcost IS NULL;
+
+-- done
+DROP TABLE test_tablesample CASCADE;