diff options
| author | Alvaro Herrera | 2017-03-24 17:06:10 +0000 |
|---|---|---|
| committer | Alvaro Herrera | 2017-03-24 17:06:10 +0000 |
| commit | 7b504eb282ca2f5104b5c00b4f05a3ef6bb1385b (patch) | |
| tree | 4b12f53c5bd25a03f1016f1daa0809606b47df3a /src/test | |
| parent | f120b614e070aed39586d1443193738a149a90d4 (diff) | |
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/[email protected]
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/[email protected]
https://postgr.es/m/[email protected]
Diffstat (limited to 'src/test')
| -rw-r--r-- | src/test/regress/expected/alter_generic.out | 45 | ||||
| -rw-r--r-- | src/test/regress/expected/object_address.out | 7 | ||||
| -rw-r--r-- | src/test/regress/expected/opr_sanity.out | 3 | ||||
| -rw-r--r-- | src/test/regress/expected/rules.out | 8 | ||||
| -rw-r--r-- | src/test/regress/expected/sanity_check.out | 1 | ||||
| -rw-r--r-- | src/test/regress/expected/stats_ext.out | 155 | ||||
| -rw-r--r-- | src/test/regress/expected/type_sanity.out | 13 | ||||
| -rw-r--r-- | src/test/regress/parallel_schedule | 2 | ||||
| -rw-r--r-- | src/test/regress/serial_schedule | 1 | ||||
| -rw-r--r-- | src/test/regress/sql/alter_generic.sql | 31 | ||||
| -rw-r--r-- | src/test/regress/sql/object_address.sql | 4 | ||||
| -rw-r--r-- | src/test/regress/sql/stats_ext.sql | 102 | ||||
| -rw-r--r-- | src/test/regress/sql/type_sanity.sql | 2 |
13 files changed, 361 insertions, 13 deletions
diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out index b01be59bbbd..ce581bb93d5 100644 --- a/src/test/regress/expected/alter_generic.out +++ b/src/test/regress/expected/alter_generic.out @@ -497,6 +497,48 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); ERROR: function 2(integer,integer) does not exist in operator family "alt_opf18" DROP OPERATOR FAMILY alt_opf18 USING btree; -- +-- Statistics +-- +SET SESSION AUTHORIZATION regress_alter_user1; +CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER); +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; +ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict) +ERROR: statistics "alt_stat2" already exists in schema "alt_nsp1" +ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership) +ERROR: must be member of role "regress_alter_user2" +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK +SET SESSION AUTHORIZATION regress_alter_user2; +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; +ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK +ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership) +ERROR: must be member of role "regress_alter_user3" +ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict) +ERROR: statistics "alt_stat2" already exists in schema "alt_nsp2" +RESET SESSION AUTHORIZATION; +SELECT nspname, staname, rolname + FROM pg_statistic_ext s, pg_namespace n, pg_authid a + WHERE s.stanamespace = n.oid AND s.staowner = a.oid + AND n.nspname in ('alt_nsp1', 'alt_nsp2') + ORDER BY nspname, staname; + nspname | staname | rolname +----------+-----------+--------------------- + alt_nsp1 | alt_stat2 | regress_alter_user2 + alt_nsp1 | alt_stat3 | regress_alter_user1 + alt_nsp1 | alt_stat4 | regress_alter_user2 + alt_nsp2 | alt_stat2 | regress_alter_user3 +(4 rows) + +-- -- Text Search Dictionary -- SET SESSION AUTHORIZATION regress_alter_user1; @@ -639,7 +681,7 @@ DROP LANGUAGE alt_lang3 CASCADE; DROP LANGUAGE alt_lang4 CASCADE; ERROR: language "alt_lang4" does not exist DROP SCHEMA alt_nsp1 CASCADE; -NOTICE: drop cascades to 26 other objects +NOTICE: drop cascades to 27 other objects DETAIL: drop cascades to function alt_func3(integer) drop cascades to function alt_agg3(integer) drop cascades to function alt_func4(integer) @@ -656,6 +698,7 @@ drop cascades to operator family alt_opc1 for access method hash drop cascades to operator family alt_opc2 for access method hash drop cascades to operator family alt_opf4 for access method hash drop cascades to operator family alt_opf2 for access method hash +drop cascades to table alt_regress_1 drop cascades to text search dictionary alt_ts_dict3 drop cascades to text search dictionary alt_ts_dict4 drop cascades to text search dictionary alt_ts_dict2 diff --git a/src/test/regress/expected/object_address.out b/src/test/regress/expected/object_address.out index 978d9a9a0f8..814e05e4ef1 100644 --- a/src/test/regress/expected/object_address.out +++ b/src/test/regress/expected/object_address.out @@ -39,6 +39,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT); WARNING: tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables +CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable; -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); ERROR: unrecognized object type "stone" @@ -409,7 +410,8 @@ WITH objects (type, name, args) AS (VALUES ('access method', '{btree}', '{}'), ('publication', '{addr_pub}', '{}'), ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'), - ('subscription', '{addr_sub}', '{}') + ('subscription', '{addr_sub}', '{}'), + ('statistics', '{addr_nsp, gentable_stat}', '{}') ) SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, -- test roundtrip through pg_identify_object_as_address @@ -457,6 +459,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, trigger | | | t on addr_nsp.gentable | t operator family | pg_catalog | integer_ops | pg_catalog.integer_ops USING btree | t policy | | | genpol on addr_nsp.gentable | t + statistics | addr_nsp | gentable_stat | addr_nsp.gentable_stat | t collation | pg_catalog | "default" | pg_catalog."default" | t transform | | | for integer on language sql | t text search dictionary | addr_nsp | addr_ts_dict | addr_nsp.addr_ts_dict | t @@ -466,7 +469,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, subscription | | addr_sub | addr_sub | t publication | | addr_pub | addr_pub | t publication relation | | | gentable in publication addr_pub | t -(45 rows) +(46 rows) --- --- Cleanup resources diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 64d9dd605fd..262036ac4ff 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -823,11 +823,12 @@ WHERE c.castmethod = 'b' AND text | character | 0 | i character varying | character | 0 | i pg_node_tree | text | 0 | i + pg_ndistinct | bytea | 0 | i cidr | inet | 0 | i xml | text | 0 | a xml | character varying | 0 | a xml | character | 0 | a -(7 rows) +(8 rows) -- **************** pg_conversion **************** -- Look for illegal values in pg_conversion fields. diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index c4c8450b830..7f04c7a7cc8 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2164,6 +2164,14 @@ pg_stats| SELECT n.nspname AS schemaname, JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid)))); +pg_stats_ext| SELECT n.nspname AS schemaname, + c.relname AS tablename, + s.staname, + s.stakeys AS attnums, + length((s.standistinct)::text) AS ndistbytes + FROM ((pg_statistic_ext s + JOIN pg_class c ON ((c.oid = s.starelid))) + LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))); pg_tables| SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index 8e3028edaa2..753ad81e43f 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -142,6 +142,7 @@ pg_shdepend|t pg_shdescription|t pg_shseclabel|t pg_statistic|t +pg_statistic_ext|t pg_subscription|t pg_subscription_rel|t pg_tablespace|t diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out new file mode 100644 index 00000000000..83d70bf9b9a --- /dev/null +++ b/src/test/regress/expected/stats_ext.out @@ -0,0 +1,155 @@ +-- Generic extended statistics support +-- Ensure stats are dropped sanely +CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS ab1_a_b_stats; +CREATE SCHEMA regress_schema_2; +CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS regress_schema_2.ab1_a_b_stats; +-- Ensure statistics are dropped when columns are +CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ALTER TABLE ab1 DROP COLUMN a; +\d ab1 + Table "public.ab1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | integer | | | + c | integer | | | +Statistics: + "public.ab1_b_c_stats" WITH (ndistinct) ON (b, c) + +DROP TABLE ab1; +-- Ensure things work sanely with SET STATISTICS 0 +CREATE TABLE ab1 (a INTEGER, b INTEGER); +ALTER TABLE ab1 ALTER a SET STATISTICS 0; +INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ANALYZE ab1; +ERROR: extended statistics could not be collected for column "a" of relation public.ab1 +HINT: Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1 +ALTER TABLE ab1 ALTER a SET STATISTICS -1; +ANALYZE ab1; +DROP TABLE ab1; +-- n-distinct tests +CREATE TABLE ndistinct ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b INT, + filler3 DATE, + c INT, + d INT +); +-- unknown column +CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; +ERROR: column "unknown_column" referenced in statistics does not exist +-- single column +CREATE STATISTICS s10 ON (a) FROM ndistinct; +ERROR: statistics require at least 2 columns +-- single column, duplicated +CREATE STATISTICS s10 ON (a,a) FROM ndistinct; +ERROR: duplicate column name in statistics definition +-- two columns, one duplicated +CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; +ERROR: duplicate column name in statistics definition +-- correct command +CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; +-- perfectly correlated groups +INSERT INTO ndistinct (a, b, c, filler1) + SELECT i/100, i/100, i/100, cash_words(i::money) + FROM generate_series(1,10000) s(i); +ANALYZE ndistinct; +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + staenabled | standistinct +------------+------------------------------------------------------------------------------------------------ + {d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}] +(1 row) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on ndistinct +(3 rows) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b, c + -> Seq Scan on ndistinct +(3 rows) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b, c, d + -> Seq Scan on ndistinct +(3 rows) + +TRUNCATE TABLE ndistinct; +-- partially correlated groups +INSERT INTO ndistinct (a, b, c) + SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); +ANALYZE ndistinct; +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + staenabled | standistinct +------------+------------------------------------------------------------------------------------------------ + {d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}] +(1 row) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate (cost=230.00..232.01 rows=201 width=16) + Group Key: a, b + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=255.00..257.01 rows=201 width=20) + Group Key: a, b, c + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=280.00..290.00 rows=1000 width=24) + Group Key: a, b, c, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=255.00..265.00 rows=1000 width=20) + Group Key: b, c, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, d; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate (cost=230.00..240.00 rows=1000 width=16) + Group Key: a, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) +(3 rows) + +DROP TABLE ndistinct; diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index 8d75bbfab31..84022f6a298 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -59,7 +59,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR -- Look for types that should have an array type according to their typtype, -- but don't. We exclude composites here because we have not bothered to -- make array types corresponding to the system catalogs' rowtypes. --- NOTE: as of v10, this check finds pg_node_tree and smgr. +-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr. SELECT p1.oid, p1.typname FROM pg_type as p1 WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' @@ -67,11 +67,12 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' (SELECT 1 FROM pg_type as p2 WHERE p2.typname = ('_' || p1.typname)::name AND p2.typelem = p1.oid and p1.typarray = p2.oid); - oid | typname ------+-------------- - 194 | pg_node_tree - 210 | smgr -(2 rows) + oid | typname +------+-------------- + 194 | pg_node_tree + 3361 | pg_ndistinct + 210 | smgr +(3 rows) -- Make sure typarray points to a varlena array type of our own base SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype, diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 38743d98c34..c283bdcb372 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -89,7 +89,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview # ---------- # Another group of parallel tests # ---------- -test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan +test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan stats_ext # rules cannot run concurrently with any test that creates a view test: rules psql_crosstab amutils diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index d9f64c28738..3a0d536a2ba 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -130,6 +130,7 @@ test: misc_functions test: sysviews test: tsrf test: tidscan +test: stats_ext test: rules test: psql_crosstab test: select_parallel diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql index c9ea4799672..f6fa8d8bfdb 100644 --- a/src/test/regress/sql/alter_generic.sql +++ b/src/test/regress/sql/alter_generic.sql @@ -433,6 +433,37 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); DROP OPERATOR FAMILY alt_opf18 USING btree; +-- +-- Statistics +-- +SET SESSION AUTHORIZATION regress_alter_user1; +CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER); +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; + +ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict) +ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK + +SET SESSION AUTHORIZATION regress_alter_user2; +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; + +ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner) +ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK +ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership) +ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner) +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict) + +RESET SESSION AUTHORIZATION; +SELECT nspname, staname, rolname + FROM pg_statistic_ext s, pg_namespace n, pg_authid a + WHERE s.stanamespace = n.oid AND s.staowner = a.oid + AND n.nspname in ('alt_nsp1', 'alt_nsp2') + ORDER BY nspname, staname; -- -- Text Search Dictionary diff --git a/src/test/regress/sql/object_address.sql b/src/test/regress/sql/object_address.sql index 28476daff18..c9219e47c4a 100644 --- a/src/test/regress/sql/object_address.sql +++ b/src/test/regress/sql/object_address.sql @@ -41,6 +41,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( TO SQL WITH FUNCTION int4recv(internal)); CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT); +CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable; -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); @@ -185,7 +186,8 @@ WITH objects (type, name, args) AS (VALUES ('access method', '{btree}', '{}'), ('publication', '{addr_pub}', '{}'), ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'), - ('subscription', '{addr_sub}', '{}') + ('subscription', '{addr_sub}', '{}'), + ('statistics', '{addr_nsp, gentable_stat}', '{}') ) SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, -- test roundtrip through pg_identify_object_as_address diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql new file mode 100644 index 00000000000..946cb848535 --- /dev/null +++ b/src/test/regress/sql/stats_ext.sql @@ -0,0 +1,102 @@ +-- Generic extended statistics support + +-- Ensure stats are dropped sanely +CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS ab1_a_b_stats; + +CREATE SCHEMA regress_schema_2; +CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS regress_schema_2.ab1_a_b_stats; + +-- Ensure statistics are dropped when columns are +CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ALTER TABLE ab1 DROP COLUMN a; +\d ab1 +DROP TABLE ab1; + +-- Ensure things work sanely with SET STATISTICS 0 +CREATE TABLE ab1 (a INTEGER, b INTEGER); +ALTER TABLE ab1 ALTER a SET STATISTICS 0; +INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ANALYZE ab1; +ALTER TABLE ab1 ALTER a SET STATISTICS -1; +ANALYZE ab1; +DROP TABLE ab1; + + +-- n-distinct tests +CREATE TABLE ndistinct ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b INT, + filler3 DATE, + c INT, + d INT +); + +-- unknown column +CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; + +-- single column +CREATE STATISTICS s10 ON (a) FROM ndistinct; + +-- single column, duplicated +CREATE STATISTICS s10 ON (a,a) FROM ndistinct; + +-- two columns, one duplicated +CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; + +-- correct command +CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; + +-- perfectly correlated groups +INSERT INTO ndistinct (a, b, c, filler1) + SELECT i/100, i/100, i/100, cash_words(i::money) + FROM generate_series(1,10000) s(i); + +ANALYZE ndistinct; + +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + +TRUNCATE TABLE ndistinct; + +-- partially correlated groups +INSERT INTO ndistinct (a, b, c) + SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); + +ANALYZE ndistinct; + +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, d; + +DROP TABLE ndistinct; diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index 0a31249f5d5..4c658140081 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -53,7 +53,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR -- Look for types that should have an array type according to their typtype, -- but don't. We exclude composites here because we have not bothered to -- make array types corresponding to the system catalogs' rowtypes. --- NOTE: as of v10, this check finds pg_node_tree and smgr. +-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr. SELECT p1.oid, p1.typname FROM pg_type as p1 |
