diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile index b2bd8794d2a1..f2df4f185137 100644 --- a/contrib/pg_stat_statements/Makefile +++ b/contrib/pg_stat_statements/Makefile @@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS)) REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf REGRESS = select dml cursors utility level_tracking planning \ user_activity wal entry_timestamp privileges extended \ - parallel cleanup oldextversions squashing + parallel sampling cleanup oldextversions squashing # Disabled because these tests require "shared_preload_libraries=pg_stat_statements", # which typical installcheck users do not have (e.g. buildfarm clients). NO_INSTALLCHECK = 1 diff --git a/contrib/pg_stat_statements/expected/sampling.out b/contrib/pg_stat_statements/expected/sampling.out new file mode 100644 index 000000000000..2204215f64ba --- /dev/null +++ b/contrib/pg_stat_statements/expected/sampling.out @@ -0,0 +1,174 @@ +-- +-- sample statements +-- +-- top-level tracking - simple query protocol +SHOW pg_stat_statements.track; + pg_stat_statements.track +-------------------------- + top +(1 row) + +SET pg_stat_statements.sample_rate = 0.0; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT 1 AS "int"; + int +----- + 1 +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------+------- +(0 rows) + +SET pg_stat_statements.sample_rate = 1.0; +SELECT 1 AS "int"; + int +----- + 1 +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------+------- + SELECT $1 AS "int" | 1 +(1 row) + +-- top-level tracking - extended query protocol +SET pg_stat_statements.sample_rate = 0.0; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT 1 \parse stmt +\bind_named stmt \g + ?column? +---------- + 1 +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------+------- +(0 rows) + +SET pg_stat_statements.sample_rate = 1.0; +\bind_named stmt \g + ?column? +---------- + 1 +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-----------+------- + SELECT $1 | 1 +(1 row) + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +DEALLOCATE stmt; +-- nested tracking - simple query protocol +SET pg_stat_statements.track = "all"; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SET pg_stat_statements.sample_rate = 1; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------ + Result +(1 row) + +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------ + Result +(1 row) + +SET pg_stat_statements.sample_rate = 0; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------ + Result +(1 row) + +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------ + Result +(1 row) + +SELECT toplevel, calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + toplevel | calls | query +----------+-------+---------------------------------------------------- + t | 2 | EXPLAIN (COSTS OFF) SELECT $1 + f | 2 | SELECT $1 + t | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t + t | 2 | SET pg_stat_statements.sample_rate = $1 +(4 rows) + +-- nested tracking - extended query protocol +SET pg_stat_statements.track = "all"; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SET pg_stat_statements.sample_rate = 1; +EXPLAIN (COSTS OFF) SELECT 1; \parse stmt + QUERY PLAN +------------ + Result +(1 row) + +\bind_named stmt \g + QUERY PLAN +------------ + Result +(1 row) + +\bind_named stmt \g + QUERY PLAN +------------ + Result +(1 row) + +SET pg_stat_statements.sample_rate = 0; +\bind_named stmt \g + QUERY PLAN +------------ + Result +(1 row) + +\bind_named stmt \g + QUERY PLAN +------------ + Result +(1 row) + +SELECT toplevel, calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + toplevel | calls | query +----------+-------+----------------------------------------- + t | 2 | EXPLAIN (COSTS OFF) SELECT $1 + f | 3 | SELECT $1 + t | 1 | SET pg_stat_statements.sample_rate = $1 +(3 rows) + diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build index 01a6cbdcf613..6fa9b7dba503 100644 --- a/contrib/pg_stat_statements/meson.build +++ b/contrib/pg_stat_statements/meson.build @@ -54,6 +54,7 @@ tests += { 'privileges', 'extended', 'parallel', + 'sampling', 'cleanup', 'oldextversions', 'squashing', diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 9778407cba30..23db0693e2ea 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -50,6 +50,7 @@ #include "access/parallel.h" #include "catalog/pg_authid.h" #include "common/int.h" +#include "common/pg_prng.h" #include "executor/instrument.h" #include "funcapi.h" #include "jit/jit.h" @@ -260,6 +261,9 @@ typedef struct pgssSharedState /* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */ static int nesting_level = 0; +/* Is the current top-level query to be sampled? */ +static bool is_query_sampled = false; + /* Saved hook values */ static shmem_request_hook_type prev_shmem_request_hook = NULL; static shmem_startup_hook_type prev_shmem_startup_hook = NULL; @@ -298,11 +302,13 @@ static bool pgss_track_utility = true; /* whether to track utility commands */ static bool pgss_track_planning = false; /* whether to track planning * duration */ static bool pgss_save = true; /* whether to save stats across shutdown */ +static double pgss_sample_rate = 1.0; /* fraction of statements to track */ -#define pgss_enabled(level) \ +#define pgss_enabled(level, skip_sampling_check) \ (!IsParallelWorker() && \ (pgss_track == PGSS_TRACK_ALL || \ - (pgss_track == PGSS_TRACK_TOP && (level) == 0))) + (pgss_track == PGSS_TRACK_TOP && (level) == 0)) && \ + (skip_sampling_check == PGSS_INVALID || current_query_sampled())) #define record_gc_qtexts() \ do { \ @@ -376,6 +382,7 @@ static char *generate_normalized_query(JumbleState *jstate, const char *query, static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc); static int comp_location(const void *a, const void *b); +static bool current_query_sampled(void); /* @@ -417,6 +424,19 @@ _PG_init(void) NULL, NULL); + DefineCustomRealVariable("pg_stat_statements.sample_rate", + "Fraction of queries to track.", + NULL, + &pgss_sample_rate, + 1.0, + 0.0, + 1.0, + PGC_SUSET, + 0, + NULL, + NULL, + NULL); + DefineCustomEnumVariable("pg_stat_statements.track", "Selects which statements are tracked by pg_stat_statements.", NULL, @@ -839,7 +859,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate) prev_post_parse_analyze_hook(pstate, query, jstate); /* Safety check... */ - if (!pgss || !pgss_hash || !pgss_enabled(nesting_level)) + if (!pgss || !pgss_hash || !pgss_enabled(nesting_level, PGSS_INVALID)) return; /* @@ -897,7 +917,7 @@ pgss_planner(Query *parse, * pgss_store needs it. We also ignore query without queryid, as it would * be treated as a utility statement, which may not be the case. */ - if (pgss_enabled(nesting_level) + if (pgss_enabled(nesting_level, PGSS_PLAN) && pgss_track_planning && query_string && parse->queryId != UINT64CONST(0)) { @@ -1008,7 +1028,8 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags) * counting of optimizable statements that are directly contained in * utility statements. */ - if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0)) + if (pgss_enabled(nesting_level, PGSS_EXEC) && + queryDesc->plannedstmt->queryId != UINT64CONST(0)) { /* * Set up to track total elapsed time in ExecutorRun. Make sure the @@ -1079,7 +1100,7 @@ pgss_ExecutorEnd(QueryDesc *queryDesc) uint64 queryId = queryDesc->plannedstmt->queryId; if (queryId != UINT64CONST(0) && queryDesc->totaltime && - pgss_enabled(nesting_level)) + pgss_enabled(nesting_level, PGSS_EXEC)) { /* * Make sure stats accumulation is done. (Note: it's okay if several @@ -1122,7 +1143,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, uint64 saved_queryId = pstmt->queryId; int saved_stmt_location = pstmt->stmt_location; int saved_stmt_len = pstmt->stmt_len; - bool enabled = pgss_track_utility && pgss_enabled(nesting_level); + bool enabled = pgss_track_utility && pgss_enabled(nesting_level, PGSS_EXEC); /* * Force utility statements to get queryId zero. We do this even in cases @@ -3084,3 +3105,21 @@ comp_location(const void *a, const void *b) return pg_cmp_s32(l, r); } + +/* + * Determine whether the current query should be sampled. + * + * At the beginning of each top-level statement, decide whether we'll + * sample this statement. If nested-statement tracking is enabled, + * either all nested statements will be tracked or none will. + */ +static bool +current_query_sampled(void) +{ + if (nesting_level == 0) + is_query_sampled = pgss_sample_rate != 0.0 && + (pgss_sample_rate == 1.0 || + pg_prng_double(&pg_global_prng_state) <= pgss_sample_rate); + + return is_query_sampled; +} \ No newline at end of file diff --git a/contrib/pg_stat_statements/sql/sampling.sql b/contrib/pg_stat_statements/sql/sampling.sql new file mode 100644 index 000000000000..b09f45991ba2 --- /dev/null +++ b/contrib/pg_stat_statements/sql/sampling.sql @@ -0,0 +1,50 @@ +-- +-- sample statements +-- + +-- top-level tracking - simple query protocol +SHOW pg_stat_statements.track; +SET pg_stat_statements.sample_rate = 0.0; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT 1 AS "int"; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +SET pg_stat_statements.sample_rate = 1.0; +SELECT 1 AS "int"; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- top-level tracking - extended query protocol +SET pg_stat_statements.sample_rate = 0.0; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT 1 \parse stmt +\bind_named stmt \g +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +SET pg_stat_statements.sample_rate = 1.0; +\bind_named stmt \g +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +DEALLOCATE stmt; + +-- nested tracking - simple query protocol +SET pg_stat_statements.track = "all"; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SET pg_stat_statements.sample_rate = 1; +EXPLAIN (COSTS OFF) SELECT 1; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_stat_statements.sample_rate = 0; +EXPLAIN (COSTS OFF) SELECT 1; +EXPLAIN (COSTS OFF) SELECT 1; +SELECT toplevel, calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + +-- nested tracking - extended query protocol +SET pg_stat_statements.track = "all"; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SET pg_stat_statements.sample_rate = 1; +EXPLAIN (COSTS OFF) SELECT 1; \parse stmt +\bind_named stmt \g +\bind_named stmt \g +SET pg_stat_statements.sample_rate = 0; +\bind_named stmt \g +\bind_named stmt \g +SELECT toplevel, calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index 7baa07dcdbf7..2ae718dea6c4 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -966,6 +966,25 @@ calls | 2 + + + pg_stat_statements.sample_rate (real) + + pg_stat_statements.sample_rate configuration parameter + + + + + + pg_stat_statements.sample_rate causes pg_stat_statements to only + track a fraction of the statements in each session. The default is 1, + meaning track all the queries. Setting this to 0 disables sampled statements + tracking, the same as setting pg_stat_statements.track to none. + In case of nested statements, either all will be tracked or none. Only superusers can change this setting. + + + + pg_stat_statements.save (boolean)