diff options
| author | Amit Kapila | 2022-03-01 00:47:52 +0000 |
|---|---|---|
| committer | Amit Kapila | 2022-03-01 00:47:52 +0000 |
| commit | 7a85073290856554416353a89799a4c04d09b74b (patch) | |
| tree | 46fdf5ee363b32132dd70d66e16bcfd5c51dc1fc /src/test | |
| parent | 54bd1e43ca56e323aef309dc2dc0e1391825ce68 (diff) | |
Reconsider pg_stat_subscription_workers view.
It was decided (refer to the Discussion link below) that the stats
collector is not an appropriate place to store the error information of
subscription workers.
This patch changes the pg_stat_subscription_workers view (introduced by
commit 8d74fc96db) so that it stores only statistics counters:
apply_error_count and sync_error_count, and has one entry for
each subscription. The removed error information such as error-XID and
the error message would be stored in another way in the future which is
more reliable and persistent.
After removing these error details, there is no longer any relation
information, so the subscription statistics are now a cluster-wide
statistics.
The patch also changes the view name to pg_stat_subscription_stats since
the word "worker" is an implementation detail that we use one worker for
one tablesync and one apply.
Author: Masahiko Sawada, based on suggestions by Andres Freund
Reviewed-by: Peter Smith, Haiying Tang, Takamichi Osumi, Amit Kapila
Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src/test')
| -rw-r--r-- | src/test/regress/expected/rules.out | 23 | ||||
| -rw-r--r-- | src/test/subscription/t/026_stats.pl | 102 | ||||
| -rw-r--r-- | src/test/subscription/t/026_worker_stats.pl | 165 |
3 files changed, 108 insertions, 182 deletions
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 1420288d67b..ac468568a1a 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2072,24 +2072,13 @@ pg_stat_subscription| SELECT su.oid AS subid, st.latest_end_time FROM (pg_subscription su LEFT JOIN pg_stat_get_subscription(NULL::oid) st(subid, relid, pid, received_lsn, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time) ON ((st.subid = su.oid))); -pg_stat_subscription_workers| SELECT w.subid, +pg_stat_subscription_stats| SELECT ss.subid, s.subname, - w.subrelid, - w.last_error_relid, - w.last_error_command, - w.last_error_xid, - w.last_error_count, - w.last_error_message, - w.last_error_time - FROM ( SELECT pg_subscription.oid AS subid, - NULL::oid AS relid - FROM pg_subscription - UNION ALL - SELECT pg_subscription_rel.srsubid AS subid, - pg_subscription_rel.srrelid AS relid - FROM pg_subscription_rel) sr, - (LATERAL pg_stat_get_subscription_worker(sr.subid, sr.relid) w(subid, subrelid, last_error_relid, last_error_command, last_error_xid, last_error_count, last_error_message, last_error_time) - JOIN pg_subscription s ON ((w.subid = s.oid))); + ss.apply_error_count, + ss.sync_error_count, + ss.stats_reset + FROM pg_subscription s, + LATERAL pg_stat_get_subscription_stats(s.oid) ss(subid, apply_error_count, sync_error_count, stats_reset); pg_stat_sys_indexes| SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, diff --git a/src/test/subscription/t/026_stats.pl b/src/test/subscription/t/026_stats.pl new file mode 100644 index 00000000000..a42ea3170ec --- /dev/null +++ b/src/test/subscription/t/026_stats.pl @@ -0,0 +1,102 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Tests for subscription stats. +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Create publisher node. +my $node_publisher = PostgreSQL::Test::Cluster->new('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->start; + +# Create subscriber node. +my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +# Initial table setup on both publisher and subscriber. On subscriber we +# create the same tables but with primary keys. Also, insert some data that +# will conflict with the data replicated from publisher later. +$node_publisher->safe_psql( + 'postgres', + qq[ +BEGIN; +CREATE TABLE test_tab1 (a int); +INSERT INTO test_tab1 VALUES (1); +COMMIT; +]); +$node_subscriber->safe_psql( + 'postgres', + qq[ +BEGIN; +CREATE TABLE test_tab1 (a int primary key); +INSERT INTO test_tab1 VALUES (1); +COMMIT; +]); + +# Setup publication. +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION tap_pub FOR TABLE test_tab1;"); + +# There shouldn't be any subscription errors before starting logical replication. +my $result = $node_subscriber->safe_psql('postgres', + "SELECT count(1) FROM pg_stat_subscription_stats"); +is($result, qq(0), 'check no subscription error'); + +# Create subscription. The tablesync for test_tab1 on tap_sub will enter into +# infinite error loop due to violating the unique constraint. +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr' PUBLICATION tap_pub;" +); + +$node_publisher->wait_for_catchup('tap_sub'); + +# Wait for the tablesync error to be reported. +$node_subscriber->poll_query_until( + 'postgres', + qq[ +SELECT sync_error_count > 0 +FROM pg_stat_subscription_stats +WHERE subname = 'tap_sub' +]) or die "Timed out while waiting for tablesync error"; + +# Truncate test_tab1 so that tablesync worker can continue. +$node_subscriber->safe_psql('postgres', "TRUNCATE test_tab1;"); + +# Wait for initial tablesync for test_tab1 to finish. +$node_subscriber->poll_query_until( + 'postgres', + qq[ +SELECT count(1) = 1 FROM pg_subscription_rel +WHERE srrelid = 'test_tab1'::regclass AND srsubstate in ('r', 's') +]) or die "Timed out while waiting for subscriber to synchronize data"; + +# Check test_tab1 on the subscriber has one row. +$result = $node_subscriber->safe_psql('postgres', "SELECT a FROM test_tab1"); +is($result, qq(1), 'check the table has now row'); + +# Insert data to test_tab1 on the publisher, raising an error on the subscriber +# due to violation of the unique constraint on test_tab1. +$node_publisher->safe_psql('postgres', "INSERT INTO test_tab1 VALUES (1)"); + +# Wait for the apply error to be reported. +$node_subscriber->poll_query_until( + 'postgres', + qq[ +SELECT apply_error_count > 0 +FROM pg_stat_subscription_stats +WHERE subname = 'tap_sub' +]) or die "Timed out while waiting for apply error"; + +# Truncate test_tab1 so that apply worker can continue. +$node_subscriber->safe_psql('postgres', "TRUNCATE test_tab1;"); + +$node_subscriber->stop('fast'); +$node_publisher->stop('fast'); + +done_testing(); diff --git a/src/test/subscription/t/026_worker_stats.pl b/src/test/subscription/t/026_worker_stats.pl deleted file mode 100644 index f72e4766e83..00000000000 --- a/src/test/subscription/t/026_worker_stats.pl +++ /dev/null @@ -1,165 +0,0 @@ - -# Copyright (c) 2021-2022, PostgreSQL Global Development Group - -# Tests for subscription error stats. -use strict; -use warnings; -use PostgreSQL::Test::Cluster; -use PostgreSQL::Test::Utils; -use Test::More; - -# Test if the error reported on pg_stat_subscription_workers view is expected. -sub test_subscription_error -{ - my ($node, $relname, $command, $xid, $by_apply_worker, $errmsg_prefix, $msg) - = @_; - - my $check_sql = qq[ -SELECT count(1) > 0 -FROM pg_stat_subscription_workers -WHERE last_error_relid = '$relname'::regclass - AND starts_with(last_error_message, '$errmsg_prefix')]; - - # subrelid - $check_sql .= $by_apply_worker - ? qq[ AND subrelid IS NULL] - : qq[ AND subrelid = '$relname'::regclass]; - - # last_error_command - $check_sql .= $command eq '' - ? qq[ AND last_error_command IS NULL] - : qq[ AND last_error_command = '$command']; - - # last_error_xid - $check_sql .= $xid eq '' - ? qq[ AND last_error_xid IS NULL] - : qq[ AND last_error_xid = '$xid'::xid]; - - # Wait for the particular error statistics to be reported. - $node->poll_query_until('postgres', $check_sql, -) or die "Timed out while waiting for " . $msg; -} - -# Create publisher node. -my $node_publisher = PostgreSQL::Test::Cluster->new('publisher'); -$node_publisher->init(allows_streaming => 'logical'); -$node_publisher->start; - -# Create subscriber node. -my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); -$node_subscriber->init(allows_streaming => 'logical'); - -# The subscriber will enter an infinite error loop, so we don't want -# to overflow the server log with error messages. -$node_subscriber->append_conf('postgresql.conf', - qq[ -wal_retrieve_retry_interval = 2s -]); -$node_subscriber->start; - -# Initial table setup on both publisher and subscriber. On subscriber we -# create the same tables but with primary keys. Also, insert some data that -# will conflict with the data replicated from publisher later. -$node_publisher->safe_psql( - 'postgres', - qq[ -BEGIN; -CREATE TABLE test_tab1 (a int); -CREATE TABLE test_tab2 (a int); -INSERT INTO test_tab1 VALUES (1); -INSERT INTO test_tab2 VALUES (1); -COMMIT; -]); -$node_subscriber->safe_psql( - 'postgres', - qq[ -BEGIN; -CREATE TABLE test_tab1 (a int primary key); -CREATE TABLE test_tab2 (a int primary key); -INSERT INTO test_tab2 VALUES (1); -COMMIT; -]); - -# Setup publications. -my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; -$node_publisher->safe_psql( - 'postgres', - "CREATE PUBLICATION tap_pub FOR TABLE test_tab1, test_tab2;"); - -# There shouldn't be any subscription errors before starting logical replication. -my $result = $node_subscriber->safe_psql( - 'postgres', - "SELECT count(1) FROM pg_stat_subscription_workers"); -is($result, qq(0), 'check no subscription error'); - -# Create subscription. The table sync for test_tab2 on tap_sub will enter into -# infinite error loop due to violating the unique constraint. -$node_subscriber->safe_psql( - 'postgres', - "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr' PUBLICATION tap_pub;"); - -$node_publisher->wait_for_catchup('tap_sub'); - -# Wait for initial table sync for test_tab1 to finish. -$node_subscriber->poll_query_until( - 'postgres', - qq[ -SELECT count(1) = 1 FROM pg_subscription_rel -WHERE srrelid = 'test_tab1'::regclass AND srsubstate in ('r', 's') -]) or die "Timed out while waiting for subscriber to synchronize data"; - -# Check the initial data. -$result = $node_subscriber->safe_psql( - 'postgres', - "SELECT count(a) FROM test_tab1"); -is($result, q(1), 'check initial data are copied to subscriber'); - -# Insert more data to test_tab1, raising an error on the subscriber due to -# violation of the unique constraint on test_tab1. -my $xid = $node_publisher->safe_psql( - 'postgres', - qq[ -BEGIN; -INSERT INTO test_tab1 VALUES (1); -SELECT pg_current_xact_id()::xid; -COMMIT; -]); -test_subscription_error($node_subscriber, 'test_tab1', 'INSERT', $xid, - 1, # check apply worker error - qq(duplicate key value violates unique constraint), - 'error reported by the apply worker'); - -# Check the table sync worker's error in the view. -test_subscription_error($node_subscriber, 'test_tab2', '', '', - 0, # check tablesync worker error - qq(duplicate key value violates unique constraint), - 'the error reported by the table sync worker'); - -# Test for resetting subscription worker statistics. -# Truncate test_tab1 and test_tab2 so that applying changes and table sync can -# continue, respectively. -$node_subscriber->safe_psql( - 'postgres', - "TRUNCATE test_tab1, test_tab2;"); - -# Wait for the data to be replicated. -$node_subscriber->poll_query_until( - 'postgres', - "SELECT count(1) > 0 FROM test_tab1"); -$node_subscriber->poll_query_until( - 'postgres', - "SELECT count(1) > 0 FROM test_tab2"); - -# There shouldn't be any errors in the view after dropping the subscription. -$node_subscriber->safe_psql( - 'postgres', - "DROP SUBSCRIPTION tap_sub;"); -$result = $node_subscriber->safe_psql( - 'postgres', - "SELECT count(1) FROM pg_stat_subscription_workers"); -is($result, q(0), 'no error after dropping subscription'); - -$node_subscriber->stop('fast'); -$node_publisher->stop('fast'); - -done_testing(); |
