src/test/recovery/t/002_archiving.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124


# Copyright (c) 2021-2022, PostgreSQL Global Development Group

# test for archiving with hot standby
use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
use File::Copy;

# Initialize primary node, doing archives
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
$node_primary->init(
	has_archiving    => 1,
	allows_streaming => 1);
my $backup_name = 'my_backup';

# Start it
$node_primary->start;

# Take backup for standby
$node_primary->backup($backup_name);

# Initialize standby node from backup, fetching WAL from archives
my $node_standby = PostgreSQL::Test::Cluster->new('standby');
$node_standby->init_from_backup($node_primary, $backup_name,
	has_restoring => 1);
$node_standby->append_conf('postgresql.conf',
	"wal_retrieve_retry_interval = '100ms'");

# Set archive_cleanup_command and recovery_end_command, checking their
# execution by the backend with dummy commands.
my $data_dir                     = $node_standby->data_dir;
my $archive_cleanup_command_file = "archive_cleanup_command.done";
my $recovery_end_command_file    = "recovery_end_command.done";
$node_standby->append_conf(
	'postgresql.conf', qq(
archive_cleanup_command = 'echo archive_cleanup_done > $archive_cleanup_command_file'
recovery_end_command = 'echo recovery_ended_done > $recovery_end_command_file'
));
$node_standby->start;

# Create some content on primary
$node_primary->safe_psql('postgres',
	"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
my $current_lsn =
  $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");

# Note the presence of this checkpoint for the archive_cleanup_command
# check done below, before switching to a new segment.
$node_primary->safe_psql('postgres', "CHECKPOINT");

# Force archiving of WAL file to make it present on primary
$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()");

# Add some more content, it should not be present on standby
$node_primary->safe_psql('postgres',
	"INSERT INTO tab_int VALUES (generate_series(1001,2000))");

# Wait until necessary replay has been done on standby
my $caughtup_query =
  "SELECT '$current_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
$node_standby->poll_query_until('postgres', $caughtup_query)
  or die "Timed out while waiting for standby to catch up";

my $result =
  $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq(1000), 'check content from archives');

# archive_cleanup_command is executed after generating a restart point,
# with a checkpoint.
$node_standby->safe_psql('postgres', q{CHECKPOINT});
ok( -f "$data_dir/$archive_cleanup_command_file",
	'archive_cleanup_command executed on checkpoint');
ok( !-f "$data_dir/$recovery_end_command_file",
	'recovery_end_command not executed yet');

# Check the presence of temporary files specifically generated during
# archive recovery.  To ensure the presence of the temporary history
# file, switch to a timeline large enough to allow a standby to recover
# a history file from an archive.  As this requires at least two timeline
# switches, promote the existing standby first.  Then create a second
# standby based on the promoted one.  Finally, the second standby is
# promoted.
$node_standby->promote;

# recovery_end_command should have been triggered on promotion.
ok( -f "$data_dir/$recovery_end_command_file",
	'recovery_end_command executed after promotion');

my $node_standby2 = PostgreSQL::Test::Cluster->new('standby2');
$node_standby2->init_from_backup($node_primary, $backup_name,
	has_restoring => 1);

# Make execution of recovery_end_command fail.  This should not affect
# promotion, and its failure should be logged.
$node_standby2->append_conf(
	'postgresql.conf', qq(
recovery_end_command = 'echo recovery_end_failed > missing_dir/xyz.file'
));

$node_standby2->start;

# Save the log location, to see the failure of recovery_end_command.
my $log_location = -s $node_standby2->logfile;

# Now promote standby2, and check that temporary files specifically
# generated during archive recovery are removed by the end of recovery.
$node_standby2->promote;
my $node_standby2_data = $node_standby2->data_dir;
ok( !-f "$node_standby2_data/pg_wal/RECOVERYHISTORY",
	"RECOVERYHISTORY removed after promotion");
ok( !-f "$node_standby2_data/pg_wal/RECOVERYXLOG",
	"RECOVERYXLOG removed after promotion");

# Check the logs of the standby to see that the commands have failed.
my $log_contents = slurp_file($node_standby2->logfile, $log_location);
like(
	$log_contents,
	qr/WARNING:.*recovery_end_command/s,
	"recovery_end_command failure detected in logs after promotion");

done_testing();