diff options
author | Tom Lane | 2022-10-11 22:54:31 +0000 |
---|---|---|
committer | Tom Lane | 2022-10-11 22:54:31 +0000 |
commit | 18a4a620e2de0e25a15303a8f78ff415950a14ef (patch) | |
tree | e6eab90fbbd32dc02304ac7c3f462a26cace2461 /src | |
parent | b8f2687fdc410371bbfa579ab7c4fd4b7a5ed1cb (diff) |
Harden pmsignal.c against clobbered shared memory.
The postmaster is not supposed to do anything that depends
fundamentally on shared memory contents, because that creates
the risk that a backend crash that trashes shared memory will
take the postmaster down with it, preventing automatic recovery.
In commit 969d7cd43 I lost sight of this principle and coded
AssignPostmasterChildSlot() in such a way that it could fail
or even crash if the shared PMSignalState structure became
corrupted. Remarkably, we've not seen field reports of such
crashes; but I managed to induce one while testing the recent
changes around palloc chunk headers.
To fix, make a semi-duplicative state array inside the postmaster
so that we need consult only local state while choosing a "child
slot" for a new backend. Ensure that other postmaster-executed
routines in pmsignal.c don't have critical dependencies on the
shared state, either. Corruption of PMSignalState might now
lead ReleasePostmasterChildSlot() to conclude that backend X
failed, when actually backend Y was the one that trashed things.
But that doesn't matter, because we'll force a cluster-wide reset
regardless.
Back-patch to all supported branches, since this is an old bug.
Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/storage/ipc/pmsignal.c | 56 |
1 files changed, 44 insertions, 12 deletions
diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c index 3f0ec5e6b82..c85521d3649 100644 --- a/src/backend/storage/ipc/pmsignal.c +++ b/src/backend/storage/ipc/pmsignal.c @@ -26,6 +26,7 @@ #include "replication/walsender.h" #include "storage/pmsignal.h" #include "storage/shmem.h" +#include "utils/memutils.h" /* @@ -75,13 +76,22 @@ struct PMSignalData QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */ /* per-child-process flags */ int num_child_flags; /* # of entries in PMChildFlags[] */ - int next_child_flag; /* next slot to try to assign */ sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER]; }; +/* PMSignalState pointer is valid in both postmaster and child processes */ NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL; /* + * These static variables are valid only in the postmaster. We keep a + * duplicative private array so that we can trust its state even if some + * failing child has clobbered the PMSignalData struct in shared memory. + */ +static int num_child_inuse; /* # of entries in PMChildInUse[] */ +static int next_child_inuse; /* next slot to try to assign */ +static bool *PMChildInUse; /* true if i'th flag slot is assigned */ + +/* * Signal handler to be notified if postmaster dies. */ #ifdef USE_POSTMASTER_DEATH_SIGNAL @@ -142,7 +152,25 @@ PMSignalShmemInit(void) { /* initialize all flags to zeroes */ MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize()); - PMSignalState->num_child_flags = MaxLivePostmasterChildren(); + num_child_inuse = MaxLivePostmasterChildren(); + PMSignalState->num_child_flags = num_child_inuse; + + /* + * Also allocate postmaster's private PMChildInUse[] array. We + * might've already done that in a previous shared-memory creation + * cycle, in which case free the old array to avoid a leak. (Do it + * like this to support the possibility that MaxLivePostmasterChildren + * changed.) In a standalone backend, we do not need this. + */ + if (PostmasterContext != NULL) + { + if (PMChildInUse) + pfree(PMChildInUse); + PMChildInUse = (bool *) + MemoryContextAllocZero(PostmasterContext, + num_child_inuse * sizeof(bool)); + } + next_child_inuse = 0; } } @@ -218,21 +246,24 @@ GetQuitSignalReason(void) int AssignPostmasterChildSlot(void) { - int slot = PMSignalState->next_child_flag; + int slot = next_child_inuse; int n; /* - * Scan for a free slot. We track the last slot assigned so as not to - * waste time repeatedly rescanning low-numbered slots. + * Scan for a free slot. Notice that we trust nothing about the contents + * of PMSignalState, but use only postmaster-local data for this decision. + * We track the last slot assigned so as not to waste time repeatedly + * rescanning low-numbered slots. */ - for (n = PMSignalState->num_child_flags; n > 0; n--) + for (n = num_child_inuse; n > 0; n--) { if (--slot < 0) - slot = PMSignalState->num_child_flags - 1; - if (PMSignalState->PMChildFlags[slot] == PM_CHILD_UNUSED) + slot = num_child_inuse - 1; + if (!PMChildInUse[slot]) { + PMChildInUse[slot] = true; PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED; - PMSignalState->next_child_flag = slot; + next_child_inuse = slot; return slot + 1; } } @@ -254,7 +285,7 @@ ReleasePostmasterChildSlot(int slot) { bool result; - Assert(slot > 0 && slot <= PMSignalState->num_child_flags); + Assert(slot > 0 && slot <= num_child_inuse); slot--; /* @@ -264,17 +295,18 @@ ReleasePostmasterChildSlot(int slot) */ result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED); PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED; + PMChildInUse[slot] = false; return result; } /* * IsPostmasterChildWalSender - check if given slot is in use by a - * walsender process. + * walsender process. This is called only by the postmaster. */ bool IsPostmasterChildWalSender(int slot) { - Assert(slot > 0 && slot <= PMSignalState->num_child_flags); + Assert(slot > 0 && slot <= num_child_inuse); slot--; if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER) |