Skip to content

Commit f1f8ea3

Browse files
jchampioCommitfest Bot
authored and
Commitfest Bot
committed
Report external auth calls as wait events
Introduce a new "Auth" wait class for various external authentication systems, to make it obvious what's going wrong if one of those systems hangs. Each new wait event is unique in order to more easily pinpoint problematic locations in the code. Discussion: https://postgr.es/m/CAOYmi%2B%3D60deN20WDyCoHCiecgivJxr%3D98s7s7-C8SkXwrCfHXg%40mail.gmail.com
1 parent 1d617a2 commit f1f8ea3

File tree

6 files changed

+100
-6
lines changed

6 files changed

+100
-6
lines changed

doc/src/sgml/monitoring.sgml

+8
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
10451045
see <xref linkend="wait-event-activity-table"/>.
10461046
</entry>
10471047
</row>
1048+
<row>
1049+
<entry><literal>Auth</literal></entry>
1050+
<entry>The server process is waiting for an external system to
1051+
authenticate and/or authorize the client connection.
1052+
<literal>wait_event</literal> will identify the specific wait point;
1053+
see <xref linkend="wait-event-auth-table"/>.
1054+
</entry>
1055+
</row>
10481056
<row>
10491057
<entry><literal>BufferPin</literal></entry>
10501058
<entry>The server process is waiting for exclusive access to

src/backend/libpq/auth.c

+51-5
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "storage/ipc.h"
4141
#include "tcop/backend_startup.h"
4242
#include "utils/memutils.h"
43+
#include "utils/wait_event.h"
4344

4445
/*----------------------------------------------------------------
4546
* Global authentication functions
@@ -993,6 +994,7 @@ pg_GSS_recvauth(Port *port)
993994
elog(DEBUG4, "processing received GSS token of length %u",
994995
(unsigned int) gbuf.length);
995996

997+
pgstat_report_wait_start(WAIT_EVENT_GSSAPI_ACCEPT_SEC_CONTEXT);
996998
maj_stat = gss_accept_sec_context(&min_stat,
997999
&port->gss->ctx,
9981000
port->gss->cred,
@@ -1004,6 +1006,7 @@ pg_GSS_recvauth(Port *port)
10041006
&gflags,
10051007
NULL,
10061008
pg_gss_accept_delegation ? &delegated_creds : NULL);
1009+
pgstat_report_wait_end();
10071010

10081011
/* gbuf no longer used */
10091012
pfree(buf.data);
@@ -1215,6 +1218,7 @@ pg_SSPI_recvauth(Port *port)
12151218
/*
12161219
* Acquire a handle to the server credentials.
12171220
*/
1221+
pgstat_report_wait_start(WAIT_EVENT_SSPI_ACQUIRE_CREDENTIALS_HANDLE);
12181222
r = AcquireCredentialsHandle(NULL,
12191223
"negotiate",
12201224
SECPKG_CRED_INBOUND,
@@ -1224,6 +1228,8 @@ pg_SSPI_recvauth(Port *port)
12241228
NULL,
12251229
&sspicred,
12261230
&expiry);
1231+
pgstat_report_wait_end();
1232+
12271233
if (r != SEC_E_OK)
12281234
pg_SSPI_error(ERROR, _("could not acquire SSPI credentials"), r);
12291235

@@ -1289,6 +1295,7 @@ pg_SSPI_recvauth(Port *port)
12891295
elog(DEBUG4, "processing received SSPI token of length %u",
12901296
(unsigned int) buf.len);
12911297

1298+
pgstat_report_wait_start(WAIT_EVENT_SSPI_ACCEPT_SECURITY_CONTEXT);
12921299
r = AcceptSecurityContext(&sspicred,
12931300
sspictx,
12941301
&inbuf,
@@ -1298,6 +1305,7 @@ pg_SSPI_recvauth(Port *port)
12981305
&outbuf,
12991306
&contextattr,
13001307
NULL);
1308+
pgstat_report_wait_end();
13011309

13021310
/* input buffer no longer used */
13031311
pfree(buf.data);
@@ -1395,11 +1403,13 @@ pg_SSPI_recvauth(Port *port)
13951403

13961404
CloseHandle(token);
13971405

1406+
pgstat_report_wait_start(WAIT_EVENT_SSPI_LOOKUP_ACCOUNT_SID);
13981407
if (!LookupAccountSid(NULL, tokenuser->User.Sid, accountname, &accountnamesize,
13991408
domainname, &domainnamesize, &accountnameuse))
14001409
ereport(ERROR,
14011410
(errmsg_internal("could not look up account SID: error code %lu",
14021411
GetLastError())));
1412+
pgstat_report_wait_end();
14031413

14041414
free(tokenuser);
14051415

@@ -1496,8 +1506,11 @@ pg_SSPI_make_upn(char *accountname,
14961506
*/
14971507

14981508
samname = psprintf("%s\\%s", domainname, accountname);
1509+
1510+
pgstat_report_wait_start(WAIT_EVENT_SSPI_TRANSLATE_NAME);
14991511
res = TranslateName(samname, NameSamCompatible, NameUserPrincipal,
15001512
NULL, &upnamesize);
1513+
pgstat_report_wait_end();
15011514

15021515
if ((!res && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
15031516
|| upnamesize == 0)
@@ -1512,8 +1525,10 @@ pg_SSPI_make_upn(char *accountname,
15121525
/* upnamesize includes the terminating NUL. */
15131526
upname = palloc(upnamesize);
15141527

1528+
pgstat_report_wait_start(WAIT_EVENT_SSPI_TRANSLATE_NAME);
15151529
res = TranslateName(samname, NameSamCompatible, NameUserPrincipal,
15161530
upname, &upnamesize);
1531+
pgstat_report_wait_end();
15171532

15181533
pfree(samname);
15191534
if (res)
@@ -2112,7 +2127,9 @@ CheckPAMAuth(Port *port, const char *user, const char *password)
21122127
return STATUS_ERROR;
21132128
}
21142129

2130+
pgstat_report_wait_start(WAIT_EVENT_PAM_AUTHENTICATE);
21152131
retval = pam_authenticate(pamh, 0);
2132+
pgstat_report_wait_end();
21162133

21172134
if (retval != PAM_SUCCESS)
21182135
{
@@ -2125,7 +2142,9 @@ CheckPAMAuth(Port *port, const char *user, const char *password)
21252142
return pam_no_password ? STATUS_EOF : STATUS_ERROR;
21262143
}
21272144

2145+
pgstat_report_wait_start(WAIT_EVENT_PAM_ACCT_MGMT);
21282146
retval = pam_acct_mgmt(pamh, 0);
2147+
pgstat_report_wait_end();
21292148

21302149
if (retval != PAM_SUCCESS)
21312150
{
@@ -2265,7 +2284,11 @@ InitializeLDAPConnection(Port *port, LDAP **ldap)
22652284
}
22662285

22672286
/* Look up a list of LDAP server hosts and port numbers */
2268-
if (ldap_domain2hostlist(domain, &hostlist))
2287+
pgstat_report_wait_start(WAIT_EVENT_LDAP_HOST_LOOKUP);
2288+
r = ldap_domain2hostlist(domain, &hostlist);
2289+
pgstat_report_wait_end();
2290+
2291+
if (r)
22692292
{
22702293
ereport(LOG,
22712294
(errmsg("LDAP authentication could not find DNS SRV records for \"%s\"",
@@ -2359,11 +2382,15 @@ InitializeLDAPConnection(Port *port, LDAP **ldap)
23592382

23602383
if (port->hba->ldaptls)
23612384
{
2385+
pgstat_report_wait_start(WAIT_EVENT_LDAP_START_TLS);
23622386
#ifndef WIN32
2363-
if ((r = ldap_start_tls_s(*ldap, NULL, NULL)) != LDAP_SUCCESS)
2387+
r = ldap_start_tls_s(*ldap, NULL, NULL);
23642388
#else
2365-
if ((r = ldap_start_tls_s(*ldap, NULL, NULL, NULL, NULL)) != LDAP_SUCCESS)
2389+
r = ldap_start_tls_s(*ldap, NULL, NULL, NULL, NULL);
23662390
#endif
2391+
pgstat_report_wait_end();
2392+
2393+
if (r != LDAP_SUCCESS)
23672394
{
23682395
ereport(LOG,
23692396
(errmsg("could not start LDAP TLS session: %s",
@@ -2523,9 +2550,12 @@ CheckLDAPAuth(Port *port)
25232550
* Bind with a pre-defined username/password (if available) for
25242551
* searching. If none is specified, this turns into an anonymous bind.
25252552
*/
2553+
pgstat_report_wait_start(WAIT_EVENT_LDAP_BIND_FOR_SEARCH);
25262554
r = ldap_simple_bind_s(ldap,
25272555
port->hba->ldapbinddn ? port->hba->ldapbinddn : "",
25282556
port->hba->ldapbindpasswd ? ldap_password_hook(port->hba->ldapbindpasswd) : "");
2557+
pgstat_report_wait_end();
2558+
25292559
if (r != LDAP_SUCCESS)
25302560
{
25312561
ereport(LOG,
@@ -2548,13 +2578,16 @@ CheckLDAPAuth(Port *port)
25482578
filter = psprintf("(uid=%s)", port->user_name);
25492579

25502580
search_message = NULL;
2581+
2582+
pgstat_report_wait_start(WAIT_EVENT_LDAP_SEARCH);
25512583
r = ldap_search_s(ldap,
25522584
port->hba->ldapbasedn,
25532585
port->hba->ldapscope,
25542586
filter,
25552587
attributes,
25562588
0,
25572589
&search_message);
2590+
pgstat_report_wait_end();
25582591

25592592
if (r != LDAP_SUCCESS)
25602593
{
@@ -2623,7 +2656,9 @@ CheckLDAPAuth(Port *port)
26232656
port->user_name,
26242657
port->hba->ldapsuffix ? port->hba->ldapsuffix : "");
26252658

2659+
pgstat_report_wait_start(WAIT_EVENT_LDAP_BIND);
26262660
r = ldap_simple_bind_s(ldap, fulluser, passwd);
2661+
pgstat_report_wait_end();
26272662

26282663
if (r != LDAP_SUCCESS)
26292664
{
@@ -3072,8 +3107,12 @@ PerformRadiusTransaction(const char *server, const char *secret, const char *por
30723107
return STATUS_ERROR;
30733108
}
30743109

3075-
if (sendto(sock, radius_buffer, packetlength, 0,
3076-
serveraddrs[0].ai_addr, serveraddrs[0].ai_addrlen) < 0)
3110+
pgstat_report_wait_start(WAIT_EVENT_RADIUS_SENDTO);
3111+
r = sendto(sock, radius_buffer, packetlength, 0,
3112+
serveraddrs[0].ai_addr, serveraddrs[0].ai_addrlen);
3113+
pgstat_report_wait_end();
3114+
3115+
if (r < 0)
30773116
{
30783117
ereport(LOG,
30793118
(errmsg("could not send RADIUS packet: %m")));
@@ -3121,7 +3160,10 @@ PerformRadiusTransaction(const char *server, const char *secret, const char *por
31213160
FD_ZERO(&fdset);
31223161
FD_SET(sock, &fdset);
31233162

3163+
pgstat_report_wait_start(WAIT_EVENT_RADIUS_WAIT);
31243164
r = select(sock + 1, &fdset, NULL, NULL, &timeout);
3165+
pgstat_report_wait_end();
3166+
31253167
if (r < 0)
31263168
{
31273169
if (errno == EINTR)
@@ -3154,8 +3196,12 @@ PerformRadiusTransaction(const char *server, const char *secret, const char *por
31543196
*/
31553197

31563198
addrsize = sizeof(remoteaddr);
3199+
3200+
pgstat_report_wait_start(WAIT_EVENT_RADIUS_RECVFROM);
31573201
packetlength = recvfrom(sock, receive_buffer, RADIUS_BUFFER_SIZE, 0,
31583202
(struct sockaddr *) &remoteaddr, &addrsize);
3203+
pgstat_report_wait_end();
3204+
31593205
if (packetlength < 0)
31603206
{
31613207
ereport(LOG,

src/backend/utils/activity/wait_event.c

+11
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ static const char *pgstat_get_wait_client(WaitEventClient w);
3434
static const char *pgstat_get_wait_ipc(WaitEventIPC w);
3535
static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
3636
static const char *pgstat_get_wait_io(WaitEventIO w);
37+
static const char *pgstat_get_wait_auth(WaitEventAuth w);
3738

3839

3940
static uint32 local_my_wait_event_info;
@@ -413,6 +414,9 @@ pgstat_get_wait_event_type(uint32 wait_event_info)
413414
case PG_WAIT_INJECTIONPOINT:
414415
event_type = "InjectionPoint";
415416
break;
417+
case PG_WAIT_AUTH:
418+
event_type = "Auth";
419+
break;
416420
default:
417421
event_type = "???";
418422
break;
@@ -495,6 +499,13 @@ pgstat_get_wait_event(uint32 wait_event_info)
495499
event_name = pgstat_get_wait_io(w);
496500
break;
497501
}
502+
case PG_WAIT_AUTH:
503+
{
504+
WaitEventAuth w = (WaitEventAuth) wait_event_info;
505+
506+
event_name = pgstat_get_wait_auth(w);
507+
break;
508+
}
498509
default:
499510
event_name = "unknown wait event";
500511
break;

src/backend/utils/activity/wait_event_names.txt

+27
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,33 @@ XACT_GROUP_UPDATE "Waiting for the group leader to update transaction status at
163163

164164
ABI_compatibility:
165165

166+
#
167+
# Wait Events - Auth
168+
#
169+
# Use this category when a process is waiting for a third party to
170+
# authenticate/authorize the user.
171+
#
172+
173+
Section: ClassName - WaitEventAuth
174+
175+
GSSAPI_ACCEPT_SEC_CONTEXT "Waiting for a response from a Kerberos server via GSSAPI."
176+
LDAP_BIND "Waiting for an LDAP bind operation to authenticate the user."
177+
LDAP_BIND_FOR_SEARCH "Waiting for an LDAP bind operation to search the directory."
178+
LDAP_HOST_LOOKUP "Waiting for DNS lookup of LDAP servers."
179+
LDAP_SEARCH "Waiting for an LDAP search operation to complete."
180+
LDAP_START_TLS "Waiting for an LDAP StartTLS exchange."
181+
PAM_ACCT_MGMT "Waiting for the PAM service to validate the user account."
182+
PAM_AUTHENTICATE "Waiting for the PAM service to authenticate the user."
183+
RADIUS_RECVFROM "Waiting for a <function>recvfrom</function> call during a RADIUS transaction."
184+
RADIUS_SENDTO "Waiting for a <function>sendto</function> call during a RADIUS transaction."
185+
RADIUS_WAIT "Waiting for a RADIUS server to respond."
186+
SSPI_ACCEPT_SECURITY_CONTEXT "Waiting for a Windows security provider to accept the client's SSPI token."
187+
SSPI_ACQUIRE_CREDENTIALS_HANDLE "Waiting for a Windows security provider to acquire server credentials for SSPI."
188+
SSPI_LOOKUP_ACCOUNT_SID "Waiting for Windows to find the user's security identifier."
189+
SSPI_TRANSLATE_NAME "Waiting for Windows to translate a Kerberos UPN."
190+
191+
ABI_compatibility:
192+
166193
#
167194
# Wait Events - Timeout
168195
#

src/include/utils/wait_event.h

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#define PG_WAIT_TIMEOUT 0x09000000U
2626
#define PG_WAIT_IO 0x0A000000U
2727
#define PG_WAIT_INJECTIONPOINT 0x0B000000U
28+
#define PG_WAIT_AUTH 0x0C000000U
2829

2930
/* enums for wait events */
3031
#include "utils/wait_event_types.h"

src/test/regress/expected/sysviews.out

+2-1
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ select type, count(*) > 0 as ok FROM pg_wait_events
181181
type | ok
182182
-----------+----
183183
Activity | t
184+
Auth | t
184185
BufferPin | t
185186
Client | t
186187
Extension | t
@@ -189,7 +190,7 @@ select type, count(*) > 0 as ok FROM pg_wait_events
189190
LWLock | t
190191
Lock | t
191192
Timeout | t
192-
(9 rows)
193+
(10 rows)
193194

194195
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
195196
-- more-or-less working. We can't test their contents in any great detail

0 commit comments

Comments
 (0)