summaryrefslogtreecommitdiff
path: root/src/test/isolation/isolationtester.c
diff options
context:
space:
mode:
authorRobert Haas2016-02-11 13:36:30 +0000
committerRobert Haas2016-02-11 13:36:30 +0000
commit38f8bdcac4982215beb9f65a19debecaf22fd470 (patch)
treef1a86518b289c084b6a7403f5953c8958557f284 /src/test/isolation/isolationtester.c
parentc9882c60f44cf5d0b37411535175a5c154fdad0e (diff)
Modify the isolation tester so that multiple sessions can wait.
This allows testing of deadlock scenarios. Scenarios that would previously have been considered invalid are now simply taken as a scenario in which more than one backend will wait.
Diffstat (limited to 'src/test/isolation/isolationtester.c')
-rw-r--r--src/test/isolation/isolationtester.c330
1 files changed, 189 insertions, 141 deletions
diff --git a/src/test/isolation/isolationtester.c b/src/test/isolation/isolationtester.c
index ab4150143f7..0d48cdbdacc 100644
--- a/src/test/isolation/isolationtester.c
+++ b/src/test/isolation/isolationtester.c
@@ -15,6 +15,7 @@
#include <sys/select.h>
#endif
+#include "datatype/timestamp.h"
#include "libpq-fe.h"
#include "pqexpbuffer.h"
#include "pg_getopt.h"
@@ -237,54 +238,6 @@ main(int argc, char **argv)
appendPQExpBufferStr(&wait_query,
") "
- "AND holder.mode = ANY (CASE waiter.mode "
- "WHEN 'AccessShareLock' THEN ARRAY["
- "'AccessExclusiveLock'] "
- "WHEN 'RowShareLock' THEN ARRAY["
- "'ExclusiveLock',"
- "'AccessExclusiveLock'] "
- "WHEN 'RowExclusiveLock' THEN ARRAY["
- "'ShareLock',"
- "'ShareRowExclusiveLock',"
- "'ExclusiveLock',"
- "'AccessExclusiveLock'] "
- "WHEN 'ShareUpdateExclusiveLock' THEN ARRAY["
- "'ShareUpdateExclusiveLock',"
- "'ShareLock',"
- "'ShareRowExclusiveLock',"
- "'ExclusiveLock',"
- "'AccessExclusiveLock'] "
- "WHEN 'ShareLock' THEN ARRAY["
- "'RowExclusiveLock',"
- "'ShareUpdateExclusiveLock',"
- "'ShareRowExclusiveLock',"
- "'ExclusiveLock',"
- "'AccessExclusiveLock'] "
- "WHEN 'ShareRowExclusiveLock' THEN ARRAY["
- "'RowExclusiveLock',"
- "'ShareUpdateExclusiveLock',"
- "'ShareLock',"
- "'ShareRowExclusiveLock',"
- "'ExclusiveLock',"
- "'AccessExclusiveLock'] "
- "WHEN 'ExclusiveLock' THEN ARRAY["
- "'RowShareLock',"
- "'RowExclusiveLock',"
- "'ShareUpdateExclusiveLock',"
- "'ShareLock',"
- "'ShareRowExclusiveLock',"
- "'ExclusiveLock',"
- "'AccessExclusiveLock'] "
- "WHEN 'AccessExclusiveLock' THEN ARRAY["
- "'AccessShareLock',"
- "'RowShareLock',"
- "'RowExclusiveLock',"
- "'ShareUpdateExclusiveLock',"
- "'ShareLock',"
- "'ShareRowExclusiveLock',"
- "'ExclusiveLock',"
- "'AccessExclusiveLock'] END) "
-
"AND holder.locktype IS NOT DISTINCT FROM waiter.locktype "
"AND holder.database IS NOT DISTINCT FROM waiter.database "
"AND holder.relation IS NOT DISTINCT FROM waiter.relation "
@@ -467,34 +420,48 @@ report_error_message(Step *step)
}
/*
- * As above, but reports messages possibly emitted by two steps. This is
+ * As above, but reports messages possibly emitted by multiple steps. This is
* useful when we have a blocked command awakened by another one; we want to
- * report both messages identically, for the case where we don't care which
+ * report all messages identically, for the case where we don't care which
* one fails due to a timeout such as deadlock timeout.
*/
static void
-report_two_error_messages(Step *step1, Step *step2)
+report_multiple_error_messages(Step *step, int nextra, Step **extrastep)
{
- char *prefix;
-
- prefix = psprintf("%s %s", step1->name, step2->name);
+ PQExpBufferData buffer;
+ int n;
- if (step1->errormsg)
+ if (nextra == 0)
{
- fprintf(stdout, "error in steps %s: %s\n", prefix,
- step1->errormsg);
- free(step1->errormsg);
- step1->errormsg = NULL;
+ report_error_message(step);
+ return;
}
- if (step2->errormsg)
+
+ initPQExpBuffer(&buffer);
+ appendPQExpBufferStr(&buffer, step->name);
+
+ for (n = 0; n < nextra; ++n)
+ appendPQExpBuffer(&buffer, " %s", extrastep[n]->name);
+
+ if (step->errormsg)
{
- fprintf(stdout, "error in steps %s: %s\n", prefix,
- step2->errormsg);
- free(step2->errormsg);
- step2->errormsg = NULL;
+ fprintf(stdout, "error in steps %s: %s\n", buffer.data,
+ step->errormsg);
+ free(step->errormsg);
+ step->errormsg = NULL;
}
- free(prefix);
+ for (n = 0; n < nextra; ++n)
+ {
+ if (extrastep[n]->errormsg == NULL)
+ continue;
+ fprintf(stdout, "error in steps %s: %s\n",
+ buffer.data, extrastep[n]->errormsg);
+ free(extrastep[n]->errormsg);
+ extrastep[n]->errormsg = NULL;
+ }
+
+ termPQExpBuffer(&buffer);
}
/*
@@ -505,7 +472,14 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
{
PGresult *res;
int i;
- Step *waiting = NULL;
+ int w;
+ int nwaiting = 0;
+ int nerrorstep = 0;
+ Step **waiting;
+ Step **errorstep;
+
+ waiting = malloc(sizeof(Step *) * testspec->nsessions);
+ errorstep = malloc(sizeof(Step *) * testspec->nsessions);
/*
* In dry run mode, just display the permutation in the same format used
@@ -566,54 +540,76 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
for (i = 0; i < nsteps; i++)
{
Step *step = steps[i];
+ Step *oldstep = NULL;
PGconn *conn = conns[1 + step->session];
+ bool mustwait;
- if (waiting != NULL && step->session == waiting->session)
+ /*
+ * Check whether the session that needs to perform the next step
+ * is still blocked on an earlier step. If so, wait for it to finish.
+ *
+ * In older versions of this tool, when allowed precisely one session
+ * to be waiting at a time. If we reached a step which required that
+ * session to execute the next command, we would declare the whole
+ * permutation invalid, cancel everything, and move on to the next one.
+ * Unfortunately, that made it impossible to test the deadlock detector
+ * using this framework unless the numebr of processes involved in the
+ * deadlock was precisely two. We now assume that if we reach a step
+ * that is still blocked, we need to wait for it to unblock itself.
+ */
+ for (w = 0; w < nwaiting; ++w)
{
- PGcancel *cancel;
- PGresult *res;
- int j;
-
- /*
- * This permutation is invalid: it can never happen in real life.
- *
- * A session is blocked on an earlier step (waiting) and no
- * further steps from this session can run until it is unblocked,
- * but it can only be unblocked by running steps from other
- * sessions.
- */
- fprintf(stderr, "invalid permutation detected\n");
-
- /* Cancel the waiting statement from this session. */
- cancel = PQgetCancel(conn);
- if (cancel != NULL)
+ if (step->session == waiting[w]->session)
{
- char buf[256];
-
- if (!PQcancel(cancel, buf, sizeof(buf)))
- fprintf(stderr, "PQcancel failed: %s\n", buf);
-
- /* Be sure to consume the error message. */
- while ((res = PQgetResult(conn)) != NULL)
- PQclear(res);
-
- PQfreeCancel(cancel);
+ oldstep = waiting[w];
+ break;
}
+ }
+ if (oldstep != NULL)
+ {
+ /* Wait for previous step on this connection. */
+ try_complete_step(oldstep, STEP_RETRY);
/*
- * Now we really have to complete all the running transactions to
- * make sure teardown doesn't block.
+ * Attempt to complete any steps that were previously waiting.
+ * Remove any that have completed, and also remove the one for
+ * which we just waited.
*/
- for (j = 1; j < nconns; j++)
+ w = 0;
+ nerrorstep = 0;
+ while (w < nwaiting)
{
- res = PQexec(conns[j], "ROLLBACK");
- if (res != NULL)
- PQclear(res);
+ if (waiting[w] == oldstep)
+ {
+ /* We just waited for this; remove it. */
+ if (w + 1 < nwaiting)
+ memcpy(&waiting[w], &waiting[w + 1],
+ (nwaiting - (w + 1)) * sizeof(Step *));
+ nwaiting--;
+ continue;
+ }
+
+ if (try_complete_step(waiting[w], STEP_NONBLOCK | STEP_RETRY))
+ {
+ /* Still blocked on a lock, leave it alone. */
+ w++;
+ }
+ else
+ {
+ /* This one finished, too! */
+ errorstep[nerrorstep++] = waiting[w];
+ if (w + 1 < nwaiting)
+ memcpy(&waiting[w], &waiting[w + 1],
+ (nwaiting - (w + 1)) * sizeof(Step *));
+ nwaiting--;
+ }
}
- goto teardown;
+ /* Report all errors together. */
+ report_multiple_error_messages(oldstep, nerrorstep, errorstep);
}
+ /* Send the query for this step. */
if (!PQsendQuery(conn, step->sql))
{
fprintf(stdout, "failed to send query for step %s: %s\n",
@@ -621,39 +617,41 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
exit_nicely();
}
- if (waiting != NULL)
- {
- /* Some other step is already waiting: just block. */
- try_complete_step(step, 0);
+ /* Try to complete this step without blocking. */
+ mustwait = try_complete_step(step, STEP_NONBLOCK);
- /*
- * See if this step unblocked the waiting step; report both error
- * messages together if so.
- */
- if (!try_complete_step(waiting, STEP_NONBLOCK | STEP_RETRY))
+ /* Attempt to complete any steps that were previously waiting. */
+ w = 0;
+ nerrorstep = 0;
+ while (w < nwaiting)
+ {
+ if (try_complete_step(waiting[w], STEP_NONBLOCK | STEP_RETRY))
+ w++;
+ else
{
- report_two_error_messages(step, waiting);
- waiting = NULL;
+ errorstep[nerrorstep++] = waiting[w];
+ if (w + 1 < nwaiting)
+ memcpy(&waiting[w], &waiting[w + 1],
+ (nwaiting - (w + 1)) * sizeof(Step *));
+ nwaiting--;
}
- else
- report_error_message(step);
- }
- else
- {
- if (try_complete_step(step, STEP_NONBLOCK))
- waiting = step;
- report_error_message(step);
}
+
+ /* Report any error from this step, and any steps that it unblocked. */
+ report_multiple_error_messages(step, nerrorstep, errorstep);
+
+ /* If this step is waiting, add it to the array of waiters. */
+ if (mustwait)
+ waiting[nwaiting++] = step;
}
/* Finish any waiting query. */
- if (waiting != NULL)
+ for (w = 0; w < nwaiting; ++w)
{
- try_complete_step(waiting, STEP_RETRY);
- report_error_message(waiting);
+ try_complete_step(waiting[w], STEP_RETRY);
+ report_error_message(waiting[w]);
}
-teardown:
/* Perform per-session teardown */
for (i = 0; i < testspec->nsessions; i++)
{
@@ -714,14 +712,17 @@ try_complete_step(Step *step, int flags)
{
PGconn *conn = conns[1 + step->session];
fd_set read_set;
+ struct timeval start_time;
struct timeval timeout;
int sock = PQsocket(conn);
int ret;
PGresult *res;
+ bool canceled = false;
+ gettimeofday(&start_time, NULL);
FD_ZERO(&read_set);
- while ((flags & STEP_NONBLOCK) && PQisBusy(conn))
+ while (PQisBusy(conn))
{
FD_SET(sock, &read_set);
timeout.tv_sec = 0;
@@ -737,28 +738,75 @@ try_complete_step(Step *step, int flags)
}
else if (ret == 0) /* select() timeout: check for lock wait */
{
- int ntuples;
+ struct timeval current_time;
+ int64 td;
- res = PQexecPrepared(conns[0], PREP_WAITING, 1,
- &backend_pids[step->session + 1],
- NULL, NULL, 0);
- if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ /* If it's OK for the step to block, check whether it has. */
+ if (flags & STEP_NONBLOCK)
{
- fprintf(stderr, "lock wait query failed: %s",
- PQerrorMessage(conn));
- exit_nicely();
+ int ntuples;
+
+ res = PQexecPrepared(conns[0], PREP_WAITING, 1,
+ &backend_pids[step->session + 1],
+ NULL, NULL, 0);
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ fprintf(stderr, "lock wait query failed: %s",
+ PQerrorMessage(conn));
+ exit_nicely();
+ }
+ ntuples = PQntuples(res);
+ PQclear(res);
+
+ if (ntuples >= 1) /* waiting to acquire a lock */
+ {
+ if (!(flags & STEP_RETRY))
+ printf("step %s: %s <waiting ...>\n",
+ step->name, step->sql);
+ return true;
+ }
+ /* else, not waiting */
}
- ntuples = PQntuples(res);
- PQclear(res);
- if (ntuples >= 1) /* waiting to acquire a lock */
+ /* Figure out how long we've been waiting for this step. */
+ gettimeofday(&current_time, NULL);
+ td = (int64) current_time.tv_sec - (int64) start_time.tv_sec;
+ td *= USECS_PER_SEC;
+ td += (int64) current_time.tv_usec - (int64) start_time.tv_usec;
+
+ /*
+ * After 60 seconds, try to cancel the query.
+ *
+ * If the user tries to test an invalid permutation, we don't
+ * want to hang forever, especially when this is running in the
+ * buildfarm. So try to cancel it after a minute. This will
+ * presumably lead to this permutation failing, but remaining
+ * permutations and tests should still be OK.
+ */
+ if (td > 60 * USECS_PER_SEC && !canceled)
{
- if (!(flags & STEP_RETRY))
- printf("step %s: %s <waiting ...>\n",
- step->name, step->sql);
- return true;
+ PGcancel *cancel = PQgetCancel(conn);
+ char buf[256];
+
+ if (cancel != NULL && !PQcancel(cancel, buf, sizeof(buf)))
+ fprintf(stderr, "PQcancel failed: %s\n", buf);
+ if (cancel != NULL)
+ PQfreeCancel(cancel);
+ }
+
+ /*
+ * After 75 seconds, just give up and die.
+ *
+ * Since cleanup steps won't be run in this case, this may cause
+ * later tests to fail. That stinks, but it's better than waiting
+ * forever for the server to respond to the cancel.
+ */
+ if (td > 75 * USECS_PER_SEC)
+ {
+ fprintf(stderr, "step %s timed out after 75 seconds\n",
+ step->name);
+ exit_nicely();
}
- /* else, not waiting: give it more time */
}
else if (!PQconsumeInput(conn)) /* select(): data available */
{