Skip to content

Commit f9ba2cf

Browse files
rhyshgopherbot
authored andcommitted
runtime: improve runtime-internal mutex profile tests
Have the test use the same clock (cputicks) as the profiler, and use the test's own measurements as hard bounds on the magnitude to expect in the profile. Compare the depiction of two users of the same lock: one where the critical section is fast, one where it is slow. Confirm that the profile shows the slow critical section as a large source of delay (with #66999 fixed), rather than showing the fast critical section as a large recipient of delay. For #64253 For #66999 Change-Id: I784c8beedc39de564dc8cee42060a5d5ce55de39 Reviewed-on: https://go-review.googlesource.com/c/go/+/586237 Auto-Submit: Rhys Hiltner <[email protected]> Reviewed-by: Carlos Amedee <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent 87e930f commit f9ba2cf

File tree

2 files changed

+146
-27
lines changed

2 files changed

+146
-27
lines changed

src/runtime/export_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ var Atoi32 = atoi32
3939
var ParseByteCount = parseByteCount
4040

4141
var Nanotime = nanotime
42+
var Cputicks = cputicks
43+
var CyclesPerSecond = pprof_cyclesPerSecond
4244
var NetpollBreak = netpollBreak
4345
var Usleep = usleep
4446

src/runtime/metrics_test.go

+144-27
Original file line numberDiff line numberDiff line change
@@ -1008,9 +1008,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10081008
return metricGrowth, profileGrowth, p
10091009
}
10101010

1011-
testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
1012-
return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
1013-
metricGrowth, profileGrowth, p := measureDelta(t, func() {
1011+
testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64) {
1012+
return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64) {
1013+
metricGrowth, totalProfileGrowth, p := measureDelta(t, func() {
10141014
var started, stopped sync.WaitGroup
10151015
started.Add(workers)
10161016
stopped.Add(workers)
@@ -1030,7 +1030,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10301030
stopped.Wait()
10311031
})
10321032

1033-
if profileGrowth == 0 {
1033+
if totalProfileGrowth == 0 {
10341034
t.Errorf("no increase in mutex profile")
10351035
}
10361036
if metricGrowth == 0 && strictTiming {
@@ -1043,7 +1043,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10431043
// together. It doesn't work as well for user-space contention, where the
10441044
// involved goroutines are not _Grunnable the whole time and so need to pass
10451045
// through the scheduler.
1046-
t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth)
1046+
t.Logf("lock contention growth in runtime/pprof's view (%fs)", totalProfileGrowth)
10471047
t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
10481048

10491049
acceptStacks = append([][]string(nil), acceptStacks...)
@@ -1063,7 +1063,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10631063
}
10641064

10651065
var stks [][]string
1066-
values := make([][2]int64, len(acceptStacks))
1066+
values := make([][2]int64, len(acceptStacks)+1)
10671067
for _, s := range p.Sample {
10681068
var have []string
10691069
for _, loc := range s.Location {
@@ -1072,16 +1072,26 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
10721072
}
10731073
}
10741074
stks = append(stks, have)
1075+
found := false
10751076
for i, stk := range acceptStacks {
10761077
if slices.Equal(have, stk) {
10771078
values[i][0] += s.Value[0]
10781079
values[i][1] += s.Value[1]
1080+
found = true
1081+
break
10791082
}
10801083
}
1084+
if !found {
1085+
values[len(values)-1][0] += s.Value[0]
1086+
values[len(values)-1][1] += s.Value[1]
1087+
}
10811088
}
1089+
profileGrowth = make([]int64, len(acceptStacks)+1)
1090+
profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1]
10821091
for i, stk := range acceptStacks {
10831092
n += values[i][0]
10841093
value += values[i][1]
1094+
profileGrowth[i] = values[i][1]
10851095
t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
10861096
}
10871097
if n == 0 && value == 0 {
@@ -1103,8 +1113,14 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11031113
t.Run("runtime.lock", func(t *testing.T) {
11041114
mus := make([]runtime.Mutex, 100)
11051115
var needContention atomic.Int64
1106-
delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
1107-
delayMicros := delay.Microseconds()
1116+
baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
1117+
fastDelayMicros := baseDelay.Microseconds()
1118+
slowDelayMicros := baseDelay.Microseconds() * 4
1119+
1120+
const (
1121+
fastRole = 0
1122+
slowRole = 1
1123+
)
11081124

11091125
// The goroutine that acquires the lock will only proceed when it
11101126
// detects that its partner is contended for the lock. That will lead to
@@ -1120,34 +1136,69 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11201136
t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers)
11211137
}
11221138

1139+
minTicks := make([][]int64, workers) // lower bound, known-contended time, measured by cputicks
1140+
maxTicks := make([][]int64, workers) // upper bound, total lock() duration, measured by cputicks
1141+
for i := range minTicks {
1142+
minTicks[i] = make([]int64, len(mus))
1143+
maxTicks[i] = make([]int64, len(mus))
1144+
}
1145+
var id atomic.Int32
11231146
fn := func() bool {
11241147
n := int(needContention.Load())
11251148
if n < 0 {
11261149
return false
11271150
}
11281151
mu := &mus[n]
11291152

1130-
runtime.Lock(mu)
1131-
for int(needContention.Load()) == n {
1132-
if runtime.MutexContended(mu) {
1133-
// make them wait a little while
1134-
for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
1135-
runtime.Usleep(uint32(delayMicros))
1153+
// Each worker has a role: to have a fast or slow critical section.
1154+
// Rotate the role assignments as we step through the mutex slice so
1155+
// we don't end up with one M always claiming the same kind of work.
1156+
id := int(id.Add(1))
1157+
role := (id + n) % workers
1158+
1159+
marker, delayMicros := fastMarkerFrame, fastDelayMicros
1160+
if role == slowRole {
1161+
marker, delayMicros = slowMarkerFrame, slowDelayMicros
1162+
}
1163+
1164+
marker(func() {
1165+
t0 := runtime.Cputicks()
1166+
runtime.Lock(mu)
1167+
maxTicks[role][n] = runtime.Cputicks() - t0
1168+
minTicks[role][n] = 0
1169+
for int(needContention.Load()) == n {
1170+
if runtime.MutexContended(mu) {
1171+
t1 := runtime.Cputicks()
1172+
// make them wait a little while
1173+
for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
1174+
runtime.Usleep(uint32(1 + delayMicros/8))
1175+
}
1176+
minTicks[role][n] = runtime.Cputicks() - t1
1177+
break
11361178
}
1137-
break
11381179
}
1139-
}
1140-
runtime.Unlock(mu)
1141-
needContention.Store(int64(n - 1))
1180+
runtime.Unlock(mu)
1181+
needContention.Store(int64(n - 1))
1182+
})
11421183

11431184
return true
11441185
}
11451186

1146-
stks := [][]string{{
1147-
"runtime.unlock",
1148-
"runtime_test." + name + ".func4.1",
1149-
"runtime_test.(*contentionWorker).run",
1150-
}}
1187+
stks := make([][]string, 2)
1188+
for i := range stks {
1189+
marker := "runtime_test.fastMarkerFrame"
1190+
if i == slowRole {
1191+
marker = "runtime_test.slowMarkerFrame"
1192+
}
1193+
1194+
stks[i] = []string{
1195+
"runtime.unlock",
1196+
"runtime_test." + name + ".func4.1.1",
1197+
marker,
1198+
"runtime_test." + name + ".func4.1",
1199+
"runtime_test.(*contentionWorker).run",
1200+
}
1201+
}
11511202

11521203
t.Run("sample-1", func(t *testing.T) {
11531204
old := runtime.SetMutexProfileFraction(1)
@@ -1156,7 +1207,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11561207
needContention.Store(int64(len(mus) - 1))
11571208
metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t)
11581209

1159-
if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want {
1210+
if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want {
11601211
// The test imposes a delay with usleep, verified with calls to
11611212
// nanotime. Compare against the runtime/metrics package's view
11621213
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1167,10 +1218,68 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11671218
t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
11681219
}
11691220

1221+
var slowMinTicks, fastMinTicks int64
1222+
for role, ticks := range minTicks {
1223+
for _, delta := range ticks {
1224+
if role == slowRole {
1225+
slowMinTicks += delta
1226+
} else {
1227+
fastMinTicks += delta
1228+
}
1229+
}
1230+
}
1231+
var slowMaxTicks, fastMaxTicks int64
1232+
for role, ticks := range maxTicks {
1233+
for _, delta := range ticks {
1234+
if role == slowRole {
1235+
slowMaxTicks += delta
1236+
} else {
1237+
fastMaxTicks += delta
1238+
}
1239+
}
1240+
}
1241+
1242+
cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9
1243+
for _, set := range []struct {
1244+
name string
1245+
profTime int64
1246+
minTime int64
1247+
maxTime int64
1248+
}{
1249+
{
1250+
name: "slow",
1251+
profTime: profileGrowth[slowRole],
1252+
minTime: int64(float64(slowMinTicks) / cpuGHz),
1253+
maxTime: int64(float64(fastMaxTicks) / cpuGHz),
1254+
},
1255+
{
1256+
name: "fast",
1257+
profTime: profileGrowth[fastRole],
1258+
minTime: int64(float64(fastMinTicks) / cpuGHz),
1259+
maxTime: int64(float64(slowMaxTicks) / cpuGHz),
1260+
},
1261+
} {
1262+
t.Logf("profile's view of delays due to %q critical section: %dns", set.name, set.profTime)
1263+
t.Logf("test's view of known-contended time within %q critical section: %dns", set.name, set.minTime)
1264+
t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime)
1265+
1266+
if set.profTime < set.minTime {
1267+
t.Errorf("profile undercounted %q critical section", set.name)
1268+
}
1269+
if set.profTime > set.maxTime {
1270+
t.Errorf("profile overcounted %q critical section", set.name)
1271+
}
1272+
}
1273+
1274+
var totalProfileGrowth float64
1275+
for _, growth := range profileGrowth {
1276+
totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
1277+
}
1278+
11701279
const slop = 1.5 // account for nanotime vs cputicks
11711280
t.Run("compare timers", func(t *testing.T) {
11721281
testenv.SkipFlaky(t, 64253)
1173-
if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth {
1282+
if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth {
11741283
t.Errorf("views differ by more than %fx", slop)
11751284
}
11761285
})
@@ -1191,7 +1300,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
11911300
// 1e-9 chance of that, which is an acceptably low flakiness rate.
11921301
const samplingSlop = 2.5
11931302

1194-
if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want {
1303+
if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want {
11951304
// The test imposes a delay with usleep, verified with calls to
11961305
// nanotime. Compare against the runtime/metrics package's view
11971306
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1202,8 +1311,13 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
12021311
t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
12031312
}
12041313

1314+
var totalProfileGrowth float64
1315+
for _, growth := range profileGrowth {
1316+
totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
1317+
}
1318+
12051319
const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
1206-
if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth {
1320+
if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth {
12071321
t.Errorf("views differ by more than %fx", timerSlop)
12081322
}
12091323
})
@@ -1270,6 +1384,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
12701384
})
12711385
}
12721386

1387+
func slowMarkerFrame(fn func()) { fn() }
1388+
func fastMarkerFrame(fn func()) { fn() }
1389+
12731390
// contentionWorker provides cleaner call stacks for lock contention profile tests
12741391
type contentionWorker struct {
12751392
before func()

0 commit comments

Comments
 (0)