@@ -1008,9 +1008,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1008
1008
return metricGrowth , profileGrowth , p
1009
1009
}
1010
1010
1011
- testcase := func (strictTiming bool , acceptStacks [][]string , workers int , fn func () bool ) func (t * testing.T ) (metricGrowth , profileGrowth float64 , n , value int64 ) {
1012
- return func (t * testing.T ) (metricGrowth , profileGrowth float64 , n , value int64 ) {
1013
- metricGrowth , profileGrowth , p := measureDelta (t , func () {
1011
+ testcase := func (strictTiming bool , acceptStacks [][]string , workers int , fn func () bool ) func (t * testing.T ) (metricGrowth float64 , profileGrowth [] int64 , n , value int64 ) {
1012
+ return func (t * testing.T ) (metricGrowth float64 , profileGrowth [] int64 , n , value int64 ) {
1013
+ metricGrowth , totalProfileGrowth , p := measureDelta (t , func () {
1014
1014
var started , stopped sync.WaitGroup
1015
1015
started .Add (workers )
1016
1016
stopped .Add (workers )
@@ -1030,7 +1030,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1030
1030
stopped .Wait ()
1031
1031
})
1032
1032
1033
- if profileGrowth == 0 {
1033
+ if totalProfileGrowth == 0 {
1034
1034
t .Errorf ("no increase in mutex profile" )
1035
1035
}
1036
1036
if metricGrowth == 0 && strictTiming {
@@ -1043,7 +1043,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1043
1043
// together. It doesn't work as well for user-space contention, where the
1044
1044
// involved goroutines are not _Grunnable the whole time and so need to pass
1045
1045
// through the scheduler.
1046
- t .Logf ("lock contention growth in runtime/pprof's view (%fs)" , profileGrowth )
1046
+ t .Logf ("lock contention growth in runtime/pprof's view (%fs)" , totalProfileGrowth )
1047
1047
t .Logf ("lock contention growth in runtime/metrics' view (%fs)" , metricGrowth )
1048
1048
1049
1049
acceptStacks = append ([][]string (nil ), acceptStacks ... )
@@ -1063,7 +1063,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1063
1063
}
1064
1064
1065
1065
var stks [][]string
1066
- values := make ([][2 ]int64 , len (acceptStacks ))
1066
+ values := make ([][2 ]int64 , len (acceptStacks )+ 1 )
1067
1067
for _ , s := range p .Sample {
1068
1068
var have []string
1069
1069
for _ , loc := range s .Location {
@@ -1072,16 +1072,26 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1072
1072
}
1073
1073
}
1074
1074
stks = append (stks , have )
1075
+ found := false
1075
1076
for i , stk := range acceptStacks {
1076
1077
if slices .Equal (have , stk ) {
1077
1078
values [i ][0 ] += s .Value [0 ]
1078
1079
values [i ][1 ] += s .Value [1 ]
1080
+ found = true
1081
+ break
1079
1082
}
1080
1083
}
1084
+ if ! found {
1085
+ values [len (values )- 1 ][0 ] += s .Value [0 ]
1086
+ values [len (values )- 1 ][1 ] += s .Value [1 ]
1087
+ }
1081
1088
}
1089
+ profileGrowth = make ([]int64 , len (acceptStacks )+ 1 )
1090
+ profileGrowth [len (profileGrowth )- 1 ] = values [len (values )- 1 ][1 ]
1082
1091
for i , stk := range acceptStacks {
1083
1092
n += values [i ][0 ]
1084
1093
value += values [i ][1 ]
1094
+ profileGrowth [i ] = values [i ][1 ]
1085
1095
t .Logf ("stack %v has samples totaling n=%d value=%d" , stk , values [i ][0 ], values [i ][1 ])
1086
1096
}
1087
1097
if n == 0 && value == 0 {
@@ -1103,8 +1113,14 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1103
1113
t .Run ("runtime.lock" , func (t * testing.T ) {
1104
1114
mus := make ([]runtime.Mutex , 100 )
1105
1115
var needContention atomic.Int64
1106
- delay := 100 * time .Microsecond // large relative to system noise, for comparison between clocks
1107
- delayMicros := delay .Microseconds ()
1116
+ baseDelay := 100 * time .Microsecond // large relative to system noise, for comparison between clocks
1117
+ fastDelayMicros := baseDelay .Microseconds ()
1118
+ slowDelayMicros := baseDelay .Microseconds () * 4
1119
+
1120
+ const (
1121
+ fastRole = 0
1122
+ slowRole = 1
1123
+ )
1108
1124
1109
1125
// The goroutine that acquires the lock will only proceed when it
1110
1126
// detects that its partner is contended for the lock. That will lead to
@@ -1120,34 +1136,69 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1120
1136
t .Skipf ("contention on runtime-internal locks requires GOMAXPROCS >= %d" , workers )
1121
1137
}
1122
1138
1139
+ minTicks := make ([][]int64 , workers ) // lower bound, known-contended time, measured by cputicks
1140
+ maxTicks := make ([][]int64 , workers ) // upper bound, total lock() duration, measured by cputicks
1141
+ for i := range minTicks {
1142
+ minTicks [i ] = make ([]int64 , len (mus ))
1143
+ maxTicks [i ] = make ([]int64 , len (mus ))
1144
+ }
1145
+ var id atomic.Int32
1123
1146
fn := func () bool {
1124
1147
n := int (needContention .Load ())
1125
1148
if n < 0 {
1126
1149
return false
1127
1150
}
1128
1151
mu := & mus [n ]
1129
1152
1130
- runtime .Lock (mu )
1131
- for int (needContention .Load ()) == n {
1132
- if runtime .MutexContended (mu ) {
1133
- // make them wait a little while
1134
- for start := runtime .Nanotime (); (runtime .Nanotime ()- start )/ 1000 < delayMicros ; {
1135
- runtime .Usleep (uint32 (delayMicros ))
1153
+ // Each worker has a role: to have a fast or slow critical section.
1154
+ // Rotate the role assignments as we step through the mutex slice so
1155
+ // we don't end up with one M always claiming the same kind of work.
1156
+ id := int (id .Add (1 ))
1157
+ role := (id + n ) % workers
1158
+
1159
+ marker , delayMicros := fastMarkerFrame , fastDelayMicros
1160
+ if role == slowRole {
1161
+ marker , delayMicros = slowMarkerFrame , slowDelayMicros
1162
+ }
1163
+
1164
+ marker (func () {
1165
+ t0 := runtime .Cputicks ()
1166
+ runtime .Lock (mu )
1167
+ maxTicks [role ][n ] = runtime .Cputicks () - t0
1168
+ minTicks [role ][n ] = 0
1169
+ for int (needContention .Load ()) == n {
1170
+ if runtime .MutexContended (mu ) {
1171
+ t1 := runtime .Cputicks ()
1172
+ // make them wait a little while
1173
+ for start := runtime .Nanotime (); (runtime .Nanotime ()- start )/ 1000 < delayMicros ; {
1174
+ runtime .Usleep (uint32 (1 + delayMicros / 8 ))
1175
+ }
1176
+ minTicks [role ][n ] = runtime .Cputicks () - t1
1177
+ break
1136
1178
}
1137
- break
1138
1179
}
1139
- }
1140
- runtime . Unlock ( mu )
1141
- needContention . Store ( int64 ( n - 1 ) )
1180
+ runtime . Unlock ( mu )
1181
+ needContention . Store ( int64 ( n - 1 ) )
1182
+ } )
1142
1183
1143
1184
return true
1144
1185
}
1145
1186
1146
- stks := [][]string {{
1147
- "runtime.unlock" ,
1148
- "runtime_test." + name + ".func4.1" ,
1149
- "runtime_test.(*contentionWorker).run" ,
1150
- }}
1187
+ stks := make ([][]string , 2 )
1188
+ for i := range stks {
1189
+ marker := "runtime_test.fastMarkerFrame"
1190
+ if i == slowRole {
1191
+ marker = "runtime_test.slowMarkerFrame"
1192
+ }
1193
+
1194
+ stks [i ] = []string {
1195
+ "runtime.unlock" ,
1196
+ "runtime_test." + name + ".func4.1.1" ,
1197
+ marker ,
1198
+ "runtime_test." + name + ".func4.1" ,
1199
+ "runtime_test.(*contentionWorker).run" ,
1200
+ }
1201
+ }
1151
1202
1152
1203
t .Run ("sample-1" , func (t * testing.T ) {
1153
1204
old := runtime .SetMutexProfileFraction (1 )
@@ -1156,7 +1207,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1156
1207
needContention .Store (int64 (len (mus ) - 1 ))
1157
1208
metricGrowth , profileGrowth , n , _ := testcase (true , stks , workers , fn )(t )
1158
1209
1159
- if have , want := metricGrowth , delay .Seconds ()* float64 (len (mus )); have < want {
1210
+ if have , want := metricGrowth , baseDelay .Seconds ()* float64 (len (mus )); have < want {
1160
1211
// The test imposes a delay with usleep, verified with calls to
1161
1212
// nanotime. Compare against the runtime/metrics package's view
1162
1213
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1167,10 +1218,68 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1167
1218
t .Errorf ("mutex profile reported contention count different from the known true count (%d != %d)" , have , want )
1168
1219
}
1169
1220
1221
+ var slowMinTicks , fastMinTicks int64
1222
+ for role , ticks := range minTicks {
1223
+ for _ , delta := range ticks {
1224
+ if role == slowRole {
1225
+ slowMinTicks += delta
1226
+ } else {
1227
+ fastMinTicks += delta
1228
+ }
1229
+ }
1230
+ }
1231
+ var slowMaxTicks , fastMaxTicks int64
1232
+ for role , ticks := range maxTicks {
1233
+ for _ , delta := range ticks {
1234
+ if role == slowRole {
1235
+ slowMaxTicks += delta
1236
+ } else {
1237
+ fastMaxTicks += delta
1238
+ }
1239
+ }
1240
+ }
1241
+
1242
+ cpuGHz := float64 (runtime .CyclesPerSecond ()) / 1e9
1243
+ for _ , set := range []struct {
1244
+ name string
1245
+ profTime int64
1246
+ minTime int64
1247
+ maxTime int64
1248
+ }{
1249
+ {
1250
+ name : "slow" ,
1251
+ profTime : profileGrowth [slowRole ],
1252
+ minTime : int64 (float64 (slowMinTicks ) / cpuGHz ),
1253
+ maxTime : int64 (float64 (fastMaxTicks ) / cpuGHz ),
1254
+ },
1255
+ {
1256
+ name : "fast" ,
1257
+ profTime : profileGrowth [fastRole ],
1258
+ minTime : int64 (float64 (fastMinTicks ) / cpuGHz ),
1259
+ maxTime : int64 (float64 (slowMaxTicks ) / cpuGHz ),
1260
+ },
1261
+ } {
1262
+ t .Logf ("profile's view of delays due to %q critical section: %dns" , set .name , set .profTime )
1263
+ t .Logf ("test's view of known-contended time within %q critical section: %dns" , set .name , set .minTime )
1264
+ t .Logf ("test's view of lock duration before critical sections other than %q: %dns" , set .name , set .maxTime )
1265
+
1266
+ if set .profTime < set .minTime {
1267
+ t .Errorf ("profile undercounted %q critical section" , set .name )
1268
+ }
1269
+ if set .profTime > set .maxTime {
1270
+ t .Errorf ("profile overcounted %q critical section" , set .name )
1271
+ }
1272
+ }
1273
+
1274
+ var totalProfileGrowth float64
1275
+ for _ , growth := range profileGrowth {
1276
+ totalProfileGrowth += float64 (growth ) * time .Nanosecond .Seconds ()
1277
+ }
1278
+
1170
1279
const slop = 1.5 // account for nanotime vs cputicks
1171
1280
t .Run ("compare timers" , func (t * testing.T ) {
1172
1281
testenv .SkipFlaky (t , 64253 )
1173
- if profileGrowth > slop * metricGrowth || metricGrowth > slop * profileGrowth {
1282
+ if totalProfileGrowth > slop * metricGrowth || metricGrowth > slop * totalProfileGrowth {
1174
1283
t .Errorf ("views differ by more than %fx" , slop )
1175
1284
}
1176
1285
})
@@ -1191,7 +1300,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1191
1300
// 1e-9 chance of that, which is an acceptably low flakiness rate.
1192
1301
const samplingSlop = 2.5
1193
1302
1194
- if have , want := metricGrowth , delay .Seconds ()* float64 (len (mus )); samplingSlop * have < want {
1303
+ if have , want := metricGrowth , baseDelay .Seconds ()* float64 (len (mus )); samplingSlop * have < want {
1195
1304
// The test imposes a delay with usleep, verified with calls to
1196
1305
// nanotime. Compare against the runtime/metrics package's view
1197
1306
// (based on nanotime) rather than runtime/pprof's view (based
@@ -1202,8 +1311,13 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1202
1311
t .Errorf ("mutex profile reported contention count too different from the expected count (%d far from %d)" , have , want )
1203
1312
}
1204
1313
1314
+ var totalProfileGrowth float64
1315
+ for _ , growth := range profileGrowth {
1316
+ totalProfileGrowth += float64 (growth ) * time .Nanosecond .Seconds ()
1317
+ }
1318
+
1205
1319
const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
1206
- if profileGrowth > timerSlop * metricGrowth || metricGrowth > timerSlop * profileGrowth {
1320
+ if totalProfileGrowth > timerSlop * metricGrowth || metricGrowth > timerSlop * totalProfileGrowth {
1207
1321
t .Errorf ("views differ by more than %fx" , timerSlop )
1208
1322
}
1209
1323
})
@@ -1270,6 +1384,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) {
1270
1384
})
1271
1385
}
1272
1386
1387
+ func slowMarkerFrame (fn func ()) { fn () }
1388
+ func fastMarkerFrame (fn func ()) { fn () }
1389
+
1273
1390
// contentionWorker provides cleaner call stacks for lock contention profile tests
1274
1391
type contentionWorker struct {
1275
1392
before func ()
0 commit comments