Skip to content

Commit d38fc4e

Browse files
committed
runtime: let sysmon sleep in netpoll if possible
Currently, the scheduler picks up next to-run g in the following delicate order: 1 locked g. 2 trace reader g. 3 GC backgroud mark worker g. 4 global runq if schedtick%61 == 0. 5 local runq including timers. 6 findrunnable loop. 6.1 finalizer g. 6.2 local runq including timers. 6.3 global runq, no schedtick checked. 6.4 non-blocking netpoll(0) if netpollWaiters > 0 and no other M polling. 6.5 steal any timers and runnable g from other P. spinning state. 6.6 GC idle mark worker g. 6.7 recheck all other P's local runq, idle GC marker and timers. non-spinning state, P is idle. 6.8 blocking netpoll(delay) until next earliest timer or IO ready. if any other M already polling, no blocking netpoll. M parked. If found a runnable g in any step, all steps following it will be skipped. Suppose there are many runnable gs, scheduler would almost find a g before trying findrunnable, so netpoll left untouched. Normal Ms have no opportunity to unpark netpoll waiters g even though IO readied. The special M sysmon polls network if not polled for more than 10ms, which is the only opportunity to ready any netpoll waiters g into the global runq. This causes a false positive timeout problem. I did a simple test to demostrate this problem. Dialing a localhost TCP listening port costs about 10ms. This is obviously not credible. See file runtime/sysmon_test.go for detail test code. Normal Ms will sleep in netpoll waiting for timer and IO ready if no g found at last. The special M sysmon simply sleeps 20us~10ms and does a non-blocking netpoll if needed. This CL let sysmon sleep in netpoll, do a blocking netpoll like what normal Ms do. So netpoll waiters g will be readied into runq as soon as IO readied. With this change, the above mentioned test shows that dialing to localhost TCP listening port costs mostly less than 1ms. Its much more reasonable than 10ms. Detailed test results for the old and new versions are as follows: $ ../../bin/go test -v -run TestSysmonReadyNetpollWaitersASAP === RUN TestSysmonReadyNetpollWaitersASAP sysmon_test.go:105: dialed 85 times within 1.000029211s sysmon_test.go:106: timeBucket count percent sysmon_test.go:108: [ 0, 1)ms 1 1.18% sysmon_test.go:108: [ 1, 2)ms 0 0.00% sysmon_test.go:108: [ 2, 3)ms 0 0.00% sysmon_test.go:108: [ 3, 4)ms 0 0.00% sysmon_test.go:108: [ 4, 5)ms 0 0.00% sysmon_test.go:108: [ 5, 6)ms 0 0.00% sysmon_test.go:108: [ 6, 7)ms 0 0.00% sysmon_test.go:108: [ 7, 8)ms 0 0.00% sysmon_test.go:108: [ 8, 9)ms 1 1.18% sysmon_test.go:108: [ 9,10)ms 2 2.35% sysmon_test.go:108: [10,11)ms 32 37.65% sysmon_test.go:108: [11,12)ms 49 57.65% --- FAIL: TestSysmonReadyNetpollWaitersASAP (1.11s) $ ../../bin/go test -v -run TestSysmonReadyNetpollWaitersASAP === RUN TestSysmonReadyNetpollWaitersASAP sysmon_test.go:105: dialed 2369 times within 1.000021368s sysmon_test.go:106: timeBucket count percent sysmon_test.go:108: [ 0, 1)ms 2209 93.25% sysmon_test.go:108: [ 1, 2)ms 13 0.55% sysmon_test.go:108: [ 2, 3)ms 33 1.39% sysmon_test.go:108: [ 3, 4)ms 68 2.87% sysmon_test.go:108: [ 4, 5)ms 8 0.34% sysmon_test.go:108: [ 5, 6)ms 9 0.38% sysmon_test.go:108: [ 6, 7)ms 6 0.25% sysmon_test.go:108: [ 7, 8)ms 4 0.17% sysmon_test.go:108: [ 8, 9)ms 7 0.30% sysmon_test.go:108: [ 9,10)ms 4 0.17% sysmon_test.go:108: [10,11)ms 5 0.21% sysmon_test.go:108: [11,12)ms 3 0.13% --- PASS: TestSysmonReadyNetpollWaitersASAP (1.11s)
1 parent 8893175 commit d38fc4e

File tree

3 files changed

+209
-26
lines changed

3 files changed

+209
-26
lines changed

src/runtime/export_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ var MemclrNoHeapPointers = memclrNoHeapPointers
4848

4949
var LockPartialOrder = lockPartialOrder
5050

51+
var Goyield = goyield
52+
53+
var NeedSysmonWorkaround = needSysmonWorkaround
54+
5155
type LockRank lockRank
5256

5357
func (l LockRank) String() string {

src/runtime/proc.go

Lines changed: 71 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5050,20 +5050,26 @@ func sysmon() {
50505050
checkdead()
50515051
unlock(&sched.lock)
50525052

5053-
lasttrace := int64(0)
5054-
idle := 0 // how many cycles in succession we had not wokeup somebody
5055-
delay := uint32(0)
5053+
if netpollInited == 0 {
5054+
netpollGenericInit()
5055+
}
5056+
tracedelay := int64(debug.schedtrace) * 1000000
5057+
nexttrace := int64(maxWhen) // when should we output next schedtrace. much far future if disabled.
5058+
if tracedelay > 0 {
5059+
nexttrace = nanotime()
5060+
}
5061+
idle := 0 // how many cycles in succession we had not wokeup somebody
5062+
delay := int64(0) // ns
50565063

50575064
for {
50585065
if idle == 0 { // start with 20us sleep...
5059-
delay = 20
5066+
delay = 20 * 1000
50605067
} else if idle > 50 { // start doubling the sleep after 1ms...
50615068
delay *= 2
50625069
}
5063-
if delay > 10*1000 { // up to 10ms
5064-
delay = 10 * 1000
5070+
if delay > 10*1000*1000 { // up to 10ms
5071+
delay = 10 * 1000 * 1000
50655072
}
5066-
usleep(delay)
50675073

50685074
// sysmon should not enter deep sleep if schedtrace is enabled so that
50695075
// it can print that information at the right time.
@@ -5109,7 +5115,7 @@ func sysmon() {
51095115
}
51105116
if syscallWake {
51115117
idle = 0
5112-
delay = 20
5118+
delay = 20 * 1000
51135119
}
51145120
}
51155121
unlock(&sched.lock)
@@ -5124,23 +5130,62 @@ func sysmon() {
51245130
if *cgo_yield != nil {
51255131
asmcgocall(*cgo_yield, nil)
51265132
}
5127-
// poll network if not polled for more than 10ms
5128-
lastpoll := int64(atomic.Load64(&sched.lastpoll))
5129-
if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
5130-
atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
5131-
list := netpoll(0) // non-blocking - returns list of goroutines
5132-
if !list.empty() {
5133-
// Need to decrement number of idle locked M's
5134-
// (pretending that one more is running) before injectglist.
5135-
// Otherwise it can lead to the following situation:
5136-
// injectglist grabs all P's but before it starts M's to run the P's,
5137-
// another M returns from syscall, finishes running its G,
5138-
// observes that there is no work to do and no other running M's
5139-
// and reports deadlock.
5140-
incidlelocked(-1)
5141-
injectglist(&list)
5142-
incidlelocked(1)
5133+
5134+
if delay < 1000*1000 || (GOOS == "netbsd" && needSysmonWorkaround) {
5135+
// netpoll() will convert (0, 999us] to 1ms on some platforms.
5136+
// to let retake() happen as often as want, using usleep if delay is less than 1ms.
5137+
// issue 42515 reports netbsd may sometimes miss netpoll wake-ups, so skip it.
5138+
usleep(uint32(delay / 1000))
5139+
5140+
// non-blocking poll if no other M polling, not polled for more than 2ms and there is any waiter.
5141+
lastpoll := int64(atomic.Load64(&sched.lastpoll))
5142+
if lastpoll != 0 && lastpoll+2*1000*1000 < now && atomic.Load(&netpollWaiters) > 0 {
5143+
atomic.Cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
5144+
list := netpoll(0) // non-blocking - returns list of goroutines
5145+
if !list.empty() {
5146+
// Need to decrement number of idle locked M's
5147+
// (pretending that one more is running) before injectglist.
5148+
// Otherwise it can lead to the following situation:
5149+
// injectglist grabs all P's but before it starts M's to run the P's,
5150+
// another M returns from syscall, finishes running its G,
5151+
// observes that there is no work to do and no other running M's
5152+
// and reports deadlock.
5153+
incidlelocked(-1)
5154+
injectglist(&list)
5155+
incidlelocked(1)
5156+
}
5157+
}
5158+
} else {
5159+
// poll network until earliest timer, next retake or next schedtrace, may blocking.
5160+
sleep := delay
5161+
pollUntil, _ := timeSleepUntil()
5162+
if nexttrace < pollUntil {
5163+
pollUntil = nexttrace
51435164
}
5165+
if pollUntil-now < sleep {
5166+
sleep = pollUntil - now
5167+
}
5168+
if sleep < 0 || faketime != 0 {
5169+
sleep = 0
5170+
}
5171+
pollUntil = now + sleep
5172+
5173+
// sysmon pretends to be a normal M waiting for timer and IO ready.
5174+
// so need to decrement number of idle locked M's.
5175+
incidlelocked(-1)
5176+
if atomic.Xchg64(&sched.lastpoll, 0) != 0 {
5177+
atomic.Store64(&sched.pollUntil, uint64(pollUntil))
5178+
list := netpoll(sleep)
5179+
atomic.Store64(&sched.pollUntil, 0)
5180+
atomic.Store64(&sched.lastpoll, uint64(nanotime()))
5181+
if !list.empty() {
5182+
injectglist(&list)
5183+
} else {
5184+
// may wake up by timer.
5185+
wakep()
5186+
}
5187+
}
5188+
incidlelocked(1)
51445189
}
51455190
if GOOS == "netbsd" && needSysmonWorkaround {
51465191
// netpoll is responsible for waiting for timer
@@ -5182,8 +5227,8 @@ func sysmon() {
51825227
injectglist(&list)
51835228
unlock(&forcegc.lock)
51845229
}
5185-
if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now {
5186-
lasttrace = now
5230+
if nexttrace <= now {
5231+
nexttrace += tracedelay
51875232
schedtrace(debug.scheddetail > 0)
51885233
}
51895234
unlock(&sched.sysmonlock)

src/runtime/sysmon_test.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Copyright 2022 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Test netpoll waiters G's will be unparked as soon as IO readied
6+
// even though M's are busy with G's in local runq.
7+
8+
package runtime_test
9+
10+
import (
11+
"net"
12+
"runtime"
13+
"testing"
14+
"time"
15+
)
16+
17+
type busysrv struct {
18+
l net.Listener
19+
bucket []int
20+
bucketTotal int
21+
exit chan struct{}
22+
start, end time.Time
23+
}
24+
25+
func (srv *busysrv) stop() {
26+
close(srv.exit)
27+
}
28+
29+
func (srv *busysrv) startListening() {
30+
l, _ := net.Listen("tcp4", "localhost:0")
31+
bucket := make([]int, 12)
32+
exit := make(chan struct{})
33+
srv.l = l
34+
srv.bucket = bucket
35+
srv.exit = exit
36+
go func() {
37+
for {
38+
select {
39+
case _, ok := <-exit:
40+
if !ok {
41+
l.Close()
42+
return
43+
}
44+
default:
45+
}
46+
47+
if con, _ := l.Accept(); con != nil {
48+
con.Close()
49+
}
50+
}
51+
}()
52+
}
53+
54+
func (srv *busysrv) startDialing() {
55+
srv.start = time.Now()
56+
defer func() {
57+
srv.end = time.Now()
58+
}()
59+
network, addr := srv.l.Addr().Network(), srv.l.Addr().String()
60+
for {
61+
select {
62+
case _, ok := <-srv.exit:
63+
if !ok {
64+
return
65+
}
66+
default:
67+
}
68+
69+
start := time.Now()
70+
con, _ := net.Dial(network, addr)
71+
ms := int(time.Since(start) / 1000000)
72+
if ms >= len(srv.bucket) {
73+
ms = len(srv.bucket) - 1
74+
}
75+
srv.bucket[ms]++
76+
srv.bucketTotal++
77+
if con != nil {
78+
con.Close()
79+
}
80+
}
81+
}
82+
83+
func (srv *busysrv) busy() {
84+
for {
85+
select {
86+
case _, ok := <-srv.exit:
87+
if !ok {
88+
return
89+
}
90+
default:
91+
}
92+
runtime.Goyield() // simulate many runnable G's in local runq.
93+
}
94+
}
95+
96+
func (srv *busysrv) expect(bucket int, percent float64) bool {
97+
count := 0
98+
for i := 0; i < bucket && i < len(srv.bucket); i++ {
99+
count += srv.bucket[i]
100+
}
101+
return float64(count)/float64(srv.bucketTotal)*100.0 > percent
102+
}
103+
104+
func (srv *busysrv) printf(ffn func(format string, args ...interface{})) {
105+
ffn("dialed %d times within %v\n", srv.bucketTotal, srv.end.Sub(srv.start))
106+
ffn("timeBucket\tcount\tpercent\n")
107+
for ms, cnt := range srv.bucket {
108+
ffn("[%2d,%2d)ms\t%d\t%.2f%%\n", ms, ms+1, cnt, float64(cnt)/float64(srv.bucketTotal)*100.0)
109+
}
110+
}
111+
112+
func TestSysmonReadyNetpollWaitersASAP(t *testing.T) {
113+
if runtime.GOOS == "netbsd" && runtime.NeedSysmonWorkaround {
114+
t.Skip("netbsd 9.2 earlier")
115+
}
116+
117+
srv := &busysrv{}
118+
srv.startListening()
119+
np := runtime.GOMAXPROCS(0)
120+
for i := 0; i < np*5; i++ {
121+
go srv.busy()
122+
}
123+
go srv.startDialing()
124+
125+
time.Sleep(time.Second)
126+
srv.stop()
127+
time.Sleep(time.Millisecond * 100)
128+
129+
// expect more than 80% dialings accomplished within 2ms.
130+
if !srv.expect(2, 80.0) {
131+
t.Fail()
132+
}
133+
srv.printf(t.Logf)
134+
}

0 commit comments

Comments
 (0)