github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/platform/systrap/shared_context.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package systrap 16 17 import ( 18 "fmt" 19 "runtime" 20 "strconv" 21 "sync" 22 "sync/atomic" 23 "time" 24 25 "github.com/MerlinKodo/gvisor/pkg/log" 26 "github.com/MerlinKodo/gvisor/pkg/sentry/platform" 27 "github.com/MerlinKodo/gvisor/pkg/sentry/platform/systrap/sysmsg" 28 "github.com/MerlinKodo/gvisor/pkg/syncevent" 29 "golang.org/x/sys/unix" 30 ) 31 32 const ( 33 ackReset uint32 = 0 34 ) 35 36 // sharedContext is an abstraction for interactions that the sentry has to 37 // perform with memory shared between it and the stub threads used for contexts. 38 // 39 // Any access to shared memory should most likely have a getter/setter through 40 // this struct. This is due to the following reasons: 41 // - The memory needs to be read or modified atomically because there is no 42 // (trusted) synchronization between the sentry and the stub processes. 43 // - Data read from shared memory may require validation before it can be used. 44 type sharedContext struct { 45 contextEntry 46 47 // subprocess is the subprocess that this sharedContext instance belongs to. 48 subprocess *subprocess 49 // contextID is the ID corresponding to the sysmsg.ThreadContext memory slot 50 // that is used for this sharedContext. 51 contextID uint32 52 // shared is the handle to the shared memory that the sentry task go-routine 53 // reads from and writes to. 54 // NOTE: Using this handle directly without a getter from this function should 55 // most likely be avoided due to concerns listed above. 56 shared *sysmsg.ThreadContext 57 58 // sync is used by the context go-routine to wait for events from the 59 // dispatcher. 60 sync syncevent.Waiter 61 startWaitingTS int64 62 kicked bool 63 // The task associated with the context fell asleep. 64 sleeping bool 65 } 66 67 // String returns the ID of this shared context. 68 func (sc *sharedContext) String() string { 69 return strconv.Itoa(int(sc.contextID)) 70 } 71 72 const ( 73 // sharedContextReady indicates that a context has new events. 74 sharedContextReady = syncevent.Set(1 << iota) 75 // sharedContextKicked indicates that a new stub thread should be woken up. 76 sharedContextKicked 77 // sharedContextSlowPath indicates that a context has to be waited for in the 78 // slow path. 79 sharedContextSlowPath 80 // sharedContextDispatch indicates that a context go-routine has to start the wait loop. 81 sharedContextDispatch 82 ) 83 84 func (s *subprocess) getSharedContext() (*sharedContext, error) { 85 s.mu.Lock() 86 defer s.mu.Unlock() 87 88 id, ok := s.threadContextPool.Get() 89 if !ok { 90 return nil, fmt.Errorf("subprocess has too many active tasks (%d); failed to create a new one", maxGuestContexts) 91 } 92 s.IncRef() 93 sc := sharedContext{ 94 subprocess: s, 95 contextID: uint32(id), 96 shared: s.getThreadContextFromID(id), 97 } 98 sc.shared.Init(invalidThreadID) 99 sc.sync.Init() 100 sc.sleeping = true 101 102 return &sc, nil 103 } 104 105 func (sc *sharedContext) release() { 106 if sc == nil { 107 return 108 } 109 if !sc.sleeping { 110 sc.subprocess.decAwakeContexts() 111 112 } 113 sc.subprocess.threadContextPool.Put(uint64(sc.contextID)) 114 sc.subprocess.DecRef(sc.subprocess.release) 115 } 116 117 func (sc *sharedContext) isActiveInSubprocess(s *subprocess) bool { 118 if sc == nil { 119 return false 120 } 121 return sc.subprocess == s 122 } 123 124 // NotifyInterrupt implements interrupt.Receiver.NotifyInterrupt. 125 func (sc *sharedContext) NotifyInterrupt() { 126 // If this context is not being worked on right now we need to mark it as 127 // interrupted so the next executor does not start working on it. 128 atomic.StoreUint32(&sc.shared.Interrupt, 1) 129 if sc.threadID() == invalidThreadID { 130 return 131 } 132 sc.subprocess.sysmsgThreadsMu.Lock() 133 defer sc.subprocess.sysmsgThreadsMu.Unlock() 134 135 threadID := atomic.LoadUint32(&sc.shared.ThreadID) 136 sysmsgThread, ok := sc.subprocess.sysmsgThreads[threadID] 137 if !ok { 138 // This is either an invalidThreadID or another garbage value; either way we 139 // don't know which thread to interrupt; best we can do is mark the context. 140 return 141 } 142 143 t := sysmsgThread.thread 144 if _, _, e := unix.RawSyscall(unix.SYS_TGKILL, uintptr(t.tgid), uintptr(t.tid), uintptr(platform.SignalInterrupt)); e != 0 { 145 panic(fmt.Sprintf("failed to interrupt the child process %d: %v", t.tid, e)) 146 } 147 } 148 149 func (sc *sharedContext) state() sysmsg.ContextState { 150 return sc.shared.State.Get() 151 } 152 153 func (sc *sharedContext) setState(state sysmsg.ContextState) { 154 sc.shared.State.Set(state) 155 } 156 157 func (sc *sharedContext) setInterrupt() { 158 atomic.StoreUint32(&sc.shared.Interrupt, 1) 159 } 160 161 func (sc *sharedContext) clearInterrupt() { 162 atomic.StoreUint32(&sc.shared.Interrupt, 0) 163 } 164 165 func (sc *sharedContext) setFPStateChanged() { 166 atomic.StoreUint64(&sc.shared.FPStateChanged, 1) 167 } 168 169 func (sc *sharedContext) threadID() uint32 { 170 return atomic.LoadUint32(&sc.shared.ThreadID) 171 } 172 173 // EnableSentryFastPath indicates that the polling mode is enabled for the 174 // Sentry. It has to be called before putting the context into the context queue. 175 func (sc *sharedContext) enableSentryFastPath() { 176 atomic.StoreUint32(&sc.shared.SentryFastPath, 1) 177 } 178 179 // DisableSentryFastPath indicates that the polling mode for the sentry is 180 // disabled for the Sentry. 181 func (sc *sharedContext) disableSentryFastPath() { 182 atomic.StoreUint32(&sc.shared.SentryFastPath, 0) 183 } 184 185 func (sc *sharedContext) isAcked() bool { 186 return atomic.LoadUint32(&sc.shared.Acked) != ackReset 187 } 188 189 func (sc *sharedContext) resetAcked() { 190 atomic.StoreUint32(&sc.shared.Acked, ackReset) 191 } 192 193 const ( 194 contextPreemptTimeoutNsec = 10 * 1000 * 1000 // 10ms 195 contextCheckupTimeoutSec = 5 196 stuckContextTimeout = 30 * time.Second 197 ) 198 199 func (sc *sharedContext) sleepOnState(state sysmsg.ContextState) { 200 timeout := unix.Timespec{ 201 Sec: 0, 202 Nsec: contextPreemptTimeoutNsec, 203 } 204 sentInterruptOnce := false 205 deadline := time.Now().Add(stuckContextTimeout) 206 for sc.state() == state { 207 errno := sc.shared.SleepOnState(state, &timeout) 208 if errno == 0 { 209 continue 210 } 211 if errno != unix.ETIMEDOUT { 212 panic(fmt.Sprintf("error waiting for state: %v", errno)) 213 } 214 if time.Now().After(deadline) { 215 log.Warningf("Systrap task goroutine has been waiting on ThreadContext.State futex too long. ThreadContext: %v", sc) 216 } 217 if sentInterruptOnce { 218 log.Warningf("The context is still running: %v", sc) 219 continue 220 } 221 222 if !sc.isAcked() || sc.subprocess.contextQueue.isEmpty() { 223 continue 224 } 225 sc.NotifyInterrupt() 226 sentInterruptOnce = true 227 timeout.Sec = contextCheckupTimeoutSec 228 timeout.Nsec = 0 229 } 230 } 231 232 type fastPathDispatcher struct { 233 // list is used only from the loop method and so it isn't protected by 234 // any lock. 235 list contextList 236 237 mu sync.Mutex 238 239 // nr is the number of contexts in the queue. 240 // +checklocks:mu 241 nr int 242 243 // entrants contains new contexts that haven't been added to `list` yet. 244 // +checklocks:mu 245 entrants contextList 246 247 // fastPathDisabledTS is the time stamp when the stub fast path was 248 // disabled. It is zero if the fast path is enabled. 249 fastPathDisabledTS atomic.Uint64 250 } 251 252 var dispatcher fastPathDispatcher 253 254 // fastPathContextLimit is the maximum number of contexts after which the fast 255 // path in stub threads is disabled. Its value can be higher than the number of 256 // CPU-s, because the Sentry is running with higher priority than stub threads, 257 // deepSleepTimeout is much shorter than the Linux scheduler timeslice, so the 258 // only thing that matters here is whether the Sentry handles syscall faster 259 // than the overhead of scheduling another stub thread. 260 var fastPathContextLimit = uint32(runtime.GOMAXPROCS(0) * 2) 261 262 // fastPathDisabledTimeout is the timeout after which the fast path in stub 263 // processes will be re-enabled. 264 const fastPathDisabledTimeout = uint64(200 * 1000 * 1000) // 100ms for 2GHz. 265 266 // nrMaxAwakeStubThreads is the maximum number of awake stub threads over all 267 // subprocesses at the this moment. 268 var nrMaxAwakeStubThreads atomic.Uint32 269 270 // stubFastPathEnabled returns true if the fast path in stub processes is 271 // enabled. If the fast path is disabled, it revises whether it has to be 272 // re-enabled or not. 273 func (q *fastPathDispatcher) stubFastPathEnabled() bool { 274 ts := q.fastPathDisabledTS.Load() 275 if ts != 0 { 276 if uint64(cputicks())-ts < fastPathDisabledTimeout { 277 return false 278 } 279 if nrMaxAwakeStubThreads.Load() > fastPathContextLimit { 280 q.fastPathDisabledTS.Store(uint64(cputicks())) 281 return false 282 } 283 q.fastPathDisabledTS.Store(0) 284 } 285 return true 286 } 287 288 // disableStubFastPath disables the fast path over all subprocesses with active 289 // contexts. 290 func (q *fastPathDispatcher) disableStubFastPath() { 291 q.fastPathDisabledTS.Store(uint64(cputicks())) 292 } 293 294 // deep_sleep_timeout is the timeout after which we stops polling and fall asleep. 295 // 296 // The value is 40µs for 2GHz CPU. This timeout matches the sentry<->stub round 297 // trip in the pure deep sleep case. 298 const deepSleepTimeout = uint64(80000) 299 const handshakeTimeout = uint64(1000) 300 301 // loop is processing contexts in the queue. Only one instance of it can be 302 // running, because it has exclusive access to the list. 303 // 304 // target is the context associated with the current go-routine. 305 func (q *fastPathDispatcher) loop(target *sharedContext) { 306 done := false 307 processed := 0 308 slowPath := false 309 start := cputicks() 310 for { 311 var ctx, next *sharedContext 312 313 q.mu.Lock() 314 if processed != 0 || !q.entrants.Empty() { 315 start = cputicks() 316 slowPath = false 317 } 318 q.nr -= processed 319 // Add new contexts to the list. 320 q.list.PushBackList(&q.entrants) 321 ctx = q.list.Front() 322 q.mu.Unlock() 323 324 if done { 325 if ctx != nil { 326 // Wake up the next go-routine to run the loop. 327 ctx.sync.Receiver().Notify(sharedContextDispatch) 328 } 329 break 330 } 331 332 processed = 0 333 now := cputicks() 334 for ctx = q.list.Front(); ctx != nil; ctx = next { 335 next = ctx.Next() 336 337 event := sharedContextReady 338 if ctx.state() == sysmsg.ContextStateNone { 339 if slowPath { 340 event = sharedContextSlowPath 341 } else if !ctx.kicked && uint64(now-ctx.startWaitingTS) > handshakeTimeout { 342 if ctx.isAcked() { 343 ctx.kicked = true 344 continue 345 } 346 event = sharedContextKicked 347 } else { 348 continue 349 } 350 } 351 processed++ 352 q.list.Remove(ctx) 353 if ctx == target { 354 done = true 355 } 356 ctx.sync.Receiver().Notify(event) 357 } 358 if processed == 0 { 359 if uint64(cputicks()-start) > deepSleepTimeout { 360 slowPath = true 361 // Do one more run to notify all contexts. 362 // q.list has to be empty at the end. 363 continue 364 } 365 yield() 366 } 367 } 368 } 369 370 func (q *fastPathDispatcher) waitFor(ctx *sharedContext) syncevent.Set { 371 events := syncevent.Set(0) 372 373 q.mu.Lock() 374 q.entrants.PushBack(ctx) 375 q.nr++ 376 if q.nr == 1 { 377 events = sharedContextDispatch 378 } 379 q.mu.Unlock() 380 381 for { 382 if events&sharedContextDispatch != 0 { 383 ctx.sync.Ack(sharedContextDispatch) 384 q.loop(ctx) 385 } 386 events = ctx.sync.WaitAndAckAll() 387 if events&sharedContextDispatch == 0 { 388 break 389 } 390 } 391 return events 392 }