github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/platform/systrap/shared_context.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package systrap 16 17 import ( 18 "fmt" 19 "runtime" 20 "strconv" 21 "sync" 22 "sync/atomic" 23 "time" 24 25 "golang.org/x/sys/unix" 26 "github.com/nicocha30/gvisor-ligolo/pkg/log" 27 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/platform" 28 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/platform/systrap/sysmsg" 29 "github.com/nicocha30/gvisor-ligolo/pkg/syncevent" 30 ) 31 32 const ( 33 ackReset uint32 = 0 34 ) 35 36 // sharedContext is an abstraction for interactions that the sentry has to 37 // perform with memory shared between it and the stub threads used for contexts. 38 // 39 // Any access to shared memory should most likely have a getter/setter through 40 // this struct. This is due to the following reasons: 41 // - The memory needs to be read or modified atomically because there is no 42 // (trusted) synchronization between the sentry and the stub processes. 43 // - Data read from shared memory may require validation before it can be used. 44 type sharedContext struct { 45 contextEntry 46 47 // subprocess is the subprocess that this sharedContext instance belongs to. 48 subprocess *subprocess 49 // contextID is the ID corresponding to the sysmsg.ThreadContext memory slot 50 // that is used for this sharedContext. 51 contextID uint32 52 // shared is the handle to the shared memory that the sentry task go-routine 53 // reads from and writes to. 54 // NOTE: Using this handle directly without a getter from this function should 55 // most likely be avoided due to concerns listed above. 56 shared *sysmsg.ThreadContext 57 58 // sync is used by the context go-routine to wait for events from the 59 // dispatcher. 60 sync syncevent.Waiter 61 startWaitingTS int64 62 kicked bool 63 // The task associated with the context fell asleep. 64 sleeping bool 65 } 66 67 // String returns the ID of this shared context. 68 func (sc *sharedContext) String() string { 69 return strconv.Itoa(int(sc.contextID)) 70 } 71 72 const ( 73 // sharedContextReady indicates that a context has new events. 74 sharedContextReady = syncevent.Set(1 << iota) 75 // sharedContextKicked indicates that a new stub thread should be woken up. 76 sharedContextKicked 77 // sharedContextSlowPath indicates that a context has to be waited for in the 78 // slow path. 79 sharedContextSlowPath 80 // sharedContextDispatch indicates that a context go-routine has to start the wait loop. 81 sharedContextDispatch 82 ) 83 84 func (s *subprocess) getSharedContext() (*sharedContext, error) { 85 s.mu.Lock() 86 defer s.mu.Unlock() 87 88 id, ok := s.threadContextPool.Get() 89 if !ok { 90 return nil, fmt.Errorf("subprocess has too many active tasks (%d); failed to create a new one", maxGuestContexts) 91 } 92 s.IncRef() 93 sc := sharedContext{ 94 subprocess: s, 95 contextID: uint32(id), 96 shared: s.getThreadContextFromID(id), 97 } 98 sc.shared.Init(invalidThreadID) 99 sc.sync.Init() 100 sc.sleeping = true 101 102 return &sc, nil 103 } 104 105 func (sc *sharedContext) release() { 106 if sc == nil { 107 return 108 } 109 if !sc.sleeping { 110 sc.subprocess.decAwakeContexts() 111 112 } 113 sc.subprocess.threadContextPool.Put(uint64(sc.contextID)) 114 sc.subprocess.DecRef(sc.subprocess.release) 115 } 116 117 func (sc *sharedContext) isActiveInSubprocess(s *subprocess) bool { 118 if sc == nil { 119 return false 120 } 121 return sc.subprocess == s 122 } 123 124 // NotifyInterrupt implements interrupt.Receiver.NotifyInterrupt. 125 func (sc *sharedContext) NotifyInterrupt() { 126 // If this context is not being worked on right now we need to mark it as 127 // interrupted so the next executor does not start working on it. 128 atomic.StoreUint32(&sc.shared.Interrupt, 1) 129 if sc.threadID() == invalidThreadID { 130 return 131 } 132 sc.subprocess.sysmsgThreadsMu.Lock() 133 defer sc.subprocess.sysmsgThreadsMu.Unlock() 134 135 threadID := atomic.LoadUint32(&sc.shared.ThreadID) 136 sysmsgThread, ok := sc.subprocess.sysmsgThreads[threadID] 137 if !ok { 138 // This is either an invalidThreadID or another garbage value; either way we 139 // don't know which thread to interrupt; best we can do is mark the context. 140 return 141 } 142 143 t := sysmsgThread.thread 144 if _, _, e := unix.RawSyscall(unix.SYS_TGKILL, uintptr(t.tgid), uintptr(t.tid), uintptr(platform.SignalInterrupt)); e != 0 { 145 panic(fmt.Sprintf("failed to interrupt the child process %d: %v", t.tid, e)) 146 } 147 } 148 149 func (sc *sharedContext) state() sysmsg.ContextState { 150 return sc.shared.State.Get() 151 } 152 153 func (sc *sharedContext) setState(state sysmsg.ContextState) { 154 sc.shared.State.Set(state) 155 } 156 157 func (sc *sharedContext) setInterrupt() { 158 atomic.StoreUint32(&sc.shared.Interrupt, 1) 159 } 160 161 func (sc *sharedContext) clearInterrupt() { 162 atomic.StoreUint32(&sc.shared.Interrupt, 0) 163 } 164 165 func (sc *sharedContext) setFPStateChanged() { 166 atomic.StoreUint64(&sc.shared.FPStateChanged, 1) 167 } 168 169 func (sc *sharedContext) threadID() uint32 { 170 return atomic.LoadUint32(&sc.shared.ThreadID) 171 } 172 173 // EnableSentryFastPath indicates that the polling mode is enabled for the 174 // Sentry. It has to be called before putting the context into the context queue. 175 // This function is used if contextDecouplingExp=true because the fastpath 176 // is negotiated in ThreadContext. 177 func (sc *sharedContext) enableSentryFastPath() { 178 atomic.StoreUint32(&sc.shared.SentryFastPath, 1) 179 } 180 181 // DisableSentryFastPath indicates that the polling mode for the sentry is 182 // disabled for the Sentry. 183 // This function is used if contextDecouplingExp=true because the fastpath 184 // is negotiated in ThreadContext. 185 func (sc *sharedContext) disableSentryFastPath() { 186 atomic.StoreUint32(&sc.shared.SentryFastPath, 0) 187 } 188 189 func (sc *sharedContext) isAcked() bool { 190 return atomic.LoadUint32(&sc.shared.Acked) != ackReset 191 } 192 193 func (sc *sharedContext) resetAcked() { 194 atomic.StoreUint32(&sc.shared.Acked, ackReset) 195 } 196 197 const ( 198 contextPreemptTimeoutNsec = 10 * 1000 * 1000 // 10ms 199 contextCheckupTimeoutSec = 5 200 stuckContextTimeout = 30 * time.Second 201 ) 202 203 func (sc *sharedContext) sleepOnState(state sysmsg.ContextState) { 204 timeout := unix.Timespec{ 205 Sec: 0, 206 Nsec: contextPreemptTimeoutNsec, 207 } 208 sentInterruptOnce := false 209 deadline := time.Now().Add(stuckContextTimeout) 210 for sc.state() == state { 211 errno := sc.shared.SleepOnState(state, &timeout) 212 if errno == 0 { 213 continue 214 } 215 if errno != unix.ETIMEDOUT { 216 panic(fmt.Sprintf("error waiting for state: %v", errno)) 217 } 218 if time.Now().After(deadline) { 219 log.Warningf("Systrap task goroutine has been waiting on ThreadContext.State futex too long. ThreadContext: %v", sc) 220 } 221 if sentInterruptOnce { 222 log.Warningf("The context is still running: %v", sc) 223 continue 224 } 225 226 if !sc.isAcked() || sc.subprocess.contextQueue.isEmpty() { 227 continue 228 } 229 sc.NotifyInterrupt() 230 sentInterruptOnce = true 231 timeout.Sec = contextCheckupTimeoutSec 232 timeout.Nsec = 0 233 } 234 } 235 236 type fastPathDispatcher struct { 237 // list is used only from the loop method and so it isn't protected by 238 // any lock. 239 list contextList 240 241 mu sync.Mutex 242 243 // nr is the number of contexts in the queue. 244 // +checklocks:mu 245 nr int 246 247 // entrants contains new contexts that haven't been added to `list` yet. 248 // +checklocks:mu 249 entrants contextList 250 251 // fastPathDisabledTS is the time stamp when the stub fast path was 252 // disabled. It is zero if the fast path is enabled. 253 fastPathDisabledTS atomic.Uint64 254 } 255 256 var dispatcher fastPathDispatcher 257 258 // fastPathContextLimit is the maximum number of contexts after which the fast 259 // path in stub threads is disabled. Its value can be higher than the number of 260 // CPU-s, because the Sentry is running with higher priority than stub threads, 261 // deepSleepTimeout is much shorter than the Linux scheduler timeslice, so the 262 // only thing that matters here is whether the Sentry handles syscall faster 263 // than the overhead of scheduling another stub thread. 264 var fastPathContextLimit = uint32(runtime.GOMAXPROCS(0) * 2) 265 266 // fastPathDisabledTimeout is the timeout after which the fast path in stub 267 // processes will be re-enabled. 268 const fastPathDisabledTimeout = uint64(200 * 1000 * 1000) // 100ms for 2GHz. 269 270 // nrMaxAwakeStubThreads is the maximum number of awake stub threads over all 271 // subprocesses at the this moment. 272 var nrMaxAwakeStubThreads atomic.Uint32 273 274 // stubFastPathEnabled returns true if the fast path in stub processes is 275 // enabled. If the fast path is disabled, it revises whether it has to be 276 // re-enabled or not. 277 func (q *fastPathDispatcher) stubFastPathEnabled() bool { 278 ts := q.fastPathDisabledTS.Load() 279 if ts != 0 { 280 if uint64(cputicks())-ts < fastPathDisabledTimeout { 281 return false 282 } 283 if nrMaxAwakeStubThreads.Load() > fastPathContextLimit { 284 q.fastPathDisabledTS.Store(uint64(cputicks())) 285 return false 286 } 287 q.fastPathDisabledTS.Store(0) 288 } 289 return true 290 } 291 292 // disableStubFastPath disables the fast path over all subprocesses with active 293 // contexts. 294 func (q *fastPathDispatcher) disableStubFastPath() { 295 q.fastPathDisabledTS.Store(uint64(cputicks())) 296 } 297 298 // deep_sleep_timeout is the timeout after which we stops polling and fall asleep. 299 // 300 // The value is 40µs for 2GHz CPU. This timeout matches the sentry<->stub round 301 // trip in the pure deep sleep case. 302 const deepSleepTimeout = uint64(80000) 303 const handshakeTimeout = uint64(1000) 304 305 // loop is processing contexts in the queue. Only one instance of it can be 306 // running, because it has exclusive access to the list. 307 // 308 // target is the context associated with the current go-routine. 309 func (q *fastPathDispatcher) loop(target *sharedContext) { 310 done := false 311 processed := 0 312 slowPath := false 313 start := cputicks() 314 for { 315 var ctx, next *sharedContext 316 317 q.mu.Lock() 318 if processed != 0 || !q.entrants.Empty() { 319 start = cputicks() 320 slowPath = false 321 } 322 q.nr -= processed 323 // Add new contexts to the list. 324 q.list.PushBackList(&q.entrants) 325 ctx = q.list.Front() 326 q.mu.Unlock() 327 328 if done { 329 if ctx != nil { 330 // Wake up the next go-routine to run the loop. 331 ctx.sync.Receiver().Notify(sharedContextDispatch) 332 } 333 break 334 } 335 336 processed = 0 337 now := cputicks() 338 for ctx = q.list.Front(); ctx != nil; ctx = next { 339 next = ctx.Next() 340 341 event := sharedContextReady 342 if ctx.state() == sysmsg.ContextStateNone { 343 if slowPath { 344 event = sharedContextSlowPath 345 } else if !ctx.kicked && uint64(now-ctx.startWaitingTS) > handshakeTimeout { 346 if ctx.isAcked() { 347 ctx.kicked = true 348 continue 349 } 350 event = sharedContextKicked 351 } else { 352 continue 353 } 354 } 355 processed++ 356 q.list.Remove(ctx) 357 if ctx == target { 358 done = true 359 } 360 ctx.sync.Receiver().Notify(event) 361 } 362 if processed == 0 { 363 if uint64(cputicks()-start) > deepSleepTimeout { 364 slowPath = true 365 // Do one more run to notify all contexts. 366 // q.list has to be empty at the end. 367 continue 368 } 369 yield() 370 } 371 } 372 } 373 374 func (q *fastPathDispatcher) waitFor(ctx *sharedContext) syncevent.Set { 375 events := syncevent.Set(0) 376 377 q.mu.Lock() 378 q.entrants.PushBack(ctx) 379 q.nr++ 380 if q.nr == 1 { 381 events = sharedContextDispatch 382 } 383 q.mu.Unlock() 384 385 for { 386 if events&sharedContextDispatch != 0 { 387 ctx.sync.Ack(sharedContextDispatch) 388 q.loop(ctx) 389 } 390 events = ctx.sync.WaitAndAckAll() 391 if events&sharedContextDispatch == 0 { 392 break 393 } 394 } 395 return events 396 }