github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/watchdog/watchdog.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package watchdog is responsible for monitoring the sentry for tasks that may 16 // potentially be stuck or looping inderterminally causing hard to debug hungs in 17 // the untrusted app. 18 // 19 // It works by periodically querying all tasks to check whether they are in user 20 // mode (RunUser), kernel mode (RunSys), or blocked in the kernel (OffCPU). Tasks 21 // that have been running in kernel mode for a long time in the same syscall 22 // without blocking are considered stuck and are reported. 23 // 24 // When a stuck task is detected, the watchdog can take one of the following actions: 25 // 1. LogWarning: Logs a warning message followed by a stack dump of all goroutines. 26 // If a tasks continues to be stuck, the message will repeat every minute, unless 27 // a new stuck task is detected 28 // 2. Panic: same as above, followed by panic() 29 package watchdog 30 31 import ( 32 "bytes" 33 "fmt" 34 "time" 35 36 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 37 "github.com/MerlinKodo/gvisor/pkg/log" 38 "github.com/MerlinKodo/gvisor/pkg/metric" 39 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel" 40 ktime "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/time" 41 "github.com/MerlinKodo/gvisor/pkg/sync" 42 ) 43 44 // Opts configures the watchdog. 45 type Opts struct { 46 // TaskTimeout is the amount of time to allow a task to execute the 47 // same syscall without blocking before it's declared stuck. 48 TaskTimeout time.Duration 49 50 // TaskTimeoutAction indicates what action to take when a stuck tasks 51 // is detected. 52 TaskTimeoutAction Action 53 54 // StartupTimeout is the amount of time to allow between watchdog 55 // creation and calling watchdog.Start. 56 StartupTimeout time.Duration 57 58 // StartupTimeoutAction indicates what action to take when 59 // watchdog.Start is not called within the timeout. 60 StartupTimeoutAction Action 61 } 62 63 // DefaultOpts is a default set of options for the watchdog. 64 var DefaultOpts = Opts{ 65 // Task timeout. 66 TaskTimeout: 3 * time.Minute, 67 TaskTimeoutAction: LogWarning, 68 69 // Startup timeout. 70 StartupTimeout: 30 * time.Second, 71 StartupTimeoutAction: LogWarning, 72 } 73 74 // descheduleThreshold is the amount of time scheduling needs to be off before the entire wait period 75 // is discounted from task's last update time. It's set high enough that small scheduling delays won't 76 // trigger it. 77 const descheduleThreshold = 1 * time.Second 78 79 // Amount of time to wait before dumping the stack to the log again when the same task(s) remains stuck. 80 var stackDumpSameTaskPeriod = time.Minute 81 82 // Action defines what action to take when a stuck task is detected. 83 type Action int 84 85 const ( 86 // LogWarning logs warning message followed by stack trace. 87 LogWarning Action = iota 88 89 // Panic will do the same logging as LogWarning and panic(). 90 Panic 91 ) 92 93 // Set implements flag.Value. 94 func (a *Action) Set(v string) error { 95 switch v { 96 case "log", "logwarning": 97 *a = LogWarning 98 case "panic": 99 *a = Panic 100 default: 101 return fmt.Errorf("invalid watchdog action %q", v) 102 } 103 return nil 104 } 105 106 // Get implements flag.Value. 107 func (a *Action) Get() any { 108 return *a 109 } 110 111 // String returns Action's string representation. 112 func (a Action) String() string { 113 switch a { 114 case LogWarning: 115 return "logWarning" 116 case Panic: 117 return "panic" 118 default: 119 panic(fmt.Sprintf("Invalid watchdog action: %d", a)) 120 } 121 } 122 123 // Watchdog is the main watchdog class. It controls a goroutine that periodically 124 // analyses all tasks and reports if any of them appear to be stuck. 125 type Watchdog struct { 126 // Configuration options are embedded. 127 Opts 128 129 // period indicates how often to check all tasks. It's calculated based on 130 // opts.TaskTimeout. 131 period time.Duration 132 133 // k is where the tasks come from. 134 k *kernel.Kernel 135 136 // stop is used to notify to watchdog should stop. 137 stop chan struct{} 138 139 // done is used to notify when the watchdog has stopped. 140 done chan struct{} 141 142 // offenders map contains all tasks that are currently stuck. 143 offenders map[*kernel.Task]*offender 144 145 // lastStackDump tracks the last time a stack dump was generated to prevent 146 // spamming the log. 147 lastStackDump time.Time 148 149 // lastRun is set to the last time the watchdog executed a monitoring loop. 150 lastRun ktime.Time 151 152 // mu protects the fields below. 153 mu sync.Mutex 154 155 // running is true if the watchdog is running. 156 running bool 157 158 // startCalled is true if Start has ever been called. It remains true 159 // even if Stop is called. 160 startCalled bool 161 } 162 163 type offender struct { 164 lastUpdateTime ktime.Time 165 } 166 167 // New creates a new watchdog. 168 func New(k *kernel.Kernel, opts Opts) *Watchdog { 169 // 4 is arbitrary, just don't want to prolong 'TaskTimeout' too much. 170 period := opts.TaskTimeout / 4 171 w := &Watchdog{ 172 Opts: opts, 173 k: k, 174 period: period, 175 offenders: make(map[*kernel.Task]*offender), 176 stop: make(chan struct{}), 177 done: make(chan struct{}), 178 } 179 180 // Handle StartupTimeout if it exists. 181 if w.StartupTimeout > 0 { 182 log.Infof("Watchdog waiting %v for startup", w.StartupTimeout) 183 go w.waitForStart() // S/R-SAFE: watchdog is stopped buring save and restarted after restore. 184 } 185 186 return w 187 } 188 189 // Start starts the watchdog. 190 func (w *Watchdog) Start() { 191 w.mu.Lock() 192 defer w.mu.Unlock() 193 w.startCalled = true 194 195 if w.running { 196 return 197 } 198 199 if w.TaskTimeout == 0 { 200 log.Infof("Watchdog task timeout disabled") 201 return 202 } 203 w.lastRun = w.k.MonotonicClock().Now() 204 205 log.Infof("Starting watchdog, period: %v, timeout: %v, action: %v", w.period, w.TaskTimeout, w.TaskTimeoutAction) 206 go w.loop() // S/R-SAFE: watchdog is stopped during save and restarted after restore. 207 w.running = true 208 } 209 210 // Stop requests the watchdog to stop and wait for it. 211 func (w *Watchdog) Stop() { 212 if w.TaskTimeout == 0 { 213 return 214 } 215 216 w.mu.Lock() 217 defer w.mu.Unlock() 218 if !w.running { 219 return 220 } 221 log.Infof("Stopping watchdog") 222 w.stop <- struct{}{} 223 <-w.done 224 w.running = false 225 log.Infof("Watchdog stopped") 226 } 227 228 // waitForStart waits for Start to be called and takes action if it does not 229 // happen within the startup timeout. 230 func (w *Watchdog) waitForStart() { 231 <-time.After(w.StartupTimeout) 232 w.mu.Lock() 233 defer w.mu.Unlock() 234 if w.startCalled { 235 // We are fine. 236 return 237 } 238 239 metric.WeirdnessMetric.Increment(&metric.WeirdnessTypeWatchdogStuckStartup) 240 241 var buf bytes.Buffer 242 buf.WriteString(fmt.Sprintf("Watchdog.Start() not called within %s", w.StartupTimeout)) 243 w.doAction(w.StartupTimeoutAction, false, &buf) 244 } 245 246 // loop is the main watchdog routine. It only returns when 'Stop()' is called. 247 func (w *Watchdog) loop() { 248 // Loop until someone stops it. 249 for { 250 select { 251 case <-w.stop: 252 w.done <- struct{}{} 253 return 254 case <-time.After(w.period): 255 w.runTurn() 256 } 257 } 258 } 259 260 // runTurn runs a single pass over all tasks and reports anything it finds. 261 func (w *Watchdog) runTurn() { 262 // Someone needs to watch the watchdog. The call below can get stuck if there 263 // is a deadlock affecting root's PID namespace mutex. Run it in a goroutine 264 // and report if it takes too long to return. 265 var tasks []*kernel.Task 266 done := make(chan struct{}) 267 go func() { // S/R-SAFE: watchdog is stopped and restarted during S/R. 268 tasks = w.k.TaskSet().Root.Tasks() 269 close(done) 270 }() 271 272 select { 273 case <-done: 274 case <-time.After(w.TaskTimeout): 275 // Report if the watchdog is not making progress. 276 // No one is watching the watchdog watcher though. 277 w.reportStuckWatchdog() 278 <-done 279 } 280 281 newOffenders := make(map[*kernel.Task]*offender) 282 newTaskFound := false 283 now := ktime.FromNanoseconds(int64(w.k.CPUClockNow() * uint64(linux.ClockTick))) 284 285 // The process may be running with low CPU limit making tasks appear stuck because 286 // are starved of CPU cycles. An estimate is that Tasks could have been starved 287 // since the last time the watchdog run. If the watchdog detects that scheduling 288 // is off, it will discount the entire duration since last run from 'lastUpdateTime'. 289 discount := time.Duration(0) 290 if now.Sub(w.lastRun.Add(w.period)) > descheduleThreshold { 291 discount = now.Sub(w.lastRun) 292 } 293 w.lastRun = now 294 295 log.Infof("Watchdog starting loop, tasks: %d, discount: %v", len(tasks), discount) 296 for _, t := range tasks { 297 tsched := t.TaskGoroutineSchedInfo() 298 299 // An offender is a task running inside the kernel for longer than the specified timeout. 300 if tsched.State == kernel.TaskGoroutineRunningSys { 301 lastUpdateTime := ktime.FromNanoseconds(int64(tsched.Timestamp * uint64(linux.ClockTick))) 302 elapsed := now.Sub(lastUpdateTime) - discount 303 if elapsed > w.TaskTimeout { 304 tc, ok := w.offenders[t] 305 if !ok { 306 // New stuck task detected. 307 // 308 // Note that tasks blocked doing IO may be considered stuck in kernel, 309 // unless they are surrounded by 310 // Task.UninterruptibleSleepStart/Finish. 311 tc = &offender{lastUpdateTime: lastUpdateTime} 312 metric.WeirdnessMetric.Increment(&metric.WeirdnessTypeWatchdogStuckTasks) 313 newTaskFound = true 314 } 315 newOffenders[t] = tc 316 } 317 } 318 } 319 if len(newOffenders) > 0 { 320 w.report(newOffenders, newTaskFound, now) 321 } 322 323 // Remember which tasks have been reported. 324 w.offenders = newOffenders 325 } 326 327 // report takes appropriate action when a stuck task is detected. 328 func (w *Watchdog) report(offenders map[*kernel.Task]*offender, newTaskFound bool, now ktime.Time) { 329 var buf bytes.Buffer 330 buf.WriteString(fmt.Sprintf("Sentry detected %d stuck task(s):\n", len(offenders))) 331 for t, o := range offenders { 332 tid := w.k.TaskSet().Root.IDOfTask(t) 333 buf.WriteString(fmt.Sprintf("\tTask tid: %v (goroutine %d), entered RunSys state %v ago.\n", tid, t.GoroutineID(), now.Sub(o.lastUpdateTime))) 334 } 335 buf.WriteString("Search for 'goroutine <id>' in the stack dump to find the offending goroutine(s)") 336 337 // Force stack dump only if a new task is detected. 338 w.doAction(w.TaskTimeoutAction, newTaskFound, &buf) 339 } 340 341 func (w *Watchdog) reportStuckWatchdog() { 342 var buf bytes.Buffer 343 buf.WriteString("Watchdog goroutine is stuck") 344 w.doAction(w.TaskTimeoutAction, false, &buf) 345 } 346 347 // doAction will take the given action. If the action is LogWarning, the stack 348 // is not always dumped to the log to prevent log flooding. "forceStack" 349 // guarantees that the stack will be dumped regardless. 350 func (w *Watchdog) doAction(action Action, forceStack bool, msg *bytes.Buffer) { 351 switch action { 352 case LogWarning: 353 // Dump stack only if forced or sometime has passed since the last time a 354 // stack dump was generated. 355 if !forceStack && time.Since(w.lastStackDump) < stackDumpSameTaskPeriod { 356 msg.WriteString("\n...[stack dump skipped]...") 357 log.Warningf(msg.String()) 358 return 359 } 360 log.TracebackAll(msg.String()) 361 w.lastStackDump = time.Now() 362 363 case Panic: 364 // Panic will skip over running tasks, which is likely the culprit here. So manually 365 // dump all stacks before panic'ing. 366 log.TracebackAll(msg.String()) 367 368 // Attempt to flush metrics, timeout and move on in case metrics are stuck as well. 369 metricsEmitted := make(chan struct{}, 1) 370 go func() { // S/R-SAFE: watchdog is stopped during save and restarted after restore. 371 // Flush metrics before killing process. 372 metric.EmitMetricUpdate() 373 metricsEmitted <- struct{}{} 374 }() 375 select { 376 case <-metricsEmitted: 377 case <-time.After(1 * time.Second): 378 } 379 panic(fmt.Sprintf("%s\nStack for running G's are skipped while panicking.", msg.String())) 380 381 default: 382 panic(fmt.Sprintf("Unknown watchdog action %v", action)) 383 384 } 385 }