github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/watchdog/watchdog.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package watchdog is responsible for monitoring the sentry for tasks that may 16 // potentially be stuck or looping inderterminally causing hard to debug hungs in 17 // the untrusted app. 18 // 19 // It works by periodically querying all tasks to check whether they are in user 20 // mode (RunUser), kernel mode (RunSys), or blocked in the kernel (OffCPU). Tasks 21 // that have been running in kernel mode for a long time in the same syscall 22 // without blocking are considered stuck and are reported. 23 // 24 // When a stuck task is detected, the watchdog can take one of the following actions: 25 // 1. LogWarning: Logs a warning message followed by a stack dump of all goroutines. 26 // If a tasks continues to be stuck, the message will repeat every minute, unless 27 // a new stuck task is detected 28 // 2. Panic: same as above, followed by panic() 29 // 30 package watchdog 31 32 import ( 33 "bytes" 34 "fmt" 35 "time" 36 37 "github.com/SagerNet/gvisor/pkg/abi/linux" 38 "github.com/SagerNet/gvisor/pkg/log" 39 "github.com/SagerNet/gvisor/pkg/metric" 40 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 41 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 42 "github.com/SagerNet/gvisor/pkg/sync" 43 ) 44 45 // Opts configures the watchdog. 46 type Opts struct { 47 // TaskTimeout is the amount of time to allow a task to execute the 48 // same syscall without blocking before it's declared stuck. 49 TaskTimeout time.Duration 50 51 // TaskTimeoutAction indicates what action to take when a stuck tasks 52 // is detected. 53 TaskTimeoutAction Action 54 55 // StartupTimeout is the amount of time to allow between watchdog 56 // creation and calling watchdog.Start. 57 StartupTimeout time.Duration 58 59 // StartupTimeoutAction indicates what action to take when 60 // watchdog.Start is not called within the timeout. 61 StartupTimeoutAction Action 62 } 63 64 // DefaultOpts is a default set of options for the watchdog. 65 var DefaultOpts = Opts{ 66 // Task timeout. 67 TaskTimeout: 3 * time.Minute, 68 TaskTimeoutAction: LogWarning, 69 70 // Startup timeout. 71 StartupTimeout: 30 * time.Second, 72 StartupTimeoutAction: LogWarning, 73 } 74 75 // descheduleThreshold is the amount of time scheduling needs to be off before the entire wait period 76 // is discounted from task's last update time. It's set high enough that small scheduling delays won't 77 // trigger it. 78 const descheduleThreshold = 1 * time.Second 79 80 // Amount of time to wait before dumping the stack to the log again when the same task(s) remains stuck. 81 var stackDumpSameTaskPeriod = time.Minute 82 83 // Action defines what action to take when a stuck task is detected. 84 type Action int 85 86 const ( 87 // LogWarning logs warning message followed by stack trace. 88 LogWarning Action = iota 89 90 // Panic will do the same logging as LogWarning and panic(). 91 Panic 92 ) 93 94 // Set implements flag.Value. 95 func (a *Action) Set(v string) error { 96 switch v { 97 case "log", "logwarning": 98 *a = LogWarning 99 case "panic": 100 *a = Panic 101 default: 102 return fmt.Errorf("invalid watchdog action %q", v) 103 } 104 return nil 105 } 106 107 // Get implements flag.Value. 108 func (a *Action) Get() interface{} { 109 return *a 110 } 111 112 // String returns Action's string representation. 113 func (a Action) String() string { 114 switch a { 115 case LogWarning: 116 return "logWarning" 117 case Panic: 118 return "panic" 119 default: 120 panic(fmt.Sprintf("Invalid watchdog action: %d", a)) 121 } 122 } 123 124 // Watchdog is the main watchdog class. It controls a goroutine that periodically 125 // analyses all tasks and reports if any of them appear to be stuck. 126 type Watchdog struct { 127 // Configuration options are embedded. 128 Opts 129 130 // period indicates how often to check all tasks. It's calculated based on 131 // opts.TaskTimeout. 132 period time.Duration 133 134 // k is where the tasks come from. 135 k *kernel.Kernel 136 137 // stop is used to notify to watchdog should stop. 138 stop chan struct{} 139 140 // done is used to notify when the watchdog has stopped. 141 done chan struct{} 142 143 // offenders map contains all tasks that are currently stuck. 144 offenders map[*kernel.Task]*offender 145 146 // lastStackDump tracks the last time a stack dump was generated to prevent 147 // spamming the log. 148 lastStackDump time.Time 149 150 // lastRun is set to the last time the watchdog executed a monitoring loop. 151 lastRun ktime.Time 152 153 // mu protects the fields below. 154 mu sync.Mutex 155 156 // running is true if the watchdog is running. 157 running bool 158 159 // startCalled is true if Start has ever been called. It remains true 160 // even if Stop is called. 161 startCalled bool 162 } 163 164 type offender struct { 165 lastUpdateTime ktime.Time 166 } 167 168 // New creates a new watchdog. 169 func New(k *kernel.Kernel, opts Opts) *Watchdog { 170 // 4 is arbitrary, just don't want to prolong 'TaskTimeout' too much. 171 period := opts.TaskTimeout / 4 172 w := &Watchdog{ 173 Opts: opts, 174 k: k, 175 period: period, 176 offenders: make(map[*kernel.Task]*offender), 177 stop: make(chan struct{}), 178 done: make(chan struct{}), 179 } 180 181 // Handle StartupTimeout if it exists. 182 if w.StartupTimeout > 0 { 183 log.Infof("Watchdog waiting %v for startup", w.StartupTimeout) 184 go w.waitForStart() // S/R-SAFE: watchdog is stopped buring save and restarted after restore. 185 } 186 187 return w 188 } 189 190 // Start starts the watchdog. 191 func (w *Watchdog) Start() { 192 w.mu.Lock() 193 defer w.mu.Unlock() 194 w.startCalled = true 195 196 if w.running { 197 return 198 } 199 200 if w.TaskTimeout == 0 { 201 log.Infof("Watchdog task timeout disabled") 202 return 203 } 204 w.lastRun = w.k.MonotonicClock().Now() 205 206 log.Infof("Starting watchdog, period: %v, timeout: %v, action: %v", w.period, w.TaskTimeout, w.TaskTimeoutAction) 207 go w.loop() // S/R-SAFE: watchdog is stopped during save and restarted after restore. 208 w.running = true 209 } 210 211 // Stop requests the watchdog to stop and wait for it. 212 func (w *Watchdog) Stop() { 213 if w.TaskTimeout == 0 { 214 return 215 } 216 217 w.mu.Lock() 218 defer w.mu.Unlock() 219 if !w.running { 220 return 221 } 222 log.Infof("Stopping watchdog") 223 w.stop <- struct{}{} 224 <-w.done 225 w.running = false 226 log.Infof("Watchdog stopped") 227 } 228 229 // waitForStart waits for Start to be called and takes action if it does not 230 // happen within the startup timeout. 231 func (w *Watchdog) waitForStart() { 232 <-time.After(w.StartupTimeout) 233 w.mu.Lock() 234 defer w.mu.Unlock() 235 if w.startCalled { 236 // We are fine. 237 return 238 } 239 240 metric.WeirdnessMetric.Increment("watchdog_stuck_startup") 241 242 var buf bytes.Buffer 243 buf.WriteString(fmt.Sprintf("Watchdog.Start() not called within %s", w.StartupTimeout)) 244 w.doAction(w.StartupTimeoutAction, false, &buf) 245 } 246 247 // loop is the main watchdog routine. It only returns when 'Stop()' is called. 248 func (w *Watchdog) loop() { 249 // Loop until someone stops it. 250 for { 251 select { 252 case <-w.stop: 253 w.done <- struct{}{} 254 return 255 case <-time.After(w.period): 256 w.runTurn() 257 } 258 } 259 } 260 261 // runTurn runs a single pass over all tasks and reports anything it finds. 262 func (w *Watchdog) runTurn() { 263 // Someone needs to watch the watchdog. The call below can get stuck if there 264 // is a deadlock affecting root's PID namespace mutex. Run it in a goroutine 265 // and report if it takes too long to return. 266 var tasks []*kernel.Task 267 done := make(chan struct{}) 268 go func() { // S/R-SAFE: watchdog is stopped and restarted during S/R. 269 tasks = w.k.TaskSet().Root.Tasks() 270 close(done) 271 }() 272 273 select { 274 case <-done: 275 case <-time.After(w.TaskTimeout): 276 // Report if the watchdog is not making progress. 277 // No one is watching the watchdog watcher though. 278 w.reportStuckWatchdog() 279 <-done 280 } 281 282 newOffenders := make(map[*kernel.Task]*offender) 283 newTaskFound := false 284 now := ktime.FromNanoseconds(int64(w.k.CPUClockNow() * uint64(linux.ClockTick))) 285 286 // The process may be running with low CPU limit making tasks appear stuck because 287 // are starved of CPU cycles. An estimate is that Tasks could have been starved 288 // since the last time the watchdog run. If the watchdog detects that scheduling 289 // is off, it will discount the entire duration since last run from 'lastUpdateTime'. 290 discount := time.Duration(0) 291 if now.Sub(w.lastRun.Add(w.period)) > descheduleThreshold { 292 discount = now.Sub(w.lastRun) 293 } 294 w.lastRun = now 295 296 log.Infof("Watchdog starting loop, tasks: %d, discount: %v", len(tasks), discount) 297 for _, t := range tasks { 298 tsched := t.TaskGoroutineSchedInfo() 299 300 // An offender is a task running inside the kernel for longer than the specified timeout. 301 if tsched.State == kernel.TaskGoroutineRunningSys { 302 lastUpdateTime := ktime.FromNanoseconds(int64(tsched.Timestamp * uint64(linux.ClockTick))) 303 elapsed := now.Sub(lastUpdateTime) - discount 304 if elapsed > w.TaskTimeout { 305 tc, ok := w.offenders[t] 306 if !ok { 307 // New stuck task detected. 308 // 309 // Note that tasks blocked doing IO may be considered stuck in kernel, 310 // unless they are surrounded by 311 // Task.UninterruptibleSleepStart/Finish. 312 tc = &offender{lastUpdateTime: lastUpdateTime} 313 metric.WeirdnessMetric.Increment("watchdog_stuck_tasks") 314 newTaskFound = true 315 } 316 newOffenders[t] = tc 317 } 318 } 319 } 320 if len(newOffenders) > 0 { 321 w.report(newOffenders, newTaskFound, now) 322 } 323 324 // Remember which tasks have been reported. 325 w.offenders = newOffenders 326 } 327 328 // report takes appropriate action when a stuck task is detected. 329 func (w *Watchdog) report(offenders map[*kernel.Task]*offender, newTaskFound bool, now ktime.Time) { 330 var buf bytes.Buffer 331 buf.WriteString(fmt.Sprintf("Sentry detected %d stuck task(s):\n", len(offenders))) 332 for t, o := range offenders { 333 tid := w.k.TaskSet().Root.IDOfTask(t) 334 buf.WriteString(fmt.Sprintf("\tTask tid: %v (goroutine %d), entered RunSys state %v ago.\n", tid, t.GoroutineID(), now.Sub(o.lastUpdateTime))) 335 } 336 buf.WriteString("Search for 'goroutine <id>' in the stack dump to find the offending goroutine(s)") 337 338 // Force stack dump only if a new task is detected. 339 w.doAction(w.TaskTimeoutAction, newTaskFound, &buf) 340 } 341 342 func (w *Watchdog) reportStuckWatchdog() { 343 var buf bytes.Buffer 344 buf.WriteString("Watchdog goroutine is stuck") 345 w.doAction(w.TaskTimeoutAction, false, &buf) 346 } 347 348 // doAction will take the given action. If the action is LogWarning, the stack 349 // is not always dumped to the log to prevent log flooding. "forceStack" 350 // guarantees that the stack will be dumped regardless. 351 func (w *Watchdog) doAction(action Action, forceStack bool, msg *bytes.Buffer) { 352 switch action { 353 case LogWarning: 354 // Dump stack only if forced or sometime has passed since the last time a 355 // stack dump was generated. 356 if !forceStack && time.Since(w.lastStackDump) < stackDumpSameTaskPeriod { 357 msg.WriteString("\n...[stack dump skipped]...") 358 log.Warningf(msg.String()) 359 return 360 } 361 log.TracebackAll(msg.String()) 362 w.lastStackDump = time.Now() 363 364 case Panic: 365 // Panic will skip over running tasks, which is likely the culprit here. So manually 366 // dump all stacks before panic'ing. 367 log.TracebackAll(msg.String()) 368 369 // Attempt to flush metrics, timeout and move on in case metrics are stuck as well. 370 metricsEmitted := make(chan struct{}, 1) 371 go func() { // S/R-SAFE: watchdog is stopped during save and restarted after restore. 372 // Flush metrics before killing process. 373 metric.EmitMetricUpdate() 374 metricsEmitted <- struct{}{} 375 }() 376 select { 377 case <-metricsEmitted: 378 case <-time.After(1 * time.Second): 379 } 380 panic(fmt.Sprintf("%s\nStack for running G's are skipped while panicking.", msg.String())) 381 382 default: 383 panic(fmt.Sprintf("Unknown watchdog action %v", action)) 384 385 } 386 }