github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/mgclimit.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import "runtime/internal/atomic" 8 9 // gcCPULimiter is a mechanism to limit GC CPU utilization in situations 10 // where it might become excessive and inhibit application progress (e.g. 11 // a death spiral). 12 // 13 // The core of the limiter is a leaky bucket mechanism that fills with GC 14 // CPU time and drains with mutator time. Because the bucket fills and 15 // drains with time directly (i.e. without any weighting), this effectively 16 // sets a very conservative limit of 50%. This limit could be enforced directly, 17 // however, but the purpose of the bucket is to accommodate spikes in GC CPU 18 // utilization without hurting throughput. 19 // 20 // Note that the bucket in the leaky bucket mechanism can never go negative, 21 // so the GC never gets credit for a lot of CPU time spent without the GC 22 // running. This is intentional, as an application that stays idle for, say, 23 // an entire day, could build up enough credit to fail to prevent a death 24 // spiral the following day. The bucket's capacity is the GC's only leeway. 25 // 26 // The capacity thus also sets the window the limiter considers. For example, 27 // if the capacity of the bucket is 1 cpu-second, then the limiter will not 28 // kick in until at least 1 full cpu-second in the last 2 cpu-second window 29 // is spent on GC CPU time. 30 var gcCPULimiter gcCPULimiterState 31 32 type gcCPULimiterState struct { 33 lock atomic.Uint32 34 35 enabled atomic.Bool 36 bucket struct { 37 // Invariants: 38 // - fill >= 0 39 // - capacity >= 0 40 // - fill <= capacity 41 fill, capacity uint64 42 } 43 // overflow is the cumulative amount of GC CPU time that we tried to fill the 44 // bucket with but exceeded its capacity. 45 overflow uint64 46 47 // gcEnabled is an internal copy of gcBlackenEnabled that determines 48 // whether the limiter tracks total assist time. 49 // 50 // gcBlackenEnabled isn't used directly so as to keep this structure 51 // unit-testable. 52 gcEnabled bool 53 54 // transitioning is true when the GC is in a STW and transitioning between 55 // the mark and sweep phases. 56 transitioning bool 57 58 // assistTimePool is the accumulated assist time since the last update. 59 assistTimePool atomic.Int64 60 61 // idleMarkTimePool is the accumulated idle mark time since the last update. 62 idleMarkTimePool atomic.Int64 63 64 // idleTimePool is the accumulated time Ps spent on the idle list since the last update. 65 idleTimePool atomic.Int64 66 67 // lastUpdate is the nanotime timestamp of the last time update was called. 68 // 69 // Updated under lock, but may be read concurrently. 70 lastUpdate atomic.Int64 71 72 // lastEnabledCycle is the GC cycle that last had the limiter enabled. 73 lastEnabledCycle atomic.Uint32 74 75 // nprocs is an internal copy of gomaxprocs, used to determine total available 76 // CPU time. 77 // 78 // gomaxprocs isn't used directly so as to keep this structure unit-testable. 79 nprocs int32 80 81 // test indicates whether this instance of the struct was made for testing purposes. 82 test bool 83 } 84 85 // limiting returns true if the CPU limiter is currently enabled, meaning the Go GC 86 // should take action to limit CPU utilization. 87 // 88 // It is safe to call concurrently with other operations. 89 func (l *gcCPULimiterState) limiting() bool { 90 return l.enabled.Load() 91 } 92 93 // startGCTransition notifies the limiter of a GC transition. 94 // 95 // This call takes ownership of the limiter and disables all other means of 96 // updating the limiter. Release ownership by calling finishGCTransition. 97 // 98 // It is safe to call concurrently with other operations. 99 func (l *gcCPULimiterState) startGCTransition(enableGC bool, now int64) { 100 if !l.tryLock() { 101 // This must happen during a STW, so we can't fail to acquire the lock. 102 // If we did, something went wrong. Throw. 103 throw("failed to acquire lock to start a GC transition") 104 } 105 if l.gcEnabled == enableGC { 106 throw("transitioning GC to the same state as before?") 107 } 108 // Flush whatever was left between the last update and now. 109 l.updateLocked(now) 110 l.gcEnabled = enableGC 111 l.transitioning = true 112 // N.B. finishGCTransition releases the lock. 113 // 114 // We don't release here to increase the chance that if there's a failure 115 // to finish the transition, that we throw on failing to acquire the lock. 116 } 117 118 // finishGCTransition notifies the limiter that the GC transition is complete 119 // and releases ownership of it. It also accumulates STW time in the bucket. 120 // now must be the timestamp from the end of the STW pause. 121 func (l *gcCPULimiterState) finishGCTransition(now int64) { 122 if !l.transitioning { 123 throw("finishGCTransition called without starting one?") 124 } 125 // Count the full nprocs set of CPU time because the world is stopped 126 // between startGCTransition and finishGCTransition. Even though the GC 127 // isn't running on all CPUs, it is preventing user code from doing so, 128 // so it might as well be. 129 if lastUpdate := l.lastUpdate.Load(); now >= lastUpdate { 130 l.accumulate(0, (now-lastUpdate)*int64(l.nprocs)) 131 } 132 l.lastUpdate.Store(now) 133 l.transitioning = false 134 l.unlock() 135 } 136 137 // gcCPULimiterUpdatePeriod dictates the maximum amount of wall-clock time 138 // we can go before updating the limiter. 139 const gcCPULimiterUpdatePeriod = 10e6 // 10ms 140 141 // needUpdate returns true if the limiter's maximum update period has been 142 // exceeded, and so would benefit from an update. 143 func (l *gcCPULimiterState) needUpdate(now int64) bool { 144 return now-l.lastUpdate.Load() > gcCPULimiterUpdatePeriod 145 } 146 147 // addAssistTime notifies the limiter of additional assist time. It will be 148 // included in the next update. 149 func (l *gcCPULimiterState) addAssistTime(t int64) { 150 l.assistTimePool.Add(t) 151 } 152 153 // addIdleTime notifies the limiter of additional time a P spent on the idle list. It will be 154 // subtracted from the total CPU time in the next update. 155 func (l *gcCPULimiterState) addIdleTime(t int64) { 156 l.idleTimePool.Add(t) 157 } 158 159 // update updates the bucket given runtime-specific information. now is the 160 // current monotonic time in nanoseconds. 161 // 162 // This is safe to call concurrently with other operations, except *GCTransition. 163 func (l *gcCPULimiterState) update(now int64) { 164 if !l.tryLock() { 165 // We failed to acquire the lock, which means something else is currently 166 // updating. Just drop our update, the next one to update will include 167 // our total assist time. 168 return 169 } 170 if l.transitioning { 171 throw("update during transition") 172 } 173 l.updateLocked(now) 174 l.unlock() 175 } 176 177 // updateLocked is the implementation of update. l.lock must be held. 178 func (l *gcCPULimiterState) updateLocked(now int64) { 179 lastUpdate := l.lastUpdate.Load() 180 if now < lastUpdate { 181 // Defensively avoid overflow. This isn't even the latest update anyway. 182 return 183 } 184 windowTotalTime := (now - lastUpdate) * int64(l.nprocs) 185 l.lastUpdate.Store(now) 186 187 // Drain the pool of assist time. 188 assistTime := l.assistTimePool.Load() 189 if assistTime != 0 { 190 l.assistTimePool.Add(-assistTime) 191 } 192 193 // Drain the pool of idle time. 194 idleTime := l.idleTimePool.Load() 195 if idleTime != 0 { 196 l.idleTimePool.Add(-idleTime) 197 } 198 199 if !l.test { 200 // Consume time from in-flight events. Make sure we're not preemptible so allp can't change. 201 // 202 // The reason we do this instead of just waiting for those events to finish and push updates 203 // is to ensure that all the time we're accounting for happened sometime between lastUpdate 204 // and now. This dramatically simplifies reasoning about the limiter because we're not at 205 // risk of extra time being accounted for in this window than actually happened in this window, 206 // leading to all sorts of weird transient behavior. 207 mp := acquirem() 208 for _, pp := range allp { 209 typ, duration := pp.limiterEvent.consume(now) 210 switch typ { 211 case limiterEventIdleMarkWork: 212 fallthrough 213 case limiterEventIdle: 214 idleTime += duration 215 case limiterEventMarkAssist: 216 fallthrough 217 case limiterEventScavengeAssist: 218 assistTime += duration 219 case limiterEventNone: 220 break 221 default: 222 throw("invalid limiter event type found") 223 } 224 } 225 releasem(mp) 226 } 227 228 // Compute total GC time. 229 windowGCTime := assistTime 230 if l.gcEnabled { 231 windowGCTime += int64(float64(windowTotalTime) * gcBackgroundUtilization) 232 } 233 234 // Subtract out all idle time from the total time. Do this after computing 235 // GC time, because the background utilization is dependent on the *real* 236 // total time, not the total time after idle time is subtracted. 237 // 238 // Idle time is counted as any time that a P is on the P idle list plus idle mark 239 // time. Idle mark workers soak up time that the application spends idle. 240 // 241 // On a heavily undersubscribed system, any additional idle time can skew GC CPU 242 // utilization, because the GC might be executing continuously and thrashing, 243 // yet the CPU utilization with respect to GOMAXPROCS will be quite low, so 244 // the limiter fails to turn on. By subtracting idle time, we're removing time that 245 // we know the application was idle giving a more accurate picture of whether 246 // the GC is thrashing. 247 // 248 // Note that this can cause the limiter to turn on even if it's not needed. For 249 // instance, on a system with 32 Ps but only 1 running goroutine, each GC will have 250 // 8 dedicated GC workers. Assuming the GC cycle is half mark phase and half sweep 251 // phase, then the GC CPU utilization over that cycle, with idle time removed, will 252 // be 8/(8+2) = 80%. Even though the limiter turns on, though, assist should be 253 // unnecessary, as the GC has way more CPU time to outpace the 1 goroutine that's 254 // running. 255 windowTotalTime -= idleTime 256 257 l.accumulate(windowTotalTime-windowGCTime, windowGCTime) 258 } 259 260 // accumulate adds time to the bucket and signals whether the limiter is enabled. 261 // 262 // This is an internal function that deals just with the bucket. Prefer update. 263 // l.lock must be held. 264 func (l *gcCPULimiterState) accumulate(mutatorTime, gcTime int64) { 265 headroom := l.bucket.capacity - l.bucket.fill 266 enabled := headroom == 0 267 268 // Let's be careful about three things here: 269 // 1. The addition and subtraction, for the invariants. 270 // 2. Overflow. 271 // 3. Excessive mutation of l.enabled, which is accessed 272 // by all assists, potentially more than once. 273 change := gcTime - mutatorTime 274 275 // Handle limiting case. 276 if change > 0 && headroom <= uint64(change) { 277 l.overflow += uint64(change) - headroom 278 l.bucket.fill = l.bucket.capacity 279 if !enabled { 280 l.enabled.Store(true) 281 l.lastEnabledCycle.Store(memstats.numgc + 1) 282 } 283 return 284 } 285 286 // Handle non-limiting cases. 287 if change < 0 && l.bucket.fill <= uint64(-change) { 288 // Bucket emptied. 289 l.bucket.fill = 0 290 } else { 291 // All other cases. 292 l.bucket.fill -= uint64(-change) 293 } 294 if change != 0 && enabled { 295 l.enabled.Store(false) 296 } 297 } 298 299 // tryLock attempts to lock l. Returns true on success. 300 func (l *gcCPULimiterState) tryLock() bool { 301 return l.lock.CompareAndSwap(0, 1) 302 } 303 304 // unlock releases the lock on l. Must be called if tryLock returns true. 305 func (l *gcCPULimiterState) unlock() { 306 old := l.lock.Swap(0) 307 if old != 1 { 308 throw("double unlock") 309 } 310 } 311 312 // capacityPerProc is the limiter's bucket capacity for each P in GOMAXPROCS. 313 const capacityPerProc = 1e9 // 1 second in nanoseconds 314 315 // resetCapacity updates the capacity based on GOMAXPROCS. Must not be called 316 // while the GC is enabled. 317 // 318 // It is safe to call concurrently with other operations. 319 func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) { 320 if !l.tryLock() { 321 // This must happen during a STW, so we can't fail to acquire the lock. 322 // If we did, something went wrong. Throw. 323 throw("failed to acquire lock to reset capacity") 324 } 325 // Flush the rest of the time for this period. 326 l.updateLocked(now) 327 l.nprocs = nprocs 328 329 l.bucket.capacity = uint64(nprocs) * capacityPerProc 330 if l.bucket.fill > l.bucket.capacity { 331 l.bucket.fill = l.bucket.capacity 332 l.enabled.Store(true) 333 l.lastEnabledCycle.Store(memstats.numgc + 1) 334 } else if l.bucket.fill < l.bucket.capacity { 335 l.enabled.Store(false) 336 } 337 l.unlock() 338 } 339 340 // limiterEventType indicates the type of an event occurring on some P. 341 // 342 // These events represent the full set of events that the GC CPU limiter tracks 343 // to execute its function. 344 // 345 // This type may use no more than limiterEventBits bits of information. 346 type limiterEventType uint8 347 348 const ( 349 limiterEventNone limiterEventType = iota // None of the following events. 350 limiterEventIdleMarkWork // Refers to an idle mark worker (see gcMarkWorkerMode). 351 limiterEventMarkAssist // Refers to mark assist (see gcAssistAlloc). 352 limiterEventScavengeAssist // Refers to a scavenge assist (see allocSpan). 353 limiterEventIdle // Refers to time a P spent on the idle list. 354 355 limiterEventBits = 3 356 ) 357 358 // limiterEventTypeMask is a mask for the bits in p.limiterEventStart that represent 359 // the event type. The rest of the bits of that field represent a timestamp. 360 const ( 361 limiterEventTypeMask = uint64((1<<limiterEventBits)-1) << (64 - limiterEventBits) 362 limiterEventStampNone = limiterEventStamp(0) 363 ) 364 365 // limiterEventStamp is a nanotime timestamp packed with a limiterEventType. 366 type limiterEventStamp uint64 367 368 // makeLimiterEventStamp creates a new stamp from the event type and the current timestamp. 369 func makeLimiterEventStamp(typ limiterEventType, now int64) limiterEventStamp { 370 return limiterEventStamp(uint64(typ)<<(64-limiterEventBits) | (uint64(now) &^ limiterEventTypeMask)) 371 } 372 373 // duration computes the difference between now and the start time stored in the stamp. 374 // 375 // Returns 0 if the difference is negative, which may happen if now is stale or if the 376 // before and after timestamps cross a 2^(64-limiterEventBits) boundary. 377 func (s limiterEventStamp) duration(now int64) int64 { 378 // The top limiterEventBits bits of the timestamp are derived from the current time 379 // when computing a duration. 380 start := int64((uint64(now) & limiterEventTypeMask) | (uint64(s) &^ limiterEventTypeMask)) 381 if now < start { 382 return 0 383 } 384 return now - start 385 } 386 387 // type extracts the event type from the stamp. 388 func (s limiterEventStamp) typ() limiterEventType { 389 return limiterEventType(s >> (64 - limiterEventBits)) 390 } 391 392 // limiterEvent represents tracking state for an event tracked by the GC CPU limiter. 393 type limiterEvent struct { 394 stamp atomic.Uint64 // Stores a limiterEventStamp. 395 } 396 397 // start begins tracking a new limiter event of the current type. If an event 398 // is already in flight, then a new event cannot begin because the current time is 399 // already being attributed to that event. In this case, this function returns false. 400 // Otherwise, it returns true. 401 // 402 // The caller must be non-preemptible until at least stop is called or this function 403 // returns false. Because this is trying to measure "on-CPU" time of some event, getting 404 // scheduled away during it can mean that whatever we're measuring isn't a reflection 405 // of "on-CPU" time. The OS could deschedule us at any time, but we want to maintain as 406 // close of an approximation as we can. 407 func (e *limiterEvent) start(typ limiterEventType, now int64) bool { 408 if limiterEventStamp(e.stamp.Load()).typ() != limiterEventNone { 409 return false 410 } 411 e.stamp.Store(uint64(makeLimiterEventStamp(typ, now))) 412 return true 413 } 414 415 // consume acquires the partial event CPU time from any in-flight event. 416 // It achieves this by storing the current time as the new event time. 417 // 418 // Returns the type of the in-flight event, as well as how long it's currently been 419 // executing for. Returns limiterEventNone if no event is active. 420 func (e *limiterEvent) consume(now int64) (typ limiterEventType, duration int64) { 421 // Read the limiter event timestamp and update it to now. 422 for { 423 old := limiterEventStamp(e.stamp.Load()) 424 typ = old.typ() 425 if typ == limiterEventNone { 426 // There's no in-flight event, so just push that up. 427 return 428 } 429 duration = old.duration(now) 430 if duration == 0 { 431 // We might have a stale now value, or this crossed the 432 // 2^(64-limiterEventBits) boundary in the clock readings. 433 // Just ignore it. 434 return limiterEventNone, 0 435 } 436 new := makeLimiterEventStamp(typ, now) 437 if e.stamp.CompareAndSwap(uint64(old), uint64(new)) { 438 break 439 } 440 } 441 return 442 } 443 444 // stop stops the active limiter event. Throws if the 445 // 446 // The caller must be non-preemptible across the event. See start as to why. 447 func (e *limiterEvent) stop(typ limiterEventType, now int64) { 448 var stamp limiterEventStamp 449 for { 450 stamp = limiterEventStamp(e.stamp.Load()) 451 if stamp.typ() != typ { 452 print("runtime: want=", typ, " got=", stamp.typ(), "\n") 453 throw("limiterEvent.stop: found wrong event in p's limiter event slot") 454 } 455 if e.stamp.CompareAndSwap(uint64(stamp), uint64(limiterEventStampNone)) { 456 break 457 } 458 } 459 duration := stamp.duration(now) 460 if duration == 0 { 461 // It's possible that we're missing time because we crossed a 462 // 2^(64-limiterEventBits) boundary between the start and end. 463 // In this case, we're dropping that information. This is OK because 464 // at worst it'll cause a transient hiccup that will quickly resolve 465 // itself as all new timestamps begin on the other side of the boundary. 466 // Such a hiccup should be incredibly rare. 467 return 468 } 469 // Account for the event. 470 switch typ { 471 case limiterEventIdleMarkWork: 472 gcCPULimiter.addIdleTime(duration) 473 case limiterEventIdle: 474 gcCPULimiter.addIdleTime(duration) 475 sched.idleTime.Add(duration) 476 case limiterEventMarkAssist: 477 fallthrough 478 case limiterEventScavengeAssist: 479 gcCPULimiter.addAssistTime(duration) 480 default: 481 throw("limiterEvent.stop: invalid limiter event type found") 482 } 483 }