github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/mgclimit.go

github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/mgclimit.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import "runtime/internal/atomic"
     8  
     9  // gcCPULimiter is a mechanism to limit GC CPU utilization in situations
    10  // where it might become excessive and inhibit application progress (e.g.
    11  // a death spiral).
    12  //
    13  // The core of the limiter is a leaky bucket mechanism that fills with GC
    14  // CPU time and drains with mutator time. Because the bucket fills and
    15  // drains with time directly (i.e. without any weighting), this effectively
    16  // sets a very conservative limit of 50%. This limit could be enforced directly,
    17  // however, but the purpose of the bucket is to accommodate spikes in GC CPU
    18  // utilization without hurting throughput.
    19  //
    20  // Note that the bucket in the leaky bucket mechanism can never go negative,
    21  // so the GC never gets credit for a lot of CPU time spent without the GC
    22  // running. This is intentional, as an application that stays idle for, say,
    23  // an entire day, could build up enough credit to fail to prevent a death
    24  // spiral the following day. The bucket's capacity is the GC's only leeway.
    25  //
    26  // The capacity thus also sets the window the limiter considers. For example,
    27  // if the capacity of the bucket is 1 cpu-second, then the limiter will not
    28  // kick in until at least 1 full cpu-second in the last 2 cpu-second window
    29  // is spent on GC CPU time.
    30  var gcCPULimiter gcCPULimiterState
    31  
    32  type gcCPULimiterState struct {
    33  	lock atomic.Uint32
    34  
    35  	enabled atomic.Bool
    36  	bucket  struct {
    37  		// Invariants:
    38  		// - fill >= 0
    39  		// - capacity >= 0
    40  		// - fill <= capacity
    41  		fill, capacity uint64
    42  	}
    43  	// overflow is the cumulative amount of GC CPU time that we tried to fill the
    44  	// bucket with but exceeded its capacity.
    45  	overflow uint64
    46  
    47  	// gcEnabled is an internal copy of gcBlackenEnabled that determines
    48  	// whether the limiter tracks total assist time.
    49  	//
    50  	// gcBlackenEnabled isn't used directly so as to keep this structure
    51  	// unit-testable.
    52  	gcEnabled bool
    53  
    54  	// transitioning is true when the GC is in a STW and transitioning between
    55  	// the mark and sweep phases.
    56  	transitioning bool
    57  
    58  	// assistTimePool is the accumulated assist time since the last update.
    59  	assistTimePool atomic.Int64
    60  
    61  	// idleMarkTimePool is the accumulated idle mark time since the last update.
    62  	idleMarkTimePool atomic.Int64
    63  
    64  	// idleTimePool is the accumulated time Ps spent on the idle list since the last update.
    65  	idleTimePool atomic.Int64
    66  
    67  	// lastUpdate is the nanotime timestamp of the last time update was called.
    68  	//
    69  	// Updated under lock, but may be read concurrently.
    70  	lastUpdate atomic.Int64
    71  
    72  	// lastEnabledCycle is the GC cycle that last had the limiter enabled.
    73  	lastEnabledCycle atomic.Uint32
    74  
    75  	// nprocs is an internal copy of gomaxprocs, used to determine total available
    76  	// CPU time.
    77  	//
    78  	// gomaxprocs isn't used directly so as to keep this structure unit-testable.
    79  	nprocs int32
    80  
    81  	// test indicates whether this instance of the struct was made for testing purposes.
    82  	test bool
    83  }
    84  
    85  // limiting returns true if the CPU limiter is currently enabled, meaning the Go GC
    86  // should take action to limit CPU utilization.
    87  //
    88  // It is safe to call concurrently with other operations.
    89  func (l *gcCPULimiterState) limiting() bool {
    90  	return l.enabled.Load()
    91  }
    92  
    93  // startGCTransition notifies the limiter of a GC transition.
    94  //
    95  // This call takes ownership of the limiter and disables all other means of
    96  // updating the limiter. Release ownership by calling finishGCTransition.
    97  //
    98  // It is safe to call concurrently with other operations.
    99  func (l *gcCPULimiterState) startGCTransition(enableGC bool, now int64) {
   100  	if !l.tryLock() {
   101  		// This must happen during a STW, so we can't fail to acquire the lock.
   102  		// If we did, something went wrong. Throw.
   103  		throw("failed to acquire lock to start a GC transition")
   104  	}
   105  	if l.gcEnabled == enableGC {
   106  		throw("transitioning GC to the same state as before?")
   107  	}
   108  	// Flush whatever was left between the last update and now.
   109  	l.updateLocked(now)
   110  	l.gcEnabled = enableGC
   111  	l.transitioning = true
   112  	// N.B. finishGCTransition releases the lock.
   113  	//
   114  	// We don't release here to increase the chance that if there's a failure
   115  	// to finish the transition, that we throw on failing to acquire the lock.
   116  }
   117  
   118  // finishGCTransition notifies the limiter that the GC transition is complete
   119  // and releases ownership of it. It also accumulates STW time in the bucket.
   120  // now must be the timestamp from the end of the STW pause.
   121  func (l *gcCPULimiterState) finishGCTransition(now int64) {
   122  	if !l.transitioning {
   123  		throw("finishGCTransition called without starting one?")
   124  	}
   125  	// Count the full nprocs set of CPU time because the world is stopped
   126  	// between startGCTransition and finishGCTransition. Even though the GC
   127  	// isn't running on all CPUs, it is preventing user code from doing so,
   128  	// so it might as well be.
   129  	if lastUpdate := l.lastUpdate.Load(); now >= lastUpdate {
   130  		l.accumulate(0, (now-lastUpdate)*int64(l.nprocs))
   131  	}
   132  	l.lastUpdate.Store(now)
   133  	l.transitioning = false
   134  	l.unlock()
   135  }
   136  
   137  // gcCPULimiterUpdatePeriod dictates the maximum amount of wall-clock time
   138  // we can go before updating the limiter.
   139  const gcCPULimiterUpdatePeriod = 10e6 // 10ms
   140  
   141  // needUpdate returns true if the limiter's maximum update period has been
   142  // exceeded, and so would benefit from an update.
   143  func (l *gcCPULimiterState) needUpdate(now int64) bool {
   144  	return now-l.lastUpdate.Load() > gcCPULimiterUpdatePeriod
   145  }
   146  
   147  // addAssistTime notifies the limiter of additional assist time. It will be
   148  // included in the next update.
   149  func (l *gcCPULimiterState) addAssistTime(t int64) {
   150  	l.assistTimePool.Add(t)
   151  }
   152  
   153  // addIdleTime notifies the limiter of additional time a P spent on the idle list. It will be
   154  // subtracted from the total CPU time in the next update.
   155  func (l *gcCPULimiterState) addIdleTime(t int64) {
   156  	l.idleTimePool.Add(t)
   157  }
   158  
   159  // update updates the bucket given runtime-specific information. now is the
   160  // current monotonic time in nanoseconds.
   161  //
   162  // This is safe to call concurrently with other operations, except *GCTransition.
   163  func (l *gcCPULimiterState) update(now int64) {
   164  	if !l.tryLock() {
   165  		// We failed to acquire the lock, which means something else is currently
   166  		// updating. Just drop our update, the next one to update will include
   167  		// our total assist time.
   168  		return
   169  	}
   170  	if l.transitioning {
   171  		throw("update during transition")
   172  	}
   173  	l.updateLocked(now)
   174  	l.unlock()
   175  }
   176  
   177  // updateLocked is the implementation of update. l.lock must be held.
   178  func (l *gcCPULimiterState) updateLocked(now int64) {
   179  	lastUpdate := l.lastUpdate.Load()
   180  	if now < lastUpdate {
   181  		// Defensively avoid overflow. This isn't even the latest update anyway.
   182  		return
   183  	}
   184  	windowTotalTime := (now - lastUpdate) * int64(l.nprocs)
   185  	l.lastUpdate.Store(now)
   186  
   187  	// Drain the pool of assist time.
   188  	assistTime := l.assistTimePool.Load()
   189  	if assistTime != 0 {
   190  		l.assistTimePool.Add(-assistTime)
   191  	}
   192  
   193  	// Drain the pool of idle time.
   194  	idleTime := l.idleTimePool.Load()
   195  	if idleTime != 0 {
   196  		l.idleTimePool.Add(-idleTime)
   197  	}
   198  
   199  	if !l.test {
   200  		// Consume time from in-flight events. Make sure we're not preemptible so allp can't change.
   201  		//
   202  		// The reason we do this instead of just waiting for those events to finish and push updates
   203  		// is to ensure that all the time we're accounting for happened sometime between lastUpdate
   204  		// and now. This dramatically simplifies reasoning about the limiter because we're not at
   205  		// risk of extra time being accounted for in this window than actually happened in this window,
   206  		// leading to all sorts of weird transient behavior.
   207  		mp := acquirem()
   208  		for _, pp := range allp {
   209  			typ, duration := pp.limiterEvent.consume(now)
   210  			switch typ {
   211  			case limiterEventIdleMarkWork:
   212  				fallthrough
   213  			case limiterEventIdle:
   214  				idleTime += duration
   215  			case limiterEventMarkAssist:
   216  				fallthrough
   217  			case limiterEventScavengeAssist:
   218  				assistTime += duration
   219  			case limiterEventNone:
   220  				break
   221  			default:
   222  				throw("invalid limiter event type found")
   223  			}
   224  		}
   225  		releasem(mp)
   226  	}
   227  
   228  	// Compute total GC time.
   229  	windowGCTime := assistTime
   230  	if l.gcEnabled {
   231  		windowGCTime += int64(float64(windowTotalTime) * gcBackgroundUtilization)
   232  	}
   233  
   234  	// Subtract out all idle time from the total time. Do this after computing
   235  	// GC time, because the background utilization is dependent on the *real*
   236  	// total time, not the total time after idle time is subtracted.
   237  	//
   238  	// Idle time is counted as any time that a P is on the P idle list plus idle mark
   239  	// time. Idle mark workers soak up time that the application spends idle.
   240  	//
   241  	// On a heavily undersubscribed system, any additional idle time can skew GC CPU
   242  	// utilization, because the GC might be executing continuously and thrashing,
   243  	// yet the CPU utilization with respect to GOMAXPROCS will be quite low, so
   244  	// the limiter fails to turn on. By subtracting idle time, we're removing time that
   245  	// we know the application was idle giving a more accurate picture of whether
   246  	// the GC is thrashing.
   247  	//
   248  	// Note that this can cause the limiter to turn on even if it's not needed. For
   249  	// instance, on a system with 32 Ps but only 1 running goroutine, each GC will have
   250  	// 8 dedicated GC workers. Assuming the GC cycle is half mark phase and half sweep
   251  	// phase, then the GC CPU utilization over that cycle, with idle time removed, will
   252  	// be 8/(8+2) = 80%. Even though the limiter turns on, though, assist should be
   253  	// unnecessary, as the GC has way more CPU time to outpace the 1 goroutine that's
   254  	// running.
   255  	windowTotalTime -= idleTime
   256  
   257  	l.accumulate(windowTotalTime-windowGCTime, windowGCTime)
   258  }
   259  
   260  // accumulate adds time to the bucket and signals whether the limiter is enabled.
   261  //
   262  // This is an internal function that deals just with the bucket. Prefer update.
   263  // l.lock must be held.
   264  func (l *gcCPULimiterState) accumulate(mutatorTime, gcTime int64) {
   265  	headroom := l.bucket.capacity - l.bucket.fill
   266  	enabled := headroom == 0
   267  
   268  	// Let's be careful about three things here:
   269  	// 1. The addition and subtraction, for the invariants.
   270  	// 2. Overflow.
   271  	// 3. Excessive mutation of l.enabled, which is accessed
   272  	//    by all assists, potentially more than once.
   273  	change := gcTime - mutatorTime
   274  
   275  	// Handle limiting case.
   276  	if change > 0 && headroom <= uint64(change) {
   277  		l.overflow += uint64(change) - headroom
   278  		l.bucket.fill = l.bucket.capacity
   279  		if !enabled {
   280  			l.enabled.Store(true)
   281  			l.lastEnabledCycle.Store(memstats.numgc + 1)
   282  		}
   283  		return
   284  	}
   285  
   286  	// Handle non-limiting cases.
   287  	if change < 0 && l.bucket.fill <= uint64(-change) {
   288  		// Bucket emptied.
   289  		l.bucket.fill = 0
   290  	} else {
   291  		// All other cases.
   292  		l.bucket.fill -= uint64(-change)
   293  	}
   294  	if change != 0 && enabled {
   295  		l.enabled.Store(false)
   296  	}
   297  }
   298  
   299  // tryLock attempts to lock l. Returns true on success.
   300  func (l *gcCPULimiterState) tryLock() bool {
   301  	return l.lock.CompareAndSwap(0, 1)
   302  }
   303  
   304  // unlock releases the lock on l. Must be called if tryLock returns true.
   305  func (l *gcCPULimiterState) unlock() {
   306  	old := l.lock.Swap(0)
   307  	if old != 1 {
   308  		throw("double unlock")
   309  	}
   310  }
   311  
   312  // capacityPerProc is the limiter's bucket capacity for each P in GOMAXPROCS.
   313  const capacityPerProc = 1e9 // 1 second in nanoseconds
   314  
   315  // resetCapacity updates the capacity based on GOMAXPROCS. Must not be called
   316  // while the GC is enabled.
   317  //
   318  // It is safe to call concurrently with other operations.
   319  func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) {
   320  	if !l.tryLock() {
   321  		// This must happen during a STW, so we can't fail to acquire the lock.
   322  		// If we did, something went wrong. Throw.
   323  		throw("failed to acquire lock to reset capacity")
   324  	}
   325  	// Flush the rest of the time for this period.
   326  	l.updateLocked(now)
   327  	l.nprocs = nprocs
   328  
   329  	l.bucket.capacity = uint64(nprocs) * capacityPerProc
   330  	if l.bucket.fill > l.bucket.capacity {
   331  		l.bucket.fill = l.bucket.capacity
   332  		l.enabled.Store(true)
   333  		l.lastEnabledCycle.Store(memstats.numgc + 1)
   334  	} else if l.bucket.fill < l.bucket.capacity {
   335  		l.enabled.Store(false)
   336  	}
   337  	l.unlock()
   338  }
   339  
   340  // limiterEventType indicates the type of an event occurring on some P.
   341  //
   342  // These events represent the full set of events that the GC CPU limiter tracks
   343  // to execute its function.
   344  //
   345  // This type may use no more than limiterEventBits bits of information.
   346  type limiterEventType uint8
   347  
   348  const (
   349  	limiterEventNone           limiterEventType = iota // None of the following events.
   350  	limiterEventIdleMarkWork                           // Refers to an idle mark worker (see gcMarkWorkerMode).
   351  	limiterEventMarkAssist                             // Refers to mark assist (see gcAssistAlloc).
   352  	limiterEventScavengeAssist                         // Refers to a scavenge assist (see allocSpan).
   353  	limiterEventIdle                                   // Refers to time a P spent on the idle list.
   354  
   355  	limiterEventBits = 3
   356  )
   357  
   358  // limiterEventTypeMask is a mask for the bits in p.limiterEventStart that represent
   359  // the event type. The rest of the bits of that field represent a timestamp.
   360  const (
   361  	limiterEventTypeMask  = uint64((1<<limiterEventBits)-1) << (64 - limiterEventBits)
   362  	limiterEventStampNone = limiterEventStamp(0)
   363  )
   364  
   365  // limiterEventStamp is a nanotime timestamp packed with a limiterEventType.
   366  type limiterEventStamp uint64
   367  
   368  // makeLimiterEventStamp creates a new stamp from the event type and the current timestamp.
   369  func makeLimiterEventStamp(typ limiterEventType, now int64) limiterEventStamp {
   370  	return limiterEventStamp(uint64(typ)<<(64-limiterEventBits) | (uint64(now) &^ limiterEventTypeMask))
   371  }
   372  
   373  // duration computes the difference between now and the start time stored in the stamp.
   374  //
   375  // Returns 0 if the difference is negative, which may happen if now is stale or if the
   376  // before and after timestamps cross a 2^(64-limiterEventBits) boundary.
   377  func (s limiterEventStamp) duration(now int64) int64 {
   378  	// The top limiterEventBits bits of the timestamp are derived from the current time
   379  	// when computing a duration.
   380  	start := int64((uint64(now) & limiterEventTypeMask) | (uint64(s) &^ limiterEventTypeMask))
   381  	if now < start {
   382  		return 0
   383  	}
   384  	return now - start
   385  }
   386  
   387  // type extracts the event type from the stamp.
   388  func (s limiterEventStamp) typ() limiterEventType {
   389  	return limiterEventType(s >> (64 - limiterEventBits))
   390  }
   391  
   392  // limiterEvent represents tracking state for an event tracked by the GC CPU limiter.
   393  type limiterEvent struct {
   394  	stamp atomic.Uint64 // Stores a limiterEventStamp.
   395  }
   396  
   397  // start begins tracking a new limiter event of the current type. If an event
   398  // is already in flight, then a new event cannot begin because the current time is
   399  // already being attributed to that event. In this case, this function returns false.
   400  // Otherwise, it returns true.
   401  //
   402  // The caller must be non-preemptible until at least stop is called or this function
   403  // returns false. Because this is trying to measure "on-CPU" time of some event, getting
   404  // scheduled away during it can mean that whatever we're measuring isn't a reflection
   405  // of "on-CPU" time. The OS could deschedule us at any time, but we want to maintain as
   406  // close of an approximation as we can.
   407  func (e *limiterEvent) start(typ limiterEventType, now int64) bool {
   408  	if limiterEventStamp(e.stamp.Load()).typ() != limiterEventNone {
   409  		return false
   410  	}
   411  	e.stamp.Store(uint64(makeLimiterEventStamp(typ, now)))
   412  	return true
   413  }
   414  
   415  // consume acquires the partial event CPU time from any in-flight event.
   416  // It achieves this by storing the current time as the new event time.
   417  //
   418  // Returns the type of the in-flight event, as well as how long it's currently been
   419  // executing for. Returns limiterEventNone if no event is active.
   420  func (e *limiterEvent) consume(now int64) (typ limiterEventType, duration int64) {
   421  	// Read the limiter event timestamp and update it to now.
   422  	for {
   423  		old := limiterEventStamp(e.stamp.Load())
   424  		typ = old.typ()
   425  		if typ == limiterEventNone {
   426  			// There's no in-flight event, so just push that up.
   427  			return
   428  		}
   429  		duration = old.duration(now)
   430  		if duration == 0 {
   431  			// We might have a stale now value, or this crossed the
   432  			// 2^(64-limiterEventBits) boundary in the clock readings.
   433  			// Just ignore it.
   434  			return limiterEventNone, 0
   435  		}
   436  		new := makeLimiterEventStamp(typ, now)
   437  		if e.stamp.CompareAndSwap(uint64(old), uint64(new)) {
   438  			break
   439  		}
   440  	}
   441  	return
   442  }
   443  
   444  // stop stops the active limiter event. Throws if the
   445  //
   446  // The caller must be non-preemptible across the event. See start as to why.
   447  func (e *limiterEvent) stop(typ limiterEventType, now int64) {
   448  	var stamp limiterEventStamp
   449  	for {
   450  		stamp = limiterEventStamp(e.stamp.Load())
   451  		if stamp.typ() != typ {
   452  			print("runtime: want=", typ, " got=", stamp.typ(), "\n")
   453  			throw("limiterEvent.stop: found wrong event in p's limiter event slot")
   454  		}
   455  		if e.stamp.CompareAndSwap(uint64(stamp), uint64(limiterEventStampNone)) {
   456  			break
   457  		}
   458  	}
   459  	duration := stamp.duration(now)
   460  	if duration == 0 {
   461  		// It's possible that we're missing time because we crossed a
   462  		// 2^(64-limiterEventBits) boundary between the start and end.
   463  		// In this case, we're dropping that information. This is OK because
   464  		// at worst it'll cause a transient hiccup that will quickly resolve
   465  		// itself as all new timestamps begin on the other side of the boundary.
   466  		// Such a hiccup should be incredibly rare.
   467  		return
   468  	}
   469  	// Account for the event.
   470  	switch typ {
   471  	case limiterEventIdleMarkWork:
   472  		gcCPULimiter.addIdleTime(duration)
   473  	case limiterEventIdle:
   474  		gcCPULimiter.addIdleTime(duration)
   475  		sched.idleTime.Add(duration)
   476  	case limiterEventMarkAssist:
   477  		fallthrough
   478  	case limiterEventScavengeAssist:
   479  		gcCPULimiter.addAssistTime(duration)
   480  	default:
   481  		throw("limiterEvent.stop: invalid limiter event type found")
   482  	}
   483  }