github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/trace/gc.go

github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/trace/gc.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package trace
     6  
     7  import (
     8  	"container/heap"
     9  	"math"
    10  	"sort"
    11  	"strings"
    12  	"time"
    13  
    14  	tracev2 "github.com/go-asm/go/trace/v2"
    15  )
    16  
    17  // MutatorUtil is a change in mutator utilization at a particular
    18  // time. Mutator utilization functions are represented as a
    19  // time-ordered []MutatorUtil.
    20  type MutatorUtil struct {
    21  	Time int64
    22  	// Util is the mean mutator utilization starting at Time. This
    23  	// is in the range [0, 1].
    24  	Util float64
    25  }
    26  
    27  // UtilFlags controls the behavior of MutatorUtilization.
    28  type UtilFlags int
    29  
    30  const (
    31  	// UtilSTW means utilization should account for STW events.
    32  	// This includes non-GC STW events, which are typically user-requested.
    33  	UtilSTW UtilFlags = 1 << iota
    34  	// UtilBackground means utilization should account for
    35  	// background mark workers.
    36  	UtilBackground
    37  	// UtilAssist means utilization should account for mark
    38  	// assists.
    39  	UtilAssist
    40  	// UtilSweep means utilization should account for sweeping.
    41  	UtilSweep
    42  
    43  	// UtilPerProc means each P should be given a separate
    44  	// utilization function. Otherwise, there is a single function
    45  	// and each P is given a fraction of the utilization.
    46  	UtilPerProc
    47  )
    48  
    49  // MutatorUtilization returns a set of mutator utilization functions
    50  // for the given trace. Each function will always end with 0
    51  // utilization. The bounds of each function are implicit in the first
    52  // and last event; outside of these bounds each function is undefined.
    53  //
    54  // If the UtilPerProc flag is not given, this always returns a single
    55  // utilization function. Otherwise, it returns one function per P.
    56  func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil {
    57  	if len(events) == 0 {
    58  		return nil
    59  	}
    60  
    61  	type perP struct {
    62  		// gc > 0 indicates that GC is active on this P.
    63  		gc int
    64  		// series the logical series number for this P. This
    65  		// is necessary because Ps may be removed and then
    66  		// re-added, and then the new P needs a new series.
    67  		series int
    68  	}
    69  	ps := []perP{}
    70  	stw := 0
    71  
    72  	out := [][]MutatorUtil{}
    73  	assists := map[uint64]bool{}
    74  	block := map[uint64]*Event{}
    75  	bgMark := map[uint64]bool{}
    76  
    77  	for _, ev := range events {
    78  		switch ev.Type {
    79  		case EvGomaxprocs:
    80  			gomaxprocs := int(ev.Args[0])
    81  			if len(ps) > gomaxprocs {
    82  				if flags&UtilPerProc != 0 {
    83  					// End each P's series.
    84  					for _, p := range ps[gomaxprocs:] {
    85  						out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
    86  					}
    87  				}
    88  				ps = ps[:gomaxprocs]
    89  			}
    90  			for len(ps) < gomaxprocs {
    91  				// Start new P's series.
    92  				series := 0
    93  				if flags&UtilPerProc != 0 || len(out) == 0 {
    94  					series = len(out)
    95  					out = append(out, []MutatorUtil{{ev.Ts, 1}})
    96  				}
    97  				ps = append(ps, perP{series: series})
    98  			}
    99  		case EvSTWStart:
   100  			if flags&UtilSTW != 0 {
   101  				stw++
   102  			}
   103  		case EvSTWDone:
   104  			if flags&UtilSTW != 0 {
   105  				stw--
   106  			}
   107  		case EvGCMarkAssistStart:
   108  			if flags&UtilAssist != 0 {
   109  				ps[ev.P].gc++
   110  				assists[ev.G] = true
   111  			}
   112  		case EvGCMarkAssistDone:
   113  			if flags&UtilAssist != 0 {
   114  				ps[ev.P].gc--
   115  				delete(assists, ev.G)
   116  			}
   117  		case EvGCSweepStart:
   118  			if flags&UtilSweep != 0 {
   119  				ps[ev.P].gc++
   120  			}
   121  		case EvGCSweepDone:
   122  			if flags&UtilSweep != 0 {
   123  				ps[ev.P].gc--
   124  			}
   125  		case EvGoStartLabel:
   126  			if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
   127  				// Background mark worker.
   128  				//
   129  				// If we're in per-proc mode, we don't
   130  				// count dedicated workers because
   131  				// they kick all of the goroutines off
   132  				// that P, so don't directly
   133  				// contribute to goroutine latency.
   134  				if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
   135  					bgMark[ev.G] = true
   136  					ps[ev.P].gc++
   137  				}
   138  			}
   139  			fallthrough
   140  		case EvGoStart:
   141  			if assists[ev.G] {
   142  				// Unblocked during assist.
   143  				ps[ev.P].gc++
   144  			}
   145  			block[ev.G] = ev.Link
   146  		default:
   147  			if ev != block[ev.G] {
   148  				continue
   149  			}
   150  
   151  			if assists[ev.G] {
   152  				// Blocked during assist.
   153  				ps[ev.P].gc--
   154  			}
   155  			if bgMark[ev.G] {
   156  				// Background mark worker done.
   157  				ps[ev.P].gc--
   158  				delete(bgMark, ev.G)
   159  			}
   160  			delete(block, ev.G)
   161  		}
   162  
   163  		if flags&UtilPerProc == 0 {
   164  			// Compute the current average utilization.
   165  			if len(ps) == 0 {
   166  				continue
   167  			}
   168  			gcPs := 0
   169  			if stw > 0 {
   170  				gcPs = len(ps)
   171  			} else {
   172  				for i := range ps {
   173  					if ps[i].gc > 0 {
   174  						gcPs++
   175  					}
   176  				}
   177  			}
   178  			mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
   179  
   180  			// Record the utilization change. (Since
   181  			// len(ps) == len(out), we know len(out) > 0.)
   182  			out[0] = addUtil(out[0], mu)
   183  		} else {
   184  			// Check for per-P utilization changes.
   185  			for i := range ps {
   186  				p := &ps[i]
   187  				util := 1.0
   188  				if stw > 0 || p.gc > 0 {
   189  					util = 0.0
   190  				}
   191  				out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
   192  			}
   193  		}
   194  	}
   195  
   196  	// Add final 0 utilization event to any remaining series. This
   197  	// is important to mark the end of the trace. The exact value
   198  	// shouldn't matter since no window should extend beyond this,
   199  	// but using 0 is symmetric with the start of the trace.
   200  	mu := MutatorUtil{events[len(events)-1].Ts, 0}
   201  	for i := range ps {
   202  		out[ps[i].series] = addUtil(out[ps[i].series], mu)
   203  	}
   204  	return out
   205  }
   206  
   207  // MutatorUtilizationV2 returns a set of mutator utilization functions
   208  // for the given v2 trace, passed as an io.Reader. Each function will
   209  // always end with 0 utilization. The bounds of each function are implicit
   210  // in the first and last event; outside of these bounds each function is
   211  // undefined.
   212  //
   213  // If the UtilPerProc flag is not given, this always returns a single
   214  // utilization function. Otherwise, it returns one function per P.
   215  func MutatorUtilizationV2(events []tracev2.Event, flags UtilFlags) [][]MutatorUtil {
   216  	// Set up a bunch of analysis state.
   217  	type perP struct {
   218  		// gc > 0 indicates that GC is active on this P.
   219  		gc int
   220  		// series the logical series number for this P. This
   221  		// is necessary because Ps may be removed and then
   222  		// re-added, and then the new P needs a new series.
   223  		series int
   224  	}
   225  	type procsCount struct {
   226  		// time at which procs changed.
   227  		time int64
   228  		// n is the number of procs at that point.
   229  		n int
   230  	}
   231  	out := [][]MutatorUtil{}
   232  	stw := 0
   233  	ps := []perP{}
   234  	inGC := make(map[tracev2.GoID]bool)
   235  	states := make(map[tracev2.GoID]tracev2.GoState)
   236  	bgMark := make(map[tracev2.GoID]bool)
   237  	procs := []procsCount{}
   238  	seenSync := false
   239  
   240  	// Helpers.
   241  	handleSTW := func(r tracev2.Range) bool {
   242  		return flags&UtilSTW != 0 && isGCSTW(r)
   243  	}
   244  	handleMarkAssist := func(r tracev2.Range) bool {
   245  		return flags&UtilAssist != 0 && isGCMarkAssist(r)
   246  	}
   247  	handleSweep := func(r tracev2.Range) bool {
   248  		return flags&UtilSweep != 0 && isGCSweep(r)
   249  	}
   250  
   251  	// Iterate through the trace, tracking mutator utilization.
   252  	var lastEv *tracev2.Event
   253  	for i := range events {
   254  		ev := &events[i]
   255  		lastEv = ev
   256  
   257  		// Process the event.
   258  		switch ev.Kind() {
   259  		case tracev2.EventSync:
   260  			seenSync = true
   261  		case tracev2.EventMetric:
   262  			m := ev.Metric()
   263  			if m.Name != "/sched/gomaxprocs:threads" {
   264  				break
   265  			}
   266  			gomaxprocs := int(m.Value.Uint64())
   267  			if len(ps) > gomaxprocs {
   268  				if flags&UtilPerProc != 0 {
   269  					// End each P's series.
   270  					for _, p := range ps[gomaxprocs:] {
   271  						out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), 0})
   272  					}
   273  				}
   274  				ps = ps[:gomaxprocs]
   275  			}
   276  			for len(ps) < gomaxprocs {
   277  				// Start new P's series.
   278  				series := 0
   279  				if flags&UtilPerProc != 0 || len(out) == 0 {
   280  					series = len(out)
   281  					out = append(out, []MutatorUtil{{int64(ev.Time()), 1}})
   282  				}
   283  				ps = append(ps, perP{series: series})
   284  			}
   285  			if len(procs) == 0 || gomaxprocs != procs[len(procs)-1].n {
   286  				procs = append(procs, procsCount{time: int64(ev.Time()), n: gomaxprocs})
   287  			}
   288  		}
   289  		if len(ps) == 0 {
   290  			// We can't start doing any analysis until we see what GOMAXPROCS is.
   291  			// It will show up very early in the trace, but we need to be robust to
   292  			// something else being emitted beforehand.
   293  			continue
   294  		}
   295  
   296  		switch ev.Kind() {
   297  		case tracev2.EventRangeActive:
   298  			if seenSync {
   299  				// If we've seen a sync, then we can be sure we're not finding out about
   300  				// something late; we have complete information after that point, and these
   301  				// active events will just be redundant.
   302  				break
   303  			}
   304  			// This range is active back to the start of the trace. We're failing to account
   305  			// for this since we just found out about it now. Fix up the mutator utilization.
   306  			//
   307  			// N.B. A trace can't start during a STW, so we don't handle it here.
   308  			r := ev.Range()
   309  			switch {
   310  			case handleMarkAssist(r):
   311  				if !states[ev.Goroutine()].Executing() {
   312  					// If the goroutine isn't executing, then the fact that it was in mark
   313  					// assist doesn't actually count.
   314  					break
   315  				}
   316  				// This G has been in a mark assist *and running on its P* since the start
   317  				// of the trace.
   318  				fallthrough
   319  			case handleSweep(r):
   320  				// This P has been in sweep (or mark assist, from above) in the start of the trace.
   321  				//
   322  				// We don't need to do anything if UtilPerProc is set. If we get an event like
   323  				// this for a running P, it must show up the first time a P is mentioned. Therefore,
   324  				// this P won't actually have any MutatorUtils on its list yet.
   325  				//
   326  				// However, if UtilPerProc isn't set, then we probably have data from other procs
   327  				// and from previous events. We need to fix that up.
   328  				if flags&UtilPerProc != 0 {
   329  					break
   330  				}
   331  				// Subtract out 1/gomaxprocs mutator utilization for all time periods
   332  				// from the beginning of the trace until now.
   333  				mi, pi := 0, 0
   334  				for mi < len(out[0]) {
   335  					if pi < len(procs)-1 && procs[pi+1].time < out[0][mi].Time {
   336  						pi++
   337  						continue
   338  					}
   339  					out[0][mi].Util -= float64(1) / float64(procs[pi].n)
   340  					if out[0][mi].Util < 0 {
   341  						out[0][mi].Util = 0
   342  					}
   343  					mi++
   344  				}
   345  			}
   346  			// After accounting for the portion we missed, this just acts like the
   347  			// beginning of a new range.
   348  			fallthrough
   349  		case tracev2.EventRangeBegin:
   350  			r := ev.Range()
   351  			if handleSTW(r) {
   352  				stw++
   353  			} else if handleSweep(r) {
   354  				ps[ev.Proc()].gc++
   355  			} else if handleMarkAssist(r) {
   356  				ps[ev.Proc()].gc++
   357  				if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine {
   358  					inGC[g] = true
   359  				}
   360  			}
   361  		case tracev2.EventRangeEnd:
   362  			r := ev.Range()
   363  			if handleSTW(r) {
   364  				stw--
   365  			} else if handleSweep(r) {
   366  				ps[ev.Proc()].gc--
   367  			} else if handleMarkAssist(r) {
   368  				ps[ev.Proc()].gc--
   369  				if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine {
   370  					delete(inGC, g)
   371  				}
   372  			}
   373  		case tracev2.EventStateTransition:
   374  			st := ev.StateTransition()
   375  			if st.Resource.Kind != tracev2.ResourceGoroutine {
   376  				break
   377  			}
   378  			old, new := st.Goroutine()
   379  			g := st.Resource.Goroutine()
   380  			if inGC[g] || bgMark[g] {
   381  				if !old.Executing() && new.Executing() {
   382  					// Started running while doing GC things.
   383  					ps[ev.Proc()].gc++
   384  				} else if old.Executing() && !new.Executing() {
   385  					// Stopped running while doing GC things.
   386  					ps[ev.Proc()].gc--
   387  				}
   388  			}
   389  			states[g] = new
   390  		case tracev2.EventLabel:
   391  			l := ev.Label()
   392  			if flags&UtilBackground != 0 && strings.HasPrefix(l.Label, "GC ") && l.Label != "GC (idle)" {
   393  				// Background mark worker.
   394  				//
   395  				// If we're in per-proc mode, we don't
   396  				// count dedicated workers because
   397  				// they kick all of the goroutines off
   398  				// that P, so don't directly
   399  				// contribute to goroutine latency.
   400  				if !(flags&UtilPerProc != 0 && l.Label == "GC (dedicated)") {
   401  					bgMark[ev.Goroutine()] = true
   402  					ps[ev.Proc()].gc++
   403  				}
   404  			}
   405  		}
   406  
   407  		if flags&UtilPerProc == 0 {
   408  			// Compute the current average utilization.
   409  			if len(ps) == 0 {
   410  				continue
   411  			}
   412  			gcPs := 0
   413  			if stw > 0 {
   414  				gcPs = len(ps)
   415  			} else {
   416  				for i := range ps {
   417  					if ps[i].gc > 0 {
   418  						gcPs++
   419  					}
   420  				}
   421  			}
   422  			mu := MutatorUtil{int64(ev.Time()), 1 - float64(gcPs)/float64(len(ps))}
   423  
   424  			// Record the utilization change. (Since
   425  			// len(ps) == len(out), we know len(out) > 0.)
   426  			out[0] = addUtil(out[0], mu)
   427  		} else {
   428  			// Check for per-P utilization changes.
   429  			for i := range ps {
   430  				p := &ps[i]
   431  				util := 1.0
   432  				if stw > 0 || p.gc > 0 {
   433  					util = 0.0
   434  				}
   435  				out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), util})
   436  			}
   437  		}
   438  	}
   439  
   440  	// No events in the stream.
   441  	if lastEv == nil {
   442  		return nil
   443  	}
   444  
   445  	// Add final 0 utilization event to any remaining series. This
   446  	// is important to mark the end of the trace. The exact value
   447  	// shouldn't matter since no window should extend beyond this,
   448  	// but using 0 is symmetric with the start of the trace.
   449  	mu := MutatorUtil{int64(lastEv.Time()), 0}
   450  	for i := range ps {
   451  		out[ps[i].series] = addUtil(out[ps[i].series], mu)
   452  	}
   453  	return out
   454  }
   455  
   456  func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
   457  	if len(util) > 0 {
   458  		if mu.Util == util[len(util)-1].Util {
   459  			// No change.
   460  			return util
   461  		}
   462  		if mu.Time == util[len(util)-1].Time {
   463  			// Take the lowest utilization at a time stamp.
   464  			if mu.Util < util[len(util)-1].Util {
   465  				util[len(util)-1] = mu
   466  			}
   467  			return util
   468  		}
   469  	}
   470  	return append(util, mu)
   471  }
   472  
   473  // totalUtil is total utilization, measured in nanoseconds. This is a
   474  // separate type primarily to distinguish it from mean utilization,
   475  // which is also a float64.
   476  type totalUtil float64
   477  
   478  func totalUtilOf(meanUtil float64, dur int64) totalUtil {
   479  	return totalUtil(meanUtil * float64(dur))
   480  }
   481  
   482  // mean returns the mean utilization over dur.
   483  func (u totalUtil) mean(dur time.Duration) float64 {
   484  	return float64(u) / float64(dur)
   485  }
   486  
   487  // An MMUCurve is the minimum mutator utilization curve across
   488  // multiple window sizes.
   489  type MMUCurve struct {
   490  	series []mmuSeries
   491  }
   492  
   493  type mmuSeries struct {
   494  	util []MutatorUtil
   495  	// sums[j] is the cumulative sum of util[:j].
   496  	sums []totalUtil
   497  	// bands summarizes util in non-overlapping bands of duration
   498  	// bandDur.
   499  	bands []mmuBand
   500  	// bandDur is the duration of each band.
   501  	bandDur int64
   502  }
   503  
   504  type mmuBand struct {
   505  	// minUtil is the minimum instantaneous mutator utilization in
   506  	// this band.
   507  	minUtil float64
   508  	// cumUtil is the cumulative total mutator utilization between
   509  	// time 0 and the left edge of this band.
   510  	cumUtil totalUtil
   511  
   512  	// integrator is the integrator for the left edge of this
   513  	// band.
   514  	integrator integrator
   515  }
   516  
   517  // NewMMUCurve returns an MMU curve for the given mutator utilization
   518  // function.
   519  func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
   520  	series := make([]mmuSeries, len(utils))
   521  	for i, util := range utils {
   522  		series[i] = newMMUSeries(util)
   523  	}
   524  	return &MMUCurve{series}
   525  }
   526  
   527  // bandsPerSeries is the number of bands to divide each series into.
   528  // This is only changed by tests.
   529  var bandsPerSeries = 1000
   530  
   531  func newMMUSeries(util []MutatorUtil) mmuSeries {
   532  	// Compute cumulative sum.
   533  	sums := make([]totalUtil, len(util))
   534  	var prev MutatorUtil
   535  	var sum totalUtil
   536  	for j, u := range util {
   537  		sum += totalUtilOf(prev.Util, u.Time-prev.Time)
   538  		sums[j] = sum
   539  		prev = u
   540  	}
   541  
   542  	// Divide the utilization curve up into equal size
   543  	// non-overlapping "bands" and compute a summary for each of
   544  	// these bands.
   545  	//
   546  	// Compute the duration of each band.
   547  	numBands := bandsPerSeries
   548  	if numBands > len(util) {
   549  		// There's no point in having lots of bands if there
   550  		// aren't many events.
   551  		numBands = len(util)
   552  	}
   553  	dur := util[len(util)-1].Time - util[0].Time
   554  	bandDur := (dur + int64(numBands) - 1) / int64(numBands)
   555  	if bandDur < 1 {
   556  		bandDur = 1
   557  	}
   558  	// Compute the bands. There are numBands+1 bands in order to
   559  	// record the final cumulative sum.
   560  	bands := make([]mmuBand, numBands+1)
   561  	s := mmuSeries{util, sums, bands, bandDur}
   562  	leftSum := integrator{&s, 0}
   563  	for i := range bands {
   564  		startTime, endTime := s.bandTime(i)
   565  		cumUtil := leftSum.advance(startTime)
   566  		predIdx := leftSum.pos
   567  		minUtil := 1.0
   568  		for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
   569  			minUtil = math.Min(minUtil, util[i].Util)
   570  		}
   571  		bands[i] = mmuBand{minUtil, cumUtil, leftSum}
   572  	}
   573  
   574  	return s
   575  }
   576  
   577  func (s *mmuSeries) bandTime(i int) (start, end int64) {
   578  	start = int64(i)*s.bandDur + s.util[0].Time
   579  	end = start + s.bandDur
   580  	return
   581  }
   582  
   583  type bandUtil struct {
   584  	// Utilization series index
   585  	series int
   586  	// Band index
   587  	i int
   588  	// Lower bound of mutator utilization for all windows
   589  	// with a left edge in this band.
   590  	utilBound float64
   591  }
   592  
   593  type bandUtilHeap []bandUtil
   594  
   595  func (h bandUtilHeap) Len() int {
   596  	return len(h)
   597  }
   598  
   599  func (h bandUtilHeap) Less(i, j int) bool {
   600  	return h[i].utilBound < h[j].utilBound
   601  }
   602  
   603  func (h bandUtilHeap) Swap(i, j int) {
   604  	h[i], h[j] = h[j], h[i]
   605  }
   606  
   607  func (h *bandUtilHeap) Push(x any) {
   608  	*h = append(*h, x.(bandUtil))
   609  }
   610  
   611  func (h *bandUtilHeap) Pop() any {
   612  	x := (*h)[len(*h)-1]
   613  	*h = (*h)[:len(*h)-1]
   614  	return x
   615  }
   616  
   617  // UtilWindow is a specific window at Time.
   618  type UtilWindow struct {
   619  	Time int64
   620  	// MutatorUtil is the mean mutator utilization in this window.
   621  	MutatorUtil float64
   622  }
   623  
   624  type utilHeap []UtilWindow
   625  
   626  func (h utilHeap) Len() int {
   627  	return len(h)
   628  }
   629  
   630  func (h utilHeap) Less(i, j int) bool {
   631  	if h[i].MutatorUtil != h[j].MutatorUtil {
   632  		return h[i].MutatorUtil > h[j].MutatorUtil
   633  	}
   634  	return h[i].Time > h[j].Time
   635  }
   636  
   637  func (h utilHeap) Swap(i, j int) {
   638  	h[i], h[j] = h[j], h[i]
   639  }
   640  
   641  func (h *utilHeap) Push(x any) {
   642  	*h = append(*h, x.(UtilWindow))
   643  }
   644  
   645  func (h *utilHeap) Pop() any {
   646  	x := (*h)[len(*h)-1]
   647  	*h = (*h)[:len(*h)-1]
   648  	return x
   649  }
   650  
   651  // An accumulator takes a windowed mutator utilization function and
   652  // tracks various statistics for that function.
   653  type accumulator struct {
   654  	mmu float64
   655  
   656  	// bound is the mutator utilization bound where adding any
   657  	// mutator utilization above this bound cannot affect the
   658  	// accumulated statistics.
   659  	bound float64
   660  
   661  	// Worst N window tracking
   662  	nWorst int
   663  	wHeap  utilHeap
   664  
   665  	// Mutator utilization distribution tracking
   666  	mud *mud
   667  	// preciseMass is the distribution mass that must be precise
   668  	// before accumulation is stopped.
   669  	preciseMass float64
   670  	// lastTime and lastMU are the previous point added to the
   671  	// windowed mutator utilization function.
   672  	lastTime int64
   673  	lastMU   float64
   674  }
   675  
   676  // resetTime declares a discontinuity in the windowed mutator
   677  // utilization function by resetting the current time.
   678  func (acc *accumulator) resetTime() {
   679  	// This only matters for distribution collection, since that's
   680  	// the only thing that depends on the progression of the
   681  	// windowed mutator utilization function.
   682  	acc.lastTime = math.MaxInt64
   683  }
   684  
   685  // addMU adds a point to the windowed mutator utilization function at
   686  // (time, mu). This must be called for monotonically increasing values
   687  // of time.
   688  //
   689  // It returns true if further calls to addMU would be pointless.
   690  func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
   691  	if mu < acc.mmu {
   692  		acc.mmu = mu
   693  	}
   694  	acc.bound = acc.mmu
   695  
   696  	if acc.nWorst == 0 {
   697  		// If the minimum has reached zero, it can't go any
   698  		// lower, so we can stop early.
   699  		return mu == 0
   700  	}
   701  
   702  	// Consider adding this window to the n worst.
   703  	if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
   704  		// This window is lower than the K'th worst window.
   705  		//
   706  		// Check if there's any overlapping window
   707  		// already in the heap and keep whichever is
   708  		// worse.
   709  		for i, ui := range acc.wHeap {
   710  			if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
   711  				if ui.MutatorUtil <= mu {
   712  					// Keep the first window.
   713  					goto keep
   714  				} else {
   715  					// Replace it with this window.
   716  					heap.Remove(&acc.wHeap, i)
   717  					break
   718  				}
   719  			}
   720  		}
   721  
   722  		heap.Push(&acc.wHeap, UtilWindow{time, mu})
   723  		if len(acc.wHeap) > acc.nWorst {
   724  			heap.Pop(&acc.wHeap)
   725  		}
   726  	keep:
   727  	}
   728  
   729  	if len(acc.wHeap) < acc.nWorst {
   730  		// We don't have N windows yet, so keep accumulating.
   731  		acc.bound = 1.0
   732  	} else {
   733  		// Anything above the least worst window has no effect.
   734  		acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
   735  	}
   736  
   737  	if acc.mud != nil {
   738  		if acc.lastTime != math.MaxInt64 {
   739  			// Update distribution.
   740  			acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
   741  		}
   742  		acc.lastTime, acc.lastMU = time, mu
   743  		if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
   744  			acc.bound = math.Max(acc.bound, mudBound)
   745  		} else {
   746  			// We haven't accumulated enough total precise
   747  			// mass yet to even reach our goal, so keep
   748  			// accumulating.
   749  			acc.bound = 1
   750  		}
   751  		// It's not worth checking percentiles every time, so
   752  		// just keep accumulating this band.
   753  		return false
   754  	}
   755  
   756  	// If we've found enough 0 utilizations, we can stop immediately.
   757  	return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
   758  }
   759  
   760  // MMU returns the minimum mutator utilization for the given time
   761  // window. This is the minimum utilization for all windows of this
   762  // duration across the execution. The returned value is in the range
   763  // [0, 1].
   764  func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
   765  	acc := accumulator{mmu: 1.0, bound: 1.0}
   766  	c.mmu(window, &acc)
   767  	return acc.mmu
   768  }
   769  
   770  // Examples returns n specific examples of the lowest mutator
   771  // utilization for the given window size. The returned windows will be
   772  // disjoint (otherwise there would be a huge number of
   773  // mostly-overlapping windows at the single lowest point). There are
   774  // no guarantees on which set of disjoint windows this returns.
   775  func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
   776  	acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
   777  	c.mmu(window, &acc)
   778  	sort.Sort(sort.Reverse(acc.wHeap))
   779  	return ([]UtilWindow)(acc.wHeap)
   780  }
   781  
   782  // MUD returns mutator utilization distribution quantiles for the
   783  // given window size.
   784  //
   785  // The mutator utilization distribution is the distribution of mean
   786  // mutator utilization across all windows of the given window size in
   787  // the trace.
   788  //
   789  // The minimum mutator utilization is the minimum (0th percentile) of
   790  // this distribution. (However, if only the minimum is desired, it's
   791  // more efficient to use the MMU method.)
   792  func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
   793  	if len(quantiles) == 0 {
   794  		return []float64{}
   795  	}
   796  
   797  	// Each unrefined band contributes a known total mass to the
   798  	// distribution (bandDur except at the end), but in an unknown
   799  	// way. However, we know that all the mass it contributes must
   800  	// be at or above its worst-case mean mutator utilization.
   801  	//
   802  	// Hence, we refine bands until the highest desired
   803  	// distribution quantile is less than the next worst-case mean
   804  	// mutator utilization. At this point, all further
   805  	// contributions to the distribution must be beyond the
   806  	// desired quantile and hence cannot affect it.
   807  	//
   808  	// First, find the highest desired distribution quantile.
   809  	maxQ := quantiles[0]
   810  	for _, q := range quantiles {
   811  		if q > maxQ {
   812  			maxQ = q
   813  		}
   814  	}
   815  	// The distribution's mass is in units of time (it's not
   816  	// normalized because this would make it more annoying to
   817  	// account for future contributions of unrefined bands). The
   818  	// total final mass will be the duration of the trace itself
   819  	// minus the window size. Using this, we can compute the mass
   820  	// corresponding to quantile maxQ.
   821  	var duration int64
   822  	for _, s := range c.series {
   823  		duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
   824  		if duration1 >= int64(window) {
   825  			duration += duration1 - int64(window)
   826  		}
   827  	}
   828  	qMass := float64(duration) * maxQ
   829  
   830  	// Accumulate the MUD until we have precise information for
   831  	// everything to the left of qMass.
   832  	acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)}
   833  	acc.mud.setTrackMass(qMass)
   834  	c.mmu(window, &acc)
   835  
   836  	// Evaluate the quantiles on the accumulated MUD.
   837  	out := make([]float64, len(quantiles))
   838  	for i := range out {
   839  		mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
   840  		if math.IsNaN(mu) {
   841  			// There are a few legitimate ways this can
   842  			// happen:
   843  			//
   844  			// 1. If the window is the full trace
   845  			// duration, then the windowed MU function is
   846  			// only defined at a single point, so the MU
   847  			// distribution is not well-defined.
   848  			//
   849  			// 2. If there are no events, then the MU
   850  			// distribution has no mass.
   851  			//
   852  			// Either way, all of the quantiles will have
   853  			// converged toward the MMU at this point.
   854  			mu = acc.mmu
   855  		}
   856  		out[i] = mu
   857  	}
   858  	return out
   859  }
   860  
   861  func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
   862  	if window <= 0 {
   863  		acc.mmu = 0
   864  		return
   865  	}
   866  
   867  	var bandU bandUtilHeap
   868  	windows := make([]time.Duration, len(c.series))
   869  	for i, s := range c.series {
   870  		windows[i] = window
   871  		if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
   872  			windows[i] = max
   873  		}
   874  
   875  		bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
   876  		if bandU == nil {
   877  			bandU = bandU1
   878  		} else {
   879  			bandU = append(bandU, bandU1...)
   880  		}
   881  	}
   882  
   883  	// Process bands from lowest utilization bound to highest.
   884  	heap.Init(&bandU)
   885  
   886  	// Refine each band into a precise window and MMU until
   887  	// refining the next lowest band can no longer affect the MMU
   888  	// or windows.
   889  	for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
   890  		i := bandU[0].series
   891  		c.series[i].bandMMU(bandU[0].i, windows[i], acc)
   892  		heap.Pop(&bandU)
   893  	}
   894  }
   895  
   896  func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
   897  	// For each band, compute the worst-possible total mutator
   898  	// utilization for all windows that start in that band.
   899  
   900  	// minBands is the minimum number of bands a window can span
   901  	// and maxBands is the maximum number of bands a window can
   902  	// span in any alignment.
   903  	minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
   904  	maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
   905  	if window > 1 && maxBands < 2 {
   906  		panic("maxBands < 2")
   907  	}
   908  	tailDur := int64(window) % c.bandDur
   909  	nUtil := len(c.bands) - maxBands + 1
   910  	if nUtil < 0 {
   911  		nUtil = 0
   912  	}
   913  	bandU := make([]bandUtil, nUtil)
   914  	for i := range bandU {
   915  		// To compute the worst-case MU, we assume the minimum
   916  		// for any bands that are only partially overlapped by
   917  		// some window and the mean for any bands that are
   918  		// completely covered by all windows.
   919  		var util totalUtil
   920  
   921  		// Find the lowest and second lowest of the partial
   922  		// bands.
   923  		l := c.bands[i].minUtil
   924  		r1 := c.bands[i+minBands-1].minUtil
   925  		r2 := c.bands[i+maxBands-1].minUtil
   926  		minBand := math.Min(l, math.Min(r1, r2))
   927  		// Assume the worst window maximally overlaps the
   928  		// worst minimum and then the rest overlaps the second
   929  		// worst minimum.
   930  		if minBands == 1 {
   931  			util += totalUtilOf(minBand, int64(window))
   932  		} else {
   933  			util += totalUtilOf(minBand, c.bandDur)
   934  			midBand := 0.0
   935  			switch {
   936  			case minBand == l:
   937  				midBand = math.Min(r1, r2)
   938  			case minBand == r1:
   939  				midBand = math.Min(l, r2)
   940  			case minBand == r2:
   941  				midBand = math.Min(l, r1)
   942  			}
   943  			util += totalUtilOf(midBand, tailDur)
   944  		}
   945  
   946  		// Add the total mean MU of bands that are completely
   947  		// overlapped by all windows.
   948  		if minBands > 2 {
   949  			util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
   950  		}
   951  
   952  		bandU[i] = bandUtil{series, i, util.mean(window)}
   953  	}
   954  
   955  	return bandU
   956  }
   957  
   958  // bandMMU computes the precise minimum mutator utilization for
   959  // windows with a left edge in band bandIdx.
   960  func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
   961  	util := c.util
   962  
   963  	// We think of the mutator utilization over time as the
   964  	// box-filtered utilization function, which we call the
   965  	// "windowed mutator utilization function". The resulting
   966  	// function is continuous and piecewise linear (unless
   967  	// window==0, which we handle elsewhere), where the boundaries
   968  	// between segments occur when either edge of the window
   969  	// encounters a change in the instantaneous mutator
   970  	// utilization function. Hence, the minimum of this function
   971  	// will always occur when one of the edges of the window
   972  	// aligns with a utilization change, so these are the only
   973  	// points we need to consider.
   974  	//
   975  	// We compute the mutator utilization function incrementally
   976  	// by tracking the integral from t=0 to the left edge of the
   977  	// window and to the right edge of the window.
   978  	left := c.bands[bandIdx].integrator
   979  	right := left
   980  	time, endTime := c.bandTime(bandIdx)
   981  	if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
   982  		endTime = utilEnd
   983  	}
   984  	acc.resetTime()
   985  	for {
   986  		// Advance edges to time and time+window.
   987  		mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
   988  		if acc.addMU(time, mu, window) {
   989  			break
   990  		}
   991  		if time == endTime {
   992  			break
   993  		}
   994  
   995  		// The maximum slope of the windowed mutator
   996  		// utilization function is 1/window, so we can always
   997  		// advance the time by at least (mu - mmu) * window
   998  		// without dropping below mmu.
   999  		minTime := time + int64((mu-acc.bound)*float64(window))
  1000  
  1001  		// Advance the window to the next time where either
  1002  		// the left or right edge of the window encounters a
  1003  		// change in the utilization curve.
  1004  		if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
  1005  			time = t1
  1006  		} else {
  1007  			time = t2
  1008  		}
  1009  		if time < minTime {
  1010  			time = minTime
  1011  		}
  1012  		if time >= endTime {
  1013  			// For MMUs we could stop here, but for MUDs
  1014  			// it's important that we span the entire
  1015  			// band.
  1016  			time = endTime
  1017  		}
  1018  	}
  1019  }
  1020  
  1021  // An integrator tracks a position in a utilization function and
  1022  // integrates it.
  1023  type integrator struct {
  1024  	u *mmuSeries
  1025  	// pos is the index in u.util of the current time's non-strict
  1026  	// predecessor.
  1027  	pos int
  1028  }
  1029  
  1030  // advance returns the integral of the utilization function from 0 to
  1031  // time. advance must be called on monotonically increasing values of
  1032  // times.
  1033  func (in *integrator) advance(time int64) totalUtil {
  1034  	util, pos := in.u.util, in.pos
  1035  	// Advance pos until pos+1 is time's strict successor (making
  1036  	// pos time's non-strict predecessor).
  1037  	//
  1038  	// Very often, this will be nearby, so we optimize that case,
  1039  	// but it may be arbitrarily far away, so we handled that
  1040  	// efficiently, too.
  1041  	const maxSeq = 8
  1042  	if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
  1043  		// Nearby. Use a linear scan.
  1044  		for pos+1 < len(util) && util[pos+1].Time <= time {
  1045  			pos++
  1046  		}
  1047  	} else {
  1048  		// Far. Binary search for time's strict successor.
  1049  		l, r := pos, len(util)
  1050  		for l < r {
  1051  			h := int(uint(l+r) >> 1)
  1052  			if util[h].Time <= time {
  1053  				l = h + 1
  1054  			} else {
  1055  				r = h
  1056  			}
  1057  		}
  1058  		pos = l - 1 // Non-strict predecessor.
  1059  	}
  1060  	in.pos = pos
  1061  	var partial totalUtil
  1062  	if time != util[pos].Time {
  1063  		partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
  1064  	}
  1065  	return in.u.sums[pos] + partial
  1066  }
  1067  
  1068  // next returns the smallest time t' > time of a change in the
  1069  // utilization function.
  1070  func (in *integrator) next(time int64) int64 {
  1071  	for _, u := range in.u.util[in.pos:] {
  1072  		if u.Time > time {
  1073  			return u.Time
  1074  		}
  1075  	}
  1076  	return 1<<63 - 1
  1077  }
  1078  
  1079  func isGCSTW(r tracev2.Range) bool {
  1080  	return strings.HasPrefix(r.Name, "stop-the-world") && strings.Contains(r.Name, "GC")
  1081  }
  1082  
  1083  func isGCMarkAssist(r tracev2.Range) bool {
  1084  	return r.Name == "GC mark assist"
  1085  }
  1086  
  1087  func isGCSweep(r tracev2.Range) bool {
  1088  	return r.Name == "GC incremental sweep"
  1089  }