github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/trace/gc.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package trace 6 7 import ( 8 "container/heap" 9 "math" 10 "sort" 11 "strings" 12 "time" 13 14 tracev2 "github.com/go-asm/go/trace/v2" 15 ) 16 17 // MutatorUtil is a change in mutator utilization at a particular 18 // time. Mutator utilization functions are represented as a 19 // time-ordered []MutatorUtil. 20 type MutatorUtil struct { 21 Time int64 22 // Util is the mean mutator utilization starting at Time. This 23 // is in the range [0, 1]. 24 Util float64 25 } 26 27 // UtilFlags controls the behavior of MutatorUtilization. 28 type UtilFlags int 29 30 const ( 31 // UtilSTW means utilization should account for STW events. 32 // This includes non-GC STW events, which are typically user-requested. 33 UtilSTW UtilFlags = 1 << iota 34 // UtilBackground means utilization should account for 35 // background mark workers. 36 UtilBackground 37 // UtilAssist means utilization should account for mark 38 // assists. 39 UtilAssist 40 // UtilSweep means utilization should account for sweeping. 41 UtilSweep 42 43 // UtilPerProc means each P should be given a separate 44 // utilization function. Otherwise, there is a single function 45 // and each P is given a fraction of the utilization. 46 UtilPerProc 47 ) 48 49 // MutatorUtilization returns a set of mutator utilization functions 50 // for the given trace. Each function will always end with 0 51 // utilization. The bounds of each function are implicit in the first 52 // and last event; outside of these bounds each function is undefined. 53 // 54 // If the UtilPerProc flag is not given, this always returns a single 55 // utilization function. Otherwise, it returns one function per P. 56 func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil { 57 if len(events) == 0 { 58 return nil 59 } 60 61 type perP struct { 62 // gc > 0 indicates that GC is active on this P. 63 gc int 64 // series the logical series number for this P. This 65 // is necessary because Ps may be removed and then 66 // re-added, and then the new P needs a new series. 67 series int 68 } 69 ps := []perP{} 70 stw := 0 71 72 out := [][]MutatorUtil{} 73 assists := map[uint64]bool{} 74 block := map[uint64]*Event{} 75 bgMark := map[uint64]bool{} 76 77 for _, ev := range events { 78 switch ev.Type { 79 case EvGomaxprocs: 80 gomaxprocs := int(ev.Args[0]) 81 if len(ps) > gomaxprocs { 82 if flags&UtilPerProc != 0 { 83 // End each P's series. 84 for _, p := range ps[gomaxprocs:] { 85 out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0}) 86 } 87 } 88 ps = ps[:gomaxprocs] 89 } 90 for len(ps) < gomaxprocs { 91 // Start new P's series. 92 series := 0 93 if flags&UtilPerProc != 0 || len(out) == 0 { 94 series = len(out) 95 out = append(out, []MutatorUtil{{ev.Ts, 1}}) 96 } 97 ps = append(ps, perP{series: series}) 98 } 99 case EvSTWStart: 100 if flags&UtilSTW != 0 { 101 stw++ 102 } 103 case EvSTWDone: 104 if flags&UtilSTW != 0 { 105 stw-- 106 } 107 case EvGCMarkAssistStart: 108 if flags&UtilAssist != 0 { 109 ps[ev.P].gc++ 110 assists[ev.G] = true 111 } 112 case EvGCMarkAssistDone: 113 if flags&UtilAssist != 0 { 114 ps[ev.P].gc-- 115 delete(assists, ev.G) 116 } 117 case EvGCSweepStart: 118 if flags&UtilSweep != 0 { 119 ps[ev.P].gc++ 120 } 121 case EvGCSweepDone: 122 if flags&UtilSweep != 0 { 123 ps[ev.P].gc-- 124 } 125 case EvGoStartLabel: 126 if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" { 127 // Background mark worker. 128 // 129 // If we're in per-proc mode, we don't 130 // count dedicated workers because 131 // they kick all of the goroutines off 132 // that P, so don't directly 133 // contribute to goroutine latency. 134 if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") { 135 bgMark[ev.G] = true 136 ps[ev.P].gc++ 137 } 138 } 139 fallthrough 140 case EvGoStart: 141 if assists[ev.G] { 142 // Unblocked during assist. 143 ps[ev.P].gc++ 144 } 145 block[ev.G] = ev.Link 146 default: 147 if ev != block[ev.G] { 148 continue 149 } 150 151 if assists[ev.G] { 152 // Blocked during assist. 153 ps[ev.P].gc-- 154 } 155 if bgMark[ev.G] { 156 // Background mark worker done. 157 ps[ev.P].gc-- 158 delete(bgMark, ev.G) 159 } 160 delete(block, ev.G) 161 } 162 163 if flags&UtilPerProc == 0 { 164 // Compute the current average utilization. 165 if len(ps) == 0 { 166 continue 167 } 168 gcPs := 0 169 if stw > 0 { 170 gcPs = len(ps) 171 } else { 172 for i := range ps { 173 if ps[i].gc > 0 { 174 gcPs++ 175 } 176 } 177 } 178 mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))} 179 180 // Record the utilization change. (Since 181 // len(ps) == len(out), we know len(out) > 0.) 182 out[0] = addUtil(out[0], mu) 183 } else { 184 // Check for per-P utilization changes. 185 for i := range ps { 186 p := &ps[i] 187 util := 1.0 188 if stw > 0 || p.gc > 0 { 189 util = 0.0 190 } 191 out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util}) 192 } 193 } 194 } 195 196 // Add final 0 utilization event to any remaining series. This 197 // is important to mark the end of the trace. The exact value 198 // shouldn't matter since no window should extend beyond this, 199 // but using 0 is symmetric with the start of the trace. 200 mu := MutatorUtil{events[len(events)-1].Ts, 0} 201 for i := range ps { 202 out[ps[i].series] = addUtil(out[ps[i].series], mu) 203 } 204 return out 205 } 206 207 // MutatorUtilizationV2 returns a set of mutator utilization functions 208 // for the given v2 trace, passed as an io.Reader. Each function will 209 // always end with 0 utilization. The bounds of each function are implicit 210 // in the first and last event; outside of these bounds each function is 211 // undefined. 212 // 213 // If the UtilPerProc flag is not given, this always returns a single 214 // utilization function. Otherwise, it returns one function per P. 215 func MutatorUtilizationV2(events []tracev2.Event, flags UtilFlags) [][]MutatorUtil { 216 // Set up a bunch of analysis state. 217 type perP struct { 218 // gc > 0 indicates that GC is active on this P. 219 gc int 220 // series the logical series number for this P. This 221 // is necessary because Ps may be removed and then 222 // re-added, and then the new P needs a new series. 223 series int 224 } 225 type procsCount struct { 226 // time at which procs changed. 227 time int64 228 // n is the number of procs at that point. 229 n int 230 } 231 out := [][]MutatorUtil{} 232 stw := 0 233 ps := []perP{} 234 inGC := make(map[tracev2.GoID]bool) 235 states := make(map[tracev2.GoID]tracev2.GoState) 236 bgMark := make(map[tracev2.GoID]bool) 237 procs := []procsCount{} 238 seenSync := false 239 240 // Helpers. 241 handleSTW := func(r tracev2.Range) bool { 242 return flags&UtilSTW != 0 && isGCSTW(r) 243 } 244 handleMarkAssist := func(r tracev2.Range) bool { 245 return flags&UtilAssist != 0 && isGCMarkAssist(r) 246 } 247 handleSweep := func(r tracev2.Range) bool { 248 return flags&UtilSweep != 0 && isGCSweep(r) 249 } 250 251 // Iterate through the trace, tracking mutator utilization. 252 var lastEv *tracev2.Event 253 for i := range events { 254 ev := &events[i] 255 lastEv = ev 256 257 // Process the event. 258 switch ev.Kind() { 259 case tracev2.EventSync: 260 seenSync = true 261 case tracev2.EventMetric: 262 m := ev.Metric() 263 if m.Name != "/sched/gomaxprocs:threads" { 264 break 265 } 266 gomaxprocs := int(m.Value.Uint64()) 267 if len(ps) > gomaxprocs { 268 if flags&UtilPerProc != 0 { 269 // End each P's series. 270 for _, p := range ps[gomaxprocs:] { 271 out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), 0}) 272 } 273 } 274 ps = ps[:gomaxprocs] 275 } 276 for len(ps) < gomaxprocs { 277 // Start new P's series. 278 series := 0 279 if flags&UtilPerProc != 0 || len(out) == 0 { 280 series = len(out) 281 out = append(out, []MutatorUtil{{int64(ev.Time()), 1}}) 282 } 283 ps = append(ps, perP{series: series}) 284 } 285 if len(procs) == 0 || gomaxprocs != procs[len(procs)-1].n { 286 procs = append(procs, procsCount{time: int64(ev.Time()), n: gomaxprocs}) 287 } 288 } 289 if len(ps) == 0 { 290 // We can't start doing any analysis until we see what GOMAXPROCS is. 291 // It will show up very early in the trace, but we need to be robust to 292 // something else being emitted beforehand. 293 continue 294 } 295 296 switch ev.Kind() { 297 case tracev2.EventRangeActive: 298 if seenSync { 299 // If we've seen a sync, then we can be sure we're not finding out about 300 // something late; we have complete information after that point, and these 301 // active events will just be redundant. 302 break 303 } 304 // This range is active back to the start of the trace. We're failing to account 305 // for this since we just found out about it now. Fix up the mutator utilization. 306 // 307 // N.B. A trace can't start during a STW, so we don't handle it here. 308 r := ev.Range() 309 switch { 310 case handleMarkAssist(r): 311 if !states[ev.Goroutine()].Executing() { 312 // If the goroutine isn't executing, then the fact that it was in mark 313 // assist doesn't actually count. 314 break 315 } 316 // This G has been in a mark assist *and running on its P* since the start 317 // of the trace. 318 fallthrough 319 case handleSweep(r): 320 // This P has been in sweep (or mark assist, from above) in the start of the trace. 321 // 322 // We don't need to do anything if UtilPerProc is set. If we get an event like 323 // this for a running P, it must show up the first time a P is mentioned. Therefore, 324 // this P won't actually have any MutatorUtils on its list yet. 325 // 326 // However, if UtilPerProc isn't set, then we probably have data from other procs 327 // and from previous events. We need to fix that up. 328 if flags&UtilPerProc != 0 { 329 break 330 } 331 // Subtract out 1/gomaxprocs mutator utilization for all time periods 332 // from the beginning of the trace until now. 333 mi, pi := 0, 0 334 for mi < len(out[0]) { 335 if pi < len(procs)-1 && procs[pi+1].time < out[0][mi].Time { 336 pi++ 337 continue 338 } 339 out[0][mi].Util -= float64(1) / float64(procs[pi].n) 340 if out[0][mi].Util < 0 { 341 out[0][mi].Util = 0 342 } 343 mi++ 344 } 345 } 346 // After accounting for the portion we missed, this just acts like the 347 // beginning of a new range. 348 fallthrough 349 case tracev2.EventRangeBegin: 350 r := ev.Range() 351 if handleSTW(r) { 352 stw++ 353 } else if handleSweep(r) { 354 ps[ev.Proc()].gc++ 355 } else if handleMarkAssist(r) { 356 ps[ev.Proc()].gc++ 357 if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine { 358 inGC[g] = true 359 } 360 } 361 case tracev2.EventRangeEnd: 362 r := ev.Range() 363 if handleSTW(r) { 364 stw-- 365 } else if handleSweep(r) { 366 ps[ev.Proc()].gc-- 367 } else if handleMarkAssist(r) { 368 ps[ev.Proc()].gc-- 369 if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine { 370 delete(inGC, g) 371 } 372 } 373 case tracev2.EventStateTransition: 374 st := ev.StateTransition() 375 if st.Resource.Kind != tracev2.ResourceGoroutine { 376 break 377 } 378 old, new := st.Goroutine() 379 g := st.Resource.Goroutine() 380 if inGC[g] || bgMark[g] { 381 if !old.Executing() && new.Executing() { 382 // Started running while doing GC things. 383 ps[ev.Proc()].gc++ 384 } else if old.Executing() && !new.Executing() { 385 // Stopped running while doing GC things. 386 ps[ev.Proc()].gc-- 387 } 388 } 389 states[g] = new 390 case tracev2.EventLabel: 391 l := ev.Label() 392 if flags&UtilBackground != 0 && strings.HasPrefix(l.Label, "GC ") && l.Label != "GC (idle)" { 393 // Background mark worker. 394 // 395 // If we're in per-proc mode, we don't 396 // count dedicated workers because 397 // they kick all of the goroutines off 398 // that P, so don't directly 399 // contribute to goroutine latency. 400 if !(flags&UtilPerProc != 0 && l.Label == "GC (dedicated)") { 401 bgMark[ev.Goroutine()] = true 402 ps[ev.Proc()].gc++ 403 } 404 } 405 } 406 407 if flags&UtilPerProc == 0 { 408 // Compute the current average utilization. 409 if len(ps) == 0 { 410 continue 411 } 412 gcPs := 0 413 if stw > 0 { 414 gcPs = len(ps) 415 } else { 416 for i := range ps { 417 if ps[i].gc > 0 { 418 gcPs++ 419 } 420 } 421 } 422 mu := MutatorUtil{int64(ev.Time()), 1 - float64(gcPs)/float64(len(ps))} 423 424 // Record the utilization change. (Since 425 // len(ps) == len(out), we know len(out) > 0.) 426 out[0] = addUtil(out[0], mu) 427 } else { 428 // Check for per-P utilization changes. 429 for i := range ps { 430 p := &ps[i] 431 util := 1.0 432 if stw > 0 || p.gc > 0 { 433 util = 0.0 434 } 435 out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), util}) 436 } 437 } 438 } 439 440 // No events in the stream. 441 if lastEv == nil { 442 return nil 443 } 444 445 // Add final 0 utilization event to any remaining series. This 446 // is important to mark the end of the trace. The exact value 447 // shouldn't matter since no window should extend beyond this, 448 // but using 0 is symmetric with the start of the trace. 449 mu := MutatorUtil{int64(lastEv.Time()), 0} 450 for i := range ps { 451 out[ps[i].series] = addUtil(out[ps[i].series], mu) 452 } 453 return out 454 } 455 456 func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil { 457 if len(util) > 0 { 458 if mu.Util == util[len(util)-1].Util { 459 // No change. 460 return util 461 } 462 if mu.Time == util[len(util)-1].Time { 463 // Take the lowest utilization at a time stamp. 464 if mu.Util < util[len(util)-1].Util { 465 util[len(util)-1] = mu 466 } 467 return util 468 } 469 } 470 return append(util, mu) 471 } 472 473 // totalUtil is total utilization, measured in nanoseconds. This is a 474 // separate type primarily to distinguish it from mean utilization, 475 // which is also a float64. 476 type totalUtil float64 477 478 func totalUtilOf(meanUtil float64, dur int64) totalUtil { 479 return totalUtil(meanUtil * float64(dur)) 480 } 481 482 // mean returns the mean utilization over dur. 483 func (u totalUtil) mean(dur time.Duration) float64 { 484 return float64(u) / float64(dur) 485 } 486 487 // An MMUCurve is the minimum mutator utilization curve across 488 // multiple window sizes. 489 type MMUCurve struct { 490 series []mmuSeries 491 } 492 493 type mmuSeries struct { 494 util []MutatorUtil 495 // sums[j] is the cumulative sum of util[:j]. 496 sums []totalUtil 497 // bands summarizes util in non-overlapping bands of duration 498 // bandDur. 499 bands []mmuBand 500 // bandDur is the duration of each band. 501 bandDur int64 502 } 503 504 type mmuBand struct { 505 // minUtil is the minimum instantaneous mutator utilization in 506 // this band. 507 minUtil float64 508 // cumUtil is the cumulative total mutator utilization between 509 // time 0 and the left edge of this band. 510 cumUtil totalUtil 511 512 // integrator is the integrator for the left edge of this 513 // band. 514 integrator integrator 515 } 516 517 // NewMMUCurve returns an MMU curve for the given mutator utilization 518 // function. 519 func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve { 520 series := make([]mmuSeries, len(utils)) 521 for i, util := range utils { 522 series[i] = newMMUSeries(util) 523 } 524 return &MMUCurve{series} 525 } 526 527 // bandsPerSeries is the number of bands to divide each series into. 528 // This is only changed by tests. 529 var bandsPerSeries = 1000 530 531 func newMMUSeries(util []MutatorUtil) mmuSeries { 532 // Compute cumulative sum. 533 sums := make([]totalUtil, len(util)) 534 var prev MutatorUtil 535 var sum totalUtil 536 for j, u := range util { 537 sum += totalUtilOf(prev.Util, u.Time-prev.Time) 538 sums[j] = sum 539 prev = u 540 } 541 542 // Divide the utilization curve up into equal size 543 // non-overlapping "bands" and compute a summary for each of 544 // these bands. 545 // 546 // Compute the duration of each band. 547 numBands := bandsPerSeries 548 if numBands > len(util) { 549 // There's no point in having lots of bands if there 550 // aren't many events. 551 numBands = len(util) 552 } 553 dur := util[len(util)-1].Time - util[0].Time 554 bandDur := (dur + int64(numBands) - 1) / int64(numBands) 555 if bandDur < 1 { 556 bandDur = 1 557 } 558 // Compute the bands. There are numBands+1 bands in order to 559 // record the final cumulative sum. 560 bands := make([]mmuBand, numBands+1) 561 s := mmuSeries{util, sums, bands, bandDur} 562 leftSum := integrator{&s, 0} 563 for i := range bands { 564 startTime, endTime := s.bandTime(i) 565 cumUtil := leftSum.advance(startTime) 566 predIdx := leftSum.pos 567 minUtil := 1.0 568 for i := predIdx; i < len(util) && util[i].Time < endTime; i++ { 569 minUtil = math.Min(minUtil, util[i].Util) 570 } 571 bands[i] = mmuBand{minUtil, cumUtil, leftSum} 572 } 573 574 return s 575 } 576 577 func (s *mmuSeries) bandTime(i int) (start, end int64) { 578 start = int64(i)*s.bandDur + s.util[0].Time 579 end = start + s.bandDur 580 return 581 } 582 583 type bandUtil struct { 584 // Utilization series index 585 series int 586 // Band index 587 i int 588 // Lower bound of mutator utilization for all windows 589 // with a left edge in this band. 590 utilBound float64 591 } 592 593 type bandUtilHeap []bandUtil 594 595 func (h bandUtilHeap) Len() int { 596 return len(h) 597 } 598 599 func (h bandUtilHeap) Less(i, j int) bool { 600 return h[i].utilBound < h[j].utilBound 601 } 602 603 func (h bandUtilHeap) Swap(i, j int) { 604 h[i], h[j] = h[j], h[i] 605 } 606 607 func (h *bandUtilHeap) Push(x any) { 608 *h = append(*h, x.(bandUtil)) 609 } 610 611 func (h *bandUtilHeap) Pop() any { 612 x := (*h)[len(*h)-1] 613 *h = (*h)[:len(*h)-1] 614 return x 615 } 616 617 // UtilWindow is a specific window at Time. 618 type UtilWindow struct { 619 Time int64 620 // MutatorUtil is the mean mutator utilization in this window. 621 MutatorUtil float64 622 } 623 624 type utilHeap []UtilWindow 625 626 func (h utilHeap) Len() int { 627 return len(h) 628 } 629 630 func (h utilHeap) Less(i, j int) bool { 631 if h[i].MutatorUtil != h[j].MutatorUtil { 632 return h[i].MutatorUtil > h[j].MutatorUtil 633 } 634 return h[i].Time > h[j].Time 635 } 636 637 func (h utilHeap) Swap(i, j int) { 638 h[i], h[j] = h[j], h[i] 639 } 640 641 func (h *utilHeap) Push(x any) { 642 *h = append(*h, x.(UtilWindow)) 643 } 644 645 func (h *utilHeap) Pop() any { 646 x := (*h)[len(*h)-1] 647 *h = (*h)[:len(*h)-1] 648 return x 649 } 650 651 // An accumulator takes a windowed mutator utilization function and 652 // tracks various statistics for that function. 653 type accumulator struct { 654 mmu float64 655 656 // bound is the mutator utilization bound where adding any 657 // mutator utilization above this bound cannot affect the 658 // accumulated statistics. 659 bound float64 660 661 // Worst N window tracking 662 nWorst int 663 wHeap utilHeap 664 665 // Mutator utilization distribution tracking 666 mud *mud 667 // preciseMass is the distribution mass that must be precise 668 // before accumulation is stopped. 669 preciseMass float64 670 // lastTime and lastMU are the previous point added to the 671 // windowed mutator utilization function. 672 lastTime int64 673 lastMU float64 674 } 675 676 // resetTime declares a discontinuity in the windowed mutator 677 // utilization function by resetting the current time. 678 func (acc *accumulator) resetTime() { 679 // This only matters for distribution collection, since that's 680 // the only thing that depends on the progression of the 681 // windowed mutator utilization function. 682 acc.lastTime = math.MaxInt64 683 } 684 685 // addMU adds a point to the windowed mutator utilization function at 686 // (time, mu). This must be called for monotonically increasing values 687 // of time. 688 // 689 // It returns true if further calls to addMU would be pointless. 690 func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool { 691 if mu < acc.mmu { 692 acc.mmu = mu 693 } 694 acc.bound = acc.mmu 695 696 if acc.nWorst == 0 { 697 // If the minimum has reached zero, it can't go any 698 // lower, so we can stop early. 699 return mu == 0 700 } 701 702 // Consider adding this window to the n worst. 703 if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil { 704 // This window is lower than the K'th worst window. 705 // 706 // Check if there's any overlapping window 707 // already in the heap and keep whichever is 708 // worse. 709 for i, ui := range acc.wHeap { 710 if time+int64(window) > ui.Time && ui.Time+int64(window) > time { 711 if ui.MutatorUtil <= mu { 712 // Keep the first window. 713 goto keep 714 } else { 715 // Replace it with this window. 716 heap.Remove(&acc.wHeap, i) 717 break 718 } 719 } 720 } 721 722 heap.Push(&acc.wHeap, UtilWindow{time, mu}) 723 if len(acc.wHeap) > acc.nWorst { 724 heap.Pop(&acc.wHeap) 725 } 726 keep: 727 } 728 729 if len(acc.wHeap) < acc.nWorst { 730 // We don't have N windows yet, so keep accumulating. 731 acc.bound = 1.0 732 } else { 733 // Anything above the least worst window has no effect. 734 acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil) 735 } 736 737 if acc.mud != nil { 738 if acc.lastTime != math.MaxInt64 { 739 // Update distribution. 740 acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime)) 741 } 742 acc.lastTime, acc.lastMU = time, mu 743 if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok { 744 acc.bound = math.Max(acc.bound, mudBound) 745 } else { 746 // We haven't accumulated enough total precise 747 // mass yet to even reach our goal, so keep 748 // accumulating. 749 acc.bound = 1 750 } 751 // It's not worth checking percentiles every time, so 752 // just keep accumulating this band. 753 return false 754 } 755 756 // If we've found enough 0 utilizations, we can stop immediately. 757 return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0 758 } 759 760 // MMU returns the minimum mutator utilization for the given time 761 // window. This is the minimum utilization for all windows of this 762 // duration across the execution. The returned value is in the range 763 // [0, 1]. 764 func (c *MMUCurve) MMU(window time.Duration) (mmu float64) { 765 acc := accumulator{mmu: 1.0, bound: 1.0} 766 c.mmu(window, &acc) 767 return acc.mmu 768 } 769 770 // Examples returns n specific examples of the lowest mutator 771 // utilization for the given window size. The returned windows will be 772 // disjoint (otherwise there would be a huge number of 773 // mostly-overlapping windows at the single lowest point). There are 774 // no guarantees on which set of disjoint windows this returns. 775 func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) { 776 acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n} 777 c.mmu(window, &acc) 778 sort.Sort(sort.Reverse(acc.wHeap)) 779 return ([]UtilWindow)(acc.wHeap) 780 } 781 782 // MUD returns mutator utilization distribution quantiles for the 783 // given window size. 784 // 785 // The mutator utilization distribution is the distribution of mean 786 // mutator utilization across all windows of the given window size in 787 // the trace. 788 // 789 // The minimum mutator utilization is the minimum (0th percentile) of 790 // this distribution. (However, if only the minimum is desired, it's 791 // more efficient to use the MMU method.) 792 func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 { 793 if len(quantiles) == 0 { 794 return []float64{} 795 } 796 797 // Each unrefined band contributes a known total mass to the 798 // distribution (bandDur except at the end), but in an unknown 799 // way. However, we know that all the mass it contributes must 800 // be at or above its worst-case mean mutator utilization. 801 // 802 // Hence, we refine bands until the highest desired 803 // distribution quantile is less than the next worst-case mean 804 // mutator utilization. At this point, all further 805 // contributions to the distribution must be beyond the 806 // desired quantile and hence cannot affect it. 807 // 808 // First, find the highest desired distribution quantile. 809 maxQ := quantiles[0] 810 for _, q := range quantiles { 811 if q > maxQ { 812 maxQ = q 813 } 814 } 815 // The distribution's mass is in units of time (it's not 816 // normalized because this would make it more annoying to 817 // account for future contributions of unrefined bands). The 818 // total final mass will be the duration of the trace itself 819 // minus the window size. Using this, we can compute the mass 820 // corresponding to quantile maxQ. 821 var duration int64 822 for _, s := range c.series { 823 duration1 := s.util[len(s.util)-1].Time - s.util[0].Time 824 if duration1 >= int64(window) { 825 duration += duration1 - int64(window) 826 } 827 } 828 qMass := float64(duration) * maxQ 829 830 // Accumulate the MUD until we have precise information for 831 // everything to the left of qMass. 832 acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)} 833 acc.mud.setTrackMass(qMass) 834 c.mmu(window, &acc) 835 836 // Evaluate the quantiles on the accumulated MUD. 837 out := make([]float64, len(quantiles)) 838 for i := range out { 839 mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i]) 840 if math.IsNaN(mu) { 841 // There are a few legitimate ways this can 842 // happen: 843 // 844 // 1. If the window is the full trace 845 // duration, then the windowed MU function is 846 // only defined at a single point, so the MU 847 // distribution is not well-defined. 848 // 849 // 2. If there are no events, then the MU 850 // distribution has no mass. 851 // 852 // Either way, all of the quantiles will have 853 // converged toward the MMU at this point. 854 mu = acc.mmu 855 } 856 out[i] = mu 857 } 858 return out 859 } 860 861 func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) { 862 if window <= 0 { 863 acc.mmu = 0 864 return 865 } 866 867 var bandU bandUtilHeap 868 windows := make([]time.Duration, len(c.series)) 869 for i, s := range c.series { 870 windows[i] = window 871 if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max { 872 windows[i] = max 873 } 874 875 bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i])) 876 if bandU == nil { 877 bandU = bandU1 878 } else { 879 bandU = append(bandU, bandU1...) 880 } 881 } 882 883 // Process bands from lowest utilization bound to highest. 884 heap.Init(&bandU) 885 886 // Refine each band into a precise window and MMU until 887 // refining the next lowest band can no longer affect the MMU 888 // or windows. 889 for len(bandU) > 0 && bandU[0].utilBound < acc.bound { 890 i := bandU[0].series 891 c.series[i].bandMMU(bandU[0].i, windows[i], acc) 892 heap.Pop(&bandU) 893 } 894 } 895 896 func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil { 897 // For each band, compute the worst-possible total mutator 898 // utilization for all windows that start in that band. 899 900 // minBands is the minimum number of bands a window can span 901 // and maxBands is the maximum number of bands a window can 902 // span in any alignment. 903 minBands := int((int64(window) + c.bandDur - 1) / c.bandDur) 904 maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur) 905 if window > 1 && maxBands < 2 { 906 panic("maxBands < 2") 907 } 908 tailDur := int64(window) % c.bandDur 909 nUtil := len(c.bands) - maxBands + 1 910 if nUtil < 0 { 911 nUtil = 0 912 } 913 bandU := make([]bandUtil, nUtil) 914 for i := range bandU { 915 // To compute the worst-case MU, we assume the minimum 916 // for any bands that are only partially overlapped by 917 // some window and the mean for any bands that are 918 // completely covered by all windows. 919 var util totalUtil 920 921 // Find the lowest and second lowest of the partial 922 // bands. 923 l := c.bands[i].minUtil 924 r1 := c.bands[i+minBands-1].minUtil 925 r2 := c.bands[i+maxBands-1].minUtil 926 minBand := math.Min(l, math.Min(r1, r2)) 927 // Assume the worst window maximally overlaps the 928 // worst minimum and then the rest overlaps the second 929 // worst minimum. 930 if minBands == 1 { 931 util += totalUtilOf(minBand, int64(window)) 932 } else { 933 util += totalUtilOf(minBand, c.bandDur) 934 midBand := 0.0 935 switch { 936 case minBand == l: 937 midBand = math.Min(r1, r2) 938 case minBand == r1: 939 midBand = math.Min(l, r2) 940 case minBand == r2: 941 midBand = math.Min(l, r1) 942 } 943 util += totalUtilOf(midBand, tailDur) 944 } 945 946 // Add the total mean MU of bands that are completely 947 // overlapped by all windows. 948 if minBands > 2 { 949 util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil 950 } 951 952 bandU[i] = bandUtil{series, i, util.mean(window)} 953 } 954 955 return bandU 956 } 957 958 // bandMMU computes the precise minimum mutator utilization for 959 // windows with a left edge in band bandIdx. 960 func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) { 961 util := c.util 962 963 // We think of the mutator utilization over time as the 964 // box-filtered utilization function, which we call the 965 // "windowed mutator utilization function". The resulting 966 // function is continuous and piecewise linear (unless 967 // window==0, which we handle elsewhere), where the boundaries 968 // between segments occur when either edge of the window 969 // encounters a change in the instantaneous mutator 970 // utilization function. Hence, the minimum of this function 971 // will always occur when one of the edges of the window 972 // aligns with a utilization change, so these are the only 973 // points we need to consider. 974 // 975 // We compute the mutator utilization function incrementally 976 // by tracking the integral from t=0 to the left edge of the 977 // window and to the right edge of the window. 978 left := c.bands[bandIdx].integrator 979 right := left 980 time, endTime := c.bandTime(bandIdx) 981 if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime { 982 endTime = utilEnd 983 } 984 acc.resetTime() 985 for { 986 // Advance edges to time and time+window. 987 mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window) 988 if acc.addMU(time, mu, window) { 989 break 990 } 991 if time == endTime { 992 break 993 } 994 995 // The maximum slope of the windowed mutator 996 // utilization function is 1/window, so we can always 997 // advance the time by at least (mu - mmu) * window 998 // without dropping below mmu. 999 minTime := time + int64((mu-acc.bound)*float64(window)) 1000 1001 // Advance the window to the next time where either 1002 // the left or right edge of the window encounters a 1003 // change in the utilization curve. 1004 if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 { 1005 time = t1 1006 } else { 1007 time = t2 1008 } 1009 if time < minTime { 1010 time = minTime 1011 } 1012 if time >= endTime { 1013 // For MMUs we could stop here, but for MUDs 1014 // it's important that we span the entire 1015 // band. 1016 time = endTime 1017 } 1018 } 1019 } 1020 1021 // An integrator tracks a position in a utilization function and 1022 // integrates it. 1023 type integrator struct { 1024 u *mmuSeries 1025 // pos is the index in u.util of the current time's non-strict 1026 // predecessor. 1027 pos int 1028 } 1029 1030 // advance returns the integral of the utilization function from 0 to 1031 // time. advance must be called on monotonically increasing values of 1032 // times. 1033 func (in *integrator) advance(time int64) totalUtil { 1034 util, pos := in.u.util, in.pos 1035 // Advance pos until pos+1 is time's strict successor (making 1036 // pos time's non-strict predecessor). 1037 // 1038 // Very often, this will be nearby, so we optimize that case, 1039 // but it may be arbitrarily far away, so we handled that 1040 // efficiently, too. 1041 const maxSeq = 8 1042 if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time { 1043 // Nearby. Use a linear scan. 1044 for pos+1 < len(util) && util[pos+1].Time <= time { 1045 pos++ 1046 } 1047 } else { 1048 // Far. Binary search for time's strict successor. 1049 l, r := pos, len(util) 1050 for l < r { 1051 h := int(uint(l+r) >> 1) 1052 if util[h].Time <= time { 1053 l = h + 1 1054 } else { 1055 r = h 1056 } 1057 } 1058 pos = l - 1 // Non-strict predecessor. 1059 } 1060 in.pos = pos 1061 var partial totalUtil 1062 if time != util[pos].Time { 1063 partial = totalUtilOf(util[pos].Util, time-util[pos].Time) 1064 } 1065 return in.u.sums[pos] + partial 1066 } 1067 1068 // next returns the smallest time t' > time of a change in the 1069 // utilization function. 1070 func (in *integrator) next(time int64) int64 { 1071 for _, u := range in.u.util[in.pos:] { 1072 if u.Time > time { 1073 return u.Time 1074 } 1075 } 1076 return 1<<63 - 1 1077 } 1078 1079 func isGCSTW(r tracev2.Range) bool { 1080 return strings.HasPrefix(r.Name, "stop-the-world") && strings.Contains(r.Name, "GC") 1081 } 1082 1083 func isGCMarkAssist(r tracev2.Range) bool { 1084 return r.Name == "GC mark assist" 1085 } 1086 1087 func isGCSweep(r tracev2.Range) bool { 1088 return r.Name == "GC incremental sweep" 1089 }