github.com/bir3/gocompiler@v0.9.2202/src/internal/trace/gc.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package trace 6 7 import ( 8 "container/heap" 9 tracev2 "github.com/bir3/gocompiler/src/internal/trace/v2" 10 "math" 11 "sort" 12 "strings" 13 "time" 14 ) 15 16 // MutatorUtil is a change in mutator utilization at a particular 17 // time. Mutator utilization functions are represented as a 18 // time-ordered []MutatorUtil. 19 type MutatorUtil struct { 20 Time int64 21 // Util is the mean mutator utilization starting at Time. This 22 // is in the range [0, 1]. 23 Util float64 24 } 25 26 // UtilFlags controls the behavior of MutatorUtilization. 27 type UtilFlags int 28 29 const ( 30 // UtilSTW means utilization should account for STW events. 31 // This includes non-GC STW events, which are typically user-requested. 32 UtilSTW UtilFlags = 1 << iota 33 // UtilBackground means utilization should account for 34 // background mark workers. 35 UtilBackground 36 // UtilAssist means utilization should account for mark 37 // assists. 38 UtilAssist 39 // UtilSweep means utilization should account for sweeping. 40 UtilSweep 41 42 // UtilPerProc means each P should be given a separate 43 // utilization function. Otherwise, there is a single function 44 // and each P is given a fraction of the utilization. 45 UtilPerProc 46 ) 47 48 // MutatorUtilization returns a set of mutator utilization functions 49 // for the given trace. Each function will always end with 0 50 // utilization. The bounds of each function are implicit in the first 51 // and last event; outside of these bounds each function is undefined. 52 // 53 // If the UtilPerProc flag is not given, this always returns a single 54 // utilization function. Otherwise, it returns one function per P. 55 func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil { 56 if len(events) == 0 { 57 return nil 58 } 59 60 type perP struct { 61 // gc > 0 indicates that GC is active on this P. 62 gc int 63 // series the logical series number for this P. This 64 // is necessary because Ps may be removed and then 65 // re-added, and then the new P needs a new series. 66 series int 67 } 68 ps := []perP{} 69 stw := 0 70 71 out := [][]MutatorUtil{} 72 assists := map[uint64]bool{} 73 block := map[uint64]*Event{} 74 bgMark := map[uint64]bool{} 75 76 for _, ev := range events { 77 switch ev.Type { 78 case EvGomaxprocs: 79 gomaxprocs := int(ev.Args[0]) 80 if len(ps) > gomaxprocs { 81 if flags&UtilPerProc != 0 { 82 // End each P's series. 83 for _, p := range ps[gomaxprocs:] { 84 out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0}) 85 } 86 } 87 ps = ps[:gomaxprocs] 88 } 89 for len(ps) < gomaxprocs { 90 // Start new P's series. 91 series := 0 92 if flags&UtilPerProc != 0 || len(out) == 0 { 93 series = len(out) 94 out = append(out, []MutatorUtil{{ev.Ts, 1}}) 95 } 96 ps = append(ps, perP{series: series}) 97 } 98 case EvSTWStart: 99 if flags&UtilSTW != 0 { 100 stw++ 101 } 102 case EvSTWDone: 103 if flags&UtilSTW != 0 { 104 stw-- 105 } 106 case EvGCMarkAssistStart: 107 if flags&UtilAssist != 0 { 108 ps[ev.P].gc++ 109 assists[ev.G] = true 110 } 111 case EvGCMarkAssistDone: 112 if flags&UtilAssist != 0 { 113 ps[ev.P].gc-- 114 delete(assists, ev.G) 115 } 116 case EvGCSweepStart: 117 if flags&UtilSweep != 0 { 118 ps[ev.P].gc++ 119 } 120 case EvGCSweepDone: 121 if flags&UtilSweep != 0 { 122 ps[ev.P].gc-- 123 } 124 case EvGoStartLabel: 125 if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" { 126 // Background mark worker. 127 // 128 // If we're in per-proc mode, we don't 129 // count dedicated workers because 130 // they kick all of the goroutines off 131 // that P, so don't directly 132 // contribute to goroutine latency. 133 if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") { 134 bgMark[ev.G] = true 135 ps[ev.P].gc++ 136 } 137 } 138 fallthrough 139 case EvGoStart: 140 if assists[ev.G] { 141 // Unblocked during assist. 142 ps[ev.P].gc++ 143 } 144 block[ev.G] = ev.Link 145 default: 146 if ev != block[ev.G] { 147 continue 148 } 149 150 if assists[ev.G] { 151 // Blocked during assist. 152 ps[ev.P].gc-- 153 } 154 if bgMark[ev.G] { 155 // Background mark worker done. 156 ps[ev.P].gc-- 157 delete(bgMark, ev.G) 158 } 159 delete(block, ev.G) 160 } 161 162 if flags&UtilPerProc == 0 { 163 // Compute the current average utilization. 164 if len(ps) == 0 { 165 continue 166 } 167 gcPs := 0 168 if stw > 0 { 169 gcPs = len(ps) 170 } else { 171 for i := range ps { 172 if ps[i].gc > 0 { 173 gcPs++ 174 } 175 } 176 } 177 mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))} 178 179 // Record the utilization change. (Since 180 // len(ps) == len(out), we know len(out) > 0.) 181 out[0] = addUtil(out[0], mu) 182 } else { 183 // Check for per-P utilization changes. 184 for i := range ps { 185 p := &ps[i] 186 util := 1.0 187 if stw > 0 || p.gc > 0 { 188 util = 0.0 189 } 190 out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util}) 191 } 192 } 193 } 194 195 // Add final 0 utilization event to any remaining series. This 196 // is important to mark the end of the trace. The exact value 197 // shouldn't matter since no window should extend beyond this, 198 // but using 0 is symmetric with the start of the trace. 199 mu := MutatorUtil{events[len(events)-1].Ts, 0} 200 for i := range ps { 201 out[ps[i].series] = addUtil(out[ps[i].series], mu) 202 } 203 return out 204 } 205 206 // MutatorUtilizationV2 returns a set of mutator utilization functions 207 // for the given v2 trace, passed as an io.Reader. Each function will 208 // always end with 0 utilization. The bounds of each function are implicit 209 // in the first and last event; outside of these bounds each function is 210 // undefined. 211 // 212 // If the UtilPerProc flag is not given, this always returns a single 213 // utilization function. Otherwise, it returns one function per P. 214 func MutatorUtilizationV2(events []tracev2.Event, flags UtilFlags) [][]MutatorUtil { 215 // Set up a bunch of analysis state. 216 type perP struct { 217 // gc > 0 indicates that GC is active on this P. 218 gc int 219 // series the logical series number for this P. This 220 // is necessary because Ps may be removed and then 221 // re-added, and then the new P needs a new series. 222 series int 223 } 224 type procsCount struct { 225 // time at which procs changed. 226 time int64 227 // n is the number of procs at that point. 228 n int 229 } 230 out := [][]MutatorUtil{} 231 stw := 0 232 ps := []perP{} 233 inGC := make(map[tracev2.GoID]bool) 234 states := make(map[tracev2.GoID]tracev2.GoState) 235 bgMark := make(map[tracev2.GoID]bool) 236 procs := []procsCount{} 237 seenSync := false 238 239 // Helpers. 240 handleSTW := func(r tracev2.Range) bool { 241 return flags&UtilSTW != 0 && isGCSTW(r) 242 } 243 handleMarkAssist := func(r tracev2.Range) bool { 244 return flags&UtilAssist != 0 && isGCMarkAssist(r) 245 } 246 handleSweep := func(r tracev2.Range) bool { 247 return flags&UtilSweep != 0 && isGCSweep(r) 248 } 249 250 // Iterate through the trace, tracking mutator utilization. 251 var lastEv *tracev2.Event 252 for i := range events { 253 ev := &events[i] 254 lastEv = ev 255 256 // Process the event. 257 switch ev.Kind() { 258 case tracev2.EventSync: 259 seenSync = true 260 case tracev2.EventMetric: 261 m := ev.Metric() 262 if m.Name != "/sched/gomaxprocs:threads" { 263 break 264 } 265 gomaxprocs := int(m.Value.Uint64()) 266 if len(ps) > gomaxprocs { 267 if flags&UtilPerProc != 0 { 268 // End each P's series. 269 for _, p := range ps[gomaxprocs:] { 270 out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), 0}) 271 } 272 } 273 ps = ps[:gomaxprocs] 274 } 275 for len(ps) < gomaxprocs { 276 // Start new P's series. 277 series := 0 278 if flags&UtilPerProc != 0 || len(out) == 0 { 279 series = len(out) 280 out = append(out, []MutatorUtil{{int64(ev.Time()), 1}}) 281 } 282 ps = append(ps, perP{series: series}) 283 } 284 if len(procs) == 0 || gomaxprocs != procs[len(procs)-1].n { 285 procs = append(procs, procsCount{time: int64(ev.Time()), n: gomaxprocs}) 286 } 287 } 288 if len(ps) == 0 { 289 // We can't start doing any analysis until we see what GOMAXPROCS is. 290 // It will show up very early in the trace, but we need to be robust to 291 // something else being emitted beforehand. 292 continue 293 } 294 295 switch ev.Kind() { 296 case tracev2.EventRangeActive: 297 if seenSync { 298 // If we've seen a sync, then we can be sure we're not finding out about 299 // something late; we have complete information after that point, and these 300 // active events will just be redundant. 301 break 302 } 303 // This range is active back to the start of the trace. We're failing to account 304 // for this since we just found out about it now. Fix up the mutator utilization. 305 // 306 // N.B. A trace can't start during a STW, so we don't handle it here. 307 r := ev.Range() 308 switch { 309 case handleMarkAssist(r): 310 if !states[ev.Goroutine()].Executing() { 311 // If the goroutine isn't executing, then the fact that it was in mark 312 // assist doesn't actually count. 313 break 314 } 315 // This G has been in a mark assist *and running on its P* since the start 316 // of the trace. 317 fallthrough 318 case handleSweep(r): 319 // This P has been in sweep (or mark assist, from above) in the start of the trace. 320 // 321 // We don't need to do anything if UtilPerProc is set. If we get an event like 322 // this for a running P, it must show up the first time a P is mentioned. Therefore, 323 // this P won't actually have any MutatorUtils on its list yet. 324 // 325 // However, if UtilPerProc isn't set, then we probably have data from other procs 326 // and from previous events. We need to fix that up. 327 if flags&UtilPerProc != 0 { 328 break 329 } 330 // Subtract out 1/gomaxprocs mutator utilization for all time periods 331 // from the beginning of the trace until now. 332 mi, pi := 0, 0 333 for mi < len(out[0]) { 334 if pi < len(procs)-1 && procs[pi+1].time < out[0][mi].Time { 335 pi++ 336 continue 337 } 338 out[0][mi].Util -= float64(1) / float64(procs[pi].n) 339 if out[0][mi].Util < 0 { 340 out[0][mi].Util = 0 341 } 342 mi++ 343 } 344 } 345 // After accounting for the portion we missed, this just acts like the 346 // beginning of a new range. 347 fallthrough 348 case tracev2.EventRangeBegin: 349 r := ev.Range() 350 if handleSTW(r) { 351 stw++ 352 } else if handleSweep(r) { 353 ps[ev.Proc()].gc++ 354 } else if handleMarkAssist(r) { 355 ps[ev.Proc()].gc++ 356 if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine { 357 inGC[g] = true 358 } 359 } 360 case tracev2.EventRangeEnd: 361 r := ev.Range() 362 if handleSTW(r) { 363 stw-- 364 } else if handleSweep(r) { 365 ps[ev.Proc()].gc-- 366 } else if handleMarkAssist(r) { 367 ps[ev.Proc()].gc-- 368 if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine { 369 delete(inGC, g) 370 } 371 } 372 case tracev2.EventStateTransition: 373 st := ev.StateTransition() 374 if st.Resource.Kind != tracev2.ResourceGoroutine { 375 break 376 } 377 old, new := st.Goroutine() 378 g := st.Resource.Goroutine() 379 if inGC[g] || bgMark[g] { 380 if !old.Executing() && new.Executing() { 381 // Started running while doing GC things. 382 ps[ev.Proc()].gc++ 383 } else if old.Executing() && !new.Executing() { 384 // Stopped running while doing GC things. 385 ps[ev.Proc()].gc-- 386 } 387 } 388 states[g] = new 389 case tracev2.EventLabel: 390 l := ev.Label() 391 if flags&UtilBackground != 0 && strings.HasPrefix(l.Label, "GC ") && l.Label != "GC (idle)" { 392 // Background mark worker. 393 // 394 // If we're in per-proc mode, we don't 395 // count dedicated workers because 396 // they kick all of the goroutines off 397 // that P, so don't directly 398 // contribute to goroutine latency. 399 if !(flags&UtilPerProc != 0 && l.Label == "GC (dedicated)") { 400 bgMark[ev.Goroutine()] = true 401 ps[ev.Proc()].gc++ 402 } 403 } 404 } 405 406 if flags&UtilPerProc == 0 { 407 // Compute the current average utilization. 408 if len(ps) == 0 { 409 continue 410 } 411 gcPs := 0 412 if stw > 0 { 413 gcPs = len(ps) 414 } else { 415 for i := range ps { 416 if ps[i].gc > 0 { 417 gcPs++ 418 } 419 } 420 } 421 mu := MutatorUtil{int64(ev.Time()), 1 - float64(gcPs)/float64(len(ps))} 422 423 // Record the utilization change. (Since 424 // len(ps) == len(out), we know len(out) > 0.) 425 out[0] = addUtil(out[0], mu) 426 } else { 427 // Check for per-P utilization changes. 428 for i := range ps { 429 p := &ps[i] 430 util := 1.0 431 if stw > 0 || p.gc > 0 { 432 util = 0.0 433 } 434 out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), util}) 435 } 436 } 437 } 438 439 // No events in the stream. 440 if lastEv == nil { 441 return nil 442 } 443 444 // Add final 0 utilization event to any remaining series. This 445 // is important to mark the end of the trace. The exact value 446 // shouldn't matter since no window should extend beyond this, 447 // but using 0 is symmetric with the start of the trace. 448 mu := MutatorUtil{int64(lastEv.Time()), 0} 449 for i := range ps { 450 out[ps[i].series] = addUtil(out[ps[i].series], mu) 451 } 452 return out 453 } 454 455 func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil { 456 if len(util) > 0 { 457 if mu.Util == util[len(util)-1].Util { 458 // No change. 459 return util 460 } 461 if mu.Time == util[len(util)-1].Time { 462 // Take the lowest utilization at a time stamp. 463 if mu.Util < util[len(util)-1].Util { 464 util[len(util)-1] = mu 465 } 466 return util 467 } 468 } 469 return append(util, mu) 470 } 471 472 // totalUtil is total utilization, measured in nanoseconds. This is a 473 // separate type primarily to distinguish it from mean utilization, 474 // which is also a float64. 475 type totalUtil float64 476 477 func totalUtilOf(meanUtil float64, dur int64) totalUtil { 478 return totalUtil(meanUtil * float64(dur)) 479 } 480 481 // mean returns the mean utilization over dur. 482 func (u totalUtil) mean(dur time.Duration) float64 { 483 return float64(u) / float64(dur) 484 } 485 486 // An MMUCurve is the minimum mutator utilization curve across 487 // multiple window sizes. 488 type MMUCurve struct { 489 series []mmuSeries 490 } 491 492 type mmuSeries struct { 493 util []MutatorUtil 494 // sums[j] is the cumulative sum of util[:j]. 495 sums []totalUtil 496 // bands summarizes util in non-overlapping bands of duration 497 // bandDur. 498 bands []mmuBand 499 // bandDur is the duration of each band. 500 bandDur int64 501 } 502 503 type mmuBand struct { 504 // minUtil is the minimum instantaneous mutator utilization in 505 // this band. 506 minUtil float64 507 // cumUtil is the cumulative total mutator utilization between 508 // time 0 and the left edge of this band. 509 cumUtil totalUtil 510 511 // integrator is the integrator for the left edge of this 512 // band. 513 integrator integrator 514 } 515 516 // NewMMUCurve returns an MMU curve for the given mutator utilization 517 // function. 518 func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve { 519 series := make([]mmuSeries, len(utils)) 520 for i, util := range utils { 521 series[i] = newMMUSeries(util) 522 } 523 return &MMUCurve{series} 524 } 525 526 // bandsPerSeries is the number of bands to divide each series into. 527 // This is only changed by tests. 528 var bandsPerSeries = 1000 529 530 func newMMUSeries(util []MutatorUtil) mmuSeries { 531 // Compute cumulative sum. 532 sums := make([]totalUtil, len(util)) 533 var prev MutatorUtil 534 var sum totalUtil 535 for j, u := range util { 536 sum += totalUtilOf(prev.Util, u.Time-prev.Time) 537 sums[j] = sum 538 prev = u 539 } 540 541 // Divide the utilization curve up into equal size 542 // non-overlapping "bands" and compute a summary for each of 543 // these bands. 544 // 545 // Compute the duration of each band. 546 numBands := bandsPerSeries 547 if numBands > len(util) { 548 // There's no point in having lots of bands if there 549 // aren't many events. 550 numBands = len(util) 551 } 552 dur := util[len(util)-1].Time - util[0].Time 553 bandDur := (dur + int64(numBands) - 1) / int64(numBands) 554 if bandDur < 1 { 555 bandDur = 1 556 } 557 // Compute the bands. There are numBands+1 bands in order to 558 // record the final cumulative sum. 559 bands := make([]mmuBand, numBands+1) 560 s := mmuSeries{util, sums, bands, bandDur} 561 leftSum := integrator{&s, 0} 562 for i := range bands { 563 startTime, endTime := s.bandTime(i) 564 cumUtil := leftSum.advance(startTime) 565 predIdx := leftSum.pos 566 minUtil := 1.0 567 for i := predIdx; i < len(util) && util[i].Time < endTime; i++ { 568 minUtil = math.Min(minUtil, util[i].Util) 569 } 570 bands[i] = mmuBand{minUtil, cumUtil, leftSum} 571 } 572 573 return s 574 } 575 576 func (s *mmuSeries) bandTime(i int) (start, end int64) { 577 start = int64(i)*s.bandDur + s.util[0].Time 578 end = start + s.bandDur 579 return 580 } 581 582 type bandUtil struct { 583 // Utilization series index 584 series int 585 // Band index 586 i int 587 // Lower bound of mutator utilization for all windows 588 // with a left edge in this band. 589 utilBound float64 590 } 591 592 type bandUtilHeap []bandUtil 593 594 func (h bandUtilHeap) Len() int { 595 return len(h) 596 } 597 598 func (h bandUtilHeap) Less(i, j int) bool { 599 return h[i].utilBound < h[j].utilBound 600 } 601 602 func (h bandUtilHeap) Swap(i, j int) { 603 h[i], h[j] = h[j], h[i] 604 } 605 606 func (h *bandUtilHeap) Push(x any) { 607 *h = append(*h, x.(bandUtil)) 608 } 609 610 func (h *bandUtilHeap) Pop() any { 611 x := (*h)[len(*h)-1] 612 *h = (*h)[:len(*h)-1] 613 return x 614 } 615 616 // UtilWindow is a specific window at Time. 617 type UtilWindow struct { 618 Time int64 619 // MutatorUtil is the mean mutator utilization in this window. 620 MutatorUtil float64 621 } 622 623 type utilHeap []UtilWindow 624 625 func (h utilHeap) Len() int { 626 return len(h) 627 } 628 629 func (h utilHeap) Less(i, j int) bool { 630 if h[i].MutatorUtil != h[j].MutatorUtil { 631 return h[i].MutatorUtil > h[j].MutatorUtil 632 } 633 return h[i].Time > h[j].Time 634 } 635 636 func (h utilHeap) Swap(i, j int) { 637 h[i], h[j] = h[j], h[i] 638 } 639 640 func (h *utilHeap) Push(x any) { 641 *h = append(*h, x.(UtilWindow)) 642 } 643 644 func (h *utilHeap) Pop() any { 645 x := (*h)[len(*h)-1] 646 *h = (*h)[:len(*h)-1] 647 return x 648 } 649 650 // An accumulator takes a windowed mutator utilization function and 651 // tracks various statistics for that function. 652 type accumulator struct { 653 mmu float64 654 655 // bound is the mutator utilization bound where adding any 656 // mutator utilization above this bound cannot affect the 657 // accumulated statistics. 658 bound float64 659 660 // Worst N window tracking 661 nWorst int 662 wHeap utilHeap 663 664 // Mutator utilization distribution tracking 665 mud *mud 666 // preciseMass is the distribution mass that must be precise 667 // before accumulation is stopped. 668 preciseMass float64 669 // lastTime and lastMU are the previous point added to the 670 // windowed mutator utilization function. 671 lastTime int64 672 lastMU float64 673 } 674 675 // resetTime declares a discontinuity in the windowed mutator 676 // utilization function by resetting the current time. 677 func (acc *accumulator) resetTime() { 678 // This only matters for distribution collection, since that's 679 // the only thing that depends on the progression of the 680 // windowed mutator utilization function. 681 acc.lastTime = math.MaxInt64 682 } 683 684 // addMU adds a point to the windowed mutator utilization function at 685 // (time, mu). This must be called for monotonically increasing values 686 // of time. 687 // 688 // It returns true if further calls to addMU would be pointless. 689 func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool { 690 if mu < acc.mmu { 691 acc.mmu = mu 692 } 693 acc.bound = acc.mmu 694 695 if acc.nWorst == 0 { 696 // If the minimum has reached zero, it can't go any 697 // lower, so we can stop early. 698 return mu == 0 699 } 700 701 // Consider adding this window to the n worst. 702 if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil { 703 // This window is lower than the K'th worst window. 704 // 705 // Check if there's any overlapping window 706 // already in the heap and keep whichever is 707 // worse. 708 for i, ui := range acc.wHeap { 709 if time+int64(window) > ui.Time && ui.Time+int64(window) > time { 710 if ui.MutatorUtil <= mu { 711 // Keep the first window. 712 goto keep 713 } else { 714 // Replace it with this window. 715 heap.Remove(&acc.wHeap, i) 716 break 717 } 718 } 719 } 720 721 heap.Push(&acc.wHeap, UtilWindow{time, mu}) 722 if len(acc.wHeap) > acc.nWorst { 723 heap.Pop(&acc.wHeap) 724 } 725 keep: 726 } 727 728 if len(acc.wHeap) < acc.nWorst { 729 // We don't have N windows yet, so keep accumulating. 730 acc.bound = 1.0 731 } else { 732 // Anything above the least worst window has no effect. 733 acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil) 734 } 735 736 if acc.mud != nil { 737 if acc.lastTime != math.MaxInt64 { 738 // Update distribution. 739 acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime)) 740 } 741 acc.lastTime, acc.lastMU = time, mu 742 if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok { 743 acc.bound = math.Max(acc.bound, mudBound) 744 } else { 745 // We haven't accumulated enough total precise 746 // mass yet to even reach our goal, so keep 747 // accumulating. 748 acc.bound = 1 749 } 750 // It's not worth checking percentiles every time, so 751 // just keep accumulating this band. 752 return false 753 } 754 755 // If we've found enough 0 utilizations, we can stop immediately. 756 return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0 757 } 758 759 // MMU returns the minimum mutator utilization for the given time 760 // window. This is the minimum utilization for all windows of this 761 // duration across the execution. The returned value is in the range 762 // [0, 1]. 763 func (c *MMUCurve) MMU(window time.Duration) (mmu float64) { 764 acc := accumulator{mmu: 1.0, bound: 1.0} 765 c.mmu(window, &acc) 766 return acc.mmu 767 } 768 769 // Examples returns n specific examples of the lowest mutator 770 // utilization for the given window size. The returned windows will be 771 // disjoint (otherwise there would be a huge number of 772 // mostly-overlapping windows at the single lowest point). There are 773 // no guarantees on which set of disjoint windows this returns. 774 func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) { 775 acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n} 776 c.mmu(window, &acc) 777 sort.Sort(sort.Reverse(acc.wHeap)) 778 return ([]UtilWindow)(acc.wHeap) 779 } 780 781 // MUD returns mutator utilization distribution quantiles for the 782 // given window size. 783 // 784 // The mutator utilization distribution is the distribution of mean 785 // mutator utilization across all windows of the given window size in 786 // the trace. 787 // 788 // The minimum mutator utilization is the minimum (0th percentile) of 789 // this distribution. (However, if only the minimum is desired, it's 790 // more efficient to use the MMU method.) 791 func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 { 792 if len(quantiles) == 0 { 793 return []float64{} 794 } 795 796 // Each unrefined band contributes a known total mass to the 797 // distribution (bandDur except at the end), but in an unknown 798 // way. However, we know that all the mass it contributes must 799 // be at or above its worst-case mean mutator utilization. 800 // 801 // Hence, we refine bands until the highest desired 802 // distribution quantile is less than the next worst-case mean 803 // mutator utilization. At this point, all further 804 // contributions to the distribution must be beyond the 805 // desired quantile and hence cannot affect it. 806 // 807 // First, find the highest desired distribution quantile. 808 maxQ := quantiles[0] 809 for _, q := range quantiles { 810 if q > maxQ { 811 maxQ = q 812 } 813 } 814 // The distribution's mass is in units of time (it's not 815 // normalized because this would make it more annoying to 816 // account for future contributions of unrefined bands). The 817 // total final mass will be the duration of the trace itself 818 // minus the window size. Using this, we can compute the mass 819 // corresponding to quantile maxQ. 820 var duration int64 821 for _, s := range c.series { 822 duration1 := s.util[len(s.util)-1].Time - s.util[0].Time 823 if duration1 >= int64(window) { 824 duration += duration1 - int64(window) 825 } 826 } 827 qMass := float64(duration) * maxQ 828 829 // Accumulate the MUD until we have precise information for 830 // everything to the left of qMass. 831 acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)} 832 acc.mud.setTrackMass(qMass) 833 c.mmu(window, &acc) 834 835 // Evaluate the quantiles on the accumulated MUD. 836 out := make([]float64, len(quantiles)) 837 for i := range out { 838 mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i]) 839 if math.IsNaN(mu) { 840 // There are a few legitimate ways this can 841 // happen: 842 // 843 // 1. If the window is the full trace 844 // duration, then the windowed MU function is 845 // only defined at a single point, so the MU 846 // distribution is not well-defined. 847 // 848 // 2. If there are no events, then the MU 849 // distribution has no mass. 850 // 851 // Either way, all of the quantiles will have 852 // converged toward the MMU at this point. 853 mu = acc.mmu 854 } 855 out[i] = mu 856 } 857 return out 858 } 859 860 func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) { 861 if window <= 0 { 862 acc.mmu = 0 863 return 864 } 865 866 var bandU bandUtilHeap 867 windows := make([]time.Duration, len(c.series)) 868 for i, s := range c.series { 869 windows[i] = window 870 if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max { 871 windows[i] = max 872 } 873 874 bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i])) 875 if bandU == nil { 876 bandU = bandU1 877 } else { 878 bandU = append(bandU, bandU1...) 879 } 880 } 881 882 // Process bands from lowest utilization bound to highest. 883 heap.Init(&bandU) 884 885 // Refine each band into a precise window and MMU until 886 // refining the next lowest band can no longer affect the MMU 887 // or windows. 888 for len(bandU) > 0 && bandU[0].utilBound < acc.bound { 889 i := bandU[0].series 890 c.series[i].bandMMU(bandU[0].i, windows[i], acc) 891 heap.Pop(&bandU) 892 } 893 } 894 895 func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil { 896 // For each band, compute the worst-possible total mutator 897 // utilization for all windows that start in that band. 898 899 // minBands is the minimum number of bands a window can span 900 // and maxBands is the maximum number of bands a window can 901 // span in any alignment. 902 minBands := int((int64(window) + c.bandDur - 1) / c.bandDur) 903 maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur) 904 if window > 1 && maxBands < 2 { 905 panic("maxBands < 2") 906 } 907 tailDur := int64(window) % c.bandDur 908 nUtil := len(c.bands) - maxBands + 1 909 if nUtil < 0 { 910 nUtil = 0 911 } 912 bandU := make([]bandUtil, nUtil) 913 for i := range bandU { 914 // To compute the worst-case MU, we assume the minimum 915 // for any bands that are only partially overlapped by 916 // some window and the mean for any bands that are 917 // completely covered by all windows. 918 var util totalUtil 919 920 // Find the lowest and second lowest of the partial 921 // bands. 922 l := c.bands[i].minUtil 923 r1 := c.bands[i+minBands-1].minUtil 924 r2 := c.bands[i+maxBands-1].minUtil 925 minBand := math.Min(l, math.Min(r1, r2)) 926 // Assume the worst window maximally overlaps the 927 // worst minimum and then the rest overlaps the second 928 // worst minimum. 929 if minBands == 1 { 930 util += totalUtilOf(minBand, int64(window)) 931 } else { 932 util += totalUtilOf(minBand, c.bandDur) 933 midBand := 0.0 934 switch { 935 case minBand == l: 936 midBand = math.Min(r1, r2) 937 case minBand == r1: 938 midBand = math.Min(l, r2) 939 case minBand == r2: 940 midBand = math.Min(l, r1) 941 } 942 util += totalUtilOf(midBand, tailDur) 943 } 944 945 // Add the total mean MU of bands that are completely 946 // overlapped by all windows. 947 if minBands > 2 { 948 util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil 949 } 950 951 bandU[i] = bandUtil{series, i, util.mean(window)} 952 } 953 954 return bandU 955 } 956 957 // bandMMU computes the precise minimum mutator utilization for 958 // windows with a left edge in band bandIdx. 959 func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) { 960 util := c.util 961 962 // We think of the mutator utilization over time as the 963 // box-filtered utilization function, which we call the 964 // "windowed mutator utilization function". The resulting 965 // function is continuous and piecewise linear (unless 966 // window==0, which we handle elsewhere), where the boundaries 967 // between segments occur when either edge of the window 968 // encounters a change in the instantaneous mutator 969 // utilization function. Hence, the minimum of this function 970 // will always occur when one of the edges of the window 971 // aligns with a utilization change, so these are the only 972 // points we need to consider. 973 // 974 // We compute the mutator utilization function incrementally 975 // by tracking the integral from t=0 to the left edge of the 976 // window and to the right edge of the window. 977 left := c.bands[bandIdx].integrator 978 right := left 979 time, endTime := c.bandTime(bandIdx) 980 if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime { 981 endTime = utilEnd 982 } 983 acc.resetTime() 984 for { 985 // Advance edges to time and time+window. 986 mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window) 987 if acc.addMU(time, mu, window) { 988 break 989 } 990 if time == endTime { 991 break 992 } 993 994 // The maximum slope of the windowed mutator 995 // utilization function is 1/window, so we can always 996 // advance the time by at least (mu - mmu) * window 997 // without dropping below mmu. 998 minTime := time + int64((mu-acc.bound)*float64(window)) 999 1000 // Advance the window to the next time where either 1001 // the left or right edge of the window encounters a 1002 // change in the utilization curve. 1003 if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 { 1004 time = t1 1005 } else { 1006 time = t2 1007 } 1008 if time < minTime { 1009 time = minTime 1010 } 1011 if time >= endTime { 1012 // For MMUs we could stop here, but for MUDs 1013 // it's important that we span the entire 1014 // band. 1015 time = endTime 1016 } 1017 } 1018 } 1019 1020 // An integrator tracks a position in a utilization function and 1021 // integrates it. 1022 type integrator struct { 1023 u *mmuSeries 1024 // pos is the index in u.util of the current time's non-strict 1025 // predecessor. 1026 pos int 1027 } 1028 1029 // advance returns the integral of the utilization function from 0 to 1030 // time. advance must be called on monotonically increasing values of 1031 // times. 1032 func (in *integrator) advance(time int64) totalUtil { 1033 util, pos := in.u.util, in.pos 1034 // Advance pos until pos+1 is time's strict successor (making 1035 // pos time's non-strict predecessor). 1036 // 1037 // Very often, this will be nearby, so we optimize that case, 1038 // but it may be arbitrarily far away, so we handled that 1039 // efficiently, too. 1040 const maxSeq = 8 1041 if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time { 1042 // Nearby. Use a linear scan. 1043 for pos+1 < len(util) && util[pos+1].Time <= time { 1044 pos++ 1045 } 1046 } else { 1047 // Far. Binary search for time's strict successor. 1048 l, r := pos, len(util) 1049 for l < r { 1050 h := int(uint(l+r) >> 1) 1051 if util[h].Time <= time { 1052 l = h + 1 1053 } else { 1054 r = h 1055 } 1056 } 1057 pos = l - 1 // Non-strict predecessor. 1058 } 1059 in.pos = pos 1060 var partial totalUtil 1061 if time != util[pos].Time { 1062 partial = totalUtilOf(util[pos].Util, time-util[pos].Time) 1063 } 1064 return in.u.sums[pos] + partial 1065 } 1066 1067 // next returns the smallest time t' > time of a change in the 1068 // utilization function. 1069 func (in *integrator) next(time int64) int64 { 1070 for _, u := range in.u.util[in.pos:] { 1071 if u.Time > time { 1072 return u.Time 1073 } 1074 } 1075 return 1<<63 - 1 1076 } 1077 1078 func isGCSTW(r tracev2.Range) bool { 1079 return strings.HasPrefix(r.Name, "stop-the-world") && strings.Contains(r.Name, "GC") 1080 } 1081 1082 func isGCMarkAssist(r tracev2.Range) bool { 1083 return r.Name == "GC mark assist" 1084 } 1085 1086 func isGCSweep(r tracev2.Range) bool { 1087 return r.Name == "GC incremental sweep" 1088 }