github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/runtime/trace/trace_test.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package trace_test 6 7 import ( 8 "bytes" 9 "context" 10 "flag" 11 "fmt" 12 "internal/profile" 13 "internal/race" 14 "internal/trace" 15 "io" 16 "net" 17 "os" 18 "runtime" 19 "runtime/pprof" 20 . "runtime/trace" 21 "strconv" 22 "strings" 23 "sync" 24 "testing" 25 "time" 26 ) 27 28 var ( 29 saveTraces = flag.Bool("savetraces", false, "save traces collected by tests") 30 ) 31 32 // TestEventBatch tests Flush calls that happen during Start 33 // don't produce corrupted traces. 34 func TestEventBatch(t *testing.T) { 35 if race.Enabled { 36 t.Skip("skipping in race mode") 37 } 38 if IsEnabled() { 39 t.Skip("skipping because -test.trace is set") 40 } 41 if testing.Short() { 42 t.Skip("skipping in short mode") 43 } 44 // During Start, bunch of records are written to reflect the current 45 // snapshot of the program, including state of each goroutines. 46 // And some string constants are written to the trace to aid trace 47 // parsing. This test checks Flush of the buffer occurred during 48 // this process doesn't cause corrupted traces. 49 // When a Flush is called during Start is complicated 50 // so we test with a range of number of goroutines hoping that one 51 // of them triggers Flush. 52 // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate 53 // and traceEvGoWaiting events (12~13bytes per goroutine). 54 for g := 4950; g < 5050; g++ { 55 n := g 56 t.Run("G="+strconv.Itoa(n), func(t *testing.T) { 57 var wg sync.WaitGroup 58 wg.Add(n) 59 60 in := make(chan bool, 1000) 61 for i := 0; i < n; i++ { 62 go func() { 63 <-in 64 wg.Done() 65 }() 66 } 67 buf := new(bytes.Buffer) 68 if err := Start(buf); err != nil { 69 t.Fatalf("failed to start tracing: %v", err) 70 } 71 72 for i := 0; i < n; i++ { 73 in <- true 74 } 75 wg.Wait() 76 Stop() 77 78 _, err := trace.Parse(buf, "") 79 if err == trace.ErrTimeOrder { 80 t.Skipf("skipping trace: %v", err) 81 } 82 83 if err != nil { 84 t.Fatalf("failed to parse trace: %v", err) 85 } 86 }) 87 } 88 } 89 90 func TestTraceStartStop(t *testing.T) { 91 if IsEnabled() { 92 t.Skip("skipping because -test.trace is set") 93 } 94 buf := new(bytes.Buffer) 95 if err := Start(buf); err != nil { 96 t.Fatalf("failed to start tracing: %v", err) 97 } 98 Stop() 99 size := buf.Len() 100 if size == 0 { 101 t.Fatalf("trace is empty") 102 } 103 time.Sleep(100 * time.Millisecond) 104 if size != buf.Len() { 105 t.Fatalf("trace writes after stop: %v -> %v", size, buf.Len()) 106 } 107 saveTrace(t, buf, "TestTraceStartStop") 108 } 109 110 func TestTraceDoubleStart(t *testing.T) { 111 if IsEnabled() { 112 t.Skip("skipping because -test.trace is set") 113 } 114 Stop() 115 buf := new(bytes.Buffer) 116 if err := Start(buf); err != nil { 117 t.Fatalf("failed to start tracing: %v", err) 118 } 119 if err := Start(buf); err == nil { 120 t.Fatalf("succeed to start tracing second time") 121 } 122 Stop() 123 Stop() 124 } 125 126 func TestTrace(t *testing.T) { 127 if IsEnabled() { 128 t.Skip("skipping because -test.trace is set") 129 } 130 buf := new(bytes.Buffer) 131 if err := Start(buf); err != nil { 132 t.Fatalf("failed to start tracing: %v", err) 133 } 134 Stop() 135 saveTrace(t, buf, "TestTrace") 136 _, err := trace.Parse(buf, "") 137 if err == trace.ErrTimeOrder { 138 t.Skipf("skipping trace: %v", err) 139 } 140 if err != nil { 141 t.Fatalf("failed to parse trace: %v", err) 142 } 143 } 144 145 func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc) { 146 res, err := trace.Parse(r, "") 147 if err == trace.ErrTimeOrder { 148 t.Skipf("skipping trace: %v", err) 149 } 150 if err != nil { 151 t.Fatalf("failed to parse trace: %v", err) 152 } 153 gs := trace.GoroutineStats(res.Events) 154 for goid := range gs { 155 // We don't do any particular checks on the result at the moment. 156 // But still check that RelatedGoroutines does not crash, hang, etc. 157 _ = trace.RelatedGoroutines(res.Events, goid) 158 } 159 return res.Events, gs 160 } 161 162 func testBrokenTimestamps(t *testing.T, data []byte) { 163 // On some processors cputicks (used to generate trace timestamps) 164 // produce non-monotonic timestamps. It is important that the parser 165 // distinguishes logically inconsistent traces (e.g. missing, excessive 166 // or misordered events) from broken timestamps. The former is a bug 167 // in tracer, the latter is a machine issue. 168 // So now that we have a consistent trace, test that (1) parser does 169 // not return a logical error in case of broken timestamps 170 // and (2) broken timestamps are eventually detected and reported. 171 trace.BreakTimestampsForTesting = true 172 defer func() { 173 trace.BreakTimestampsForTesting = false 174 }() 175 for i := 0; i < 1e4; i++ { 176 _, err := trace.Parse(bytes.NewReader(data), "") 177 if err == trace.ErrTimeOrder { 178 return 179 } 180 if err != nil { 181 t.Fatalf("failed to parse trace: %v", err) 182 } 183 } 184 } 185 186 func TestTraceStress(t *testing.T) { 187 switch runtime.GOOS { 188 case "js", "wasip1": 189 t.Skip("no os.Pipe on " + runtime.GOOS) 190 } 191 if IsEnabled() { 192 t.Skip("skipping because -test.trace is set") 193 } 194 if testing.Short() { 195 t.Skip("skipping in -short mode") 196 } 197 198 var wg sync.WaitGroup 199 done := make(chan bool) 200 201 // Create a goroutine blocked before tracing. 202 wg.Add(1) 203 go func() { 204 <-done 205 wg.Done() 206 }() 207 208 // Create a goroutine blocked in syscall before tracing. 209 rp, wp, err := os.Pipe() 210 if err != nil { 211 t.Fatalf("failed to create pipe: %v", err) 212 } 213 defer func() { 214 rp.Close() 215 wp.Close() 216 }() 217 wg.Add(1) 218 go func() { 219 var tmp [1]byte 220 rp.Read(tmp[:]) 221 <-done 222 wg.Done() 223 }() 224 time.Sleep(time.Millisecond) // give the goroutine above time to block 225 226 buf := new(bytes.Buffer) 227 if err := Start(buf); err != nil { 228 t.Fatalf("failed to start tracing: %v", err) 229 } 230 231 procs := runtime.GOMAXPROCS(10) 232 time.Sleep(50 * time.Millisecond) // test proc stop/start events 233 234 go func() { 235 runtime.LockOSThread() 236 for { 237 select { 238 case <-done: 239 return 240 default: 241 runtime.Gosched() 242 } 243 } 244 }() 245 246 runtime.GC() 247 // Trigger GC from malloc. 248 n := int(1e3) 249 if isMemoryConstrained() { 250 // Reduce allocation to avoid running out of 251 // memory on the builder - see issue/12032. 252 n = 512 253 } 254 for i := 0; i < n; i++ { 255 _ = make([]byte, 1<<20) 256 } 257 258 // Create a bunch of busy goroutines to load all Ps. 259 for p := 0; p < 10; p++ { 260 wg.Add(1) 261 go func() { 262 // Do something useful. 263 tmp := make([]byte, 1<<16) 264 for i := range tmp { 265 tmp[i]++ 266 } 267 _ = tmp 268 <-done 269 wg.Done() 270 }() 271 } 272 273 // Block in syscall. 274 wg.Add(1) 275 go func() { 276 var tmp [1]byte 277 rp.Read(tmp[:]) 278 <-done 279 wg.Done() 280 }() 281 282 // Test timers. 283 timerDone := make(chan bool) 284 go func() { 285 time.Sleep(time.Millisecond) 286 timerDone <- true 287 }() 288 <-timerDone 289 290 // A bit of network. 291 ln, err := net.Listen("tcp", "127.0.0.1:0") 292 if err != nil { 293 t.Fatalf("listen failed: %v", err) 294 } 295 defer ln.Close() 296 go func() { 297 c, err := ln.Accept() 298 if err != nil { 299 return 300 } 301 time.Sleep(time.Millisecond) 302 var buf [1]byte 303 c.Write(buf[:]) 304 c.Close() 305 }() 306 c, err := net.Dial("tcp", ln.Addr().String()) 307 if err != nil { 308 t.Fatalf("dial failed: %v", err) 309 } 310 var tmp [1]byte 311 c.Read(tmp[:]) 312 c.Close() 313 314 go func() { 315 runtime.Gosched() 316 select {} 317 }() 318 319 // Unblock helper goroutines and wait them to finish. 320 wp.Write(tmp[:]) 321 wp.Write(tmp[:]) 322 close(done) 323 wg.Wait() 324 325 runtime.GOMAXPROCS(procs) 326 327 Stop() 328 saveTrace(t, buf, "TestTraceStress") 329 trace := buf.Bytes() 330 parseTrace(t, buf) 331 testBrokenTimestamps(t, trace) 332 } 333 334 // isMemoryConstrained reports whether the current machine is likely 335 // to be memory constrained. 336 // This was originally for the openbsd/arm builder (Issue 12032). 337 // TODO: move this to testenv? Make this look at memory? Look at GO_BUILDER_NAME? 338 func isMemoryConstrained() bool { 339 if runtime.GOOS == "plan9" { 340 return true 341 } 342 switch runtime.GOARCH { 343 case "arm", "mips", "mipsle": 344 return true 345 } 346 return false 347 } 348 349 // Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine. 350 // And concurrently with all that start/stop trace 3 times. 351 func TestTraceStressStartStop(t *testing.T) { 352 switch runtime.GOOS { 353 case "js", "wasip1": 354 t.Skip("no os.Pipe on " + runtime.GOOS) 355 } 356 if IsEnabled() { 357 t.Skip("skipping because -test.trace is set") 358 } 359 defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) 360 outerDone := make(chan bool) 361 362 go func() { 363 defer func() { 364 outerDone <- true 365 }() 366 367 var wg sync.WaitGroup 368 done := make(chan bool) 369 370 wg.Add(1) 371 go func() { 372 <-done 373 wg.Done() 374 }() 375 376 rp, wp, err := os.Pipe() 377 if err != nil { 378 t.Errorf("failed to create pipe: %v", err) 379 return 380 } 381 defer func() { 382 rp.Close() 383 wp.Close() 384 }() 385 wg.Add(1) 386 go func() { 387 var tmp [1]byte 388 rp.Read(tmp[:]) 389 <-done 390 wg.Done() 391 }() 392 time.Sleep(time.Millisecond) 393 394 go func() { 395 runtime.LockOSThread() 396 for { 397 select { 398 case <-done: 399 return 400 default: 401 runtime.Gosched() 402 } 403 } 404 }() 405 406 runtime.GC() 407 // Trigger GC from malloc. 408 n := int(1e3) 409 if isMemoryConstrained() { 410 // Reduce allocation to avoid running out of 411 // memory on the builder. 412 n = 512 413 } 414 for i := 0; i < n; i++ { 415 _ = make([]byte, 1<<20) 416 } 417 418 // Create a bunch of busy goroutines to load all Ps. 419 for p := 0; p < 10; p++ { 420 wg.Add(1) 421 go func() { 422 // Do something useful. 423 tmp := make([]byte, 1<<16) 424 for i := range tmp { 425 tmp[i]++ 426 } 427 _ = tmp 428 <-done 429 wg.Done() 430 }() 431 } 432 433 // Block in syscall. 434 wg.Add(1) 435 go func() { 436 var tmp [1]byte 437 rp.Read(tmp[:]) 438 <-done 439 wg.Done() 440 }() 441 442 runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) 443 444 // Test timers. 445 timerDone := make(chan bool) 446 go func() { 447 time.Sleep(time.Millisecond) 448 timerDone <- true 449 }() 450 <-timerDone 451 452 // A bit of network. 453 ln, err := net.Listen("tcp", "127.0.0.1:0") 454 if err != nil { 455 t.Errorf("listen failed: %v", err) 456 return 457 } 458 defer ln.Close() 459 go func() { 460 c, err := ln.Accept() 461 if err != nil { 462 return 463 } 464 time.Sleep(time.Millisecond) 465 var buf [1]byte 466 c.Write(buf[:]) 467 c.Close() 468 }() 469 c, err := net.Dial("tcp", ln.Addr().String()) 470 if err != nil { 471 t.Errorf("dial failed: %v", err) 472 return 473 } 474 var tmp [1]byte 475 c.Read(tmp[:]) 476 c.Close() 477 478 go func() { 479 runtime.Gosched() 480 select {} 481 }() 482 483 // Unblock helper goroutines and wait them to finish. 484 wp.Write(tmp[:]) 485 wp.Write(tmp[:]) 486 close(done) 487 wg.Wait() 488 }() 489 490 for i := 0; i < 3; i++ { 491 buf := new(bytes.Buffer) 492 if err := Start(buf); err != nil { 493 t.Fatalf("failed to start tracing: %v", err) 494 } 495 time.Sleep(time.Millisecond) 496 Stop() 497 saveTrace(t, buf, "TestTraceStressStartStop") 498 trace := buf.Bytes() 499 parseTrace(t, buf) 500 testBrokenTimestamps(t, trace) 501 } 502 <-outerDone 503 } 504 505 func TestTraceFutileWakeup(t *testing.T) { 506 if IsEnabled() { 507 t.Skip("skipping because -test.trace is set") 508 } 509 buf := new(bytes.Buffer) 510 if err := Start(buf); err != nil { 511 t.Fatalf("failed to start tracing: %v", err) 512 } 513 514 defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) 515 c0 := make(chan int, 1) 516 c1 := make(chan int, 1) 517 c2 := make(chan int, 1) 518 const procs = 2 519 var done sync.WaitGroup 520 done.Add(4 * procs) 521 for p := 0; p < procs; p++ { 522 const iters = 1e3 523 go func() { 524 for i := 0; i < iters; i++ { 525 runtime.Gosched() 526 c0 <- 0 527 } 528 done.Done() 529 }() 530 go func() { 531 for i := 0; i < iters; i++ { 532 runtime.Gosched() 533 <-c0 534 } 535 done.Done() 536 }() 537 go func() { 538 for i := 0; i < iters; i++ { 539 runtime.Gosched() 540 select { 541 case c1 <- 0: 542 case c2 <- 0: 543 } 544 } 545 done.Done() 546 }() 547 go func() { 548 for i := 0; i < iters; i++ { 549 runtime.Gosched() 550 select { 551 case <-c1: 552 case <-c2: 553 } 554 } 555 done.Done() 556 }() 557 } 558 done.Wait() 559 560 Stop() 561 saveTrace(t, buf, "TestTraceFutileWakeup") 562 events, _ := parseTrace(t, buf) 563 // Check that (1) trace does not contain EvFutileWakeup events and 564 // (2) there are no consecutive EvGoBlock/EvGCStart/EvGoBlock events 565 // (we call runtime.Gosched between all operations, so these would be futile wakeups). 566 gs := make(map[uint64]int) 567 for _, ev := range events { 568 switch ev.Type { 569 case trace.EvFutileWakeup: 570 t.Fatalf("found EvFutileWakeup event") 571 case trace.EvGoBlockSend, trace.EvGoBlockRecv, trace.EvGoBlockSelect: 572 if gs[ev.G] == 2 { 573 t.Fatalf("goroutine %v blocked on %v at %v right after start", 574 ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) 575 } 576 if gs[ev.G] == 1 { 577 t.Fatalf("goroutine %v blocked on %v at %v while blocked", 578 ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) 579 } 580 gs[ev.G] = 1 581 case trace.EvGoStart: 582 if gs[ev.G] == 1 { 583 gs[ev.G] = 2 584 } 585 default: 586 delete(gs, ev.G) 587 } 588 } 589 } 590 591 func TestTraceCPUProfile(t *testing.T) { 592 if IsEnabled() { 593 t.Skip("skipping because -test.trace is set") 594 } 595 596 cpuBuf := new(bytes.Buffer) 597 if err := pprof.StartCPUProfile(cpuBuf); err != nil { 598 t.Skipf("failed to start CPU profile: %v", err) 599 } 600 601 buf := new(bytes.Buffer) 602 if err := Start(buf); err != nil { 603 t.Fatalf("failed to start tracing: %v", err) 604 } 605 606 dur := 100 * time.Millisecond 607 func() { 608 // Create a region in the execution trace. Set and clear goroutine 609 // labels fully within that region, so we know that any CPU profile 610 // sample with the label must also be eligible for inclusion in the 611 // execution trace. 612 ctx := context.Background() 613 defer StartRegion(ctx, "cpuHogger").End() 614 pprof.Do(ctx, pprof.Labels("tracing", "on"), func(ctx context.Context) { 615 cpuHogger(cpuHog1, &salt1, dur) 616 }) 617 // Be sure the execution trace's view, when filtered to this goroutine 618 // via the explicit goroutine ID in each event, gets many more samples 619 // than the CPU profiler when filtered to this goroutine via labels. 620 cpuHogger(cpuHog1, &salt1, dur) 621 }() 622 623 Stop() 624 pprof.StopCPUProfile() 625 saveTrace(t, buf, "TestTraceCPUProfile") 626 627 prof, err := profile.Parse(cpuBuf) 628 if err != nil { 629 t.Fatalf("failed to parse CPU profile: %v", err) 630 } 631 // Examine the CPU profiler's view. Filter it to only include samples from 632 // the single test goroutine. Use labels to execute that filter: they should 633 // apply to all work done while that goroutine is getg().m.curg, and they 634 // should apply to no other goroutines. 635 pprofSamples := 0 636 pprofStacks := make(map[string]int) 637 for _, s := range prof.Sample { 638 if s.Label["tracing"] != nil { 639 var fns []string 640 var leaf string 641 for _, loc := range s.Location { 642 for _, line := range loc.Line { 643 fns = append(fns, fmt.Sprintf("%s:%d", line.Function.Name, line.Line)) 644 leaf = line.Function.Name 645 } 646 } 647 // runtime.sigprof synthesizes call stacks when "normal traceback is 648 // impossible or has failed", using particular placeholder functions 649 // to represent common failure cases. Look for those functions in 650 // the leaf position as a sign that the call stack and its 651 // symbolization are more complex than this test can handle. 652 // 653 // TODO: Make the symbolization done by the execution tracer and CPU 654 // profiler match up even in these harder cases. See #53378. 655 switch leaf { 656 case "runtime._System", "runtime._GC", "runtime._ExternalCode", "runtime._VDSO": 657 continue 658 } 659 stack := strings.Join(fns, " ") 660 samples := int(s.Value[0]) 661 pprofSamples += samples 662 pprofStacks[stack] += samples 663 } 664 } 665 if pprofSamples == 0 { 666 t.Skipf("CPU profile did not include any samples while tracing was active\n%s", prof) 667 } 668 669 // Examine the execution tracer's view of the CPU profile samples. Filter it 670 // to only include samples from the single test goroutine. Use the goroutine 671 // ID that was recorded in the events: that should reflect getg().m.curg, 672 // same as the profiler's labels (even when the M is using its g0 stack). 673 totalTraceSamples := 0 674 traceSamples := 0 675 traceStacks := make(map[string]int) 676 events, _ := parseTrace(t, buf) 677 var hogRegion *trace.Event 678 for _, ev := range events { 679 if ev.Type == trace.EvUserRegion && ev.Args[1] == 0 && ev.SArgs[0] == "cpuHogger" { 680 // mode "0" indicates region start 681 hogRegion = ev 682 } 683 } 684 if hogRegion == nil { 685 t.Fatalf("execution trace did not identify cpuHogger goroutine") 686 } else if hogRegion.Link == nil { 687 t.Fatalf("execution trace did not close cpuHogger region") 688 } 689 for _, ev := range events { 690 if ev.Type == trace.EvCPUSample { 691 totalTraceSamples++ 692 if ev.G == hogRegion.G { 693 traceSamples++ 694 var fns []string 695 for _, frame := range ev.Stk { 696 if frame.Fn != "runtime.goexit" { 697 fns = append(fns, fmt.Sprintf("%s:%d", frame.Fn, frame.Line)) 698 } 699 } 700 stack := strings.Join(fns, " ") 701 traceStacks[stack]++ 702 } 703 } 704 } 705 706 // The execution trace may drop CPU profile samples if the profiling buffer 707 // overflows. Based on the size of profBufWordCount, that takes a bit over 708 // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've 709 // hit that case, then we definitely have at least one full buffer's worth 710 // of CPU samples, so we'll call that success. 711 overflowed := totalTraceSamples >= 1900 712 if traceSamples < pprofSamples { 713 t.Logf("execution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples) 714 if !overflowed { 715 t.Fail() 716 } 717 } 718 719 for stack, traceSamples := range traceStacks { 720 pprofSamples := pprofStacks[stack] 721 delete(pprofStacks, stack) 722 if traceSamples < pprofSamples { 723 t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace", 724 stack, pprofSamples, traceSamples) 725 if !overflowed { 726 t.Fail() 727 } 728 } 729 } 730 for stack, pprofSamples := range pprofStacks { 731 t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack) 732 if !overflowed { 733 t.Fail() 734 } 735 } 736 737 if t.Failed() { 738 t.Logf("execution trace CPU samples:") 739 for stack, samples := range traceStacks { 740 t.Logf("%d: %q", samples, stack) 741 } 742 t.Logf("CPU profile:\n%v", prof) 743 } 744 } 745 746 func cpuHogger(f func(x int) int, y *int, dur time.Duration) { 747 // We only need to get one 100 Hz clock tick, so we've got 748 // a large safety buffer. 749 // But do at least 500 iterations (which should take about 100ms), 750 // otherwise TestCPUProfileMultithreaded can fail if only one 751 // thread is scheduled during the testing period. 752 t0 := time.Now() 753 accum := *y 754 for i := 0; i < 500 || time.Since(t0) < dur; i++ { 755 accum = f(accum) 756 } 757 *y = accum 758 } 759 760 var ( 761 salt1 = 0 762 ) 763 764 // The actual CPU hogging function. 765 // Must not call other functions nor access heap/globals in the loop, 766 // otherwise under race detector the samples will be in the race runtime. 767 func cpuHog1(x int) int { 768 return cpuHog0(x, 1e5) 769 } 770 771 func cpuHog0(x, n int) int { 772 foo := x 773 for i := 0; i < n; i++ { 774 if i%1000 == 0 { 775 // Spend time in mcall, stored as gp.m.curg, with g0 running 776 runtime.Gosched() 777 } 778 if foo > 0 { 779 foo *= foo 780 } else { 781 foo *= foo + 1 782 } 783 } 784 return foo 785 } 786 787 func saveTrace(t *testing.T, buf *bytes.Buffer, name string) { 788 if !*saveTraces { 789 return 790 } 791 if err := os.WriteFile(name+".trace", buf.Bytes(), 0600); err != nil { 792 t.Errorf("failed to write trace file: %s", err) 793 } 794 }