github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/runtime/trace/trace_test.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package trace_test 6 7 import ( 8 "bytes" 9 "context" 10 "flag" 11 "fmt" 12 "internal/profile" 13 "internal/race" 14 "internal/trace" 15 "io" 16 "net" 17 "os" 18 "runtime" 19 "runtime/pprof" 20 . "runtime/trace" 21 "strconv" 22 "strings" 23 "sync" 24 "testing" 25 "time" 26 ) 27 28 var ( 29 saveTraces = flag.Bool("savetraces", false, "save traces collected by tests") 30 ) 31 32 // TestEventBatch tests Flush calls that happen during Start 33 // don't produce corrupted traces. 34 func TestEventBatch(t *testing.T) { 35 if race.Enabled { 36 t.Skip("skipping in race mode") 37 } 38 if IsEnabled() { 39 t.Skip("skipping because -test.trace is set") 40 } 41 if testing.Short() { 42 t.Skip("skipping in short mode") 43 } 44 // During Start, bunch of records are written to reflect the current 45 // snapshot of the program, including state of each goroutines. 46 // And some string constants are written to the trace to aid trace 47 // parsing. This test checks Flush of the buffer occurred during 48 // this process doesn't cause corrupted traces. 49 // When a Flush is called during Start is complicated 50 // so we test with a range of number of goroutines hoping that one 51 // of them triggers Flush. 52 // This range was chosen to fill up a ~64KB buffer with traceEvGoCreate 53 // and traceEvGoWaiting events (12~13bytes per goroutine). 54 for g := 4950; g < 5050; g++ { 55 n := g 56 t.Run("G="+strconv.Itoa(n), func(t *testing.T) { 57 var wg sync.WaitGroup 58 wg.Add(n) 59 60 in := make(chan bool, 1000) 61 for i := 0; i < n; i++ { 62 go func() { 63 <-in 64 wg.Done() 65 }() 66 } 67 buf := new(bytes.Buffer) 68 if err := Start(buf); err != nil { 69 t.Fatalf("failed to start tracing: %v", err) 70 } 71 72 for i := 0; i < n; i++ { 73 in <- true 74 } 75 wg.Wait() 76 Stop() 77 78 _, err := trace.Parse(buf, "") 79 if err == trace.ErrTimeOrder { 80 t.Skipf("skipping trace: %v", err) 81 } 82 83 if err != nil { 84 t.Fatalf("failed to parse trace: %v", err) 85 } 86 }) 87 } 88 } 89 90 func TestTraceStartStop(t *testing.T) { 91 if IsEnabled() { 92 t.Skip("skipping because -test.trace is set") 93 } 94 buf := new(bytes.Buffer) 95 if err := Start(buf); err != nil { 96 t.Fatalf("failed to start tracing: %v", err) 97 } 98 Stop() 99 size := buf.Len() 100 if size == 0 { 101 t.Fatalf("trace is empty") 102 } 103 time.Sleep(100 * time.Millisecond) 104 if size != buf.Len() { 105 t.Fatalf("trace writes after stop: %v -> %v", size, buf.Len()) 106 } 107 saveTrace(t, buf, "TestTraceStartStop") 108 } 109 110 func TestTraceDoubleStart(t *testing.T) { 111 if IsEnabled() { 112 t.Skip("skipping because -test.trace is set") 113 } 114 Stop() 115 buf := new(bytes.Buffer) 116 if err := Start(buf); err != nil { 117 t.Fatalf("failed to start tracing: %v", err) 118 } 119 if err := Start(buf); err == nil { 120 t.Fatalf("succeed to start tracing second time") 121 } 122 Stop() 123 Stop() 124 } 125 126 func TestTrace(t *testing.T) { 127 if IsEnabled() { 128 t.Skip("skipping because -test.trace is set") 129 } 130 buf := new(bytes.Buffer) 131 if err := Start(buf); err != nil { 132 t.Fatalf("failed to start tracing: %v", err) 133 } 134 Stop() 135 saveTrace(t, buf, "TestTrace") 136 _, err := trace.Parse(buf, "") 137 if err == trace.ErrTimeOrder { 138 t.Skipf("skipping trace: %v", err) 139 } 140 if err != nil { 141 t.Fatalf("failed to parse trace: %v", err) 142 } 143 } 144 145 func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc) { 146 res, err := trace.Parse(r, "") 147 if err == trace.ErrTimeOrder { 148 t.Skipf("skipping trace: %v", err) 149 } 150 if err != nil { 151 t.Fatalf("failed to parse trace: %v", err) 152 } 153 gs := trace.GoroutineStats(res.Events) 154 for goid := range gs { 155 // We don't do any particular checks on the result at the moment. 156 // But still check that RelatedGoroutines does not crash, hang, etc. 157 _ = trace.RelatedGoroutines(res.Events, goid) 158 } 159 return res.Events, gs 160 } 161 162 func testBrokenTimestamps(t *testing.T, data []byte) { 163 // On some processors cputicks (used to generate trace timestamps) 164 // produce non-monotonic timestamps. It is important that the parser 165 // distinguishes logically inconsistent traces (e.g. missing, excessive 166 // or misordered events) from broken timestamps. The former is a bug 167 // in tracer, the latter is a machine issue. 168 // So now that we have a consistent trace, test that (1) parser does 169 // not return a logical error in case of broken timestamps 170 // and (2) broken timestamps are eventually detected and reported. 171 trace.BreakTimestampsForTesting = true 172 defer func() { 173 trace.BreakTimestampsForTesting = false 174 }() 175 for i := 0; i < 1e4; i++ { 176 _, err := trace.Parse(bytes.NewReader(data), "") 177 if err == trace.ErrTimeOrder { 178 return 179 } 180 if err != nil { 181 t.Fatalf("failed to parse trace: %v", err) 182 } 183 } 184 } 185 186 func TestTraceStress(t *testing.T) { 187 if runtime.GOOS == "js" { 188 t.Skip("no os.Pipe on js") 189 } 190 if IsEnabled() { 191 t.Skip("skipping because -test.trace is set") 192 } 193 if testing.Short() { 194 t.Skip("skipping in -short mode") 195 } 196 197 var wg sync.WaitGroup 198 done := make(chan bool) 199 200 // Create a goroutine blocked before tracing. 201 wg.Add(1) 202 go func() { 203 <-done 204 wg.Done() 205 }() 206 207 // Create a goroutine blocked in syscall before tracing. 208 rp, wp, err := os.Pipe() 209 if err != nil { 210 t.Fatalf("failed to create pipe: %v", err) 211 } 212 defer func() { 213 rp.Close() 214 wp.Close() 215 }() 216 wg.Add(1) 217 go func() { 218 var tmp [1]byte 219 rp.Read(tmp[:]) 220 <-done 221 wg.Done() 222 }() 223 time.Sleep(time.Millisecond) // give the goroutine above time to block 224 225 buf := new(bytes.Buffer) 226 if err := Start(buf); err != nil { 227 t.Fatalf("failed to start tracing: %v", err) 228 } 229 230 procs := runtime.GOMAXPROCS(10) 231 time.Sleep(50 * time.Millisecond) // test proc stop/start events 232 233 go func() { 234 runtime.LockOSThread() 235 for { 236 select { 237 case <-done: 238 return 239 default: 240 runtime.Gosched() 241 } 242 } 243 }() 244 245 runtime.GC() 246 // Trigger GC from malloc. 247 n := int(1e3) 248 if isMemoryConstrained() { 249 // Reduce allocation to avoid running out of 250 // memory on the builder - see issue/12032. 251 n = 512 252 } 253 for i := 0; i < n; i++ { 254 _ = make([]byte, 1<<20) 255 } 256 257 // Create a bunch of busy goroutines to load all Ps. 258 for p := 0; p < 10; p++ { 259 wg.Add(1) 260 go func() { 261 // Do something useful. 262 tmp := make([]byte, 1<<16) 263 for i := range tmp { 264 tmp[i]++ 265 } 266 _ = tmp 267 <-done 268 wg.Done() 269 }() 270 } 271 272 // Block in syscall. 273 wg.Add(1) 274 go func() { 275 var tmp [1]byte 276 rp.Read(tmp[:]) 277 <-done 278 wg.Done() 279 }() 280 281 // Test timers. 282 timerDone := make(chan bool) 283 go func() { 284 time.Sleep(time.Millisecond) 285 timerDone <- true 286 }() 287 <-timerDone 288 289 // A bit of network. 290 ln, err := net.Listen("tcp", "127.0.0.1:0") 291 if err != nil { 292 t.Fatalf("listen failed: %v", err) 293 } 294 defer ln.Close() 295 go func() { 296 c, err := ln.Accept() 297 if err != nil { 298 return 299 } 300 time.Sleep(time.Millisecond) 301 var buf [1]byte 302 c.Write(buf[:]) 303 c.Close() 304 }() 305 c, err := net.Dial("tcp", ln.Addr().String()) 306 if err != nil { 307 t.Fatalf("dial failed: %v", err) 308 } 309 var tmp [1]byte 310 c.Read(tmp[:]) 311 c.Close() 312 313 go func() { 314 runtime.Gosched() 315 select {} 316 }() 317 318 // Unblock helper goroutines and wait them to finish. 319 wp.Write(tmp[:]) 320 wp.Write(tmp[:]) 321 close(done) 322 wg.Wait() 323 324 runtime.GOMAXPROCS(procs) 325 326 Stop() 327 saveTrace(t, buf, "TestTraceStress") 328 trace := buf.Bytes() 329 parseTrace(t, buf) 330 testBrokenTimestamps(t, trace) 331 } 332 333 // isMemoryConstrained reports whether the current machine is likely 334 // to be memory constrained. 335 // This was originally for the openbsd/arm builder (Issue 12032). 336 // TODO: move this to testenv? Make this look at memory? Look at GO_BUILDER_NAME? 337 func isMemoryConstrained() bool { 338 if runtime.GOOS == "plan9" { 339 return true 340 } 341 switch runtime.GOARCH { 342 case "arm", "mips", "mipsle": 343 return true 344 } 345 return false 346 } 347 348 // Do a bunch of various stuff (timers, GC, network, etc) in a separate goroutine. 349 // And concurrently with all that start/stop trace 3 times. 350 func TestTraceStressStartStop(t *testing.T) { 351 if runtime.GOOS == "js" { 352 t.Skip("no os.Pipe on js") 353 } 354 if IsEnabled() { 355 t.Skip("skipping because -test.trace is set") 356 } 357 defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) 358 outerDone := make(chan bool) 359 360 go func() { 361 defer func() { 362 outerDone <- true 363 }() 364 365 var wg sync.WaitGroup 366 done := make(chan bool) 367 368 wg.Add(1) 369 go func() { 370 <-done 371 wg.Done() 372 }() 373 374 rp, wp, err := os.Pipe() 375 if err != nil { 376 t.Errorf("failed to create pipe: %v", err) 377 return 378 } 379 defer func() { 380 rp.Close() 381 wp.Close() 382 }() 383 wg.Add(1) 384 go func() { 385 var tmp [1]byte 386 rp.Read(tmp[:]) 387 <-done 388 wg.Done() 389 }() 390 time.Sleep(time.Millisecond) 391 392 go func() { 393 runtime.LockOSThread() 394 for { 395 select { 396 case <-done: 397 return 398 default: 399 runtime.Gosched() 400 } 401 } 402 }() 403 404 runtime.GC() 405 // Trigger GC from malloc. 406 n := int(1e3) 407 if isMemoryConstrained() { 408 // Reduce allocation to avoid running out of 409 // memory on the builder. 410 n = 512 411 } 412 for i := 0; i < n; i++ { 413 _ = make([]byte, 1<<20) 414 } 415 416 // Create a bunch of busy goroutines to load all Ps. 417 for p := 0; p < 10; p++ { 418 wg.Add(1) 419 go func() { 420 // Do something useful. 421 tmp := make([]byte, 1<<16) 422 for i := range tmp { 423 tmp[i]++ 424 } 425 _ = tmp 426 <-done 427 wg.Done() 428 }() 429 } 430 431 // Block in syscall. 432 wg.Add(1) 433 go func() { 434 var tmp [1]byte 435 rp.Read(tmp[:]) 436 <-done 437 wg.Done() 438 }() 439 440 runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) 441 442 // Test timers. 443 timerDone := make(chan bool) 444 go func() { 445 time.Sleep(time.Millisecond) 446 timerDone <- true 447 }() 448 <-timerDone 449 450 // A bit of network. 451 ln, err := net.Listen("tcp", "127.0.0.1:0") 452 if err != nil { 453 t.Errorf("listen failed: %v", err) 454 return 455 } 456 defer ln.Close() 457 go func() { 458 c, err := ln.Accept() 459 if err != nil { 460 return 461 } 462 time.Sleep(time.Millisecond) 463 var buf [1]byte 464 c.Write(buf[:]) 465 c.Close() 466 }() 467 c, err := net.Dial("tcp", ln.Addr().String()) 468 if err != nil { 469 t.Errorf("dial failed: %v", err) 470 return 471 } 472 var tmp [1]byte 473 c.Read(tmp[:]) 474 c.Close() 475 476 go func() { 477 runtime.Gosched() 478 select {} 479 }() 480 481 // Unblock helper goroutines and wait them to finish. 482 wp.Write(tmp[:]) 483 wp.Write(tmp[:]) 484 close(done) 485 wg.Wait() 486 }() 487 488 for i := 0; i < 3; i++ { 489 buf := new(bytes.Buffer) 490 if err := Start(buf); err != nil { 491 t.Fatalf("failed to start tracing: %v", err) 492 } 493 time.Sleep(time.Millisecond) 494 Stop() 495 saveTrace(t, buf, "TestTraceStressStartStop") 496 trace := buf.Bytes() 497 parseTrace(t, buf) 498 testBrokenTimestamps(t, trace) 499 } 500 <-outerDone 501 } 502 503 func TestTraceFutileWakeup(t *testing.T) { 504 if IsEnabled() { 505 t.Skip("skipping because -test.trace is set") 506 } 507 buf := new(bytes.Buffer) 508 if err := Start(buf); err != nil { 509 t.Fatalf("failed to start tracing: %v", err) 510 } 511 512 defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(8)) 513 c0 := make(chan int, 1) 514 c1 := make(chan int, 1) 515 c2 := make(chan int, 1) 516 const procs = 2 517 var done sync.WaitGroup 518 done.Add(4 * procs) 519 for p := 0; p < procs; p++ { 520 const iters = 1e3 521 go func() { 522 for i := 0; i < iters; i++ { 523 runtime.Gosched() 524 c0 <- 0 525 } 526 done.Done() 527 }() 528 go func() { 529 for i := 0; i < iters; i++ { 530 runtime.Gosched() 531 <-c0 532 } 533 done.Done() 534 }() 535 go func() { 536 for i := 0; i < iters; i++ { 537 runtime.Gosched() 538 select { 539 case c1 <- 0: 540 case c2 <- 0: 541 } 542 } 543 done.Done() 544 }() 545 go func() { 546 for i := 0; i < iters; i++ { 547 runtime.Gosched() 548 select { 549 case <-c1: 550 case <-c2: 551 } 552 } 553 done.Done() 554 }() 555 } 556 done.Wait() 557 558 Stop() 559 saveTrace(t, buf, "TestTraceFutileWakeup") 560 events, _ := parseTrace(t, buf) 561 // Check that (1) trace does not contain EvFutileWakeup events and 562 // (2) there are no consecutive EvGoBlock/EvGCStart/EvGoBlock events 563 // (we call runtime.Gosched between all operations, so these would be futile wakeups). 564 gs := make(map[uint64]int) 565 for _, ev := range events { 566 switch ev.Type { 567 case trace.EvFutileWakeup: 568 t.Fatalf("found EvFutileWakeup event") 569 case trace.EvGoBlockSend, trace.EvGoBlockRecv, trace.EvGoBlockSelect: 570 if gs[ev.G] == 2 { 571 t.Fatalf("goroutine %v blocked on %v at %v right after start", 572 ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) 573 } 574 if gs[ev.G] == 1 { 575 t.Fatalf("goroutine %v blocked on %v at %v while blocked", 576 ev.G, trace.EventDescriptions[ev.Type].Name, ev.Ts) 577 } 578 gs[ev.G] = 1 579 case trace.EvGoStart: 580 if gs[ev.G] == 1 { 581 gs[ev.G] = 2 582 } 583 default: 584 delete(gs, ev.G) 585 } 586 } 587 } 588 589 func TestTraceCPUProfile(t *testing.T) { 590 if IsEnabled() { 591 t.Skip("skipping because -test.trace is set") 592 } 593 594 cpuBuf := new(bytes.Buffer) 595 if err := pprof.StartCPUProfile(cpuBuf); err != nil { 596 t.Skipf("failed to start CPU profile: %v", err) 597 } 598 599 buf := new(bytes.Buffer) 600 if err := Start(buf); err != nil { 601 t.Fatalf("failed to start tracing: %v", err) 602 } 603 604 dur := 100 * time.Millisecond 605 func() { 606 // Create a region in the execution trace. Set and clear goroutine 607 // labels fully within that region, so we know that any CPU profile 608 // sample with the label must also be eligible for inclusion in the 609 // execution trace. 610 ctx := context.Background() 611 defer StartRegion(ctx, "cpuHogger").End() 612 pprof.Do(ctx, pprof.Labels("tracing", "on"), func(ctx context.Context) { 613 cpuHogger(cpuHog1, &salt1, dur) 614 }) 615 // Be sure the execution trace's view, when filtered to this goroutine 616 // via the explicit goroutine ID in each event, gets many more samples 617 // than the CPU profiler when filtered to this goroutine via labels. 618 cpuHogger(cpuHog1, &salt1, dur) 619 }() 620 621 Stop() 622 pprof.StopCPUProfile() 623 saveTrace(t, buf, "TestTraceCPUProfile") 624 625 prof, err := profile.Parse(cpuBuf) 626 if err != nil { 627 t.Fatalf("failed to parse CPU profile: %v", err) 628 } 629 // Examine the CPU profiler's view. Filter it to only include samples from 630 // the single test goroutine. Use labels to execute that filter: they should 631 // apply to all work done while that goroutine is getg().m.curg, and they 632 // should apply to no other goroutines. 633 pprofSamples := 0 634 pprofStacks := make(map[string]int) 635 for _, s := range prof.Sample { 636 if s.Label["tracing"] != nil { 637 var fns []string 638 var leaf string 639 for _, loc := range s.Location { 640 for _, line := range loc.Line { 641 fns = append(fns, fmt.Sprintf("%s:%d", line.Function.Name, line.Line)) 642 leaf = line.Function.Name 643 } 644 } 645 // runtime.sigprof synthesizes call stacks when "normal traceback is 646 // impossible or has failed", using particular placeholder functions 647 // to represent common failure cases. Look for those functions in 648 // the leaf position as a sign that the call stack and its 649 // symbolization are more complex than this test can handle. 650 // 651 // TODO: Make the symbolization done by the execution tracer and CPU 652 // profiler match up even in these harder cases. See #53378. 653 switch leaf { 654 case "runtime._System", "runtime._GC", "runtime._ExternalCode", "runtime._VDSO": 655 continue 656 } 657 stack := strings.Join(fns, " ") 658 samples := int(s.Value[0]) 659 pprofSamples += samples 660 pprofStacks[stack] += samples 661 } 662 } 663 if pprofSamples == 0 { 664 t.Skipf("CPU profile did not include any samples while tracing was active\n%s", prof) 665 } 666 667 // Examine the execution tracer's view of the CPU profile samples. Filter it 668 // to only include samples from the single test goroutine. Use the goroutine 669 // ID that was recorded in the events: that should reflect getg().m.curg, 670 // same as the profiler's labels (even when the M is using its g0 stack). 671 totalTraceSamples := 0 672 traceSamples := 0 673 traceStacks := make(map[string]int) 674 events, _ := parseTrace(t, buf) 675 var hogRegion *trace.Event 676 for _, ev := range events { 677 if ev.Type == trace.EvUserRegion && ev.Args[1] == 0 && ev.SArgs[0] == "cpuHogger" { 678 // mode "0" indicates region start 679 hogRegion = ev 680 } 681 } 682 if hogRegion == nil { 683 t.Fatalf("execution trace did not identify cpuHogger goroutine") 684 } else if hogRegion.Link == nil { 685 t.Fatalf("execution trace did not close cpuHogger region") 686 } 687 for _, ev := range events { 688 if ev.Type == trace.EvCPUSample { 689 totalTraceSamples++ 690 if ev.G == hogRegion.G { 691 traceSamples++ 692 var fns []string 693 for _, frame := range ev.Stk { 694 if frame.Fn != "runtime.goexit" { 695 fns = append(fns, fmt.Sprintf("%s:%d", frame.Fn, frame.Line)) 696 } 697 } 698 stack := strings.Join(fns, " ") 699 traceStacks[stack]++ 700 } 701 } 702 } 703 704 // The execution trace may drop CPU profile samples if the profiling buffer 705 // overflows. Based on the size of profBufWordCount, that takes a bit over 706 // 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've 707 // hit that case, then we definitely have at least one full buffer's worth 708 // of CPU samples, so we'll call that success. 709 overflowed := totalTraceSamples >= 1900 710 if traceSamples < pprofSamples { 711 t.Logf("exectution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples) 712 if !overflowed { 713 t.Fail() 714 } 715 } 716 717 for stack, traceSamples := range traceStacks { 718 pprofSamples := pprofStacks[stack] 719 delete(pprofStacks, stack) 720 if traceSamples < pprofSamples { 721 t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace", 722 stack, pprofSamples, traceSamples) 723 if !overflowed { 724 t.Fail() 725 } 726 } 727 } 728 for stack, pprofSamples := range pprofStacks { 729 t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack) 730 if !overflowed { 731 t.Fail() 732 } 733 } 734 735 if t.Failed() { 736 t.Logf("execution trace CPU samples:") 737 for stack, samples := range traceStacks { 738 t.Logf("%d: %q", samples, stack) 739 } 740 t.Logf("CPU profile:\n%v", prof) 741 } 742 } 743 744 func cpuHogger(f func(x int) int, y *int, dur time.Duration) { 745 // We only need to get one 100 Hz clock tick, so we've got 746 // a large safety buffer. 747 // But do at least 500 iterations (which should take about 100ms), 748 // otherwise TestCPUProfileMultithreaded can fail if only one 749 // thread is scheduled during the testing period. 750 t0 := time.Now() 751 accum := *y 752 for i := 0; i < 500 || time.Since(t0) < dur; i++ { 753 accum = f(accum) 754 } 755 *y = accum 756 } 757 758 var ( 759 salt1 = 0 760 ) 761 762 // The actual CPU hogging function. 763 // Must not call other functions nor access heap/globals in the loop, 764 // otherwise under race detector the samples will be in the race runtime. 765 func cpuHog1(x int) int { 766 return cpuHog0(x, 1e5) 767 } 768 769 func cpuHog0(x, n int) int { 770 foo := x 771 for i := 0; i < n; i++ { 772 if i%1000 == 0 { 773 // Spend time in mcall, stored as gp.m.curg, with g0 running 774 runtime.Gosched() 775 } 776 if foo > 0 { 777 foo *= foo 778 } else { 779 foo *= foo + 1 780 } 781 } 782 return foo 783 } 784 785 func saveTrace(t *testing.T, buf *bytes.Buffer, name string) { 786 if !*saveTraces { 787 return 788 } 789 if err := os.WriteFile(name+".trace", buf.Bytes(), 0600); err != nil { 790 t.Errorf("failed to write trace file: %s", err) 791 } 792 }