golang.org/x/exp@v0.0.0-20240506185415-9bf2ced13842/trace/internal/oldtrace/parser.go

golang.org/x/exp@v0.0.0-20240506185415-9bf2ced13842/trace/internal/oldtrace/parser.go (about)

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Code generated by "gen.bash" from internal/trace/v2; DO NOT EDIT.
     6  
     7  //go:build go1.21
     8  
     9  // Package oldtrace implements a parser for Go execution traces from versions
    10  // 1.11–1.21.
    11  //
    12  // The package started as a copy of Go 1.19's internal/trace, but has been
    13  // optimized to be faster while using less memory and fewer allocations. It has
    14  // been further modified for the specific purpose of converting traces to the
    15  // new 1.22+ format.
    16  package oldtrace
    17  
    18  import (
    19  	"bytes"
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"golang.org/x/exp/trace/internal/event"
    24  	"golang.org/x/exp/trace/internal/version"
    25  	"io"
    26  	"math"
    27  	"sort"
    28  )
    29  
    30  // Timestamp represents a count of nanoseconds since the beginning of the trace.
    31  // They can only be meaningfully compared with other timestamps from the same
    32  // trace.
    33  type Timestamp int64
    34  
    35  // Event describes one event in the trace.
    36  type Event struct {
    37  	// The Event type is carefully laid out to optimize its size and to avoid
    38  	// pointers, the latter so that the garbage collector won't have to scan any
    39  	// memory of our millions of events.
    40  
    41  	Ts    Timestamp  // timestamp in nanoseconds
    42  	G     uint64     // G on which the event happened
    43  	Args  [4]uint64  // event-type-specific arguments
    44  	StkID uint32     // unique stack ID
    45  	P     int32      // P on which the event happened (can be a real P or one of TimerP, NetpollP, SyscallP)
    46  	Type  event.Type // one of Ev*
    47  }
    48  
    49  // Frame is a frame in stack traces.
    50  type Frame struct {
    51  	PC uint64
    52  	// string ID of the function name
    53  	Fn uint64
    54  	// string ID of the file name
    55  	File uint64
    56  	Line int
    57  }
    58  
    59  const (
    60  	// Special P identifiers:
    61  	FakeP    = 1000000 + iota
    62  	TimerP   // contains timer unblocks
    63  	NetpollP // contains network unblocks
    64  	SyscallP // contains returns from syscalls
    65  	GCP      // contains GC state
    66  	ProfileP // contains recording of CPU profile samples
    67  )
    68  
    69  // Trace is the result of Parse.
    70  type Trace struct {
    71  	Version version.Version
    72  
    73  	// Events is the sorted list of Events in the trace.
    74  	Events Events
    75  	// Stacks is the stack traces (stored as slices of PCs), keyed by stack IDs
    76  	// from the trace.
    77  	Stacks        map[uint32][]uint64
    78  	PCs           map[uint64]Frame
    79  	Strings       map[uint64]string
    80  	InlineStrings []string
    81  }
    82  
    83  // batchOffset records the byte offset of, and number of events in, a batch. A
    84  // batch is a sequence of events emitted by a P. Events within a single batch
    85  // are sorted by time.
    86  type batchOffset struct {
    87  	offset    int
    88  	numEvents int
    89  }
    90  
    91  type parser struct {
    92  	ver  version.Version
    93  	data []byte
    94  	off  int
    95  
    96  	strings              map[uint64]string
    97  	inlineStrings        []string
    98  	inlineStringsMapping map[string]int
    99  	// map from Ps to their batch offsets
   100  	batchOffsets map[int32][]batchOffset
   101  	stacks       map[uint32][]uint64
   102  	stacksData   []uint64
   103  	ticksPerSec  int64
   104  	pcs          map[uint64]Frame
   105  	cpuSamples   []Event
   106  	timerGoids   map[uint64]bool
   107  
   108  	// state for readRawEvent
   109  	args []uint64
   110  
   111  	// state for parseEvent
   112  	lastTs Timestamp
   113  	lastG  uint64
   114  	// map from Ps to the last Gs that ran on them
   115  	lastGs map[int32]uint64
   116  	lastP  int32
   117  }
   118  
   119  func (p *parser) discard(n uint64) bool {
   120  	if n > math.MaxInt {
   121  		return false
   122  	}
   123  	if noff := p.off + int(n); noff < p.off || noff > len(p.data) {
   124  		return false
   125  	} else {
   126  		p.off = noff
   127  	}
   128  	return true
   129  }
   130  
   131  func newParser(r io.Reader, ver version.Version) (*parser, error) {
   132  	var buf []byte
   133  	if seeker, ok := r.(io.Seeker); ok {
   134  		// Determine the size of the reader so that we can allocate a buffer
   135  		// without having to grow it later.
   136  		cur, err := seeker.Seek(0, io.SeekCurrent)
   137  		if err != nil {
   138  			return nil, err
   139  		}
   140  		end, err := seeker.Seek(0, io.SeekEnd)
   141  		if err != nil {
   142  			return nil, err
   143  		}
   144  		_, err = seeker.Seek(cur, io.SeekStart)
   145  		if err != nil {
   146  			return nil, err
   147  		}
   148  
   149  		buf = make([]byte, end-cur)
   150  		_, err = io.ReadFull(r, buf)
   151  		if err != nil {
   152  			return nil, err
   153  		}
   154  	} else {
   155  		var err error
   156  		buf, err = io.ReadAll(r)
   157  		if err != nil {
   158  			return nil, err
   159  		}
   160  	}
   161  	return &parser{data: buf, ver: ver, timerGoids: make(map[uint64]bool)}, nil
   162  }
   163  
   164  // Parse parses Go execution traces from versions 1.11–1.21. The provided reader
   165  // will be read to completion and the entire trace will be materialized in
   166  // memory. That is, this function does not allow incremental parsing.
   167  //
   168  // The reader has to be positioned just after the trace header and vers needs to
   169  // be the version of the trace. This can be achieved by using
   170  // version.ReadHeader.
   171  func Parse(r io.Reader, vers version.Version) (Trace, error) {
   172  	// We accept the version as an argument because golang.org/x/exp/trace will have
   173  	// already read the version to determine which parser to use.
   174  	p, err := newParser(r, vers)
   175  	if err != nil {
   176  		return Trace{}, err
   177  	}
   178  	return p.parse()
   179  }
   180  
   181  // parse parses, post-processes and verifies the trace.
   182  func (p *parser) parse() (Trace, error) {
   183  	defer func() {
   184  		p.data = nil
   185  	}()
   186  
   187  	// We parse a trace by running the following steps in order:
   188  	//
   189  	// 1. In the initial pass we collect information about batches (their
   190  	//    locations and sizes.) We also parse CPU profiling samples in this
   191  	//    step, simply to reduce the number of full passes that we need.
   192  	//
   193  	// 2. In the second pass we parse batches and merge them into a globally
   194  	//    ordered event stream. This uses the batch information from the first
   195  	//    pass to quickly find batches.
   196  	//
   197  	// 3. After all events have been parsed we convert their timestamps from CPU
   198  	//    ticks to wall time. Furthermore we move timers and syscalls to
   199  	//    dedicated, fake Ps.
   200  	//
   201  	// 4. Finally, we validate the trace.
   202  
   203  	p.strings = make(map[uint64]string)
   204  	p.batchOffsets = make(map[int32][]batchOffset)
   205  	p.lastGs = make(map[int32]uint64)
   206  	p.stacks = make(map[uint32][]uint64)
   207  	p.pcs = make(map[uint64]Frame)
   208  	p.inlineStringsMapping = make(map[string]int)
   209  
   210  	if err := p.collectBatchesAndCPUSamples(); err != nil {
   211  		return Trace{}, err
   212  	}
   213  
   214  	events, err := p.parseEventBatches()
   215  	if err != nil {
   216  		return Trace{}, err
   217  	}
   218  
   219  	if p.ticksPerSec == 0 {
   220  		return Trace{}, errors.New("no EvFrequency event")
   221  	}
   222  
   223  	if events.Len() > 0 {
   224  		// Translate cpu ticks to real time.
   225  		minTs := events.Ptr(0).Ts
   226  		// Use floating point to avoid integer overflows.
   227  		freq := 1e9 / float64(p.ticksPerSec)
   228  		for i := 0; i < events.Len(); i++ {
   229  			ev := events.Ptr(i)
   230  			ev.Ts = Timestamp(float64(ev.Ts-minTs) * freq)
   231  			// Move timers and syscalls to separate fake Ps.
   232  			if p.timerGoids[ev.G] && ev.Type == EvGoUnblock {
   233  				ev.P = TimerP
   234  			}
   235  			if ev.Type == EvGoSysExit {
   236  				ev.P = SyscallP
   237  			}
   238  		}
   239  	}
   240  
   241  	if err := p.postProcessTrace(events); err != nil {
   242  		return Trace{}, err
   243  	}
   244  
   245  	res := Trace{
   246  		Version:       p.ver,
   247  		Events:        events,
   248  		Stacks:        p.stacks,
   249  		Strings:       p.strings,
   250  		InlineStrings: p.inlineStrings,
   251  		PCs:           p.pcs,
   252  	}
   253  	return res, nil
   254  }
   255  
   256  // rawEvent is a helper type used during parsing.
   257  type rawEvent struct {
   258  	typ   event.Type
   259  	args  []uint64
   260  	sargs []string
   261  
   262  	// if typ == EvBatch, these fields describe the batch.
   263  	batchPid    int32
   264  	batchOffset int
   265  }
   266  
   267  type proc struct {
   268  	pid int32
   269  	// the remaining events in the current batch
   270  	events []Event
   271  	// buffer for reading batches into, aliased by proc.events
   272  	buf []Event
   273  
   274  	// there are no more batches left
   275  	done bool
   276  }
   277  
   278  const eventsBucketSize = 524288 // 32 MiB of events
   279  
   280  type Events struct {
   281  	// Events is a slice of slices that grows one slice of size eventsBucketSize
   282  	// at a time. This avoids the O(n) cost of slice growth in append, and
   283  	// additionally allows consumers to drop references to parts of the data,
   284  	// freeing memory piecewise.
   285  	n       int
   286  	buckets []*[eventsBucketSize]Event
   287  	off     int
   288  }
   289  
   290  // grow grows the slice by one and returns a pointer to the new element, without
   291  // overwriting it.
   292  func (l *Events) grow() *Event {
   293  	a, b := l.index(l.n)
   294  	if a >= len(l.buckets) {
   295  		l.buckets = append(l.buckets, new([eventsBucketSize]Event))
   296  	}
   297  	ptr := &l.buckets[a][b]
   298  	l.n++
   299  	return ptr
   300  }
   301  
   302  // append appends v to the slice and returns a pointer to the new element.
   303  func (l *Events) append(v Event) *Event {
   304  	ptr := l.grow()
   305  	*ptr = v
   306  	return ptr
   307  }
   308  
   309  func (l *Events) Ptr(i int) *Event {
   310  	a, b := l.index(i + l.off)
   311  	return &l.buckets[a][b]
   312  }
   313  
   314  func (l *Events) index(i int) (int, int) {
   315  	// Doing the division on uint instead of int compiles this function to a
   316  	// shift and an AND (for power of 2 bucket sizes), versus a whole bunch of
   317  	// instructions for int.
   318  	return int(uint(i) / eventsBucketSize), int(uint(i) % eventsBucketSize)
   319  }
   320  
   321  func (l *Events) Len() int {
   322  	return l.n - l.off
   323  }
   324  
   325  func (l *Events) Less(i, j int) bool {
   326  	return l.Ptr(i).Ts < l.Ptr(j).Ts
   327  }
   328  
   329  func (l *Events) Swap(i, j int) {
   330  	*l.Ptr(i), *l.Ptr(j) = *l.Ptr(j), *l.Ptr(i)
   331  }
   332  
   333  func (l *Events) Pop() (*Event, bool) {
   334  	if l.off == l.n {
   335  		return nil, false
   336  	}
   337  	a, b := l.index(l.off)
   338  	ptr := &l.buckets[a][b]
   339  	l.off++
   340  	if b == eventsBucketSize-1 || l.off == l.n {
   341  		// We've consumed the last event from the bucket, so drop the bucket and
   342  		// allow GC to collect it.
   343  		l.buckets[a] = nil
   344  	}
   345  	return ptr, true
   346  }
   347  
   348  func (l *Events) All() func(yield func(ev *Event) bool) {
   349  	return func(yield func(ev *Event) bool) {
   350  		for i := 0; i < l.Len(); i++ {
   351  			a, b := l.index(i + l.off)
   352  			ptr := &l.buckets[a][b]
   353  			if !yield(ptr) {
   354  				return
   355  			}
   356  		}
   357  	}
   358  }
   359  
   360  // parseEventBatches reads per-P event batches and merges them into a single, consistent
   361  // stream. The high level idea is as follows. Events within an individual batch
   362  // are in correct order, because they are emitted by a single P. So we need to
   363  // produce a correct interleaving of the batches. To do this we take first
   364  // unmerged event from each batch (frontier). Then choose subset that is "ready"
   365  // to be merged, that is, events for which all dependencies are already merged.
   366  // Then we choose event with the lowest timestamp from the subset, merge it and
   367  // repeat. This approach ensures that we form a consistent stream even if
   368  // timestamps are incorrect (condition observed on some machines).
   369  func (p *parser) parseEventBatches() (Events, error) {
   370  	// The ordering of CPU profile sample events in the data stream is based on
   371  	// when each run of the signal handler was able to acquire the spinlock,
   372  	// with original timestamps corresponding to when ReadTrace pulled the data
   373  	// off of the profBuf queue. Re-sort them by the timestamp we captured
   374  	// inside the signal handler.
   375  	sort.Sort((*eventList)(&p.cpuSamples))
   376  
   377  	allProcs := make([]proc, 0, len(p.batchOffsets))
   378  	for pid := range p.batchOffsets {
   379  		allProcs = append(allProcs, proc{pid: pid})
   380  	}
   381  	allProcs = append(allProcs, proc{pid: ProfileP, events: p.cpuSamples})
   382  
   383  	events := Events{}
   384  
   385  	// Merge events as long as at least one P has more events
   386  	gs := make(map[uint64]gState)
   387  	// Note: technically we don't need a priority queue here. We're only ever
   388  	// interested in the earliest elligible event, which means we just have to
   389  	// track the smallest element. However, in practice, the priority queue
   390  	// performs better, because for each event we only have to compute its state
   391  	// transition once, not on each iteration. If it was elligible before, it'll
   392  	// already be in the queue. Furthermore, on average, we only have one P to
   393  	// look at in each iteration, because all other Ps are already in the queue.
   394  	var frontier orderEventList
   395  
   396  	availableProcs := make([]*proc, len(allProcs))
   397  	for i := range allProcs {
   398  		availableProcs[i] = &allProcs[i]
   399  	}
   400  	for {
   401  	pidLoop:
   402  		for i := 0; i < len(availableProcs); i++ {
   403  			proc := availableProcs[i]
   404  
   405  			for len(proc.events) == 0 {
   406  				// Call loadBatch in a loop because sometimes batches are empty
   407  				evs, err := p.loadBatch(proc.pid, proc.buf[:0])
   408  				proc.buf = evs[:0]
   409  				if err == io.EOF {
   410  					// This P has no more events
   411  					proc.done = true
   412  					availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
   413  					availableProcs = availableProcs[:len(availableProcs)-1]
   414  					// We swapped the element at i with another proc, so look at
   415  					// the index again
   416  					i--
   417  					continue pidLoop
   418  				} else if err != nil {
   419  					return Events{}, err
   420  				} else {
   421  					proc.events = evs
   422  				}
   423  			}
   424  
   425  			ev := &proc.events[0]
   426  			g, init, _ := stateTransition(ev)
   427  
   428  			// TODO(dh): This implementation matches the behavior of the
   429  			// upstream 'go tool trace', and works in practice, but has run into
   430  			// the following inconsistency during fuzzing: what happens if
   431  			// multiple Ps have events for the same G? While building the
   432  			// frontier we will check all of the events against the current
   433  			// state of the G. However, when we process the frontier, the state
   434  			// of the G changes, and a transition that was valid while building
   435  			// the frontier may no longer be valid when processing the frontier.
   436  			// Is this something that can happen for real, valid traces, or is
   437  			// this only possible with corrupt data?
   438  			if !transitionReady(g, gs[g], init) {
   439  				continue
   440  			}
   441  			proc.events = proc.events[1:]
   442  			availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
   443  			availableProcs = availableProcs[:len(availableProcs)-1]
   444  			frontier.Push(orderEvent{*ev, proc})
   445  
   446  			// We swapped the element at i with another proc, so look at the
   447  			// index again
   448  			i--
   449  		}
   450  
   451  		if len(frontier) == 0 {
   452  			for i := range allProcs {
   453  				if !allProcs[i].done {
   454  					return Events{}, fmt.Errorf("no consistent ordering of events possible")
   455  				}
   456  			}
   457  			break
   458  		}
   459  		f := frontier.Pop()
   460  
   461  		// We're computing the state transition twice, once when computing the
   462  		// frontier, and now to apply the transition. This is fine because
   463  		// stateTransition is a pure function. Computing it again is cheaper
   464  		// than storing large items in the frontier.
   465  		g, init, next := stateTransition(&f.ev)
   466  
   467  		// Get rid of "Local" events, they are intended merely for ordering.
   468  		switch f.ev.Type {
   469  		case EvGoStartLocal:
   470  			f.ev.Type = EvGoStart
   471  		case EvGoUnblockLocal:
   472  			f.ev.Type = EvGoUnblock
   473  		case EvGoSysExitLocal:
   474  			f.ev.Type = EvGoSysExit
   475  		}
   476  		events.append(f.ev)
   477  
   478  		if err := transition(gs, g, init, next); err != nil {
   479  			return Events{}, err
   480  		}
   481  		availableProcs = append(availableProcs, f.proc)
   482  	}
   483  
   484  	// At this point we have a consistent stream of events. Make sure time
   485  	// stamps respect the ordering. The tests will skip (not fail) the test case
   486  	// if they see this error.
   487  	if !sort.IsSorted(&events) {
   488  		return Events{}, ErrTimeOrder
   489  	}
   490  
   491  	// The last part is giving correct timestamps to EvGoSysExit events. The
   492  	// problem with EvGoSysExit is that actual syscall exit timestamp
   493  	// (ev.Args[2]) is potentially acquired long before event emission. So far
   494  	// we've used timestamp of event emission (ev.Ts). We could not set ev.Ts =
   495  	// ev.Args[2] earlier, because it would produce seemingly broken timestamps
   496  	// (misplaced event). We also can't simply update the timestamp and resort
   497  	// events, because if timestamps are broken we will misplace the event and
   498  	// later report logically broken trace (instead of reporting broken
   499  	// timestamps).
   500  	lastSysBlock := make(map[uint64]Timestamp)
   501  	for i := 0; i < events.Len(); i++ {
   502  		ev := events.Ptr(i)
   503  		switch ev.Type {
   504  		case EvGoSysBlock, EvGoInSyscall:
   505  			lastSysBlock[ev.G] = ev.Ts
   506  		case EvGoSysExit:
   507  			ts := Timestamp(ev.Args[2])
   508  			if ts == 0 {
   509  				continue
   510  			}
   511  			block := lastSysBlock[ev.G]
   512  			if block == 0 {
   513  				return Events{}, fmt.Errorf("stray syscall exit")
   514  			}
   515  			if ts < block {
   516  				return Events{}, ErrTimeOrder
   517  			}
   518  			ev.Ts = ts
   519  		}
   520  	}
   521  	sort.Stable(&events)
   522  
   523  	return events, nil
   524  }
   525  
   526  // collectBatchesAndCPUSamples records the offsets of batches and parses CPU samples.
   527  func (p *parser) collectBatchesAndCPUSamples() error {
   528  	// Read events.
   529  	var raw rawEvent
   530  	var curP int32
   531  	for n := uint64(0); ; n++ {
   532  		err := p.readRawEvent(skipArgs|skipStrings, &raw)
   533  		if err == io.EOF {
   534  			break
   535  		}
   536  		if err != nil {
   537  			return err
   538  		}
   539  		if raw.typ == EvNone {
   540  			continue
   541  		}
   542  
   543  		if raw.typ == EvBatch {
   544  			bo := batchOffset{offset: raw.batchOffset}
   545  			p.batchOffsets[raw.batchPid] = append(p.batchOffsets[raw.batchPid], bo)
   546  			curP = raw.batchPid
   547  		}
   548  
   549  		batches := p.batchOffsets[curP]
   550  		if len(batches) == 0 {
   551  			return fmt.Errorf("read event %d with current P of %d, but P has no batches yet",
   552  				raw.typ, curP)
   553  		}
   554  		batches[len(batches)-1].numEvents++
   555  
   556  		if raw.typ == EvCPUSample {
   557  			e := Event{Type: raw.typ}
   558  
   559  			argOffset := 1
   560  			narg := raw.argNum()
   561  			if len(raw.args) != narg {
   562  				return fmt.Errorf("CPU sample has wrong number of arguments: want %d, got %d", narg, len(raw.args))
   563  			}
   564  			for i := argOffset; i < narg; i++ {
   565  				if i == narg-1 {
   566  					e.StkID = uint32(raw.args[i])
   567  				} else {
   568  					e.Args[i-argOffset] = raw.args[i]
   569  				}
   570  			}
   571  
   572  			e.Ts = Timestamp(e.Args[0])
   573  			e.P = int32(e.Args[1])
   574  			e.G = e.Args[2]
   575  			e.Args[0] = 0
   576  
   577  			// Most events are written out by the active P at the exact moment
   578  			// they describe. CPU profile samples are different because they're
   579  			// written to the tracing log after some delay, by a separate worker
   580  			// goroutine, into a separate buffer.
   581  			//
   582  			// We keep these in their own batch until all of the batches are
   583  			// merged in timestamp order. We also (right before the merge)
   584  			// re-sort these events by the timestamp captured in the profiling
   585  			// signal handler.
   586  			//
   587  			// Note that we're not concerned about the memory usage of storing
   588  			// all CPU samples during the indexing phase. There are orders of
   589  			// magnitude fewer CPU samples than runtime events.
   590  			p.cpuSamples = append(p.cpuSamples, e)
   591  		}
   592  	}
   593  
   594  	return nil
   595  }
   596  
   597  const (
   598  	skipArgs = 1 << iota
   599  	skipStrings
   600  )
   601  
   602  func (p *parser) readByte() (byte, bool) {
   603  	if p.off < len(p.data) && p.off >= 0 {
   604  		b := p.data[p.off]
   605  		p.off++
   606  		return b, true
   607  	} else {
   608  		return 0, false
   609  	}
   610  }
   611  
   612  func (p *parser) readFull(n int) ([]byte, error) {
   613  	if p.off >= len(p.data) || p.off < 0 || p.off+n > len(p.data) {
   614  		// p.off < 0 is impossible but makes BCE happy.
   615  		//
   616  		// We do fail outright if there's not enough data, we don't care about
   617  		// partial results.
   618  		return nil, io.ErrUnexpectedEOF
   619  	}
   620  	buf := p.data[p.off : p.off+n]
   621  	p.off += n
   622  	return buf, nil
   623  }
   624  
   625  // readRawEvent reads a raw event into ev. The slices in ev are only valid until
   626  // the next call to readRawEvent, even when storing to a different location.
   627  func (p *parser) readRawEvent(flags uint, ev *rawEvent) error {
   628  	// The number of arguments is encoded using two bits and can thus only
   629  	// represent the values 0–3. The value 3 (on the wire) indicates that
   630  	// arguments are prefixed by their byte length, to encode >=3 arguments.
   631  	const inlineArgs = 3
   632  
   633  	// Read event type and number of arguments (1 byte).
   634  	b, ok := p.readByte()
   635  	if !ok {
   636  		return io.EOF
   637  	}
   638  	typ := event.Type(b << 2 >> 2)
   639  	// Most events have a timestamp before the actual arguments, so we add 1 and
   640  	// parse it like it's the first argument. EvString has a special format and
   641  	// the number of arguments doesn't matter. EvBatch writes '1' as the number
   642  	// of arguments, but actually has two: a pid and a timestamp, but here the
   643  	// timestamp is the second argument, not the first; adding 1 happens to come
   644  	// up with the correct number, but it doesn't matter, because EvBatch has
   645  	// custom logic for parsing.
   646  	//
   647  	// Note that because we're adding 1, inlineArgs == 3 describes the largest
   648  	// number of logical arguments that isn't length-prefixed, even though the
   649  	// value 3 on the wire indicates length-prefixing. For us, that becomes narg
   650  	// == 4.
   651  	narg := b>>6 + 1
   652  	if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > p.ver {
   653  		return fmt.Errorf("unknown event type %d", typ)
   654  	}
   655  
   656  	switch typ {
   657  	case EvString:
   658  		if flags&skipStrings != 0 {
   659  			// String dictionary entry [ID, length, string].
   660  			if _, err := p.readVal(); err != nil {
   661  				return errMalformedVarint
   662  			}
   663  			ln, err := p.readVal()
   664  			if err != nil {
   665  				return err
   666  			}
   667  			if !p.discard(ln) {
   668  				return fmt.Errorf("failed to read trace: %w", io.EOF)
   669  			}
   670  		} else {
   671  			// String dictionary entry [ID, length, string].
   672  			id, err := p.readVal()
   673  			if err != nil {
   674  				return err
   675  			}
   676  			if id == 0 {
   677  				return errors.New("string has invalid id 0")
   678  			}
   679  			if p.strings[id] != "" {
   680  				return fmt.Errorf("string has duplicate id %d", id)
   681  			}
   682  			var ln uint64
   683  			ln, err = p.readVal()
   684  			if err != nil {
   685  				return err
   686  			}
   687  			if ln == 0 {
   688  				return errors.New("string has invalid length 0")
   689  			}
   690  			if ln > 1e6 {
   691  				return fmt.Errorf("string has too large length %d", ln)
   692  			}
   693  			buf, err := p.readFull(int(ln))
   694  			if err != nil {
   695  				return fmt.Errorf("failed to read trace: %w", err)
   696  			}
   697  			p.strings[id] = string(buf)
   698  		}
   699  
   700  		ev.typ = EvNone
   701  		return nil
   702  	case EvBatch:
   703  		if want := byte(2); narg != want {
   704  			return fmt.Errorf("EvBatch has wrong number of arguments: got %d, want %d", narg, want)
   705  		}
   706  
   707  		// -1 because we've already read the first byte of the batch
   708  		off := p.off - 1
   709  
   710  		pid, err := p.readVal()
   711  		if err != nil {
   712  			return err
   713  		}
   714  		if pid != math.MaxUint64 && pid > math.MaxInt32 {
   715  			return fmt.Errorf("processor ID %d is larger than maximum of %d", pid, uint64(math.MaxUint))
   716  		}
   717  
   718  		var pid32 int32
   719  		if pid == math.MaxUint64 {
   720  			pid32 = -1
   721  		} else {
   722  			pid32 = int32(pid)
   723  		}
   724  
   725  		v, err := p.readVal()
   726  		if err != nil {
   727  			return err
   728  		}
   729  
   730  		*ev = rawEvent{
   731  			typ:         EvBatch,
   732  			args:        p.args[:0],
   733  			batchPid:    pid32,
   734  			batchOffset: off,
   735  		}
   736  		ev.args = append(ev.args, pid, v)
   737  		return nil
   738  	default:
   739  		*ev = rawEvent{typ: typ, args: p.args[:0]}
   740  		if narg <= inlineArgs {
   741  			if flags&skipArgs == 0 {
   742  				for i := 0; i < int(narg); i++ {
   743  					v, err := p.readVal()
   744  					if err != nil {
   745  						return fmt.Errorf("failed to read event %d argument: %w", typ, err)
   746  					}
   747  					ev.args = append(ev.args, v)
   748  				}
   749  			} else {
   750  				for i := 0; i < int(narg); i++ {
   751  					if _, err := p.readVal(); err != nil {
   752  						return fmt.Errorf("failed to read event %d argument: %w", typ, errMalformedVarint)
   753  					}
   754  				}
   755  			}
   756  		} else {
   757  			// More than inlineArgs args, the first value is length of the event
   758  			// in bytes.
   759  			v, err := p.readVal()
   760  			if err != nil {
   761  				return fmt.Errorf("failed to read event %d argument: %w", typ, err)
   762  			}
   763  
   764  			if limit := uint64(2048); v > limit {
   765  				// At the time of Go 1.19, v seems to be at most 128. Set 2048
   766  				// as a generous upper limit and guard against malformed traces.
   767  				return fmt.Errorf("failed to read event %d argument: length-prefixed argument too big: %d bytes, limit is %d", typ, v, limit)
   768  			}
   769  
   770  			if flags&skipArgs == 0 || typ == EvCPUSample {
   771  				buf, err := p.readFull(int(v))
   772  				if err != nil {
   773  					return fmt.Errorf("failed to read trace: %w", err)
   774  				}
   775  				for len(buf) > 0 {
   776  					var v uint64
   777  					v, buf, err = readValFrom(buf)
   778  					if err != nil {
   779  						return err
   780  					}
   781  					ev.args = append(ev.args, v)
   782  				}
   783  			} else {
   784  				// Skip over arguments
   785  				if !p.discard(v) {
   786  					return fmt.Errorf("failed to read trace: %w", io.EOF)
   787  				}
   788  			}
   789  			if typ == EvUserLog {
   790  				// EvUserLog records are followed by a value string
   791  				if flags&skipArgs == 0 {
   792  					// Read string
   793  					s, err := p.readStr()
   794  					if err != nil {
   795  						return err
   796  					}
   797  					ev.sargs = append(ev.sargs, s)
   798  				} else {
   799  					// Skip string
   800  					v, err := p.readVal()
   801  					if err != nil {
   802  						return err
   803  					}
   804  					if !p.discard(v) {
   805  						return io.EOF
   806  					}
   807  				}
   808  			}
   809  		}
   810  
   811  		p.args = ev.args[:0]
   812  		return nil
   813  	}
   814  }
   815  
   816  // loadBatch loads the next batch for pid and appends its contents to to events.
   817  func (p *parser) loadBatch(pid int32, events []Event) ([]Event, error) {
   818  	offsets := p.batchOffsets[pid]
   819  	if len(offsets) == 0 {
   820  		return nil, io.EOF
   821  	}
   822  	n := offsets[0].numEvents
   823  	offset := offsets[0].offset
   824  	offsets = offsets[1:]
   825  	p.batchOffsets[pid] = offsets
   826  
   827  	p.off = offset
   828  
   829  	if cap(events) < n {
   830  		events = make([]Event, 0, n)
   831  	}
   832  
   833  	gotHeader := false
   834  	var raw rawEvent
   835  	var ev Event
   836  	for {
   837  		err := p.readRawEvent(0, &raw)
   838  		if err == io.EOF {
   839  			break
   840  		}
   841  		if err != nil {
   842  			return nil, err
   843  		}
   844  		if raw.typ == EvNone || raw.typ == EvCPUSample {
   845  			continue
   846  		}
   847  		if raw.typ == EvBatch {
   848  			if gotHeader {
   849  				break
   850  			} else {
   851  				gotHeader = true
   852  			}
   853  		}
   854  
   855  		err = p.parseEvent(&raw, &ev)
   856  		if err != nil {
   857  			return nil, err
   858  		}
   859  		if ev.Type != EvNone {
   860  			events = append(events, ev)
   861  		}
   862  	}
   863  
   864  	return events, nil
   865  }
   866  
   867  func (p *parser) readStr() (s string, err error) {
   868  	sz, err := p.readVal()
   869  	if err != nil {
   870  		return "", err
   871  	}
   872  	if sz == 0 {
   873  		return "", nil
   874  	}
   875  	if sz > 1e6 {
   876  		return "", fmt.Errorf("string is too large (len=%d)", sz)
   877  	}
   878  	buf, err := p.readFull(int(sz))
   879  	if err != nil {
   880  		return "", fmt.Errorf("failed to read trace: %w", err)
   881  	}
   882  	return string(buf), nil
   883  }
   884  
   885  // parseEvent transforms raw events into events.
   886  // It does analyze and verify per-event-type arguments.
   887  func (p *parser) parseEvent(raw *rawEvent, ev *Event) error {
   888  	desc := &EventDescriptions[raw.typ]
   889  	if desc.Name == "" {
   890  		return fmt.Errorf("missing description for event type %d", raw.typ)
   891  	}
   892  	narg := raw.argNum()
   893  	if len(raw.args) != narg {
   894  		return fmt.Errorf("%s has wrong number of arguments: want %d, got %d", desc.Name, narg, len(raw.args))
   895  	}
   896  	switch raw.typ {
   897  	case EvBatch:
   898  		p.lastGs[p.lastP] = p.lastG
   899  		if raw.args[0] != math.MaxUint64 && raw.args[0] > math.MaxInt32 {
   900  			return fmt.Errorf("processor ID %d is larger than maximum of %d", raw.args[0], uint64(math.MaxInt32))
   901  		}
   902  		if raw.args[0] == math.MaxUint64 {
   903  			p.lastP = -1
   904  		} else {
   905  			p.lastP = int32(raw.args[0])
   906  		}
   907  		p.lastG = p.lastGs[p.lastP]
   908  		p.lastTs = Timestamp(raw.args[1])
   909  	case EvFrequency:
   910  		p.ticksPerSec = int64(raw.args[0])
   911  		if p.ticksPerSec <= 0 {
   912  			// The most likely cause for this is tick skew on different CPUs.
   913  			// For example, solaris/amd64 seems to have wildly different
   914  			// ticks on different CPUs.
   915  			return ErrTimeOrder
   916  		}
   917  	case EvTimerGoroutine:
   918  		p.timerGoids[raw.args[0]] = true
   919  	case EvStack:
   920  		if len(raw.args) < 2 {
   921  			return fmt.Errorf("EvStack has wrong number of arguments: want at least 2, got %d", len(raw.args))
   922  		}
   923  		size := raw.args[1]
   924  		if size > 1000 {
   925  			return fmt.Errorf("EvStack has bad number of frames: %d", size)
   926  		}
   927  		want := 2 + 4*size
   928  		if uint64(len(raw.args)) != want {
   929  			return fmt.Errorf("EvStack has wrong number of arguments: want %d, got %d", want, len(raw.args))
   930  		}
   931  		id := uint32(raw.args[0])
   932  		if id != 0 && size > 0 {
   933  			stk := p.allocateStack(size)
   934  			for i := 0; i < int(size); i++ {
   935  				pc := raw.args[2+i*4+0]
   936  				fn := raw.args[2+i*4+1]
   937  				file := raw.args[2+i*4+2]
   938  				line := raw.args[2+i*4+3]
   939  				stk[i] = pc
   940  
   941  				if _, ok := p.pcs[pc]; !ok {
   942  					p.pcs[pc] = Frame{PC: pc, Fn: fn, File: file, Line: int(line)}
   943  				}
   944  			}
   945  			p.stacks[id] = stk
   946  		}
   947  	case EvCPUSample:
   948  		// These events get parsed during the indexing step and don't strictly
   949  		// belong to the batch.
   950  	default:
   951  		*ev = Event{Type: raw.typ, P: p.lastP, G: p.lastG}
   952  		var argOffset int
   953  		ev.Ts = p.lastTs + Timestamp(raw.args[0])
   954  		argOffset = 1
   955  		p.lastTs = ev.Ts
   956  		for i := argOffset; i < narg; i++ {
   957  			if i == narg-1 && desc.Stack {
   958  				ev.StkID = uint32(raw.args[i])
   959  			} else {
   960  				ev.Args[i-argOffset] = raw.args[i]
   961  			}
   962  		}
   963  		switch raw.typ {
   964  		case EvGoStart, EvGoStartLocal, EvGoStartLabel:
   965  			p.lastG = ev.Args[0]
   966  			ev.G = p.lastG
   967  		case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt,
   968  			EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
   969  			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
   970  			EvGoSysBlock, EvGoBlockGC:
   971  			p.lastG = 0
   972  		case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
   973  			ev.G = ev.Args[0]
   974  		case EvUserTaskCreate:
   975  			// e.Args 0: taskID, 1:parentID, 2:nameID
   976  		case EvUserRegion:
   977  			// e.Args 0: taskID, 1: mode, 2:nameID
   978  		case EvUserLog:
   979  			// e.Args 0: taskID, 1:keyID, 2: stackID, 3: messageID
   980  			// raw.sargs 0: message
   981  
   982  			if id, ok := p.inlineStringsMapping[raw.sargs[0]]; ok {
   983  				ev.Args[3] = uint64(id)
   984  			} else {
   985  				id := len(p.inlineStrings)
   986  				p.inlineStringsMapping[raw.sargs[0]] = id
   987  				p.inlineStrings = append(p.inlineStrings, raw.sargs[0])
   988  				ev.Args[3] = uint64(id)
   989  			}
   990  		}
   991  
   992  		return nil
   993  	}
   994  
   995  	ev.Type = EvNone
   996  	return nil
   997  }
   998  
   999  // ErrTimeOrder is returned by Parse when the trace contains
  1000  // time stamps that do not respect actual event ordering.
  1001  var ErrTimeOrder = errors.New("time stamps out of order")
  1002  
  1003  // postProcessTrace does inter-event verification and information restoration.
  1004  // The resulting trace is guaranteed to be consistent
  1005  // (for example, a P does not run two Gs at the same time, or a G is indeed
  1006  // blocked before an unblock event).
  1007  func (p *parser) postProcessTrace(events Events) error {
  1008  	const (
  1009  		gDead = iota
  1010  		gRunnable
  1011  		gRunning
  1012  		gWaiting
  1013  	)
  1014  	type gdesc struct {
  1015  		state        int
  1016  		ev           *Event
  1017  		evStart      *Event
  1018  		evCreate     *Event
  1019  		evMarkAssist *Event
  1020  	}
  1021  	type pdesc struct {
  1022  		running bool
  1023  		g       uint64
  1024  		evSweep *Event
  1025  	}
  1026  
  1027  	gs := make(map[uint64]gdesc)
  1028  	ps := make(map[int32]pdesc)
  1029  	tasks := make(map[uint64]*Event)           // task id to task creation events
  1030  	activeRegions := make(map[uint64][]*Event) // goroutine id to stack of regions
  1031  	gs[0] = gdesc{state: gRunning}
  1032  	var evGC, evSTW *Event
  1033  
  1034  	checkRunning := func(p pdesc, g gdesc, ev *Event, allowG0 bool) error {
  1035  		name := EventDescriptions[ev.Type].Name
  1036  		if g.state != gRunning {
  1037  			return fmt.Errorf("g %d is not running while %s (time %d)", ev.G, name, ev.Ts)
  1038  		}
  1039  		if p.g != ev.G {
  1040  			return fmt.Errorf("p %d is not running g %d while %s (time %d)", ev.P, ev.G, name, ev.Ts)
  1041  		}
  1042  		if !allowG0 && ev.G == 0 {
  1043  			return fmt.Errorf("g 0 did %s (time %d)", name, ev.Ts)
  1044  		}
  1045  		return nil
  1046  	}
  1047  
  1048  	for evIdx := 0; evIdx < events.Len(); evIdx++ {
  1049  		ev := events.Ptr(evIdx)
  1050  
  1051  		switch ev.Type {
  1052  		case EvProcStart:
  1053  			p := ps[ev.P]
  1054  			if p.running {
  1055  				return fmt.Errorf("p %d is running before start (time %d)", ev.P, ev.Ts)
  1056  			}
  1057  			p.running = true
  1058  
  1059  			ps[ev.P] = p
  1060  		case EvProcStop:
  1061  			p := ps[ev.P]
  1062  			if !p.running {
  1063  				return fmt.Errorf("p %d is not running before stop (time %d)", ev.P, ev.Ts)
  1064  			}
  1065  			if p.g != 0 {
  1066  				return fmt.Errorf("p %d is running a goroutine %d during stop (time %d)", ev.P, p.g, ev.Ts)
  1067  			}
  1068  			p.running = false
  1069  
  1070  			ps[ev.P] = p
  1071  		case EvGCStart:
  1072  			if evGC != nil {
  1073  				return fmt.Errorf("previous GC is not ended before a new one (time %d)", ev.Ts)
  1074  			}
  1075  			evGC = ev
  1076  			// Attribute this to the global GC state.
  1077  			ev.P = GCP
  1078  		case EvGCDone:
  1079  			if evGC == nil {
  1080  				return fmt.Errorf("bogus GC end (time %d)", ev.Ts)
  1081  			}
  1082  			evGC = nil
  1083  		case EvSTWStart:
  1084  			evp := &evSTW
  1085  			if *evp != nil {
  1086  				return fmt.Errorf("previous STW is not ended before a new one (time %d)", ev.Ts)
  1087  			}
  1088  			*evp = ev
  1089  		case EvSTWDone:
  1090  			evp := &evSTW
  1091  			if *evp == nil {
  1092  				return fmt.Errorf("bogus STW end (time %d)", ev.Ts)
  1093  			}
  1094  			*evp = nil
  1095  		case EvGCSweepStart:
  1096  			p := ps[ev.P]
  1097  			if p.evSweep != nil {
  1098  				return fmt.Errorf("previous sweeping is not ended before a new one (time %d)", ev.Ts)
  1099  			}
  1100  			p.evSweep = ev
  1101  
  1102  			ps[ev.P] = p
  1103  		case EvGCMarkAssistStart:
  1104  			g := gs[ev.G]
  1105  			if g.evMarkAssist != nil {
  1106  				return fmt.Errorf("previous mark assist is not ended before a new one (time %d)", ev.Ts)
  1107  			}
  1108  			g.evMarkAssist = ev
  1109  
  1110  			gs[ev.G] = g
  1111  		case EvGCMarkAssistDone:
  1112  			// Unlike most events, mark assists can be in progress when a
  1113  			// goroutine starts tracing, so we can't report an error here.
  1114  			g := gs[ev.G]
  1115  			if g.evMarkAssist != nil {
  1116  				g.evMarkAssist = nil
  1117  			}
  1118  
  1119  			gs[ev.G] = g
  1120  		case EvGCSweepDone:
  1121  			p := ps[ev.P]
  1122  			if p.evSweep == nil {
  1123  				return fmt.Errorf("bogus sweeping end (time %d)", ev.Ts)
  1124  			}
  1125  			p.evSweep = nil
  1126  
  1127  			ps[ev.P] = p
  1128  		case EvGoWaiting:
  1129  			g := gs[ev.G]
  1130  			if g.state != gRunnable {
  1131  				return fmt.Errorf("g %d is not runnable before EvGoWaiting (time %d)", ev.G, ev.Ts)
  1132  			}
  1133  			g.state = gWaiting
  1134  			g.ev = ev
  1135  
  1136  			gs[ev.G] = g
  1137  		case EvGoInSyscall:
  1138  			g := gs[ev.G]
  1139  			if g.state != gRunnable {
  1140  				return fmt.Errorf("g %d is not runnable before EvGoInSyscall (time %d)", ev.G, ev.Ts)
  1141  			}
  1142  			g.state = gWaiting
  1143  			g.ev = ev
  1144  
  1145  			gs[ev.G] = g
  1146  		case EvGoCreate:
  1147  			g := gs[ev.G]
  1148  			p := ps[ev.P]
  1149  			if err := checkRunning(p, g, ev, true); err != nil {
  1150  				return err
  1151  			}
  1152  			if _, ok := gs[ev.Args[0]]; ok {
  1153  				return fmt.Errorf("g %d already exists (time %d)", ev.Args[0], ev.Ts)
  1154  			}
  1155  			gs[ev.Args[0]] = gdesc{state: gRunnable, ev: ev, evCreate: ev}
  1156  
  1157  		case EvGoStart, EvGoStartLabel:
  1158  			g := gs[ev.G]
  1159  			p := ps[ev.P]
  1160  			if g.state != gRunnable {
  1161  				return fmt.Errorf("g %d is not runnable before start (time %d)", ev.G, ev.Ts)
  1162  			}
  1163  			if p.g != 0 {
  1164  				return fmt.Errorf("p %d is already running g %d while start g %d (time %d)", ev.P, p.g, ev.G, ev.Ts)
  1165  			}
  1166  			g.state = gRunning
  1167  			g.evStart = ev
  1168  			p.g = ev.G
  1169  			if g.evCreate != nil {
  1170  				ev.StkID = uint32(g.evCreate.Args[1])
  1171  				g.evCreate = nil
  1172  			}
  1173  
  1174  			if g.ev != nil {
  1175  				g.ev = nil
  1176  			}
  1177  
  1178  			gs[ev.G] = g
  1179  			ps[ev.P] = p
  1180  		case EvGoEnd, EvGoStop:
  1181  			g := gs[ev.G]
  1182  			p := ps[ev.P]
  1183  			if err := checkRunning(p, g, ev, false); err != nil {
  1184  				return err
  1185  			}
  1186  			g.evStart = nil
  1187  			g.state = gDead
  1188  			p.g = 0
  1189  
  1190  			if ev.Type == EvGoEnd { // flush all active regions
  1191  				delete(activeRegions, ev.G)
  1192  			}
  1193  
  1194  			gs[ev.G] = g
  1195  			ps[ev.P] = p
  1196  		case EvGoSched, EvGoPreempt:
  1197  			g := gs[ev.G]
  1198  			p := ps[ev.P]
  1199  			if err := checkRunning(p, g, ev, false); err != nil {
  1200  				return err
  1201  			}
  1202  			g.state = gRunnable
  1203  			g.evStart = nil
  1204  			p.g = 0
  1205  			g.ev = ev
  1206  
  1207  			gs[ev.G] = g
  1208  			ps[ev.P] = p
  1209  		case EvGoUnblock:
  1210  			g := gs[ev.G]
  1211  			p := ps[ev.P]
  1212  			if g.state != gRunning {
  1213  				return fmt.Errorf("g %d is not running while unpark (time %d)", ev.G, ev.Ts)
  1214  			}
  1215  			if ev.P != TimerP && p.g != ev.G {
  1216  				return fmt.Errorf("p %d is not running g %d while unpark (time %d)", ev.P, ev.G, ev.Ts)
  1217  			}
  1218  			g1 := gs[ev.Args[0]]
  1219  			if g1.state != gWaiting {
  1220  				return fmt.Errorf("g %d is not waiting before unpark (time %d)", ev.Args[0], ev.Ts)
  1221  			}
  1222  			if g1.ev != nil && g1.ev.Type == EvGoBlockNet {
  1223  				ev.P = NetpollP
  1224  			}
  1225  			g1.state = gRunnable
  1226  			g1.ev = ev
  1227  			gs[ev.Args[0]] = g1
  1228  
  1229  		case EvGoSysCall:
  1230  			g := gs[ev.G]
  1231  			p := ps[ev.P]
  1232  			if err := checkRunning(p, g, ev, false); err != nil {
  1233  				return err
  1234  			}
  1235  			g.ev = ev
  1236  
  1237  			gs[ev.G] = g
  1238  		case EvGoSysBlock:
  1239  			g := gs[ev.G]
  1240  			p := ps[ev.P]
  1241  			if err := checkRunning(p, g, ev, false); err != nil {
  1242  				return err
  1243  			}
  1244  			g.state = gWaiting
  1245  			g.evStart = nil
  1246  			p.g = 0
  1247  
  1248  			gs[ev.G] = g
  1249  			ps[ev.P] = p
  1250  		case EvGoSysExit:
  1251  			g := gs[ev.G]
  1252  			if g.state != gWaiting {
  1253  				return fmt.Errorf("g %d is not waiting during syscall exit (time %d)", ev.G, ev.Ts)
  1254  			}
  1255  			g.state = gRunnable
  1256  			g.ev = ev
  1257  
  1258  			gs[ev.G] = g
  1259  		case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
  1260  			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC:
  1261  			g := gs[ev.G]
  1262  			p := ps[ev.P]
  1263  			if err := checkRunning(p, g, ev, false); err != nil {
  1264  				return err
  1265  			}
  1266  			g.state = gWaiting
  1267  			g.ev = ev
  1268  			g.evStart = nil
  1269  			p.g = 0
  1270  
  1271  			gs[ev.G] = g
  1272  			ps[ev.P] = p
  1273  		case EvUserTaskCreate:
  1274  			taskid := ev.Args[0]
  1275  			if prevEv, ok := tasks[taskid]; ok {
  1276  				return fmt.Errorf("task id conflicts (id:%d), %q vs %q", taskid, ev, prevEv)
  1277  			}
  1278  			tasks[ev.Args[0]] = ev
  1279  
  1280  		case EvUserTaskEnd:
  1281  			taskid := ev.Args[0]
  1282  			delete(tasks, taskid)
  1283  
  1284  		case EvUserRegion:
  1285  			mode := ev.Args[1]
  1286  			regions := activeRegions[ev.G]
  1287  			if mode == 0 { // region start
  1288  				activeRegions[ev.G] = append(regions, ev) // push
  1289  			} else if mode == 1 { // region end
  1290  				n := len(regions)
  1291  				if n > 0 { // matching region start event is in the trace.
  1292  					s := regions[n-1]
  1293  					if s.Args[0] != ev.Args[0] || s.Args[2] != ev.Args[2] { // task id, region name mismatch
  1294  						return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", ev.G, ev, s)
  1295  					}
  1296  
  1297  					if n > 1 {
  1298  						activeRegions[ev.G] = regions[:n-1]
  1299  					} else {
  1300  						delete(activeRegions, ev.G)
  1301  					}
  1302  				}
  1303  			} else {
  1304  				return fmt.Errorf("invalid user region mode: %q", ev)
  1305  			}
  1306  		}
  1307  
  1308  		if ev.StkID != 0 && len(p.stacks[ev.StkID]) == 0 {
  1309  			// Make sure events don't refer to stacks that don't exist or to
  1310  			// stacks with zero frames. Neither of these should be possible, but
  1311  			// better be safe than sorry.
  1312  
  1313  			ev.StkID = 0
  1314  		}
  1315  
  1316  	}
  1317  
  1318  	// TODO(mknyszek): restore stacks for EvGoStart events.
  1319  	return nil
  1320  }
  1321  
  1322  var errMalformedVarint = errors.New("malformatted base-128 varint")
  1323  
  1324  // readVal reads unsigned base-128 value from r.
  1325  func (p *parser) readVal() (uint64, error) {
  1326  	v, n := binary.Uvarint(p.data[p.off:])
  1327  	if n <= 0 {
  1328  		return 0, errMalformedVarint
  1329  	}
  1330  	p.off += n
  1331  	return v, nil
  1332  }
  1333  
  1334  func readValFrom(buf []byte) (v uint64, rem []byte, err error) {
  1335  	v, n := binary.Uvarint(buf)
  1336  	if n <= 0 {
  1337  		return 0, nil, errMalformedVarint
  1338  	}
  1339  	return v, buf[n:], nil
  1340  }
  1341  
  1342  func (ev *Event) String() string {
  1343  	desc := &EventDescriptions[ev.Type]
  1344  	w := new(bytes.Buffer)
  1345  	fmt.Fprintf(w, "%d %s p=%d g=%d stk=%d", ev.Ts, desc.Name, ev.P, ev.G, ev.StkID)
  1346  	for i, a := range desc.Args {
  1347  		fmt.Fprintf(w, " %s=%d", a, ev.Args[i])
  1348  	}
  1349  	return w.String()
  1350  }
  1351  
  1352  // argNum returns total number of args for the event accounting for timestamps,
  1353  // sequence numbers and differences between trace format versions.
  1354  func (raw *rawEvent) argNum() int {
  1355  	desc := &EventDescriptions[raw.typ]
  1356  	if raw.typ == EvStack {
  1357  		return len(raw.args)
  1358  	}
  1359  	narg := len(desc.Args)
  1360  	if desc.Stack {
  1361  		narg++
  1362  	}
  1363  	switch raw.typ {
  1364  	case EvBatch, EvFrequency, EvTimerGoroutine:
  1365  		return narg
  1366  	}
  1367  	narg++ // timestamp
  1368  	return narg
  1369  }
  1370  
  1371  // Event types in the trace.
  1372  // Verbatim copy from src/runtime/trace.go with the "trace" prefix removed.
  1373  const (
  1374  	EvNone              event.Type = 0  // unused
  1375  	EvBatch             event.Type = 1  // start of per-P batch of events [pid, timestamp]
  1376  	EvFrequency         event.Type = 2  // contains tracer timer frequency [frequency (ticks per second)]
  1377  	EvStack             event.Type = 3  // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
  1378  	EvGomaxprocs        event.Type = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
  1379  	EvProcStart         event.Type = 5  // start of P [timestamp, thread id]
  1380  	EvProcStop          event.Type = 6  // stop of P [timestamp]
  1381  	EvGCStart           event.Type = 7  // GC start [timestamp, seq, stack id]
  1382  	EvGCDone            event.Type = 8  // GC done [timestamp]
  1383  	EvSTWStart          event.Type = 9  // GC mark termination start [timestamp, kind]
  1384  	EvSTWDone           event.Type = 10 // GC mark termination done [timestamp]
  1385  	EvGCSweepStart      event.Type = 11 // GC sweep start [timestamp, stack id]
  1386  	EvGCSweepDone       event.Type = 12 // GC sweep done [timestamp, swept, reclaimed]
  1387  	EvGoCreate          event.Type = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
  1388  	EvGoStart           event.Type = 14 // goroutine starts running [timestamp, goroutine id, seq]
  1389  	EvGoEnd             event.Type = 15 // goroutine ends [timestamp]
  1390  	EvGoStop            event.Type = 16 // goroutine stops (like in select{}) [timestamp, stack]
  1391  	EvGoSched           event.Type = 17 // goroutine calls Gosched [timestamp, stack]
  1392  	EvGoPreempt         event.Type = 18 // goroutine is preempted [timestamp, stack]
  1393  	EvGoSleep           event.Type = 19 // goroutine calls Sleep [timestamp, stack]
  1394  	EvGoBlock           event.Type = 20 // goroutine blocks [timestamp, stack]
  1395  	EvGoUnblock         event.Type = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
  1396  	EvGoBlockSend       event.Type = 22 // goroutine blocks on chan send [timestamp, stack]
  1397  	EvGoBlockRecv       event.Type = 23 // goroutine blocks on chan recv [timestamp, stack]
  1398  	EvGoBlockSelect     event.Type = 24 // goroutine blocks on select [timestamp, stack]
  1399  	EvGoBlockSync       event.Type = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
  1400  	EvGoBlockCond       event.Type = 26 // goroutine blocks on Cond [timestamp, stack]
  1401  	EvGoBlockNet        event.Type = 27 // goroutine blocks on network [timestamp, stack]
  1402  	EvGoSysCall         event.Type = 28 // syscall enter [timestamp, stack]
  1403  	EvGoSysExit         event.Type = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
  1404  	EvGoSysBlock        event.Type = 30 // syscall blocks [timestamp]
  1405  	EvGoWaiting         event.Type = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
  1406  	EvGoInSyscall       event.Type = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
  1407  	EvHeapAlloc         event.Type = 33 // gcController.heapLive change [timestamp, heap live bytes]
  1408  	EvHeapGoal          event.Type = 34 // gcController.heapGoal change [timestamp, heap goal bytes]
  1409  	EvTimerGoroutine    event.Type = 35 // denotes timer goroutine [timer goroutine id]
  1410  	EvFutileWakeup      event.Type = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
  1411  	EvString            event.Type = 37 // string dictionary entry [ID, length, string]
  1412  	EvGoStartLocal      event.Type = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
  1413  	EvGoUnblockLocal    event.Type = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
  1414  	EvGoSysExitLocal    event.Type = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
  1415  	EvGoStartLabel      event.Type = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
  1416  	EvGoBlockGC         event.Type = 42 // goroutine blocks on GC assist [timestamp, stack]
  1417  	EvGCMarkAssistStart event.Type = 43 // GC mark assist start [timestamp, stack]
  1418  	EvGCMarkAssistDone  event.Type = 44 // GC mark assist done [timestamp]
  1419  	EvUserTaskCreate    event.Type = 45 // trace.NewTask [timestamp, internal task id, internal parent id, stack, name string]
  1420  	EvUserTaskEnd       event.Type = 46 // end of task [timestamp, internal task id, stack]
  1421  	EvUserRegion        event.Type = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), name string]
  1422  	EvUserLog           event.Type = 48 // trace.Log [timestamp, internal id, key string id, stack, value string]
  1423  	EvCPUSample         event.Type = 49 // CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id]
  1424  	EvCount             event.Type = 50
  1425  )
  1426  
  1427  var EventDescriptions = [256]struct {
  1428  	Name       string
  1429  	minVersion version.Version
  1430  	Stack      bool
  1431  	Args       []string
  1432  	SArgs      []string // string arguments
  1433  }{
  1434  	EvNone:              {"None", 5, false, []string{}, nil},
  1435  	EvBatch:             {"Batch", 5, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"}
  1436  	EvFrequency:         {"Frequency", 5, false, []string{"freq"}, nil},   // in 1.5 format it was {"freq", "unused"}
  1437  	EvStack:             {"Stack", 5, false, []string{"id", "siz"}, nil},
  1438  	EvGomaxprocs:        {"Gomaxprocs", 5, true, []string{"procs"}, nil},
  1439  	EvProcStart:         {"ProcStart", 5, false, []string{"thread"}, nil},
  1440  	EvProcStop:          {"ProcStop", 5, false, []string{}, nil},
  1441  	EvGCStart:           {"GCStart", 5, true, []string{"seq"}, nil}, // in 1.5 format it was {}
  1442  	EvGCDone:            {"GCDone", 5, false, []string{}, nil},
  1443  	EvSTWStart:          {"GCSTWStart", 5, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
  1444  	EvSTWDone:           {"GCSTWDone", 5, false, []string{}, nil},
  1445  	EvGCSweepStart:      {"GCSweepStart", 5, true, []string{}, nil},
  1446  	EvGCSweepDone:       {"GCSweepDone", 5, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {}
  1447  	EvGoCreate:          {"GoCreate", 5, true, []string{"g", "stack"}, nil},
  1448  	EvGoStart:           {"GoStart", 5, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
  1449  	EvGoEnd:             {"GoEnd", 5, false, []string{}, nil},
  1450  	EvGoStop:            {"GoStop", 5, true, []string{}, nil},
  1451  	EvGoSched:           {"GoSched", 5, true, []string{}, nil},
  1452  	EvGoPreempt:         {"GoPreempt", 5, true, []string{}, nil},
  1453  	EvGoSleep:           {"GoSleep", 5, true, []string{}, nil},
  1454  	EvGoBlock:           {"GoBlock", 5, true, []string{}, nil},
  1455  	EvGoUnblock:         {"GoUnblock", 5, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
  1456  	EvGoBlockSend:       {"GoBlockSend", 5, true, []string{}, nil},
  1457  	EvGoBlockRecv:       {"GoBlockRecv", 5, true, []string{}, nil},
  1458  	EvGoBlockSelect:     {"GoBlockSelect", 5, true, []string{}, nil},
  1459  	EvGoBlockSync:       {"GoBlockSync", 5, true, []string{}, nil},
  1460  	EvGoBlockCond:       {"GoBlockCond", 5, true, []string{}, nil},
  1461  	EvGoBlockNet:        {"GoBlockNet", 5, true, []string{}, nil},
  1462  	EvGoSysCall:         {"GoSysCall", 5, true, []string{}, nil},
  1463  	EvGoSysExit:         {"GoSysExit", 5, false, []string{"g", "seq", "ts"}, nil},
  1464  	EvGoSysBlock:        {"GoSysBlock", 5, false, []string{}, nil},
  1465  	EvGoWaiting:         {"GoWaiting", 5, false, []string{"g"}, nil},
  1466  	EvGoInSyscall:       {"GoInSyscall", 5, false, []string{"g"}, nil},
  1467  	EvHeapAlloc:         {"HeapAlloc", 5, false, []string{"mem"}, nil},
  1468  	EvHeapGoal:          {"HeapGoal", 5, false, []string{"mem"}, nil},
  1469  	EvTimerGoroutine:    {"TimerGoroutine", 5, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"}
  1470  	EvFutileWakeup:      {"FutileWakeup", 5, false, []string{}, nil},
  1471  	EvString:            {"String", 7, false, []string{}, nil},
  1472  	EvGoStartLocal:      {"GoStartLocal", 7, false, []string{"g"}, nil},
  1473  	EvGoUnblockLocal:    {"GoUnblockLocal", 7, true, []string{"g"}, nil},
  1474  	EvGoSysExitLocal:    {"GoSysExitLocal", 7, false, []string{"g", "ts"}, nil},
  1475  	EvGoStartLabel:      {"GoStartLabel", 8, false, []string{"g", "seq", "labelid"}, []string{"label"}},
  1476  	EvGoBlockGC:         {"GoBlockGC", 8, true, []string{}, nil},
  1477  	EvGCMarkAssistStart: {"GCMarkAssistStart", 9, true, []string{}, nil},
  1478  	EvGCMarkAssistDone:  {"GCMarkAssistDone", 9, false, []string{}, nil},
  1479  	EvUserTaskCreate:    {"UserTaskCreate", 11, true, []string{"taskid", "pid", "typeid"}, []string{"name"}},
  1480  	EvUserTaskEnd:       {"UserTaskEnd", 11, true, []string{"taskid"}, nil},
  1481  	EvUserRegion:        {"UserRegion", 11, true, []string{"taskid", "mode", "typeid"}, []string{"name"}},
  1482  	EvUserLog:           {"UserLog", 11, true, []string{"id", "keyid"}, []string{"category", "message"}},
  1483  	EvCPUSample:         {"CPUSample", 19, true, []string{"ts", "p", "g"}, nil},
  1484  }
  1485  
  1486  //gcassert:inline
  1487  func (p *parser) allocateStack(size uint64) []uint64 {
  1488  	if size == 0 {
  1489  		return nil
  1490  	}
  1491  
  1492  	// Stacks are plentiful but small. For our "Staticcheck on std" trace with
  1493  	// 11e6 events, we have roughly 500,000 stacks, using 200 MiB of memory. To
  1494  	// avoid making 500,000 small allocations we allocate backing arrays 1 MiB
  1495  	// at a time.
  1496  	out := p.stacksData
  1497  	if uint64(len(out)) < size {
  1498  		out = make([]uint64, 1024*128)
  1499  	}
  1500  	p.stacksData = out[size:]
  1501  	return out[:size:size]
  1502  }
  1503  
  1504  func (tr *Trace) STWReason(kindID uint64) STWReason {
  1505  	if tr.Version < 21 {
  1506  		if kindID == 0 || kindID == 1 {
  1507  			return STWReason(kindID + 1)
  1508  		} else {
  1509  			return STWUnknown
  1510  		}
  1511  	} else if tr.Version == 21 {
  1512  		if kindID < NumSTWReasons {
  1513  			return STWReason(kindID)
  1514  		} else {
  1515  			return STWUnknown
  1516  		}
  1517  	} else {
  1518  		return STWUnknown
  1519  	}
  1520  }
  1521  
  1522  type STWReason int
  1523  
  1524  const (
  1525  	STWUnknown                 STWReason = 0
  1526  	STWGCMarkTermination       STWReason = 1
  1527  	STWGCSweepTermination      STWReason = 2
  1528  	STWWriteHeapDump           STWReason = 3
  1529  	STWGoroutineProfile        STWReason = 4
  1530  	STWGoroutineProfileCleanup STWReason = 5
  1531  	STWAllGoroutinesStackTrace STWReason = 6
  1532  	STWReadMemStats            STWReason = 7
  1533  	STWAllThreadsSyscall       STWReason = 8
  1534  	STWGOMAXPROCS              STWReason = 9
  1535  	STWStartTrace              STWReason = 10
  1536  	STWStopTrace               STWReason = 11
  1537  	STWCountPagesInUse         STWReason = 12
  1538  	STWReadMetricsSlow         STWReason = 13
  1539  	STWReadMemStatsSlow        STWReason = 14
  1540  	STWPageCachePagesLeaked    STWReason = 15
  1541  	STWResetDebugLog           STWReason = 16
  1542  
  1543  	NumSTWReasons = 17
  1544  )