github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/internal/trace/v2/order.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package trace
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  
    11  	"internal/trace/v2/event"
    12  	"internal/trace/v2/event/go122"
    13  	"internal/trace/v2/version"
    14  )
    15  
    16  // ordering emulates Go scheduler state for both validation and
    17  // for putting events in the right order.
    18  //
    19  // The interface to ordering consists of two methods: Advance
    20  // and Next. Advance is called to try and advance an event and
    21  // add completed events to the ordering. Next is used to pick
    22  // off events in the ordering.
    23  type ordering struct {
    24  	gStates     map[GoID]*gState
    25  	pStates     map[ProcID]*pState // TODO: The keys are dense, so this can be a slice.
    26  	mStates     map[ThreadID]*mState
    27  	activeTasks map[TaskID]taskState
    28  	gcSeq       uint64
    29  	gcState     gcState
    30  	initialGen  uint64
    31  	queue       queue[Event]
    32  }
    33  
    34  // Advance checks if it's valid to proceed with ev which came from thread m.
    35  //
    36  // It assumes the gen value passed to it is monotonically increasing across calls.
    37  //
    38  // If any error is returned, then the trace is broken and trace parsing must cease.
    39  // If it's not valid to advance with ev, but no error was encountered, the caller
    40  // should attempt to advance with other candidate events from other threads. If the
    41  // caller runs out of candidates, the trace is invalid.
    42  //
    43  // If this returns true, Next is guaranteed to return a complete event. However,
    44  // multiple events may be added to the ordering, so the caller should (but is not
    45  // required to) continue to call Next until it is exhausted.
    46  func (o *ordering) Advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (bool, error) {
    47  	if o.initialGen == 0 {
    48  		// Set the initial gen if necessary.
    49  		o.initialGen = gen
    50  	}
    51  
    52  	var curCtx, newCtx schedCtx
    53  	curCtx.M = m
    54  	newCtx.M = m
    55  
    56  	var ms *mState
    57  	if m == NoThread {
    58  		curCtx.P = NoProc
    59  		curCtx.G = NoGoroutine
    60  		newCtx = curCtx
    61  	} else {
    62  		// Pull out or create the mState for this event.
    63  		var ok bool
    64  		ms, ok = o.mStates[m]
    65  		if !ok {
    66  			ms = &mState{
    67  				g: NoGoroutine,
    68  				p: NoProc,
    69  			}
    70  			o.mStates[m] = ms
    71  		}
    72  		curCtx.P = ms.p
    73  		curCtx.G = ms.g
    74  		newCtx = curCtx
    75  	}
    76  
    77  	f := orderingDispatch[ev.typ]
    78  	if f == nil {
    79  		return false, fmt.Errorf("bad event type found while ordering: %v", ev.typ)
    80  	}
    81  	newCtx, ok, err := f(o, ev, evt, m, gen, curCtx)
    82  	if err == nil && ok && ms != nil {
    83  		// Update the mState for this event.
    84  		ms.p = newCtx.P
    85  		ms.g = newCtx.G
    86  	}
    87  	return ok, err
    88  }
    89  
    90  type orderingHandleFunc func(o *ordering, ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error)
    91  
    92  var orderingDispatch = [256]orderingHandleFunc{
    93  	// Procs.
    94  	go122.EvProcsChange: (*ordering).advanceAnnotation,
    95  	go122.EvProcStart:   (*ordering).advanceProcStart,
    96  	go122.EvProcStop:    (*ordering).advanceProcStop,
    97  	go122.EvProcSteal:   (*ordering).advanceProcSteal,
    98  	go122.EvProcStatus:  (*ordering).advanceProcStatus,
    99  
   100  	// Goroutines.
   101  	go122.EvGoCreate:            (*ordering).advanceGoCreate,
   102  	go122.EvGoCreateSyscall:     (*ordering).advanceGoCreateSyscall,
   103  	go122.EvGoStart:             (*ordering).advanceGoStart,
   104  	go122.EvGoDestroy:           (*ordering).advanceGoStopExec,
   105  	go122.EvGoDestroySyscall:    (*ordering).advanceGoDestroySyscall,
   106  	go122.EvGoStop:              (*ordering).advanceGoStopExec,
   107  	go122.EvGoBlock:             (*ordering).advanceGoStopExec,
   108  	go122.EvGoUnblock:           (*ordering).advanceGoUnblock,
   109  	go122.EvGoSyscallBegin:      (*ordering).advanceGoSyscallBegin,
   110  	go122.EvGoSyscallEnd:        (*ordering).advanceGoSyscallEnd,
   111  	go122.EvGoSyscallEndBlocked: (*ordering).advanceGoSyscallEndBlocked,
   112  	go122.EvGoStatus:            (*ordering).advanceGoStatus,
   113  
   114  	// STW.
   115  	go122.EvSTWBegin: (*ordering).advanceGoRangeBegin,
   116  	go122.EvSTWEnd:   (*ordering).advanceGoRangeEnd,
   117  
   118  	// GC events.
   119  	go122.EvGCActive:           (*ordering).advanceGCActive,
   120  	go122.EvGCBegin:            (*ordering).advanceGCBegin,
   121  	go122.EvGCEnd:              (*ordering).advanceGCEnd,
   122  	go122.EvGCSweepActive:      (*ordering).advanceGCSweepActive,
   123  	go122.EvGCSweepBegin:       (*ordering).advanceGCSweepBegin,
   124  	go122.EvGCSweepEnd:         (*ordering).advanceGCSweepEnd,
   125  	go122.EvGCMarkAssistActive: (*ordering).advanceGoRangeActive,
   126  	go122.EvGCMarkAssistBegin:  (*ordering).advanceGoRangeBegin,
   127  	go122.EvGCMarkAssistEnd:    (*ordering).advanceGoRangeEnd,
   128  	go122.EvHeapAlloc:          (*ordering).advanceHeapMetric,
   129  	go122.EvHeapGoal:           (*ordering).advanceHeapMetric,
   130  
   131  	// Annotations.
   132  	go122.EvGoLabel:         (*ordering).advanceAnnotation,
   133  	go122.EvUserTaskBegin:   (*ordering).advanceUserTaskBegin,
   134  	go122.EvUserTaskEnd:     (*ordering).advanceUserTaskEnd,
   135  	go122.EvUserRegionBegin: (*ordering).advanceUserRegionBegin,
   136  	go122.EvUserRegionEnd:   (*ordering).advanceUserRegionEnd,
   137  	go122.EvUserLog:         (*ordering).advanceAnnotation,
   138  
   139  	// Coroutines. Added in Go 1.23.
   140  	go122.EvGoSwitch:        (*ordering).advanceGoSwitch,
   141  	go122.EvGoSwitchDestroy: (*ordering).advanceGoSwitch,
   142  	go122.EvGoCreateBlocked: (*ordering).advanceGoCreate,
   143  }
   144  
   145  func (o *ordering) advanceProcStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   146  	pid := ProcID(ev.args[0])
   147  	status := go122.ProcStatus(ev.args[1])
   148  	if int(status) >= len(go122ProcStatus2ProcState) {
   149  		return curCtx, false, fmt.Errorf("invalid status for proc %d: %d", pid, status)
   150  	}
   151  	oldState := go122ProcStatus2ProcState[status]
   152  	if s, ok := o.pStates[pid]; ok {
   153  		if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscall {
   154  			// ProcSyscallAbandoned is a special case of ProcSyscall. It indicates a
   155  			// potential loss of information, but if we're already in ProcSyscall,
   156  			// we haven't lost the relevant information. Promote the status and advance.
   157  			oldState = ProcRunning
   158  			ev.args[1] = uint64(go122.ProcSyscall)
   159  		} else if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscallAbandoned {
   160  			// If we're passing through ProcSyscallAbandoned, then there's no promotion
   161  			// to do. We've lost the M that this P is associated with. However it got there,
   162  			// it's going to appear as idle in the API, so pass through as idle.
   163  			oldState = ProcIdle
   164  			ev.args[1] = uint64(go122.ProcSyscallAbandoned)
   165  		} else if s.status != status {
   166  			return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status)
   167  		}
   168  		s.seq = makeSeq(gen, 0) // Reset seq.
   169  	} else {
   170  		o.pStates[pid] = &pState{id: pid, status: status, seq: makeSeq(gen, 0)}
   171  		if gen == o.initialGen {
   172  			oldState = ProcUndetermined
   173  		} else {
   174  			oldState = ProcNotExist
   175  		}
   176  	}
   177  	ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
   178  
   179  	// Bind the proc to the new context, if it's running.
   180  	newCtx := curCtx
   181  	if status == go122.ProcRunning || status == go122.ProcSyscall {
   182  		newCtx.P = pid
   183  	}
   184  	// If we're advancing through ProcSyscallAbandoned *but* oldState is running then we've
   185  	// promoted it to ProcSyscall. However, because it's ProcSyscallAbandoned, we know this
   186  	// P is about to get stolen and its status very likely isn't being emitted by the same
   187  	// thread it was bound to. Since this status is Running -> Running and Running is binding,
   188  	// we need to make sure we emit it in the right context: the context to which it is bound.
   189  	// Find it, and set our current context to it.
   190  	if status == go122.ProcSyscallAbandoned && oldState == ProcRunning {
   191  		// N.B. This is slow but it should be fairly rare.
   192  		found := false
   193  		for mid, ms := range o.mStates {
   194  			if ms.p == pid {
   195  				curCtx.M = mid
   196  				curCtx.P = pid
   197  				curCtx.G = ms.g
   198  				found = true
   199  			}
   200  		}
   201  		if !found {
   202  			return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid)
   203  		}
   204  	}
   205  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   206  	return newCtx, true, nil
   207  }
   208  
   209  func (o *ordering) advanceProcStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   210  	pid := ProcID(ev.args[0])
   211  	seq := makeSeq(gen, ev.args[1])
   212  
   213  	// Try to advance. We might fail here due to sequencing, because the P hasn't
   214  	// had a status emitted, or because we already have a P and we're in a syscall,
   215  	// and we haven't observed that it was stolen from us yet.
   216  	state, ok := o.pStates[pid]
   217  	if !ok || state.status != go122.ProcIdle || !seq.succeeds(state.seq) || curCtx.P != NoProc {
   218  		// We can't make an inference as to whether this is bad. We could just be seeing
   219  		// a ProcStart on a different M before the proc's state was emitted, or before we
   220  		// got to the right point in the trace.
   221  		//
   222  		// Note that we also don't advance here if we have a P and we're in a syscall.
   223  		return curCtx, false, nil
   224  	}
   225  	// We can advance this P. Check some invariants.
   226  	//
   227  	// We might have a goroutine if a goroutine is exiting a syscall.
   228  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustNotHave, Goroutine: event.MayHave}
   229  	if err := validateCtx(curCtx, reqs); err != nil {
   230  		return curCtx, false, err
   231  	}
   232  	state.status = go122.ProcRunning
   233  	state.seq = seq
   234  	newCtx := curCtx
   235  	newCtx.P = pid
   236  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   237  	return newCtx, true, nil
   238  }
   239  
   240  func (o *ordering) advanceProcStop(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   241  	// We must be able to advance this P.
   242  	//
   243  	// There are 2 ways a P can stop: ProcStop and ProcSteal. ProcStop is used when the P
   244  	// is stopped by the same M that started it, while ProcSteal is used when another M
   245  	// steals the P by stopping it from a distance.
   246  	//
   247  	// Since a P is bound to an M, and we're stopping on the same M we started, it must
   248  	// always be possible to advance the current M's P from a ProcStop. This is also why
   249  	// ProcStop doesn't need a sequence number.
   250  	state, ok := o.pStates[curCtx.P]
   251  	if !ok {
   252  		return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.P)
   253  	}
   254  	if state.status != go122.ProcRunning && state.status != go122.ProcSyscall {
   255  		return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", go122.EventString(ev.typ), go122.ProcRunning, go122.ProcSyscall)
   256  	}
   257  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
   258  	if err := validateCtx(curCtx, reqs); err != nil {
   259  		return curCtx, false, err
   260  	}
   261  	state.status = go122.ProcIdle
   262  	newCtx := curCtx
   263  	newCtx.P = NoProc
   264  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   265  	return newCtx, true, nil
   266  }
   267  
   268  func (o *ordering) advanceProcSteal(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   269  	pid := ProcID(ev.args[0])
   270  	seq := makeSeq(gen, ev.args[1])
   271  	state, ok := o.pStates[pid]
   272  	if !ok || (state.status != go122.ProcSyscall && state.status != go122.ProcSyscallAbandoned) || !seq.succeeds(state.seq) {
   273  		// We can't make an inference as to whether this is bad. We could just be seeing
   274  		// a ProcStart on a different M before the proc's state was emitted, or before we
   275  		// got to the right point in the trace.
   276  		return curCtx, false, nil
   277  	}
   278  	// We can advance this P. Check some invariants.
   279  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MayHave}
   280  	if err := validateCtx(curCtx, reqs); err != nil {
   281  		return curCtx, false, err
   282  	}
   283  	// Smuggle in the P state that let us advance so we can surface information to the event.
   284  	// Specifically, we need to make sure that the event is interpreted not as a transition of
   285  	// ProcRunning -> ProcIdle but ProcIdle -> ProcIdle instead.
   286  	//
   287  	// ProcRunning is binding, but we may be running with a P on the current M and we can't
   288  	// bind another P. This P is about to go ProcIdle anyway.
   289  	oldStatus := state.status
   290  	ev.extra(version.Go122)[0] = uint64(oldStatus)
   291  
   292  	// Update the P's status and sequence number.
   293  	state.status = go122.ProcIdle
   294  	state.seq = seq
   295  
   296  	// If we've lost information then don't try to do anything with the M.
   297  	// It may have moved on and we can't be sure.
   298  	if oldStatus == go122.ProcSyscallAbandoned {
   299  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   300  		return curCtx, true, nil
   301  	}
   302  
   303  	// Validate that the M we're stealing from is what we expect.
   304  	mid := ThreadID(ev.args[2]) // The M we're stealing from.
   305  
   306  	newCtx := curCtx
   307  	if mid == curCtx.M {
   308  		// We're stealing from ourselves. This behaves like a ProcStop.
   309  		if curCtx.P != pid {
   310  			return curCtx, false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P)
   311  		}
   312  		newCtx.P = NoProc
   313  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   314  		return newCtx, true, nil
   315  	}
   316  
   317  	// We're stealing from some other M.
   318  	mState, ok := o.mStates[mid]
   319  	if !ok {
   320  		return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid)
   321  	}
   322  
   323  	// Make sure we're actually stealing the right P.
   324  	if mState.p != pid {
   325  		return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p)
   326  	}
   327  
   328  	// Tell the M it has no P so it can proceed.
   329  	//
   330  	// This is safe because we know the P was in a syscall and
   331  	// the other M must be trying to get out of the syscall.
   332  	// GoSyscallEndBlocked cannot advance until the corresponding
   333  	// M loses its P.
   334  	mState.p = NoProc
   335  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   336  	return newCtx, true, nil
   337  }
   338  
   339  func (o *ordering) advanceGoStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   340  	gid := GoID(ev.args[0])
   341  	mid := ThreadID(ev.args[1])
   342  	status := go122.GoStatus(ev.args[2])
   343  
   344  	if int(status) >= len(go122GoStatus2GoState) {
   345  		return curCtx, false, fmt.Errorf("invalid status for goroutine %d: %d", gid, status)
   346  	}
   347  	oldState := go122GoStatus2GoState[status]
   348  	if s, ok := o.gStates[gid]; ok {
   349  		if s.status != status {
   350  			return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status)
   351  		}
   352  		s.seq = makeSeq(gen, 0) // Reset seq.
   353  	} else if gen == o.initialGen {
   354  		// Set the state.
   355  		o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)}
   356  		oldState = GoUndetermined
   357  	} else {
   358  		return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status)
   359  	}
   360  	ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
   361  
   362  	newCtx := curCtx
   363  	switch status {
   364  	case go122.GoRunning:
   365  		// Bind the goroutine to the new context, since it's running.
   366  		newCtx.G = gid
   367  	case go122.GoSyscall:
   368  		if mid == NoThread {
   369  			return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid)
   370  		}
   371  		// Is the syscall on this thread? If so, bind it to the context.
   372  		// Otherwise, we're talking about a G sitting in a syscall on an M.
   373  		// Validate the named M.
   374  		if mid == curCtx.M {
   375  			if gen != o.initialGen && curCtx.G != gid {
   376  				// If this isn't the first generation, we *must* have seen this
   377  				// binding occur already. Even if the G was blocked in a syscall
   378  				// for multiple generations since trace start, we would have seen
   379  				// a previous GoStatus event that bound the goroutine to an M.
   380  				return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, curCtx.G)
   381  			}
   382  			newCtx.G = gid
   383  			break
   384  		}
   385  		// Now we're talking about a thread and goroutine that have been
   386  		// blocked on a syscall for the entire generation. This case must
   387  		// not have a P; the runtime makes sure that all Ps are traced at
   388  		// the beginning of a generation, which involves taking a P back
   389  		// from every thread.
   390  		ms, ok := o.mStates[mid]
   391  		if ok {
   392  			// This M has been seen. That means we must have seen this
   393  			// goroutine go into a syscall on this thread at some point.
   394  			if ms.g != gid {
   395  				// But the G on the M doesn't match. Something's wrong.
   396  				return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g)
   397  			}
   398  			// This case is just a Syscall->Syscall event, which needs to
   399  			// appear as having the G currently bound to this M.
   400  			curCtx.G = ms.g
   401  		} else if !ok {
   402  			// The M hasn't been seen yet. That means this goroutine
   403  			// has just been sitting in a syscall on this M. Create
   404  			// a state for it.
   405  			o.mStates[mid] = &mState{g: gid, p: NoProc}
   406  			// Don't set curCtx.G in this case because this event is the
   407  			// binding event (and curCtx represents the "before" state).
   408  		}
   409  		// Update the current context to the M we're talking about.
   410  		curCtx.M = mid
   411  	}
   412  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   413  	return newCtx, true, nil
   414  }
   415  
   416  func (o *ordering) advanceGoCreate(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   417  	// Goroutines must be created on a running P, but may or may not be created
   418  	// by a running goroutine.
   419  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
   420  	if err := validateCtx(curCtx, reqs); err != nil {
   421  		return curCtx, false, err
   422  	}
   423  	// If we have a goroutine, it must be running.
   424  	if state, ok := o.gStates[curCtx.G]; ok && state.status != go122.GoRunning {
   425  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   426  	}
   427  	// This goroutine created another. Add a state for it.
   428  	newgid := GoID(ev.args[0])
   429  	if _, ok := o.gStates[newgid]; ok {
   430  		return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid)
   431  	}
   432  	status := go122.GoRunnable
   433  	if ev.typ == go122.EvGoCreateBlocked {
   434  		status = go122.GoWaiting
   435  	}
   436  	o.gStates[newgid] = &gState{id: newgid, status: status, seq: makeSeq(gen, 0)}
   437  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   438  	return curCtx, true, nil
   439  }
   440  
   441  func (o *ordering) advanceGoStopExec(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   442  	// These are goroutine events that all require an active running
   443  	// goroutine on some thread. They must *always* be advance-able,
   444  	// since running goroutines are bound to their M.
   445  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   446  		return curCtx, false, err
   447  	}
   448  	state, ok := o.gStates[curCtx.G]
   449  	if !ok {
   450  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   451  	}
   452  	if state.status != go122.GoRunning {
   453  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   454  	}
   455  	// Handle each case slightly differently; we just group them together
   456  	// because they have shared preconditions.
   457  	newCtx := curCtx
   458  	switch ev.typ {
   459  	case go122.EvGoDestroy:
   460  		// This goroutine is exiting itself.
   461  		delete(o.gStates, curCtx.G)
   462  		newCtx.G = NoGoroutine
   463  	case go122.EvGoStop:
   464  		// Goroutine stopped (yielded). It's runnable but not running on this M.
   465  		state.status = go122.GoRunnable
   466  		newCtx.G = NoGoroutine
   467  	case go122.EvGoBlock:
   468  		// Goroutine blocked. It's waiting now and not running on this M.
   469  		state.status = go122.GoWaiting
   470  		newCtx.G = NoGoroutine
   471  	}
   472  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   473  	return newCtx, true, nil
   474  }
   475  
   476  func (o *ordering) advanceGoStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   477  	gid := GoID(ev.args[0])
   478  	seq := makeSeq(gen, ev.args[1])
   479  	state, ok := o.gStates[gid]
   480  	if !ok || state.status != go122.GoRunnable || !seq.succeeds(state.seq) {
   481  		// We can't make an inference as to whether this is bad. We could just be seeing
   482  		// a GoStart on a different M before the goroutine was created, before it had its
   483  		// state emitted, or before we got to the right point in the trace yet.
   484  		return curCtx, false, nil
   485  	}
   486  	// We can advance this goroutine. Check some invariants.
   487  	reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MustNotHave}
   488  	if err := validateCtx(curCtx, reqs); err != nil {
   489  		return curCtx, false, err
   490  	}
   491  	state.status = go122.GoRunning
   492  	state.seq = seq
   493  	newCtx := curCtx
   494  	newCtx.G = gid
   495  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   496  	return newCtx, true, nil
   497  }
   498  
   499  func (o *ordering) advanceGoUnblock(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   500  	// N.B. These both reference the goroutine to unblock, not the current goroutine.
   501  	gid := GoID(ev.args[0])
   502  	seq := makeSeq(gen, ev.args[1])
   503  	state, ok := o.gStates[gid]
   504  	if !ok || state.status != go122.GoWaiting || !seq.succeeds(state.seq) {
   505  		// We can't make an inference as to whether this is bad. We could just be seeing
   506  		// a GoUnblock on a different M before the goroutine was created and blocked itself,
   507  		// before it had its state emitted, or before we got to the right point in the trace yet.
   508  		return curCtx, false, nil
   509  	}
   510  	state.status = go122.GoRunnable
   511  	state.seq = seq
   512  	// N.B. No context to validate. Basically anything can unblock
   513  	// a goroutine (e.g. sysmon).
   514  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   515  	return curCtx, true, nil
   516  }
   517  
   518  func (o *ordering) advanceGoSwitch(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   519  	// GoSwitch and GoSwitchDestroy represent a trio of events:
   520  	// - Unblock of the goroutine to switch to.
   521  	// - Block or destroy of the current goroutine.
   522  	// - Start executing the next goroutine.
   523  	//
   524  	// Because it acts like a GoStart for the next goroutine, we can
   525  	// only advance it if the sequence numbers line up.
   526  	//
   527  	// The current goroutine on the thread must be actively running.
   528  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   529  		return curCtx, false, err
   530  	}
   531  	curGState, ok := o.gStates[curCtx.G]
   532  	if !ok {
   533  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   534  	}
   535  	if curGState.status != go122.GoRunning {
   536  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   537  	}
   538  	nextg := GoID(ev.args[0])
   539  	seq := makeSeq(gen, ev.args[1]) // seq is for nextg, not curCtx.G.
   540  	nextGState, ok := o.gStates[nextg]
   541  	if !ok || nextGState.status != go122.GoWaiting || !seq.succeeds(nextGState.seq) {
   542  		// We can't make an inference as to whether this is bad. We could just be seeing
   543  		// a GoSwitch on a different M before the goroutine was created, before it had its
   544  		// state emitted, or before we got to the right point in the trace yet.
   545  		return curCtx, false, nil
   546  	}
   547  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   548  
   549  	// Update the state of the executing goroutine and emit an event for it
   550  	// (GoSwitch and GoSwitchDestroy will be interpreted as GoUnblock events
   551  	// for nextg).
   552  	switch ev.typ {
   553  	case go122.EvGoSwitch:
   554  		// Goroutine blocked. It's waiting now and not running on this M.
   555  		curGState.status = go122.GoWaiting
   556  
   557  		// Emit a GoBlock event.
   558  		// TODO(mknyszek): Emit a reason.
   559  		o.queue.push(makeEvent(evt, curCtx, go122.EvGoBlock, ev.time, 0 /* no reason */, 0 /* no stack */))
   560  	case go122.EvGoSwitchDestroy:
   561  		// This goroutine is exiting itself.
   562  		delete(o.gStates, curCtx.G)
   563  
   564  		// Emit a GoDestroy event.
   565  		o.queue.push(makeEvent(evt, curCtx, go122.EvGoDestroy, ev.time))
   566  	}
   567  	// Update the state of the next goroutine.
   568  	nextGState.status = go122.GoRunning
   569  	nextGState.seq = seq
   570  	newCtx := curCtx
   571  	newCtx.G = nextg
   572  
   573  	// Queue an event for the next goroutine starting to run.
   574  	startCtx := curCtx
   575  	startCtx.G = NoGoroutine
   576  	o.queue.push(makeEvent(evt, startCtx, go122.EvGoStart, ev.time, uint64(nextg), ev.args[1]))
   577  	return newCtx, true, nil
   578  }
   579  
   580  func (o *ordering) advanceGoSyscallBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   581  	// Entering a syscall requires an active running goroutine with a
   582  	// proc on some thread. It is always advancable.
   583  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   584  		return curCtx, false, err
   585  	}
   586  	state, ok := o.gStates[curCtx.G]
   587  	if !ok {
   588  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   589  	}
   590  	if state.status != go122.GoRunning {
   591  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   592  	}
   593  	// Goroutine entered a syscall. It's still running on this P and M.
   594  	state.status = go122.GoSyscall
   595  	pState, ok := o.pStates[curCtx.P]
   596  	if !ok {
   597  		return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ))
   598  	}
   599  	pState.status = go122.ProcSyscall
   600  	// Validate the P sequence number on the event and advance it.
   601  	//
   602  	// We have a P sequence number for what is supposed to be a goroutine event
   603  	// so that we can correctly model P stealing. Without this sequence number here,
   604  	// the syscall from which a ProcSteal event is stealing can be ambiguous in the
   605  	// face of broken timestamps. See the go122-syscall-steal-proc-ambiguous test for
   606  	// more details.
   607  	//
   608  	// Note that because this sequence number only exists as a tool for disambiguation,
   609  	// we can enforce that we have the right sequence number at this point; we don't need
   610  	// to back off and see if any other events will advance. This is a running P.
   611  	pSeq := makeSeq(gen, ev.args[0])
   612  	if !pSeq.succeeds(pState.seq) {
   613  		return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", go122.EventString(ev.typ), pState.seq, pSeq)
   614  	}
   615  	pState.seq = pSeq
   616  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   617  	return curCtx, true, nil
   618  }
   619  
   620  func (o *ordering) advanceGoSyscallEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   621  	// This event is always advance-able because it happens on the same
   622  	// thread that EvGoSyscallStart happened, and the goroutine can't leave
   623  	// that thread until its done.
   624  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   625  		return curCtx, false, err
   626  	}
   627  	state, ok := o.gStates[curCtx.G]
   628  	if !ok {
   629  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   630  	}
   631  	if state.status != go122.GoSyscall {
   632  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   633  	}
   634  	state.status = go122.GoRunning
   635  
   636  	// Transfer the P back to running from syscall.
   637  	pState, ok := o.pStates[curCtx.P]
   638  	if !ok {
   639  		return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ))
   640  	}
   641  	if pState.status != go122.ProcSyscall {
   642  		return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, go122.ProcSyscall, pState.status)
   643  	}
   644  	pState.status = go122.ProcRunning
   645  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   646  	return curCtx, true, nil
   647  }
   648  
   649  func (o *ordering) advanceGoSyscallEndBlocked(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   650  	// This event becomes advanceable when its P is not in a syscall state
   651  	// (lack of a P altogether is also acceptable for advancing).
   652  	// The transfer out of ProcSyscall can happen either voluntarily via
   653  	// ProcStop or involuntarily via ProcSteal. We may also acquire a new P
   654  	// before we get here (after the transfer out) but that's OK: that new
   655  	// P won't be in the ProcSyscall state anymore.
   656  	//
   657  	// Basically: while we have a preemptible P, don't advance, because we
   658  	// *know* from the event that we're going to lose it at some point during
   659  	// the syscall. We shouldn't advance until that happens.
   660  	if curCtx.P != NoProc {
   661  		pState, ok := o.pStates[curCtx.P]
   662  		if !ok {
   663  			return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ))
   664  		}
   665  		if pState.status == go122.ProcSyscall {
   666  			return curCtx, false, nil
   667  		}
   668  	}
   669  	// As mentioned above, we may have a P here if we ProcStart
   670  	// before this event.
   671  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
   672  		return curCtx, false, err
   673  	}
   674  	state, ok := o.gStates[curCtx.G]
   675  	if !ok {
   676  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   677  	}
   678  	if state.status != go122.GoSyscall {
   679  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning)
   680  	}
   681  	newCtx := curCtx
   682  	newCtx.G = NoGoroutine
   683  	state.status = go122.GoRunnable
   684  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   685  	return newCtx, true, nil
   686  }
   687  
   688  func (o *ordering) advanceGoCreateSyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   689  	// This event indicates that a goroutine is effectively
   690  	// being created out of a cgo callback. Such a goroutine
   691  	// is 'created' in the syscall state.
   692  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustNotHave}); err != nil {
   693  		return curCtx, false, err
   694  	}
   695  	// This goroutine is effectively being created. Add a state for it.
   696  	newgid := GoID(ev.args[0])
   697  	if _, ok := o.gStates[newgid]; ok {
   698  		return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid)
   699  	}
   700  	o.gStates[newgid] = &gState{id: newgid, status: go122.GoSyscall, seq: makeSeq(gen, 0)}
   701  	// Goroutine is executing. Bind it to the context.
   702  	newCtx := curCtx
   703  	newCtx.G = newgid
   704  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   705  	return newCtx, true, nil
   706  }
   707  
   708  func (o *ordering) advanceGoDestroySyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   709  	// This event indicates that a goroutine created for a
   710  	// cgo callback is disappearing, either because the callback
   711  	// ending or the C thread that called it is being destroyed.
   712  	//
   713  	// Also, treat this as if we lost our P too.
   714  	// The thread ID may be reused by the platform and we'll get
   715  	// really confused if we try to steal the P is this is running
   716  	// with later. The new M with the same ID could even try to
   717  	// steal back this P from itself!
   718  	//
   719  	// The runtime is careful to make sure that any GoCreateSyscall
   720  	// event will enter the runtime emitting events for reacquiring a P.
   721  	//
   722  	// Note: we might have a P here. The P might not be released
   723  	// eagerly by the runtime, and it might get stolen back later
   724  	// (or never again, if the program is going to exit).
   725  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
   726  		return curCtx, false, err
   727  	}
   728  	// Check to make sure the goroutine exists in the right state.
   729  	state, ok := o.gStates[curCtx.G]
   730  	if !ok {
   731  		return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G)
   732  	}
   733  	if state.status != go122.GoSyscall {
   734  		return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", go122.EventString(ev.typ), GoSyscall)
   735  	}
   736  	// This goroutine is exiting itself.
   737  	delete(o.gStates, curCtx.G)
   738  	newCtx := curCtx
   739  	newCtx.G = NoGoroutine
   740  
   741  	// If we have a proc, then we're dissociating from it now. See the comment at the top of the case.
   742  	if curCtx.P != NoProc {
   743  		pState, ok := o.pStates[curCtx.P]
   744  		if !ok {
   745  			return curCtx, false, fmt.Errorf("found invalid proc %d during %s", curCtx.P, go122.EventString(ev.typ))
   746  		}
   747  		if pState.status != go122.ProcSyscall {
   748  			return curCtx, false, fmt.Errorf("proc %d in unexpected state %s during %s", curCtx.P, pState.status, go122.EventString(ev.typ))
   749  		}
   750  		// See the go122-create-syscall-reuse-thread-id test case for more details.
   751  		pState.status = go122.ProcSyscallAbandoned
   752  		newCtx.P = NoProc
   753  
   754  		// Queue an extra self-ProcSteal event.
   755  		extra := makeEvent(evt, curCtx, go122.EvProcSteal, ev.time, uint64(curCtx.P))
   756  		extra.base.extra(version.Go122)[0] = uint64(go122.ProcSyscall)
   757  		o.queue.push(extra)
   758  	}
   759  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   760  	return newCtx, true, nil
   761  }
   762  
   763  func (o *ordering) advanceUserTaskBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   764  	// Handle tasks. Tasks are interesting because:
   765  	// - There's no Begin event required to reference a task.
   766  	// - End for a particular task ID can appear multiple times.
   767  	// As a result, there's very little to validate. The only
   768  	// thing we have to be sure of is that a task didn't begin
   769  	// after it had already begun. Task IDs are allowed to be
   770  	// reused, so we don't care about a Begin after an End.
   771  	id := TaskID(ev.args[0])
   772  	if _, ok := o.activeTasks[id]; ok {
   773  		return curCtx, false, fmt.Errorf("task ID conflict: %d", id)
   774  	}
   775  	// Get the parent ID, but don't validate it. There's no guarantee
   776  	// we actually have information on whether it's active.
   777  	parentID := TaskID(ev.args[1])
   778  	if parentID == BackgroundTask {
   779  		// Note: a value of 0 here actually means no parent, *not* the
   780  		// background task. Automatic background task attachment only
   781  		// applies to regions.
   782  		parentID = NoTask
   783  		ev.args[1] = uint64(NoTask)
   784  	}
   785  
   786  	// Validate the name and record it. We'll need to pass it through to
   787  	// EvUserTaskEnd.
   788  	nameID := stringID(ev.args[2])
   789  	name, ok := evt.strings.get(nameID)
   790  	if !ok {
   791  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
   792  	}
   793  	o.activeTasks[id] = taskState{name: name, parentID: parentID}
   794  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   795  		return curCtx, false, err
   796  	}
   797  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   798  	return curCtx, true, nil
   799  }
   800  
   801  func (o *ordering) advanceUserTaskEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   802  	id := TaskID(ev.args[0])
   803  	if ts, ok := o.activeTasks[id]; ok {
   804  		// Smuggle the task info. This may happen in a different generation,
   805  		// which may not have the name in its string table. Add it to the extra
   806  		// strings table so we can look it up later.
   807  		ev.extra(version.Go122)[0] = uint64(ts.parentID)
   808  		ev.extra(version.Go122)[1] = uint64(evt.addExtraString(ts.name))
   809  		delete(o.activeTasks, id)
   810  	} else {
   811  		// Explicitly clear the task info.
   812  		ev.extra(version.Go122)[0] = uint64(NoTask)
   813  		ev.extra(version.Go122)[1] = uint64(evt.addExtraString(""))
   814  	}
   815  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   816  		return curCtx, false, err
   817  	}
   818  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   819  	return curCtx, true, nil
   820  }
   821  
   822  func (o *ordering) advanceUserRegionBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   823  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   824  		return curCtx, false, err
   825  	}
   826  	tid := TaskID(ev.args[0])
   827  	nameID := stringID(ev.args[1])
   828  	name, ok := evt.strings.get(nameID)
   829  	if !ok {
   830  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
   831  	}
   832  	gState, ok := o.gStates[curCtx.G]
   833  	if !ok {
   834  		return curCtx, false, fmt.Errorf("encountered EvUserRegionBegin without known state for current goroutine %d", curCtx.G)
   835  	}
   836  	if err := gState.beginRegion(userRegion{tid, name}); err != nil {
   837  		return curCtx, false, err
   838  	}
   839  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   840  	return curCtx, true, nil
   841  }
   842  
   843  func (o *ordering) advanceUserRegionEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   844  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   845  		return curCtx, false, err
   846  	}
   847  	tid := TaskID(ev.args[0])
   848  	nameID := stringID(ev.args[1])
   849  	name, ok := evt.strings.get(nameID)
   850  	if !ok {
   851  		return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ)
   852  	}
   853  	gState, ok := o.gStates[curCtx.G]
   854  	if !ok {
   855  		return curCtx, false, fmt.Errorf("encountered EvUserRegionEnd without known state for current goroutine %d", curCtx.G)
   856  	}
   857  	if err := gState.endRegion(userRegion{tid, name}); err != nil {
   858  		return curCtx, false, err
   859  	}
   860  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   861  	return curCtx, true, nil
   862  }
   863  
   864  // Handle the GC mark phase.
   865  //
   866  // We have sequence numbers for both start and end because they
   867  // can happen on completely different threads. We want an explicit
   868  // partial order edge between start and end here, otherwise we're
   869  // relying entirely on timestamps to make sure we don't advance a
   870  // GCEnd for a _different_ GC cycle if timestamps are wildly broken.
   871  func (o *ordering) advanceGCActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   872  	seq := ev.args[0]
   873  	if gen == o.initialGen {
   874  		if o.gcState != gcUndetermined {
   875  			return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event")
   876  		}
   877  		o.gcSeq = seq
   878  		o.gcState = gcRunning
   879  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   880  		return curCtx, true, nil
   881  	}
   882  	if seq != o.gcSeq+1 {
   883  		// This is not the right GC cycle.
   884  		return curCtx, false, nil
   885  	}
   886  	if o.gcState != gcRunning {
   887  		return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress")
   888  	}
   889  	o.gcSeq = seq
   890  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   891  		return curCtx, false, err
   892  	}
   893  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   894  	return curCtx, true, nil
   895  }
   896  
   897  func (o *ordering) advanceGCBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   898  	seq := ev.args[0]
   899  	if o.gcState == gcUndetermined {
   900  		o.gcSeq = seq
   901  		o.gcState = gcRunning
   902  		o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   903  		return curCtx, true, nil
   904  	}
   905  	if seq != o.gcSeq+1 {
   906  		// This is not the right GC cycle.
   907  		return curCtx, false, nil
   908  	}
   909  	if o.gcState == gcRunning {
   910  		return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress")
   911  	}
   912  	o.gcSeq = seq
   913  	o.gcState = gcRunning
   914  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   915  		return curCtx, false, err
   916  	}
   917  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   918  	return curCtx, true, nil
   919  }
   920  
   921  func (o *ordering) advanceGCEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   922  	seq := ev.args[0]
   923  	if seq != o.gcSeq+1 {
   924  		// This is not the right GC cycle.
   925  		return curCtx, false, nil
   926  	}
   927  	if o.gcState == gcNotRunning {
   928  		return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress")
   929  	}
   930  	if o.gcState == gcUndetermined {
   931  		return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state")
   932  	}
   933  	o.gcSeq = seq
   934  	o.gcState = gcNotRunning
   935  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   936  		return curCtx, false, err
   937  	}
   938  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   939  	return curCtx, true, nil
   940  }
   941  
   942  func (o *ordering) advanceAnnotation(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   943  	// Handle simple instantaneous events that require a G.
   944  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
   945  		return curCtx, false, err
   946  	}
   947  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   948  	return curCtx, true, nil
   949  }
   950  
   951  func (o *ordering) advanceHeapMetric(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   952  	// Handle allocation metrics, which don't require a G.
   953  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
   954  		return curCtx, false, err
   955  	}
   956  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   957  	return curCtx, true, nil
   958  }
   959  
   960  func (o *ordering) advanceGCSweepBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   961  	// Handle sweep, which is bound to a P and doesn't require a G.
   962  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
   963  		return curCtx, false, err
   964  	}
   965  	if err := o.pStates[curCtx.P].beginRange(makeRangeType(ev.typ, 0)); err != nil {
   966  		return curCtx, false, err
   967  	}
   968  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   969  	return curCtx, true, nil
   970  }
   971  
   972  func (o *ordering) advanceGCSweepActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   973  	pid := ProcID(ev.args[0])
   974  	// N.B. In practice Ps can't block while they're sweeping, so this can only
   975  	// ever reference curCtx.P. However, be lenient about this like we are with
   976  	// GCMarkAssistActive; there's no reason the runtime couldn't change to block
   977  	// in the middle of a sweep.
   978  	pState, ok := o.pStates[pid]
   979  	if !ok {
   980  		return curCtx, false, fmt.Errorf("encountered GCSweepActive for unknown proc %d", pid)
   981  	}
   982  	if err := pState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
   983  		return curCtx, false, err
   984  	}
   985  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   986  	return curCtx, true, nil
   987  }
   988  
   989  func (o *ordering) advanceGCSweepEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
   990  	if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
   991  		return curCtx, false, err
   992  	}
   993  	_, err := o.pStates[curCtx.P].endRange(ev.typ)
   994  	if err != nil {
   995  		return curCtx, false, err
   996  	}
   997  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
   998  	return curCtx, true, nil
   999  }
  1000  
  1001  func (o *ordering) advanceGoRangeBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1002  	// Handle special goroutine-bound event ranges.
  1003  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
  1004  		return curCtx, false, err
  1005  	}
  1006  	desc := stringID(0)
  1007  	if ev.typ == go122.EvSTWBegin {
  1008  		desc = stringID(ev.args[0])
  1009  	}
  1010  	gState, ok := o.gStates[curCtx.G]
  1011  	if !ok {
  1012  		return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
  1013  	}
  1014  	if err := gState.beginRange(makeRangeType(ev.typ, desc)); err != nil {
  1015  		return curCtx, false, err
  1016  	}
  1017  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1018  	return curCtx, true, nil
  1019  }
  1020  
  1021  func (o *ordering) advanceGoRangeActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1022  	gid := GoID(ev.args[0])
  1023  	// N.B. Like GoStatus, this can happen at any time, because it can
  1024  	// reference a non-running goroutine. Don't check anything about the
  1025  	// current scheduler context.
  1026  	gState, ok := o.gStates[gid]
  1027  	if !ok {
  1028  		return curCtx, false, fmt.Errorf("uninitialized goroutine %d found during %s", gid, go122.EventString(ev.typ))
  1029  	}
  1030  	if err := gState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil {
  1031  		return curCtx, false, err
  1032  	}
  1033  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1034  	return curCtx, true, nil
  1035  }
  1036  
  1037  func (o *ordering) advanceGoRangeEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) {
  1038  	if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
  1039  		return curCtx, false, err
  1040  	}
  1041  	gState, ok := o.gStates[curCtx.G]
  1042  	if !ok {
  1043  		return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G)
  1044  	}
  1045  	desc, err := gState.endRange(ev.typ)
  1046  	if err != nil {
  1047  		return curCtx, false, err
  1048  	}
  1049  	if ev.typ == go122.EvSTWEnd {
  1050  		// Smuggle the kind into the event.
  1051  		// Don't use ev.extra here so we have symmetry with STWBegin.
  1052  		ev.args[0] = uint64(desc)
  1053  	}
  1054  	o.queue.push(Event{table: evt, ctx: curCtx, base: *ev})
  1055  	return curCtx, true, nil
  1056  }
  1057  
  1058  // Next returns the next event in the ordering.
  1059  func (o *ordering) Next() (Event, bool) {
  1060  	return o.queue.pop()
  1061  }
  1062  
  1063  // schedCtx represents the scheduling resources associated with an event.
  1064  type schedCtx struct {
  1065  	G GoID
  1066  	P ProcID
  1067  	M ThreadID
  1068  }
  1069  
  1070  // validateCtx ensures that ctx conforms to some reqs, returning an error if
  1071  // it doesn't.
  1072  func validateCtx(ctx schedCtx, reqs event.SchedReqs) error {
  1073  	// Check thread requirements.
  1074  	if reqs.Thread == event.MustHave && ctx.M == NoThread {
  1075  		return fmt.Errorf("expected a thread but didn't have one")
  1076  	} else if reqs.Thread == event.MustNotHave && ctx.M != NoThread {
  1077  		return fmt.Errorf("expected no thread but had one")
  1078  	}
  1079  
  1080  	// Check proc requirements.
  1081  	if reqs.Proc == event.MustHave && ctx.P == NoProc {
  1082  		return fmt.Errorf("expected a proc but didn't have one")
  1083  	} else if reqs.Proc == event.MustNotHave && ctx.P != NoProc {
  1084  		return fmt.Errorf("expected no proc but had one")
  1085  	}
  1086  
  1087  	// Check goroutine requirements.
  1088  	if reqs.Goroutine == event.MustHave && ctx.G == NoGoroutine {
  1089  		return fmt.Errorf("expected a goroutine but didn't have one")
  1090  	} else if reqs.Goroutine == event.MustNotHave && ctx.G != NoGoroutine {
  1091  		return fmt.Errorf("expected no goroutine but had one")
  1092  	}
  1093  	return nil
  1094  }
  1095  
  1096  // gcState is a trinary variable for the current state of the GC.
  1097  //
  1098  // The third state besides "enabled" and "disabled" is "undetermined."
  1099  type gcState uint8
  1100  
  1101  const (
  1102  	gcUndetermined gcState = iota
  1103  	gcNotRunning
  1104  	gcRunning
  1105  )
  1106  
  1107  // String returns a human-readable string for the GC state.
  1108  func (s gcState) String() string {
  1109  	switch s {
  1110  	case gcUndetermined:
  1111  		return "Undetermined"
  1112  	case gcNotRunning:
  1113  		return "NotRunning"
  1114  	case gcRunning:
  1115  		return "Running"
  1116  	}
  1117  	return "Bad"
  1118  }
  1119  
  1120  // userRegion represents a unique user region when attached to some gState.
  1121  type userRegion struct {
  1122  	// name must be a resolved string because the string ID for the same
  1123  	// string may change across generations, but we care about checking
  1124  	// the value itself.
  1125  	taskID TaskID
  1126  	name   string
  1127  }
  1128  
  1129  // rangeType is a way to classify special ranges of time.
  1130  //
  1131  // These typically correspond 1:1 with "Begin" events, but
  1132  // they may have an optional subtype that describes the range
  1133  // in more detail.
  1134  type rangeType struct {
  1135  	typ  event.Type // "Begin" event.
  1136  	desc stringID   // Optional subtype.
  1137  }
  1138  
  1139  // makeRangeType constructs a new rangeType.
  1140  func makeRangeType(typ event.Type, desc stringID) rangeType {
  1141  	if styp := go122.Specs()[typ].StartEv; styp != go122.EvNone {
  1142  		typ = styp
  1143  	}
  1144  	return rangeType{typ, desc}
  1145  }
  1146  
  1147  // gState is the state of a goroutine at a point in the trace.
  1148  type gState struct {
  1149  	id     GoID
  1150  	status go122.GoStatus
  1151  	seq    seqCounter
  1152  
  1153  	// regions are the active user regions for this goroutine.
  1154  	regions []userRegion
  1155  
  1156  	// rangeState is the state of special time ranges bound to this goroutine.
  1157  	rangeState
  1158  }
  1159  
  1160  // beginRegion starts a user region on the goroutine.
  1161  func (s *gState) beginRegion(r userRegion) error {
  1162  	s.regions = append(s.regions, r)
  1163  	return nil
  1164  }
  1165  
  1166  // endRegion ends a user region on the goroutine.
  1167  func (s *gState) endRegion(r userRegion) error {
  1168  	if len(s.regions) == 0 {
  1169  		// We do not know about regions that began before tracing started.
  1170  		return nil
  1171  	}
  1172  	if next := s.regions[len(s.regions)-1]; next != r {
  1173  		return fmt.Errorf("misuse of region in goroutine %v: region end %v when the inner-most active region start event is %v", s.id, r, next)
  1174  	}
  1175  	s.regions = s.regions[:len(s.regions)-1]
  1176  	return nil
  1177  }
  1178  
  1179  // pState is the state of a proc at a point in the trace.
  1180  type pState struct {
  1181  	id     ProcID
  1182  	status go122.ProcStatus
  1183  	seq    seqCounter
  1184  
  1185  	// rangeState is the state of special time ranges bound to this proc.
  1186  	rangeState
  1187  }
  1188  
  1189  // mState is the state of a thread at a point in the trace.
  1190  type mState struct {
  1191  	g GoID   // Goroutine bound to this M. (The goroutine's state is Executing.)
  1192  	p ProcID // Proc bound to this M. (The proc's state is Executing.)
  1193  }
  1194  
  1195  // rangeState represents the state of special time ranges.
  1196  type rangeState struct {
  1197  	// inFlight contains the rangeTypes of any ranges bound to a resource.
  1198  	inFlight []rangeType
  1199  }
  1200  
  1201  // beginRange begins a special range in time on the goroutine.
  1202  //
  1203  // Returns an error if the range is already in progress.
  1204  func (s *rangeState) beginRange(typ rangeType) error {
  1205  	if s.hasRange(typ) {
  1206  		return fmt.Errorf("discovered event already in-flight for when starting event %v", go122.Specs()[typ.typ].Name)
  1207  	}
  1208  	s.inFlight = append(s.inFlight, typ)
  1209  	return nil
  1210  }
  1211  
  1212  // activeRange marks special range in time on the goroutine as active in the
  1213  // initial generation, or confirms that it is indeed active in later generations.
  1214  func (s *rangeState) activeRange(typ rangeType, isInitialGen bool) error {
  1215  	if isInitialGen {
  1216  		if s.hasRange(typ) {
  1217  			return fmt.Errorf("found named active range already in first gen: %v", typ)
  1218  		}
  1219  		s.inFlight = append(s.inFlight, typ)
  1220  	} else if !s.hasRange(typ) {
  1221  		return fmt.Errorf("resource is missing active range: %v %v", go122.Specs()[typ.typ].Name, s.inFlight)
  1222  	}
  1223  	return nil
  1224  }
  1225  
  1226  // hasRange returns true if a special time range on the goroutine as in progress.
  1227  func (s *rangeState) hasRange(typ rangeType) bool {
  1228  	for _, ftyp := range s.inFlight {
  1229  		if ftyp == typ {
  1230  			return true
  1231  		}
  1232  	}
  1233  	return false
  1234  }
  1235  
  1236  // endsRange ends a special range in time on the goroutine.
  1237  //
  1238  // This must line up with the start event type  of the range the goroutine is currently in.
  1239  func (s *rangeState) endRange(typ event.Type) (stringID, error) {
  1240  	st := go122.Specs()[typ].StartEv
  1241  	idx := -1
  1242  	for i, r := range s.inFlight {
  1243  		if r.typ == st {
  1244  			idx = i
  1245  			break
  1246  		}
  1247  	}
  1248  	if idx < 0 {
  1249  		return 0, fmt.Errorf("tried to end event %v, but not in-flight", go122.Specs()[st].Name)
  1250  	}
  1251  	// Swap remove.
  1252  	desc := s.inFlight[idx].desc
  1253  	s.inFlight[idx], s.inFlight[len(s.inFlight)-1] = s.inFlight[len(s.inFlight)-1], s.inFlight[idx]
  1254  	s.inFlight = s.inFlight[:len(s.inFlight)-1]
  1255  	return desc, nil
  1256  }
  1257  
  1258  // seqCounter represents a global sequence counter for a resource.
  1259  type seqCounter struct {
  1260  	gen uint64 // The generation for the local sequence counter seq.
  1261  	seq uint64 // The sequence number local to the generation.
  1262  }
  1263  
  1264  // makeSeq creates a new seqCounter.
  1265  func makeSeq(gen, seq uint64) seqCounter {
  1266  	return seqCounter{gen: gen, seq: seq}
  1267  }
  1268  
  1269  // succeeds returns true if a is the immediate successor of b.
  1270  func (a seqCounter) succeeds(b seqCounter) bool {
  1271  	return a.gen == b.gen && a.seq == b.seq+1
  1272  }
  1273  
  1274  // String returns a debug string representation of the seqCounter.
  1275  func (c seqCounter) String() string {
  1276  	return fmt.Sprintf("%d (gen=%d)", c.seq, c.gen)
  1277  }
  1278  
  1279  func dumpOrdering(order *ordering) string {
  1280  	var sb strings.Builder
  1281  	for id, state := range order.gStates {
  1282  		fmt.Fprintf(&sb, "G %d [status=%s seq=%s]\n", id, state.status, state.seq)
  1283  	}
  1284  	fmt.Fprintln(&sb)
  1285  	for id, state := range order.pStates {
  1286  		fmt.Fprintf(&sb, "P %d [status=%s seq=%s]\n", id, state.status, state.seq)
  1287  	}
  1288  	fmt.Fprintln(&sb)
  1289  	for id, state := range order.mStates {
  1290  		fmt.Fprintf(&sb, "M %d [g=%d p=%d]\n", id, state.g, state.p)
  1291  	}
  1292  	fmt.Fprintln(&sb)
  1293  	fmt.Fprintf(&sb, "GC %d %s\n", order.gcSeq, order.gcState)
  1294  	return sb.String()
  1295  }
  1296  
  1297  // taskState represents an active task.
  1298  type taskState struct {
  1299  	// name is the type of the active task.
  1300  	name string
  1301  
  1302  	// parentID is the parent ID of the active task.
  1303  	parentID TaskID
  1304  }
  1305  
  1306  // queue implements a growable ring buffer with a queue API.
  1307  type queue[T any] struct {
  1308  	start, end int
  1309  	buf        []T
  1310  }
  1311  
  1312  // push adds a new event to the back of the queue.
  1313  func (q *queue[T]) push(value T) {
  1314  	if q.end-q.start == len(q.buf) {
  1315  		q.grow()
  1316  	}
  1317  	q.buf[q.end%len(q.buf)] = value
  1318  	q.end++
  1319  }
  1320  
  1321  // grow increases the size of the queue.
  1322  func (q *queue[T]) grow() {
  1323  	if len(q.buf) == 0 {
  1324  		q.buf = make([]T, 2)
  1325  		return
  1326  	}
  1327  
  1328  	// Create new buf and copy data over.
  1329  	newBuf := make([]T, len(q.buf)*2)
  1330  	pivot := q.start % len(q.buf)
  1331  	first, last := q.buf[pivot:], q.buf[:pivot]
  1332  	copy(newBuf[:len(first)], first)
  1333  	copy(newBuf[len(first):], last)
  1334  
  1335  	// Update the queue state.
  1336  	q.start = 0
  1337  	q.end = len(q.buf)
  1338  	q.buf = newBuf
  1339  }
  1340  
  1341  // pop removes an event from the front of the queue. If the
  1342  // queue is empty, it returns an EventBad event.
  1343  func (q *queue[T]) pop() (T, bool) {
  1344  	if q.end-q.start == 0 {
  1345  		return *new(T), false
  1346  	}
  1347  	elem := &q.buf[q.start%len(q.buf)]
  1348  	value := *elem
  1349  	*elem = *new(T) // Clear the entry before returning, so we don't hold onto old tables.
  1350  	q.start++
  1351  	return value, true
  1352  }
  1353  
  1354  // makeEvent creates an Event from the provided information.
  1355  //
  1356  // It's just a convenience function; it's always OK to construct
  1357  // an Event manually if this isn't quite the right way to express
  1358  // the contents of the event.
  1359  func makeEvent(table *evTable, ctx schedCtx, typ event.Type, time Time, args ...uint64) Event {
  1360  	ev := Event{
  1361  		table: table,
  1362  		ctx:   ctx,
  1363  		base: baseEvent{
  1364  			typ:  typ,
  1365  			time: time,
  1366  		},
  1367  	}
  1368  	copy(ev.base.args[:], args)
  1369  	return ev
  1370  }