github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/internal/trace/v2/order.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package trace 6 7 import ( 8 "fmt" 9 "strings" 10 11 "internal/trace/v2/event" 12 "internal/trace/v2/event/go122" 13 "internal/trace/v2/version" 14 ) 15 16 // ordering emulates Go scheduler state for both validation and 17 // for putting events in the right order. 18 // 19 // The interface to ordering consists of two methods: Advance 20 // and Next. Advance is called to try and advance an event and 21 // add completed events to the ordering. Next is used to pick 22 // off events in the ordering. 23 type ordering struct { 24 gStates map[GoID]*gState 25 pStates map[ProcID]*pState // TODO: The keys are dense, so this can be a slice. 26 mStates map[ThreadID]*mState 27 activeTasks map[TaskID]taskState 28 gcSeq uint64 29 gcState gcState 30 initialGen uint64 31 queue queue[Event] 32 } 33 34 // Advance checks if it's valid to proceed with ev which came from thread m. 35 // 36 // It assumes the gen value passed to it is monotonically increasing across calls. 37 // 38 // If any error is returned, then the trace is broken and trace parsing must cease. 39 // If it's not valid to advance with ev, but no error was encountered, the caller 40 // should attempt to advance with other candidate events from other threads. If the 41 // caller runs out of candidates, the trace is invalid. 42 // 43 // If this returns true, Next is guaranteed to return a complete event. However, 44 // multiple events may be added to the ordering, so the caller should (but is not 45 // required to) continue to call Next until it is exhausted. 46 func (o *ordering) Advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (bool, error) { 47 if o.initialGen == 0 { 48 // Set the initial gen if necessary. 49 o.initialGen = gen 50 } 51 52 var curCtx, newCtx schedCtx 53 curCtx.M = m 54 newCtx.M = m 55 56 var ms *mState 57 if m == NoThread { 58 curCtx.P = NoProc 59 curCtx.G = NoGoroutine 60 newCtx = curCtx 61 } else { 62 // Pull out or create the mState for this event. 63 var ok bool 64 ms, ok = o.mStates[m] 65 if !ok { 66 ms = &mState{ 67 g: NoGoroutine, 68 p: NoProc, 69 } 70 o.mStates[m] = ms 71 } 72 curCtx.P = ms.p 73 curCtx.G = ms.g 74 newCtx = curCtx 75 } 76 77 f := orderingDispatch[ev.typ] 78 if f == nil { 79 return false, fmt.Errorf("bad event type found while ordering: %v", ev.typ) 80 } 81 newCtx, ok, err := f(o, ev, evt, m, gen, curCtx) 82 if err == nil && ok && ms != nil { 83 // Update the mState for this event. 84 ms.p = newCtx.P 85 ms.g = newCtx.G 86 } 87 return ok, err 88 } 89 90 type orderingHandleFunc func(o *ordering, ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) 91 92 var orderingDispatch = [256]orderingHandleFunc{ 93 // Procs. 94 go122.EvProcsChange: (*ordering).advanceAnnotation, 95 go122.EvProcStart: (*ordering).advanceProcStart, 96 go122.EvProcStop: (*ordering).advanceProcStop, 97 go122.EvProcSteal: (*ordering).advanceProcSteal, 98 go122.EvProcStatus: (*ordering).advanceProcStatus, 99 100 // Goroutines. 101 go122.EvGoCreate: (*ordering).advanceGoCreate, 102 go122.EvGoCreateSyscall: (*ordering).advanceGoCreateSyscall, 103 go122.EvGoStart: (*ordering).advanceGoStart, 104 go122.EvGoDestroy: (*ordering).advanceGoStopExec, 105 go122.EvGoDestroySyscall: (*ordering).advanceGoDestroySyscall, 106 go122.EvGoStop: (*ordering).advanceGoStopExec, 107 go122.EvGoBlock: (*ordering).advanceGoStopExec, 108 go122.EvGoUnblock: (*ordering).advanceGoUnblock, 109 go122.EvGoSyscallBegin: (*ordering).advanceGoSyscallBegin, 110 go122.EvGoSyscallEnd: (*ordering).advanceGoSyscallEnd, 111 go122.EvGoSyscallEndBlocked: (*ordering).advanceGoSyscallEndBlocked, 112 go122.EvGoStatus: (*ordering).advanceGoStatus, 113 114 // STW. 115 go122.EvSTWBegin: (*ordering).advanceGoRangeBegin, 116 go122.EvSTWEnd: (*ordering).advanceGoRangeEnd, 117 118 // GC events. 119 go122.EvGCActive: (*ordering).advanceGCActive, 120 go122.EvGCBegin: (*ordering).advanceGCBegin, 121 go122.EvGCEnd: (*ordering).advanceGCEnd, 122 go122.EvGCSweepActive: (*ordering).advanceGCSweepActive, 123 go122.EvGCSweepBegin: (*ordering).advanceGCSweepBegin, 124 go122.EvGCSweepEnd: (*ordering).advanceGCSweepEnd, 125 go122.EvGCMarkAssistActive: (*ordering).advanceGoRangeActive, 126 go122.EvGCMarkAssistBegin: (*ordering).advanceGoRangeBegin, 127 go122.EvGCMarkAssistEnd: (*ordering).advanceGoRangeEnd, 128 go122.EvHeapAlloc: (*ordering).advanceHeapMetric, 129 go122.EvHeapGoal: (*ordering).advanceHeapMetric, 130 131 // Annotations. 132 go122.EvGoLabel: (*ordering).advanceAnnotation, 133 go122.EvUserTaskBegin: (*ordering).advanceUserTaskBegin, 134 go122.EvUserTaskEnd: (*ordering).advanceUserTaskEnd, 135 go122.EvUserRegionBegin: (*ordering).advanceUserRegionBegin, 136 go122.EvUserRegionEnd: (*ordering).advanceUserRegionEnd, 137 go122.EvUserLog: (*ordering).advanceAnnotation, 138 139 // Coroutines. Added in Go 1.23. 140 go122.EvGoSwitch: (*ordering).advanceGoSwitch, 141 go122.EvGoSwitchDestroy: (*ordering).advanceGoSwitch, 142 go122.EvGoCreateBlocked: (*ordering).advanceGoCreate, 143 } 144 145 func (o *ordering) advanceProcStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 146 pid := ProcID(ev.args[0]) 147 status := go122.ProcStatus(ev.args[1]) 148 if int(status) >= len(go122ProcStatus2ProcState) { 149 return curCtx, false, fmt.Errorf("invalid status for proc %d: %d", pid, status) 150 } 151 oldState := go122ProcStatus2ProcState[status] 152 if s, ok := o.pStates[pid]; ok { 153 if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscall { 154 // ProcSyscallAbandoned is a special case of ProcSyscall. It indicates a 155 // potential loss of information, but if we're already in ProcSyscall, 156 // we haven't lost the relevant information. Promote the status and advance. 157 oldState = ProcRunning 158 ev.args[1] = uint64(go122.ProcSyscall) 159 } else if status == go122.ProcSyscallAbandoned && s.status == go122.ProcSyscallAbandoned { 160 // If we're passing through ProcSyscallAbandoned, then there's no promotion 161 // to do. We've lost the M that this P is associated with. However it got there, 162 // it's going to appear as idle in the API, so pass through as idle. 163 oldState = ProcIdle 164 ev.args[1] = uint64(go122.ProcSyscallAbandoned) 165 } else if s.status != status { 166 return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status) 167 } 168 s.seq = makeSeq(gen, 0) // Reset seq. 169 } else { 170 o.pStates[pid] = &pState{id: pid, status: status, seq: makeSeq(gen, 0)} 171 if gen == o.initialGen { 172 oldState = ProcUndetermined 173 } else { 174 oldState = ProcNotExist 175 } 176 } 177 ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition. 178 179 // Bind the proc to the new context, if it's running. 180 newCtx := curCtx 181 if status == go122.ProcRunning || status == go122.ProcSyscall { 182 newCtx.P = pid 183 } 184 // If we're advancing through ProcSyscallAbandoned *but* oldState is running then we've 185 // promoted it to ProcSyscall. However, because it's ProcSyscallAbandoned, we know this 186 // P is about to get stolen and its status very likely isn't being emitted by the same 187 // thread it was bound to. Since this status is Running -> Running and Running is binding, 188 // we need to make sure we emit it in the right context: the context to which it is bound. 189 // Find it, and set our current context to it. 190 if status == go122.ProcSyscallAbandoned && oldState == ProcRunning { 191 // N.B. This is slow but it should be fairly rare. 192 found := false 193 for mid, ms := range o.mStates { 194 if ms.p == pid { 195 curCtx.M = mid 196 curCtx.P = pid 197 curCtx.G = ms.g 198 found = true 199 } 200 } 201 if !found { 202 return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid) 203 } 204 } 205 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 206 return newCtx, true, nil 207 } 208 209 func (o *ordering) advanceProcStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 210 pid := ProcID(ev.args[0]) 211 seq := makeSeq(gen, ev.args[1]) 212 213 // Try to advance. We might fail here due to sequencing, because the P hasn't 214 // had a status emitted, or because we already have a P and we're in a syscall, 215 // and we haven't observed that it was stolen from us yet. 216 state, ok := o.pStates[pid] 217 if !ok || state.status != go122.ProcIdle || !seq.succeeds(state.seq) || curCtx.P != NoProc { 218 // We can't make an inference as to whether this is bad. We could just be seeing 219 // a ProcStart on a different M before the proc's state was emitted, or before we 220 // got to the right point in the trace. 221 // 222 // Note that we also don't advance here if we have a P and we're in a syscall. 223 return curCtx, false, nil 224 } 225 // We can advance this P. Check some invariants. 226 // 227 // We might have a goroutine if a goroutine is exiting a syscall. 228 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustNotHave, Goroutine: event.MayHave} 229 if err := validateCtx(curCtx, reqs); err != nil { 230 return curCtx, false, err 231 } 232 state.status = go122.ProcRunning 233 state.seq = seq 234 newCtx := curCtx 235 newCtx.P = pid 236 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 237 return newCtx, true, nil 238 } 239 240 func (o *ordering) advanceProcStop(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 241 // We must be able to advance this P. 242 // 243 // There are 2 ways a P can stop: ProcStop and ProcSteal. ProcStop is used when the P 244 // is stopped by the same M that started it, while ProcSteal is used when another M 245 // steals the P by stopping it from a distance. 246 // 247 // Since a P is bound to an M, and we're stopping on the same M we started, it must 248 // always be possible to advance the current M's P from a ProcStop. This is also why 249 // ProcStop doesn't need a sequence number. 250 state, ok := o.pStates[curCtx.P] 251 if !ok { 252 return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.P) 253 } 254 if state.status != go122.ProcRunning && state.status != go122.ProcSyscall { 255 return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", go122.EventString(ev.typ), go122.ProcRunning, go122.ProcSyscall) 256 } 257 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave} 258 if err := validateCtx(curCtx, reqs); err != nil { 259 return curCtx, false, err 260 } 261 state.status = go122.ProcIdle 262 newCtx := curCtx 263 newCtx.P = NoProc 264 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 265 return newCtx, true, nil 266 } 267 268 func (o *ordering) advanceProcSteal(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 269 pid := ProcID(ev.args[0]) 270 seq := makeSeq(gen, ev.args[1]) 271 state, ok := o.pStates[pid] 272 if !ok || (state.status != go122.ProcSyscall && state.status != go122.ProcSyscallAbandoned) || !seq.succeeds(state.seq) { 273 // We can't make an inference as to whether this is bad. We could just be seeing 274 // a ProcStart on a different M before the proc's state was emitted, or before we 275 // got to the right point in the trace. 276 return curCtx, false, nil 277 } 278 // We can advance this P. Check some invariants. 279 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MayHave} 280 if err := validateCtx(curCtx, reqs); err != nil { 281 return curCtx, false, err 282 } 283 // Smuggle in the P state that let us advance so we can surface information to the event. 284 // Specifically, we need to make sure that the event is interpreted not as a transition of 285 // ProcRunning -> ProcIdle but ProcIdle -> ProcIdle instead. 286 // 287 // ProcRunning is binding, but we may be running with a P on the current M and we can't 288 // bind another P. This P is about to go ProcIdle anyway. 289 oldStatus := state.status 290 ev.extra(version.Go122)[0] = uint64(oldStatus) 291 292 // Update the P's status and sequence number. 293 state.status = go122.ProcIdle 294 state.seq = seq 295 296 // If we've lost information then don't try to do anything with the M. 297 // It may have moved on and we can't be sure. 298 if oldStatus == go122.ProcSyscallAbandoned { 299 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 300 return curCtx, true, nil 301 } 302 303 // Validate that the M we're stealing from is what we expect. 304 mid := ThreadID(ev.args[2]) // The M we're stealing from. 305 306 newCtx := curCtx 307 if mid == curCtx.M { 308 // We're stealing from ourselves. This behaves like a ProcStop. 309 if curCtx.P != pid { 310 return curCtx, false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P) 311 } 312 newCtx.P = NoProc 313 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 314 return newCtx, true, nil 315 } 316 317 // We're stealing from some other M. 318 mState, ok := o.mStates[mid] 319 if !ok { 320 return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid) 321 } 322 323 // Make sure we're actually stealing the right P. 324 if mState.p != pid { 325 return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p) 326 } 327 328 // Tell the M it has no P so it can proceed. 329 // 330 // This is safe because we know the P was in a syscall and 331 // the other M must be trying to get out of the syscall. 332 // GoSyscallEndBlocked cannot advance until the corresponding 333 // M loses its P. 334 mState.p = NoProc 335 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 336 return newCtx, true, nil 337 } 338 339 func (o *ordering) advanceGoStatus(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 340 gid := GoID(ev.args[0]) 341 mid := ThreadID(ev.args[1]) 342 status := go122.GoStatus(ev.args[2]) 343 344 if int(status) >= len(go122GoStatus2GoState) { 345 return curCtx, false, fmt.Errorf("invalid status for goroutine %d: %d", gid, status) 346 } 347 oldState := go122GoStatus2GoState[status] 348 if s, ok := o.gStates[gid]; ok { 349 if s.status != status { 350 return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status) 351 } 352 s.seq = makeSeq(gen, 0) // Reset seq. 353 } else if gen == o.initialGen { 354 // Set the state. 355 o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)} 356 oldState = GoUndetermined 357 } else { 358 return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status) 359 } 360 ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition. 361 362 newCtx := curCtx 363 switch status { 364 case go122.GoRunning: 365 // Bind the goroutine to the new context, since it's running. 366 newCtx.G = gid 367 case go122.GoSyscall: 368 if mid == NoThread { 369 return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid) 370 } 371 // Is the syscall on this thread? If so, bind it to the context. 372 // Otherwise, we're talking about a G sitting in a syscall on an M. 373 // Validate the named M. 374 if mid == curCtx.M { 375 if gen != o.initialGen && curCtx.G != gid { 376 // If this isn't the first generation, we *must* have seen this 377 // binding occur already. Even if the G was blocked in a syscall 378 // for multiple generations since trace start, we would have seen 379 // a previous GoStatus event that bound the goroutine to an M. 380 return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, curCtx.G) 381 } 382 newCtx.G = gid 383 break 384 } 385 // Now we're talking about a thread and goroutine that have been 386 // blocked on a syscall for the entire generation. This case must 387 // not have a P; the runtime makes sure that all Ps are traced at 388 // the beginning of a generation, which involves taking a P back 389 // from every thread. 390 ms, ok := o.mStates[mid] 391 if ok { 392 // This M has been seen. That means we must have seen this 393 // goroutine go into a syscall on this thread at some point. 394 if ms.g != gid { 395 // But the G on the M doesn't match. Something's wrong. 396 return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g) 397 } 398 // This case is just a Syscall->Syscall event, which needs to 399 // appear as having the G currently bound to this M. 400 curCtx.G = ms.g 401 } else if !ok { 402 // The M hasn't been seen yet. That means this goroutine 403 // has just been sitting in a syscall on this M. Create 404 // a state for it. 405 o.mStates[mid] = &mState{g: gid, p: NoProc} 406 // Don't set curCtx.G in this case because this event is the 407 // binding event (and curCtx represents the "before" state). 408 } 409 // Update the current context to the M we're talking about. 410 curCtx.M = mid 411 } 412 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 413 return newCtx, true, nil 414 } 415 416 func (o *ordering) advanceGoCreate(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 417 // Goroutines must be created on a running P, but may or may not be created 418 // by a running goroutine. 419 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave} 420 if err := validateCtx(curCtx, reqs); err != nil { 421 return curCtx, false, err 422 } 423 // If we have a goroutine, it must be running. 424 if state, ok := o.gStates[curCtx.G]; ok && state.status != go122.GoRunning { 425 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 426 } 427 // This goroutine created another. Add a state for it. 428 newgid := GoID(ev.args[0]) 429 if _, ok := o.gStates[newgid]; ok { 430 return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid) 431 } 432 status := go122.GoRunnable 433 if ev.typ == go122.EvGoCreateBlocked { 434 status = go122.GoWaiting 435 } 436 o.gStates[newgid] = &gState{id: newgid, status: status, seq: makeSeq(gen, 0)} 437 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 438 return curCtx, true, nil 439 } 440 441 func (o *ordering) advanceGoStopExec(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 442 // These are goroutine events that all require an active running 443 // goroutine on some thread. They must *always* be advance-able, 444 // since running goroutines are bound to their M. 445 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 446 return curCtx, false, err 447 } 448 state, ok := o.gStates[curCtx.G] 449 if !ok { 450 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 451 } 452 if state.status != go122.GoRunning { 453 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 454 } 455 // Handle each case slightly differently; we just group them together 456 // because they have shared preconditions. 457 newCtx := curCtx 458 switch ev.typ { 459 case go122.EvGoDestroy: 460 // This goroutine is exiting itself. 461 delete(o.gStates, curCtx.G) 462 newCtx.G = NoGoroutine 463 case go122.EvGoStop: 464 // Goroutine stopped (yielded). It's runnable but not running on this M. 465 state.status = go122.GoRunnable 466 newCtx.G = NoGoroutine 467 case go122.EvGoBlock: 468 // Goroutine blocked. It's waiting now and not running on this M. 469 state.status = go122.GoWaiting 470 newCtx.G = NoGoroutine 471 } 472 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 473 return newCtx, true, nil 474 } 475 476 func (o *ordering) advanceGoStart(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 477 gid := GoID(ev.args[0]) 478 seq := makeSeq(gen, ev.args[1]) 479 state, ok := o.gStates[gid] 480 if !ok || state.status != go122.GoRunnable || !seq.succeeds(state.seq) { 481 // We can't make an inference as to whether this is bad. We could just be seeing 482 // a GoStart on a different M before the goroutine was created, before it had its 483 // state emitted, or before we got to the right point in the trace yet. 484 return curCtx, false, nil 485 } 486 // We can advance this goroutine. Check some invariants. 487 reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MustNotHave} 488 if err := validateCtx(curCtx, reqs); err != nil { 489 return curCtx, false, err 490 } 491 state.status = go122.GoRunning 492 state.seq = seq 493 newCtx := curCtx 494 newCtx.G = gid 495 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 496 return newCtx, true, nil 497 } 498 499 func (o *ordering) advanceGoUnblock(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 500 // N.B. These both reference the goroutine to unblock, not the current goroutine. 501 gid := GoID(ev.args[0]) 502 seq := makeSeq(gen, ev.args[1]) 503 state, ok := o.gStates[gid] 504 if !ok || state.status != go122.GoWaiting || !seq.succeeds(state.seq) { 505 // We can't make an inference as to whether this is bad. We could just be seeing 506 // a GoUnblock on a different M before the goroutine was created and blocked itself, 507 // before it had its state emitted, or before we got to the right point in the trace yet. 508 return curCtx, false, nil 509 } 510 state.status = go122.GoRunnable 511 state.seq = seq 512 // N.B. No context to validate. Basically anything can unblock 513 // a goroutine (e.g. sysmon). 514 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 515 return curCtx, true, nil 516 } 517 518 func (o *ordering) advanceGoSwitch(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 519 // GoSwitch and GoSwitchDestroy represent a trio of events: 520 // - Unblock of the goroutine to switch to. 521 // - Block or destroy of the current goroutine. 522 // - Start executing the next goroutine. 523 // 524 // Because it acts like a GoStart for the next goroutine, we can 525 // only advance it if the sequence numbers line up. 526 // 527 // The current goroutine on the thread must be actively running. 528 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 529 return curCtx, false, err 530 } 531 curGState, ok := o.gStates[curCtx.G] 532 if !ok { 533 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 534 } 535 if curGState.status != go122.GoRunning { 536 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 537 } 538 nextg := GoID(ev.args[0]) 539 seq := makeSeq(gen, ev.args[1]) // seq is for nextg, not curCtx.G. 540 nextGState, ok := o.gStates[nextg] 541 if !ok || nextGState.status != go122.GoWaiting || !seq.succeeds(nextGState.seq) { 542 // We can't make an inference as to whether this is bad. We could just be seeing 543 // a GoSwitch on a different M before the goroutine was created, before it had its 544 // state emitted, or before we got to the right point in the trace yet. 545 return curCtx, false, nil 546 } 547 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 548 549 // Update the state of the executing goroutine and emit an event for it 550 // (GoSwitch and GoSwitchDestroy will be interpreted as GoUnblock events 551 // for nextg). 552 switch ev.typ { 553 case go122.EvGoSwitch: 554 // Goroutine blocked. It's waiting now and not running on this M. 555 curGState.status = go122.GoWaiting 556 557 // Emit a GoBlock event. 558 // TODO(mknyszek): Emit a reason. 559 o.queue.push(makeEvent(evt, curCtx, go122.EvGoBlock, ev.time, 0 /* no reason */, 0 /* no stack */)) 560 case go122.EvGoSwitchDestroy: 561 // This goroutine is exiting itself. 562 delete(o.gStates, curCtx.G) 563 564 // Emit a GoDestroy event. 565 o.queue.push(makeEvent(evt, curCtx, go122.EvGoDestroy, ev.time)) 566 } 567 // Update the state of the next goroutine. 568 nextGState.status = go122.GoRunning 569 nextGState.seq = seq 570 newCtx := curCtx 571 newCtx.G = nextg 572 573 // Queue an event for the next goroutine starting to run. 574 startCtx := curCtx 575 startCtx.G = NoGoroutine 576 o.queue.push(makeEvent(evt, startCtx, go122.EvGoStart, ev.time, uint64(nextg), ev.args[1])) 577 return newCtx, true, nil 578 } 579 580 func (o *ordering) advanceGoSyscallBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 581 // Entering a syscall requires an active running goroutine with a 582 // proc on some thread. It is always advancable. 583 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 584 return curCtx, false, err 585 } 586 state, ok := o.gStates[curCtx.G] 587 if !ok { 588 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 589 } 590 if state.status != go122.GoRunning { 591 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 592 } 593 // Goroutine entered a syscall. It's still running on this P and M. 594 state.status = go122.GoSyscall 595 pState, ok := o.pStates[curCtx.P] 596 if !ok { 597 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ)) 598 } 599 pState.status = go122.ProcSyscall 600 // Validate the P sequence number on the event and advance it. 601 // 602 // We have a P sequence number for what is supposed to be a goroutine event 603 // so that we can correctly model P stealing. Without this sequence number here, 604 // the syscall from which a ProcSteal event is stealing can be ambiguous in the 605 // face of broken timestamps. See the go122-syscall-steal-proc-ambiguous test for 606 // more details. 607 // 608 // Note that because this sequence number only exists as a tool for disambiguation, 609 // we can enforce that we have the right sequence number at this point; we don't need 610 // to back off and see if any other events will advance. This is a running P. 611 pSeq := makeSeq(gen, ev.args[0]) 612 if !pSeq.succeeds(pState.seq) { 613 return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", go122.EventString(ev.typ), pState.seq, pSeq) 614 } 615 pState.seq = pSeq 616 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 617 return curCtx, true, nil 618 } 619 620 func (o *ordering) advanceGoSyscallEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 621 // This event is always advance-able because it happens on the same 622 // thread that EvGoSyscallStart happened, and the goroutine can't leave 623 // that thread until its done. 624 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 625 return curCtx, false, err 626 } 627 state, ok := o.gStates[curCtx.G] 628 if !ok { 629 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 630 } 631 if state.status != go122.GoSyscall { 632 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 633 } 634 state.status = go122.GoRunning 635 636 // Transfer the P back to running from syscall. 637 pState, ok := o.pStates[curCtx.P] 638 if !ok { 639 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ)) 640 } 641 if pState.status != go122.ProcSyscall { 642 return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, go122.ProcSyscall, pState.status) 643 } 644 pState.status = go122.ProcRunning 645 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 646 return curCtx, true, nil 647 } 648 649 func (o *ordering) advanceGoSyscallEndBlocked(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 650 // This event becomes advanceable when its P is not in a syscall state 651 // (lack of a P altogether is also acceptable for advancing). 652 // The transfer out of ProcSyscall can happen either voluntarily via 653 // ProcStop or involuntarily via ProcSteal. We may also acquire a new P 654 // before we get here (after the transfer out) but that's OK: that new 655 // P won't be in the ProcSyscall state anymore. 656 // 657 // Basically: while we have a preemptible P, don't advance, because we 658 // *know* from the event that we're going to lose it at some point during 659 // the syscall. We shouldn't advance until that happens. 660 if curCtx.P != NoProc { 661 pState, ok := o.pStates[curCtx.P] 662 if !ok { 663 return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(ev.typ)) 664 } 665 if pState.status == go122.ProcSyscall { 666 return curCtx, false, nil 667 } 668 } 669 // As mentioned above, we may have a P here if we ProcStart 670 // before this event. 671 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil { 672 return curCtx, false, err 673 } 674 state, ok := o.gStates[curCtx.G] 675 if !ok { 676 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 677 } 678 if state.status != go122.GoSyscall { 679 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(ev.typ), GoRunning) 680 } 681 newCtx := curCtx 682 newCtx.G = NoGoroutine 683 state.status = go122.GoRunnable 684 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 685 return newCtx, true, nil 686 } 687 688 func (o *ordering) advanceGoCreateSyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 689 // This event indicates that a goroutine is effectively 690 // being created out of a cgo callback. Such a goroutine 691 // is 'created' in the syscall state. 692 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustNotHave}); err != nil { 693 return curCtx, false, err 694 } 695 // This goroutine is effectively being created. Add a state for it. 696 newgid := GoID(ev.args[0]) 697 if _, ok := o.gStates[newgid]; ok { 698 return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid) 699 } 700 o.gStates[newgid] = &gState{id: newgid, status: go122.GoSyscall, seq: makeSeq(gen, 0)} 701 // Goroutine is executing. Bind it to the context. 702 newCtx := curCtx 703 newCtx.G = newgid 704 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 705 return newCtx, true, nil 706 } 707 708 func (o *ordering) advanceGoDestroySyscall(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 709 // This event indicates that a goroutine created for a 710 // cgo callback is disappearing, either because the callback 711 // ending or the C thread that called it is being destroyed. 712 // 713 // Also, treat this as if we lost our P too. 714 // The thread ID may be reused by the platform and we'll get 715 // really confused if we try to steal the P is this is running 716 // with later. The new M with the same ID could even try to 717 // steal back this P from itself! 718 // 719 // The runtime is careful to make sure that any GoCreateSyscall 720 // event will enter the runtime emitting events for reacquiring a P. 721 // 722 // Note: we might have a P here. The P might not be released 723 // eagerly by the runtime, and it might get stolen back later 724 // (or never again, if the program is going to exit). 725 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil { 726 return curCtx, false, err 727 } 728 // Check to make sure the goroutine exists in the right state. 729 state, ok := o.gStates[curCtx.G] 730 if !ok { 731 return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(ev.typ), curCtx.G) 732 } 733 if state.status != go122.GoSyscall { 734 return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", go122.EventString(ev.typ), GoSyscall) 735 } 736 // This goroutine is exiting itself. 737 delete(o.gStates, curCtx.G) 738 newCtx := curCtx 739 newCtx.G = NoGoroutine 740 741 // If we have a proc, then we're dissociating from it now. See the comment at the top of the case. 742 if curCtx.P != NoProc { 743 pState, ok := o.pStates[curCtx.P] 744 if !ok { 745 return curCtx, false, fmt.Errorf("found invalid proc %d during %s", curCtx.P, go122.EventString(ev.typ)) 746 } 747 if pState.status != go122.ProcSyscall { 748 return curCtx, false, fmt.Errorf("proc %d in unexpected state %s during %s", curCtx.P, pState.status, go122.EventString(ev.typ)) 749 } 750 // See the go122-create-syscall-reuse-thread-id test case for more details. 751 pState.status = go122.ProcSyscallAbandoned 752 newCtx.P = NoProc 753 754 // Queue an extra self-ProcSteal event. 755 extra := makeEvent(evt, curCtx, go122.EvProcSteal, ev.time, uint64(curCtx.P)) 756 extra.base.extra(version.Go122)[0] = uint64(go122.ProcSyscall) 757 o.queue.push(extra) 758 } 759 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 760 return newCtx, true, nil 761 } 762 763 func (o *ordering) advanceUserTaskBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 764 // Handle tasks. Tasks are interesting because: 765 // - There's no Begin event required to reference a task. 766 // - End for a particular task ID can appear multiple times. 767 // As a result, there's very little to validate. The only 768 // thing we have to be sure of is that a task didn't begin 769 // after it had already begun. Task IDs are allowed to be 770 // reused, so we don't care about a Begin after an End. 771 id := TaskID(ev.args[0]) 772 if _, ok := o.activeTasks[id]; ok { 773 return curCtx, false, fmt.Errorf("task ID conflict: %d", id) 774 } 775 // Get the parent ID, but don't validate it. There's no guarantee 776 // we actually have information on whether it's active. 777 parentID := TaskID(ev.args[1]) 778 if parentID == BackgroundTask { 779 // Note: a value of 0 here actually means no parent, *not* the 780 // background task. Automatic background task attachment only 781 // applies to regions. 782 parentID = NoTask 783 ev.args[1] = uint64(NoTask) 784 } 785 786 // Validate the name and record it. We'll need to pass it through to 787 // EvUserTaskEnd. 788 nameID := stringID(ev.args[2]) 789 name, ok := evt.strings.get(nameID) 790 if !ok { 791 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ) 792 } 793 o.activeTasks[id] = taskState{name: name, parentID: parentID} 794 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 795 return curCtx, false, err 796 } 797 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 798 return curCtx, true, nil 799 } 800 801 func (o *ordering) advanceUserTaskEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 802 id := TaskID(ev.args[0]) 803 if ts, ok := o.activeTasks[id]; ok { 804 // Smuggle the task info. This may happen in a different generation, 805 // which may not have the name in its string table. Add it to the extra 806 // strings table so we can look it up later. 807 ev.extra(version.Go122)[0] = uint64(ts.parentID) 808 ev.extra(version.Go122)[1] = uint64(evt.addExtraString(ts.name)) 809 delete(o.activeTasks, id) 810 } else { 811 // Explicitly clear the task info. 812 ev.extra(version.Go122)[0] = uint64(NoTask) 813 ev.extra(version.Go122)[1] = uint64(evt.addExtraString("")) 814 } 815 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 816 return curCtx, false, err 817 } 818 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 819 return curCtx, true, nil 820 } 821 822 func (o *ordering) advanceUserRegionBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 823 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 824 return curCtx, false, err 825 } 826 tid := TaskID(ev.args[0]) 827 nameID := stringID(ev.args[1]) 828 name, ok := evt.strings.get(nameID) 829 if !ok { 830 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ) 831 } 832 gState, ok := o.gStates[curCtx.G] 833 if !ok { 834 return curCtx, false, fmt.Errorf("encountered EvUserRegionBegin without known state for current goroutine %d", curCtx.G) 835 } 836 if err := gState.beginRegion(userRegion{tid, name}); err != nil { 837 return curCtx, false, err 838 } 839 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 840 return curCtx, true, nil 841 } 842 843 func (o *ordering) advanceUserRegionEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 844 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 845 return curCtx, false, err 846 } 847 tid := TaskID(ev.args[0]) 848 nameID := stringID(ev.args[1]) 849 name, ok := evt.strings.get(nameID) 850 if !ok { 851 return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, ev.typ) 852 } 853 gState, ok := o.gStates[curCtx.G] 854 if !ok { 855 return curCtx, false, fmt.Errorf("encountered EvUserRegionEnd without known state for current goroutine %d", curCtx.G) 856 } 857 if err := gState.endRegion(userRegion{tid, name}); err != nil { 858 return curCtx, false, err 859 } 860 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 861 return curCtx, true, nil 862 } 863 864 // Handle the GC mark phase. 865 // 866 // We have sequence numbers for both start and end because they 867 // can happen on completely different threads. We want an explicit 868 // partial order edge between start and end here, otherwise we're 869 // relying entirely on timestamps to make sure we don't advance a 870 // GCEnd for a _different_ GC cycle if timestamps are wildly broken. 871 func (o *ordering) advanceGCActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 872 seq := ev.args[0] 873 if gen == o.initialGen { 874 if o.gcState != gcUndetermined { 875 return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event") 876 } 877 o.gcSeq = seq 878 o.gcState = gcRunning 879 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 880 return curCtx, true, nil 881 } 882 if seq != o.gcSeq+1 { 883 // This is not the right GC cycle. 884 return curCtx, false, nil 885 } 886 if o.gcState != gcRunning { 887 return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress") 888 } 889 o.gcSeq = seq 890 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 891 return curCtx, false, err 892 } 893 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 894 return curCtx, true, nil 895 } 896 897 func (o *ordering) advanceGCBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 898 seq := ev.args[0] 899 if o.gcState == gcUndetermined { 900 o.gcSeq = seq 901 o.gcState = gcRunning 902 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 903 return curCtx, true, nil 904 } 905 if seq != o.gcSeq+1 { 906 // This is not the right GC cycle. 907 return curCtx, false, nil 908 } 909 if o.gcState == gcRunning { 910 return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress") 911 } 912 o.gcSeq = seq 913 o.gcState = gcRunning 914 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 915 return curCtx, false, err 916 } 917 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 918 return curCtx, true, nil 919 } 920 921 func (o *ordering) advanceGCEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 922 seq := ev.args[0] 923 if seq != o.gcSeq+1 { 924 // This is not the right GC cycle. 925 return curCtx, false, nil 926 } 927 if o.gcState == gcNotRunning { 928 return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress") 929 } 930 if o.gcState == gcUndetermined { 931 return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state") 932 } 933 o.gcSeq = seq 934 o.gcState = gcNotRunning 935 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 936 return curCtx, false, err 937 } 938 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 939 return curCtx, true, nil 940 } 941 942 func (o *ordering) advanceAnnotation(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 943 // Handle simple instantaneous events that require a G. 944 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 945 return curCtx, false, err 946 } 947 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 948 return curCtx, true, nil 949 } 950 951 func (o *ordering) advanceHeapMetric(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 952 // Handle allocation metrics, which don't require a G. 953 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil { 954 return curCtx, false, err 955 } 956 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 957 return curCtx, true, nil 958 } 959 960 func (o *ordering) advanceGCSweepBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 961 // Handle sweep, which is bound to a P and doesn't require a G. 962 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil { 963 return curCtx, false, err 964 } 965 if err := o.pStates[curCtx.P].beginRange(makeRangeType(ev.typ, 0)); err != nil { 966 return curCtx, false, err 967 } 968 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 969 return curCtx, true, nil 970 } 971 972 func (o *ordering) advanceGCSweepActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 973 pid := ProcID(ev.args[0]) 974 // N.B. In practice Ps can't block while they're sweeping, so this can only 975 // ever reference curCtx.P. However, be lenient about this like we are with 976 // GCMarkAssistActive; there's no reason the runtime couldn't change to block 977 // in the middle of a sweep. 978 pState, ok := o.pStates[pid] 979 if !ok { 980 return curCtx, false, fmt.Errorf("encountered GCSweepActive for unknown proc %d", pid) 981 } 982 if err := pState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil { 983 return curCtx, false, err 984 } 985 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 986 return curCtx, true, nil 987 } 988 989 func (o *ordering) advanceGCSweepEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 990 if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil { 991 return curCtx, false, err 992 } 993 _, err := o.pStates[curCtx.P].endRange(ev.typ) 994 if err != nil { 995 return curCtx, false, err 996 } 997 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 998 return curCtx, true, nil 999 } 1000 1001 func (o *ordering) advanceGoRangeBegin(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 1002 // Handle special goroutine-bound event ranges. 1003 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 1004 return curCtx, false, err 1005 } 1006 desc := stringID(0) 1007 if ev.typ == go122.EvSTWBegin { 1008 desc = stringID(ev.args[0]) 1009 } 1010 gState, ok := o.gStates[curCtx.G] 1011 if !ok { 1012 return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G) 1013 } 1014 if err := gState.beginRange(makeRangeType(ev.typ, desc)); err != nil { 1015 return curCtx, false, err 1016 } 1017 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 1018 return curCtx, true, nil 1019 } 1020 1021 func (o *ordering) advanceGoRangeActive(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 1022 gid := GoID(ev.args[0]) 1023 // N.B. Like GoStatus, this can happen at any time, because it can 1024 // reference a non-running goroutine. Don't check anything about the 1025 // current scheduler context. 1026 gState, ok := o.gStates[gid] 1027 if !ok { 1028 return curCtx, false, fmt.Errorf("uninitialized goroutine %d found during %s", gid, go122.EventString(ev.typ)) 1029 } 1030 if err := gState.activeRange(makeRangeType(ev.typ, 0), gen == o.initialGen); err != nil { 1031 return curCtx, false, err 1032 } 1033 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 1034 return curCtx, true, nil 1035 } 1036 1037 func (o *ordering) advanceGoRangeEnd(ev *baseEvent, evt *evTable, m ThreadID, gen uint64, curCtx schedCtx) (schedCtx, bool, error) { 1038 if err := validateCtx(curCtx, event.UserGoReqs); err != nil { 1039 return curCtx, false, err 1040 } 1041 gState, ok := o.gStates[curCtx.G] 1042 if !ok { 1043 return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", ev.typ, curCtx.G) 1044 } 1045 desc, err := gState.endRange(ev.typ) 1046 if err != nil { 1047 return curCtx, false, err 1048 } 1049 if ev.typ == go122.EvSTWEnd { 1050 // Smuggle the kind into the event. 1051 // Don't use ev.extra here so we have symmetry with STWBegin. 1052 ev.args[0] = uint64(desc) 1053 } 1054 o.queue.push(Event{table: evt, ctx: curCtx, base: *ev}) 1055 return curCtx, true, nil 1056 } 1057 1058 // Next returns the next event in the ordering. 1059 func (o *ordering) Next() (Event, bool) { 1060 return o.queue.pop() 1061 } 1062 1063 // schedCtx represents the scheduling resources associated with an event. 1064 type schedCtx struct { 1065 G GoID 1066 P ProcID 1067 M ThreadID 1068 } 1069 1070 // validateCtx ensures that ctx conforms to some reqs, returning an error if 1071 // it doesn't. 1072 func validateCtx(ctx schedCtx, reqs event.SchedReqs) error { 1073 // Check thread requirements. 1074 if reqs.Thread == event.MustHave && ctx.M == NoThread { 1075 return fmt.Errorf("expected a thread but didn't have one") 1076 } else if reqs.Thread == event.MustNotHave && ctx.M != NoThread { 1077 return fmt.Errorf("expected no thread but had one") 1078 } 1079 1080 // Check proc requirements. 1081 if reqs.Proc == event.MustHave && ctx.P == NoProc { 1082 return fmt.Errorf("expected a proc but didn't have one") 1083 } else if reqs.Proc == event.MustNotHave && ctx.P != NoProc { 1084 return fmt.Errorf("expected no proc but had one") 1085 } 1086 1087 // Check goroutine requirements. 1088 if reqs.Goroutine == event.MustHave && ctx.G == NoGoroutine { 1089 return fmt.Errorf("expected a goroutine but didn't have one") 1090 } else if reqs.Goroutine == event.MustNotHave && ctx.G != NoGoroutine { 1091 return fmt.Errorf("expected no goroutine but had one") 1092 } 1093 return nil 1094 } 1095 1096 // gcState is a trinary variable for the current state of the GC. 1097 // 1098 // The third state besides "enabled" and "disabled" is "undetermined." 1099 type gcState uint8 1100 1101 const ( 1102 gcUndetermined gcState = iota 1103 gcNotRunning 1104 gcRunning 1105 ) 1106 1107 // String returns a human-readable string for the GC state. 1108 func (s gcState) String() string { 1109 switch s { 1110 case gcUndetermined: 1111 return "Undetermined" 1112 case gcNotRunning: 1113 return "NotRunning" 1114 case gcRunning: 1115 return "Running" 1116 } 1117 return "Bad" 1118 } 1119 1120 // userRegion represents a unique user region when attached to some gState. 1121 type userRegion struct { 1122 // name must be a resolved string because the string ID for the same 1123 // string may change across generations, but we care about checking 1124 // the value itself. 1125 taskID TaskID 1126 name string 1127 } 1128 1129 // rangeType is a way to classify special ranges of time. 1130 // 1131 // These typically correspond 1:1 with "Begin" events, but 1132 // they may have an optional subtype that describes the range 1133 // in more detail. 1134 type rangeType struct { 1135 typ event.Type // "Begin" event. 1136 desc stringID // Optional subtype. 1137 } 1138 1139 // makeRangeType constructs a new rangeType. 1140 func makeRangeType(typ event.Type, desc stringID) rangeType { 1141 if styp := go122.Specs()[typ].StartEv; styp != go122.EvNone { 1142 typ = styp 1143 } 1144 return rangeType{typ, desc} 1145 } 1146 1147 // gState is the state of a goroutine at a point in the trace. 1148 type gState struct { 1149 id GoID 1150 status go122.GoStatus 1151 seq seqCounter 1152 1153 // regions are the active user regions for this goroutine. 1154 regions []userRegion 1155 1156 // rangeState is the state of special time ranges bound to this goroutine. 1157 rangeState 1158 } 1159 1160 // beginRegion starts a user region on the goroutine. 1161 func (s *gState) beginRegion(r userRegion) error { 1162 s.regions = append(s.regions, r) 1163 return nil 1164 } 1165 1166 // endRegion ends a user region on the goroutine. 1167 func (s *gState) endRegion(r userRegion) error { 1168 if len(s.regions) == 0 { 1169 // We do not know about regions that began before tracing started. 1170 return nil 1171 } 1172 if next := s.regions[len(s.regions)-1]; next != r { 1173 return fmt.Errorf("misuse of region in goroutine %v: region end %v when the inner-most active region start event is %v", s.id, r, next) 1174 } 1175 s.regions = s.regions[:len(s.regions)-1] 1176 return nil 1177 } 1178 1179 // pState is the state of a proc at a point in the trace. 1180 type pState struct { 1181 id ProcID 1182 status go122.ProcStatus 1183 seq seqCounter 1184 1185 // rangeState is the state of special time ranges bound to this proc. 1186 rangeState 1187 } 1188 1189 // mState is the state of a thread at a point in the trace. 1190 type mState struct { 1191 g GoID // Goroutine bound to this M. (The goroutine's state is Executing.) 1192 p ProcID // Proc bound to this M. (The proc's state is Executing.) 1193 } 1194 1195 // rangeState represents the state of special time ranges. 1196 type rangeState struct { 1197 // inFlight contains the rangeTypes of any ranges bound to a resource. 1198 inFlight []rangeType 1199 } 1200 1201 // beginRange begins a special range in time on the goroutine. 1202 // 1203 // Returns an error if the range is already in progress. 1204 func (s *rangeState) beginRange(typ rangeType) error { 1205 if s.hasRange(typ) { 1206 return fmt.Errorf("discovered event already in-flight for when starting event %v", go122.Specs()[typ.typ].Name) 1207 } 1208 s.inFlight = append(s.inFlight, typ) 1209 return nil 1210 } 1211 1212 // activeRange marks special range in time on the goroutine as active in the 1213 // initial generation, or confirms that it is indeed active in later generations. 1214 func (s *rangeState) activeRange(typ rangeType, isInitialGen bool) error { 1215 if isInitialGen { 1216 if s.hasRange(typ) { 1217 return fmt.Errorf("found named active range already in first gen: %v", typ) 1218 } 1219 s.inFlight = append(s.inFlight, typ) 1220 } else if !s.hasRange(typ) { 1221 return fmt.Errorf("resource is missing active range: %v %v", go122.Specs()[typ.typ].Name, s.inFlight) 1222 } 1223 return nil 1224 } 1225 1226 // hasRange returns true if a special time range on the goroutine as in progress. 1227 func (s *rangeState) hasRange(typ rangeType) bool { 1228 for _, ftyp := range s.inFlight { 1229 if ftyp == typ { 1230 return true 1231 } 1232 } 1233 return false 1234 } 1235 1236 // endsRange ends a special range in time on the goroutine. 1237 // 1238 // This must line up with the start event type of the range the goroutine is currently in. 1239 func (s *rangeState) endRange(typ event.Type) (stringID, error) { 1240 st := go122.Specs()[typ].StartEv 1241 idx := -1 1242 for i, r := range s.inFlight { 1243 if r.typ == st { 1244 idx = i 1245 break 1246 } 1247 } 1248 if idx < 0 { 1249 return 0, fmt.Errorf("tried to end event %v, but not in-flight", go122.Specs()[st].Name) 1250 } 1251 // Swap remove. 1252 desc := s.inFlight[idx].desc 1253 s.inFlight[idx], s.inFlight[len(s.inFlight)-1] = s.inFlight[len(s.inFlight)-1], s.inFlight[idx] 1254 s.inFlight = s.inFlight[:len(s.inFlight)-1] 1255 return desc, nil 1256 } 1257 1258 // seqCounter represents a global sequence counter for a resource. 1259 type seqCounter struct { 1260 gen uint64 // The generation for the local sequence counter seq. 1261 seq uint64 // The sequence number local to the generation. 1262 } 1263 1264 // makeSeq creates a new seqCounter. 1265 func makeSeq(gen, seq uint64) seqCounter { 1266 return seqCounter{gen: gen, seq: seq} 1267 } 1268 1269 // succeeds returns true if a is the immediate successor of b. 1270 func (a seqCounter) succeeds(b seqCounter) bool { 1271 return a.gen == b.gen && a.seq == b.seq+1 1272 } 1273 1274 // String returns a debug string representation of the seqCounter. 1275 func (c seqCounter) String() string { 1276 return fmt.Sprintf("%d (gen=%d)", c.seq, c.gen) 1277 } 1278 1279 func dumpOrdering(order *ordering) string { 1280 var sb strings.Builder 1281 for id, state := range order.gStates { 1282 fmt.Fprintf(&sb, "G %d [status=%s seq=%s]\n", id, state.status, state.seq) 1283 } 1284 fmt.Fprintln(&sb) 1285 for id, state := range order.pStates { 1286 fmt.Fprintf(&sb, "P %d [status=%s seq=%s]\n", id, state.status, state.seq) 1287 } 1288 fmt.Fprintln(&sb) 1289 for id, state := range order.mStates { 1290 fmt.Fprintf(&sb, "M %d [g=%d p=%d]\n", id, state.g, state.p) 1291 } 1292 fmt.Fprintln(&sb) 1293 fmt.Fprintf(&sb, "GC %d %s\n", order.gcSeq, order.gcState) 1294 return sb.String() 1295 } 1296 1297 // taskState represents an active task. 1298 type taskState struct { 1299 // name is the type of the active task. 1300 name string 1301 1302 // parentID is the parent ID of the active task. 1303 parentID TaskID 1304 } 1305 1306 // queue implements a growable ring buffer with a queue API. 1307 type queue[T any] struct { 1308 start, end int 1309 buf []T 1310 } 1311 1312 // push adds a new event to the back of the queue. 1313 func (q *queue[T]) push(value T) { 1314 if q.end-q.start == len(q.buf) { 1315 q.grow() 1316 } 1317 q.buf[q.end%len(q.buf)] = value 1318 q.end++ 1319 } 1320 1321 // grow increases the size of the queue. 1322 func (q *queue[T]) grow() { 1323 if len(q.buf) == 0 { 1324 q.buf = make([]T, 2) 1325 return 1326 } 1327 1328 // Create new buf and copy data over. 1329 newBuf := make([]T, len(q.buf)*2) 1330 pivot := q.start % len(q.buf) 1331 first, last := q.buf[pivot:], q.buf[:pivot] 1332 copy(newBuf[:len(first)], first) 1333 copy(newBuf[len(first):], last) 1334 1335 // Update the queue state. 1336 q.start = 0 1337 q.end = len(q.buf) 1338 q.buf = newBuf 1339 } 1340 1341 // pop removes an event from the front of the queue. If the 1342 // queue is empty, it returns an EventBad event. 1343 func (q *queue[T]) pop() (T, bool) { 1344 if q.end-q.start == 0 { 1345 return *new(T), false 1346 } 1347 elem := &q.buf[q.start%len(q.buf)] 1348 value := *elem 1349 *elem = *new(T) // Clear the entry before returning, so we don't hold onto old tables. 1350 q.start++ 1351 return value, true 1352 } 1353 1354 // makeEvent creates an Event from the provided information. 1355 // 1356 // It's just a convenience function; it's always OK to construct 1357 // an Event manually if this isn't quite the right way to express 1358 // the contents of the event. 1359 func makeEvent(table *evTable, ctx schedCtx, typ event.Type, time Time, args ...uint64) Event { 1360 ev := Event{ 1361 table: table, 1362 ctx: ctx, 1363 base: baseEvent{ 1364 typ: typ, 1365 time: time, 1366 }, 1367 } 1368 copy(ev.base.args[:], args) 1369 return ev 1370 }