github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/ssa/schedule.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import ( 8 "container/heap" 9 "sort" 10 11 "github.com/go-asm/go/cmd/compile/base" 12 "github.com/go-asm/go/cmd/compile/types" 13 ) 14 15 const ( 16 ScorePhi = iota // towards top of block 17 ScoreArg // must occur at the top of the entry block 18 ScoreInitMem // after the args - used as mark by debug info generation 19 ScoreReadTuple // must occur immediately after tuple-generating insn (or call) 20 ScoreNilCheck 21 ScoreMemory 22 ScoreReadFlags 23 ScoreDefault 24 ScoreFlags 25 ScoreControl // towards bottom of block 26 ) 27 28 type ValHeap struct { 29 a []*Value 30 score []int8 31 inBlockUses []bool 32 } 33 34 func (h ValHeap) Len() int { return len(h.a) } 35 func (h ValHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] } 36 37 func (h *ValHeap) Push(x interface{}) { 38 // Push and Pop use pointer receivers because they modify the slice's length, 39 // not just its contents. 40 v := x.(*Value) 41 h.a = append(h.a, v) 42 } 43 func (h *ValHeap) Pop() interface{} { 44 old := h.a 45 n := len(old) 46 x := old[n-1] 47 h.a = old[0 : n-1] 48 return x 49 } 50 func (h ValHeap) Less(i, j int) bool { 51 x := h.a[i] 52 y := h.a[j] 53 sx := h.score[x.ID] 54 sy := h.score[y.ID] 55 if c := sx - sy; c != 0 { 56 return c < 0 // lower scores come earlier. 57 } 58 // Note: only scores are required for correct scheduling. 59 // Everything else is just heuristics. 60 61 ix := h.inBlockUses[x.ID] 62 iy := h.inBlockUses[y.ID] 63 if ix != iy { 64 return ix // values with in-block uses come earlier 65 } 66 67 if x.Pos != y.Pos { // Favor in-order line stepping 68 return x.Pos.Before(y.Pos) 69 } 70 if x.Op != OpPhi { 71 if c := len(x.Args) - len(y.Args); c != 0 { 72 return c > 0 // smaller args come later 73 } 74 } 75 if c := x.Uses - y.Uses; c != 0 { 76 return c > 0 // smaller uses come later 77 } 78 // These comparisons are fairly arbitrary. 79 // The goal here is stability in the face 80 // of unrelated changes elsewhere in the compiler. 81 if c := x.AuxInt - y.AuxInt; c != 0 { 82 return c < 0 83 } 84 if cmp := x.Type.Compare(y.Type); cmp != types.CMPeq { 85 return cmp == types.CMPlt 86 } 87 return x.ID < y.ID 88 } 89 90 func (op Op) isLoweredGetClosurePtr() bool { 91 switch op { 92 case OpAMD64LoweredGetClosurePtr, OpPPC64LoweredGetClosurePtr, OpARMLoweredGetClosurePtr, OpARM64LoweredGetClosurePtr, 93 Op386LoweredGetClosurePtr, OpMIPS64LoweredGetClosurePtr, OpLOONG64LoweredGetClosurePtr, OpS390XLoweredGetClosurePtr, OpMIPSLoweredGetClosurePtr, 94 OpRISCV64LoweredGetClosurePtr, OpWasmLoweredGetClosurePtr: 95 return true 96 } 97 return false 98 } 99 100 // Schedule the Values in each Block. After this phase returns, the 101 // order of b.Values matters and is the order in which those values 102 // will appear in the assembly output. For now it generates a 103 // reasonable valid schedule using a priority queue. TODO(khr): 104 // schedule smarter. 105 func schedule(f *Func) { 106 // reusable priority queue 107 priq := new(ValHeap) 108 109 // "priority" for a value 110 score := f.Cache.allocInt8Slice(f.NumValues()) 111 defer f.Cache.freeInt8Slice(score) 112 113 // maps mem values to the next live memory value 114 nextMem := f.Cache.allocValueSlice(f.NumValues()) 115 defer f.Cache.freeValueSlice(nextMem) 116 117 // inBlockUses records whether a value is used in the block 118 // in which it lives. (block control values don't count as uses.) 119 inBlockUses := f.Cache.allocBoolSlice(f.NumValues()) 120 defer f.Cache.freeBoolSlice(inBlockUses) 121 if f.Config.optimize { 122 for _, b := range f.Blocks { 123 for _, v := range b.Values { 124 for _, a := range v.Args { 125 if a.Block == b { 126 inBlockUses[a.ID] = true 127 } 128 } 129 } 130 } 131 } 132 priq.inBlockUses = inBlockUses 133 134 for _, b := range f.Blocks { 135 // Compute score. Larger numbers are scheduled closer to the end of the block. 136 for _, v := range b.Values { 137 switch { 138 case v.Op.isLoweredGetClosurePtr(): 139 // We also score GetLoweredClosurePtr as early as possible to ensure that the 140 // context register is not stomped. GetLoweredClosurePtr should only appear 141 // in the entry block where there are no phi functions, so there is no 142 // conflict or ambiguity here. 143 if b != f.Entry { 144 f.Fatalf("LoweredGetClosurePtr appeared outside of entry block, b=%s", b.String()) 145 } 146 score[v.ID] = ScorePhi 147 case opcodeTable[v.Op].nilCheck: 148 // Nil checks must come before loads from the same address. 149 score[v.ID] = ScoreNilCheck 150 case v.Op == OpPhi: 151 // We want all the phis first. 152 score[v.ID] = ScorePhi 153 case v.Op == OpArgIntReg || v.Op == OpArgFloatReg: 154 // In-register args must be scheduled as early as possible to ensure that they 155 // are not stomped (similar to the closure pointer above). 156 // In particular, they need to come before regular OpArg operations because 157 // of how regalloc places spill code (see regalloc.go:placeSpills:mustBeFirst). 158 if b != f.Entry { 159 f.Fatalf("%s appeared outside of entry block, b=%s", v.Op, b.String()) 160 } 161 score[v.ID] = ScorePhi 162 case v.Op == OpArg || v.Op == OpSP || v.Op == OpSB: 163 // We want all the args as early as possible, for better debugging. 164 score[v.ID] = ScoreArg 165 case v.Op == OpInitMem: 166 // Early, but after args. See debug.go:buildLocationLists 167 score[v.ID] = ScoreInitMem 168 case v.Type.IsMemory(): 169 // Schedule stores as early as possible. This tends to 170 // reduce register pressure. 171 score[v.ID] = ScoreMemory 172 case v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN: 173 // Tuple selectors need to appear immediately after the instruction 174 // that generates the tuple. 175 score[v.ID] = ScoreReadTuple 176 case v.hasFlagInput(): 177 // Schedule flag-reading ops earlier, to minimize the lifetime 178 // of flag values. 179 score[v.ID] = ScoreReadFlags 180 case v.isFlagOp(): 181 // Schedule flag register generation as late as possible. 182 // This makes sure that we only have one live flags 183 // value at a time. 184 // Note that this case is after the case above, so values 185 // which both read and generate flags are given ScoreReadFlags. 186 score[v.ID] = ScoreFlags 187 default: 188 score[v.ID] = ScoreDefault 189 // If we're reading flags, schedule earlier to keep flag lifetime short. 190 for _, a := range v.Args { 191 if a.isFlagOp() { 192 score[v.ID] = ScoreReadFlags 193 } 194 } 195 } 196 } 197 for _, c := range b.ControlValues() { 198 // Force the control values to be scheduled at the end, 199 // unless they have other special priority. 200 if c.Block != b || score[c.ID] < ScoreReadTuple { 201 continue 202 } 203 if score[c.ID] == ScoreReadTuple { 204 score[c.Args[0].ID] = ScoreControl 205 continue 206 } 207 score[c.ID] = ScoreControl 208 } 209 } 210 priq.score = score 211 212 // An edge represents a scheduling constraint that x must appear before y in the schedule. 213 type edge struct { 214 x, y *Value 215 } 216 edges := make([]edge, 0, 64) 217 218 // inEdges is the number of scheduling edges incoming from values that haven't been scheduled yet. 219 // i.e. inEdges[y.ID] = |e in edges where e.y == y and e.x is not in the schedule yet|. 220 inEdges := f.Cache.allocInt32Slice(f.NumValues()) 221 defer f.Cache.freeInt32Slice(inEdges) 222 223 for _, b := range f.Blocks { 224 edges = edges[:0] 225 // Standard edges: from the argument of a value to that value. 226 for _, v := range b.Values { 227 if v.Op == OpPhi { 228 // If a value is used by a phi, it does not induce 229 // a scheduling edge because that use is from the 230 // previous iteration. 231 continue 232 } 233 for _, a := range v.Args { 234 if a.Block == b { 235 edges = append(edges, edge{a, v}) 236 } 237 } 238 } 239 240 // Find store chain for block. 241 // Store chains for different blocks overwrite each other, so 242 // the calculated store chain is good only for this block. 243 for _, v := range b.Values { 244 if v.Op != OpPhi && v.Op != OpInitMem && v.Type.IsMemory() { 245 nextMem[v.MemoryArg().ID] = v 246 } 247 } 248 249 // Add edges to enforce that any load must come before the following store. 250 for _, v := range b.Values { 251 if v.Op == OpPhi || v.Type.IsMemory() { 252 continue 253 } 254 w := v.MemoryArg() 255 if w == nil { 256 continue 257 } 258 if s := nextMem[w.ID]; s != nil && s.Block == b { 259 edges = append(edges, edge{v, s}) 260 } 261 } 262 263 // Sort all the edges by source Value ID. 264 sort.Slice(edges, func(i, j int) bool { 265 return edges[i].x.ID < edges[j].x.ID 266 }) 267 // Compute inEdges for values in this block. 268 for _, e := range edges { 269 inEdges[e.y.ID]++ 270 } 271 272 // Initialize priority queue with schedulable values. 273 priq.a = priq.a[:0] 274 for _, v := range b.Values { 275 if inEdges[v.ID] == 0 { 276 heap.Push(priq, v) 277 } 278 } 279 280 // Produce the schedule. Pick the highest priority scheduleable value, 281 // add it to the schedule, add any of its uses that are now scheduleable 282 // to the queue, and repeat. 283 nv := len(b.Values) 284 b.Values = b.Values[:0] 285 for priq.Len() > 0 { 286 // Schedule the next schedulable value in priority order. 287 v := heap.Pop(priq).(*Value) 288 b.Values = append(b.Values, v) 289 290 // Find all the scheduling edges out from this value. 291 i := sort.Search(len(edges), func(i int) bool { 292 return edges[i].x.ID >= v.ID 293 }) 294 j := sort.Search(len(edges), func(i int) bool { 295 return edges[i].x.ID > v.ID 296 }) 297 // Decrement inEdges for each target of edges from v. 298 for _, e := range edges[i:j] { 299 inEdges[e.y.ID]-- 300 if inEdges[e.y.ID] == 0 { 301 heap.Push(priq, e.y) 302 } 303 } 304 } 305 if len(b.Values) != nv { 306 f.Fatalf("schedule does not include all values in block %s", b) 307 } 308 } 309 310 // Remove SPanchored now that we've scheduled. 311 // Also unlink nil checks now that ordering is assured 312 // between the nil check and the uses of the nil-checked pointer. 313 for _, b := range f.Blocks { 314 for _, v := range b.Values { 315 for i, a := range v.Args { 316 if a.Op == OpSPanchored || opcodeTable[a.Op].nilCheck { 317 v.SetArg(i, a.Args[0]) 318 } 319 } 320 } 321 for i, c := range b.ControlValues() { 322 if c.Op == OpSPanchored || opcodeTable[c.Op].nilCheck { 323 b.ReplaceControl(i, c.Args[0]) 324 } 325 } 326 } 327 for _, b := range f.Blocks { 328 i := 0 329 for _, v := range b.Values { 330 if v.Op == OpSPanchored { 331 // Free this value 332 if v.Uses != 0 { 333 base.Fatalf("SPAnchored still has %d uses", v.Uses) 334 } 335 v.resetArgs() 336 f.freeValue(v) 337 } else { 338 if opcodeTable[v.Op].nilCheck { 339 if v.Uses != 0 { 340 base.Fatalf("nilcheck still has %d uses", v.Uses) 341 } 342 // We can't delete the nil check, but we mark 343 // it as having void type so regalloc won't 344 // try to allocate a register for it. 345 v.Type = types.TypeVoid 346 } 347 b.Values[i] = v 348 i++ 349 } 350 } 351 b.truncateValues(i) 352 } 353 354 f.scheduled = true 355 } 356 357 // storeOrder orders values with respect to stores. That is, 358 // if v transitively depends on store s, v is ordered after s, 359 // otherwise v is ordered before s. 360 // Specifically, values are ordered like 361 // 362 // store1 363 // NilCheck that depends on store1 364 // other values that depends on store1 365 // store2 366 // NilCheck that depends on store2 367 // other values that depends on store2 368 // ... 369 // 370 // The order of non-store and non-NilCheck values are undefined 371 // (not necessarily dependency order). This should be cheaper 372 // than a full scheduling as done above. 373 // Note that simple dependency order won't work: there is no 374 // dependency between NilChecks and values like IsNonNil. 375 // Auxiliary data structures are passed in as arguments, so 376 // that they can be allocated in the caller and be reused. 377 // This function takes care of reset them. 378 func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value { 379 if len(values) == 0 { 380 return values 381 } 382 383 f := values[0].Block.Func 384 385 // find all stores 386 387 // Members of values that are store values. 388 // A constant bound allows this to be stack-allocated. 64 is 389 // enough to cover almost every storeOrder call. 390 stores := make([]*Value, 0, 64) 391 hasNilCheck := false 392 sset.clear() // sset is the set of stores that are used in other values 393 for _, v := range values { 394 if v.Type.IsMemory() { 395 stores = append(stores, v) 396 if v.Op == OpInitMem || v.Op == OpPhi { 397 continue 398 } 399 sset.add(v.MemoryArg().ID) // record that v's memory arg is used 400 } 401 if v.Op == OpNilCheck { 402 hasNilCheck = true 403 } 404 } 405 if len(stores) == 0 || !hasNilCheck && f.pass.name == "nilcheckelim" { 406 // there is no store, the order does not matter 407 return values 408 } 409 410 // find last store, which is the one that is not used by other stores 411 var last *Value 412 for _, v := range stores { 413 if !sset.contains(v.ID) { 414 if last != nil { 415 f.Fatalf("two stores live simultaneously: %v and %v", v, last) 416 } 417 last = v 418 } 419 } 420 421 // We assign a store number to each value. Store number is the 422 // index of the latest store that this value transitively depends. 423 // The i-th store in the current block gets store number 3*i. A nil 424 // check that depends on the i-th store gets store number 3*i+1. 425 // Other values that depends on the i-th store gets store number 3*i+2. 426 // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends 427 // is in the previous block (or no store at all, e.g. value is Const). 428 // First we assign the number to all stores by walking back the store chain, 429 // then assign the number to other values in DFS order. 430 count := make([]int32, 3*(len(stores)+1)) 431 sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once 432 for n, w := len(stores), last; n > 0; n-- { 433 storeNumber[w.ID] = int32(3 * n) 434 count[3*n]++ 435 sset.add(w.ID) 436 if w.Op == OpInitMem || w.Op == OpPhi { 437 if n != 1 { 438 f.Fatalf("store order is wrong: there are stores before %v", w) 439 } 440 break 441 } 442 w = w.MemoryArg() 443 } 444 var stack []*Value 445 for _, v := range values { 446 if sset.contains(v.ID) { 447 // in sset means v is a store, or already pushed to stack, or already assigned a store number 448 continue 449 } 450 stack = append(stack, v) 451 sset.add(v.ID) 452 453 for len(stack) > 0 { 454 w := stack[len(stack)-1] 455 if storeNumber[w.ID] != 0 { 456 stack = stack[:len(stack)-1] 457 continue 458 } 459 if w.Op == OpPhi { 460 // Phi value doesn't depend on store in the current block. 461 // Do this early to avoid dependency cycle. 462 storeNumber[w.ID] = 2 463 count[2]++ 464 stack = stack[:len(stack)-1] 465 continue 466 } 467 468 max := int32(0) // latest store dependency 469 argsdone := true 470 for _, a := range w.Args { 471 if a.Block != w.Block { 472 continue 473 } 474 if !sset.contains(a.ID) { 475 stack = append(stack, a) 476 sset.add(a.ID) 477 argsdone = false 478 break 479 } 480 if storeNumber[a.ID]/3 > max { 481 max = storeNumber[a.ID] / 3 482 } 483 } 484 if !argsdone { 485 continue 486 } 487 488 n := 3*max + 2 489 if w.Op == OpNilCheck { 490 n = 3*max + 1 491 } 492 storeNumber[w.ID] = n 493 count[n]++ 494 stack = stack[:len(stack)-1] 495 } 496 } 497 498 // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i] 499 for i := range count { 500 if i == 0 { 501 continue 502 } 503 count[i] += count[i-1] 504 } 505 if count[len(count)-1] != int32(len(values)) { 506 f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values) 507 } 508 509 // place values in count-indexed bins, which are in the desired store order 510 order := make([]*Value, len(values)) 511 for _, v := range values { 512 s := storeNumber[v.ID] 513 order[count[s-1]] = v 514 count[s-1]++ 515 } 516 517 // Order nil checks in source order. We want the first in source order to trigger. 518 // If two are on the same line, we don't really care which happens first. 519 // See issue 18169. 520 if hasNilCheck { 521 start := -1 522 for i, v := range order { 523 if v.Op == OpNilCheck { 524 if start == -1 { 525 start = i 526 } 527 } else { 528 if start != -1 { 529 sort.Sort(bySourcePos(order[start:i])) 530 start = -1 531 } 532 } 533 } 534 if start != -1 { 535 sort.Sort(bySourcePos(order[start:])) 536 } 537 } 538 539 return order 540 } 541 542 // isFlagOp reports if v is an OP with the flag type. 543 func (v *Value) isFlagOp() bool { 544 if v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags() { 545 return true 546 } 547 // PPC64 carry generators put their carry in a non-flag-typed register 548 // in their output. 549 switch v.Op { 550 case OpPPC64SUBC, OpPPC64ADDC, OpPPC64SUBCconst, OpPPC64ADDCconst: 551 return true 552 } 553 return false 554 } 555 556 // hasFlagInput reports whether v has a flag value as any of its inputs. 557 func (v *Value) hasFlagInput() bool { 558 for _, a := range v.Args { 559 if a.isFlagOp() { 560 return true 561 } 562 } 563 // PPC64 carry dependencies are conveyed through their final argument, 564 // so we treat those operations as taking flags as well. 565 switch v.Op { 566 case OpPPC64SUBE, OpPPC64ADDE, OpPPC64SUBZEzero, OpPPC64ADDZEzero: 567 return true 568 } 569 return false 570 } 571 572 type bySourcePos []*Value 573 574 func (s bySourcePos) Len() int { return len(s) } 575 func (s bySourcePos) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 576 func (s bySourcePos) Less(i, j int) bool { return s[i].Pos.Before(s[j].Pos) }