github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/ssa/schedule.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import ( 8 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 9 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 10 "container/heap" 11 "sort" 12 ) 13 14 const ( 15 ScorePhi = iota // towards top of block 16 ScoreArg // must occur at the top of the entry block 17 ScoreInitMem // after the args - used as mark by debug info generation 18 ScoreReadTuple // must occur immediately after tuple-generating insn (or call) 19 ScoreNilCheck 20 ScoreMemory 21 ScoreReadFlags 22 ScoreDefault 23 ScoreFlags 24 ScoreControl // towards bottom of block 25 ) 26 27 type ValHeap struct { 28 a []*Value 29 score []int8 30 inBlockUses []bool 31 } 32 33 func (h ValHeap) Len() int { return len(h.a) } 34 func (h ValHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] } 35 36 func (h *ValHeap) Push(x interface{}) { 37 // Push and Pop use pointer receivers because they modify the slice's length, 38 // not just its contents. 39 v := x.(*Value) 40 h.a = append(h.a, v) 41 } 42 func (h *ValHeap) Pop() interface{} { 43 old := h.a 44 n := len(old) 45 x := old[n-1] 46 h.a = old[0 : n-1] 47 return x 48 } 49 func (h ValHeap) Less(i, j int) bool { 50 x := h.a[i] 51 y := h.a[j] 52 sx := h.score[x.ID] 53 sy := h.score[y.ID] 54 if c := sx - sy; c != 0 { 55 return c < 0 // lower scores come earlier. 56 } 57 // Note: only scores are required for correct scheduling. 58 // Everything else is just heuristics. 59 60 ix := h.inBlockUses[x.ID] 61 iy := h.inBlockUses[y.ID] 62 if ix != iy { 63 return ix // values with in-block uses come earlier 64 } 65 66 if x.Pos != y.Pos { // Favor in-order line stepping 67 return x.Pos.Before(y.Pos) 68 } 69 if x.Op != OpPhi { 70 if c := len(x.Args) - len(y.Args); c != 0 { 71 return c > 0 // smaller args come later 72 } 73 } 74 if c := x.Uses - y.Uses; c != 0 { 75 return c > 0 // smaller uses come later 76 } 77 // These comparisons are fairly arbitrary. 78 // The goal here is stability in the face 79 // of unrelated changes elsewhere in the compiler. 80 if c := x.AuxInt - y.AuxInt; c != 0 { 81 return c < 0 82 } 83 if cmp := x.Type.Compare(y.Type); cmp != types.CMPeq { 84 return cmp == types.CMPlt 85 } 86 return x.ID < y.ID 87 } 88 89 func (op Op) isLoweredGetClosurePtr() bool { 90 switch op { 91 case OpAMD64LoweredGetClosurePtr, OpPPC64LoweredGetClosurePtr, OpARMLoweredGetClosurePtr, OpARM64LoweredGetClosurePtr, 92 Op386LoweredGetClosurePtr, OpMIPS64LoweredGetClosurePtr, OpLOONG64LoweredGetClosurePtr, OpS390XLoweredGetClosurePtr, OpMIPSLoweredGetClosurePtr, 93 OpRISCV64LoweredGetClosurePtr, OpWasmLoweredGetClosurePtr: 94 return true 95 } 96 return false 97 } 98 99 // Schedule the Values in each Block. After this phase returns, the 100 // order of b.Values matters and is the order in which those values 101 // will appear in the assembly output. For now it generates a 102 // reasonable valid schedule using a priority queue. TODO(khr): 103 // schedule smarter. 104 func schedule(f *Func) { 105 // reusable priority queue 106 priq := new(ValHeap) 107 108 // "priority" for a value 109 score := f.Cache.allocInt8Slice(f.NumValues()) 110 defer f.Cache.freeInt8Slice(score) 111 112 // maps mem values to the next live memory value 113 nextMem := f.Cache.allocValueSlice(f.NumValues()) 114 defer f.Cache.freeValueSlice(nextMem) 115 116 // inBlockUses records whether a value is used in the block 117 // in which it lives. (block control values don't count as uses.) 118 inBlockUses := f.Cache.allocBoolSlice(f.NumValues()) 119 defer f.Cache.freeBoolSlice(inBlockUses) 120 if f.Config.optimize { 121 for _, b := range f.Blocks { 122 for _, v := range b.Values { 123 for _, a := range v.Args { 124 if a.Block == b { 125 inBlockUses[a.ID] = true 126 } 127 } 128 } 129 } 130 } 131 priq.inBlockUses = inBlockUses 132 133 for _, b := range f.Blocks { 134 // Compute score. Larger numbers are scheduled closer to the end of the block. 135 for _, v := range b.Values { 136 switch { 137 case v.Op.isLoweredGetClosurePtr(): 138 // We also score GetLoweredClosurePtr as early as possible to ensure that the 139 // context register is not stomped. GetLoweredClosurePtr should only appear 140 // in the entry block where there are no phi functions, so there is no 141 // conflict or ambiguity here. 142 if b != f.Entry { 143 f.Fatalf("LoweredGetClosurePtr appeared outside of entry block, b=%s", b.String()) 144 } 145 score[v.ID] = ScorePhi 146 case opcodeTable[v.Op].nilCheck: 147 // Nil checks must come before loads from the same address. 148 score[v.ID] = ScoreNilCheck 149 case v.Op == OpPhi: 150 // We want all the phis first. 151 score[v.ID] = ScorePhi 152 case v.Op == OpArgIntReg || v.Op == OpArgFloatReg: 153 // In-register args must be scheduled as early as possible to ensure that they 154 // are not stomped (similar to the closure pointer above). 155 // In particular, they need to come before regular OpArg operations because 156 // of how regalloc places spill code (see regalloc.go:placeSpills:mustBeFirst). 157 if b != f.Entry { 158 f.Fatalf("%s appeared outside of entry block, b=%s", v.Op, b.String()) 159 } 160 score[v.ID] = ScorePhi 161 case v.Op == OpArg || v.Op == OpSP || v.Op == OpSB: 162 // We want all the args as early as possible, for better debugging. 163 score[v.ID] = ScoreArg 164 case v.Op == OpInitMem: 165 // Early, but after args. See debug.go:buildLocationLists 166 score[v.ID] = ScoreInitMem 167 case v.Type.IsMemory(): 168 // Schedule stores as early as possible. This tends to 169 // reduce register pressure. 170 score[v.ID] = ScoreMemory 171 case v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN: 172 // Tuple selectors need to appear immediately after the instruction 173 // that generates the tuple. 174 score[v.ID] = ScoreReadTuple 175 case v.hasFlagInput(): 176 // Schedule flag-reading ops earlier, to minimize the lifetime 177 // of flag values. 178 score[v.ID] = ScoreReadFlags 179 case v.isFlagOp(): 180 // Schedule flag register generation as late as possible. 181 // This makes sure that we only have one live flags 182 // value at a time. 183 // Note that this case is after the case above, so values 184 // which both read and generate flags are given ScoreReadFlags. 185 score[v.ID] = ScoreFlags 186 default: 187 score[v.ID] = ScoreDefault 188 // If we're reading flags, schedule earlier to keep flag lifetime short. 189 for _, a := range v.Args { 190 if a.isFlagOp() { 191 score[v.ID] = ScoreReadFlags 192 } 193 } 194 } 195 } 196 for _, c := range b.ControlValues() { 197 // Force the control values to be scheduled at the end, 198 // unless they have other special priority. 199 if c.Block != b || score[c.ID] < ScoreReadTuple { 200 continue 201 } 202 if score[c.ID] == ScoreReadTuple { 203 score[c.Args[0].ID] = ScoreControl 204 continue 205 } 206 score[c.ID] = ScoreControl 207 } 208 } 209 priq.score = score 210 211 // An edge represents a scheduling constraint that x must appear before y in the schedule. 212 type edge struct { 213 x, y *Value 214 } 215 edges := make([]edge, 0, 64) 216 217 // inEdges is the number of scheduling edges incoming from values that haven't been scheduled yet. 218 // i.e. inEdges[y.ID] = |e in edges where e.y == y and e.x is not in the schedule yet|. 219 inEdges := f.Cache.allocInt32Slice(f.NumValues()) 220 defer f.Cache.freeInt32Slice(inEdges) 221 222 for _, b := range f.Blocks { 223 edges = edges[:0] 224 // Standard edges: from the argument of a value to that value. 225 for _, v := range b.Values { 226 if v.Op == OpPhi { 227 // If a value is used by a phi, it does not induce 228 // a scheduling edge because that use is from the 229 // previous iteration. 230 continue 231 } 232 for _, a := range v.Args { 233 if a.Block == b { 234 edges = append(edges, edge{a, v}) 235 } 236 } 237 } 238 239 // Find store chain for block. 240 // Store chains for different blocks overwrite each other, so 241 // the calculated store chain is good only for this block. 242 for _, v := range b.Values { 243 if v.Op != OpPhi && v.Op != OpInitMem && v.Type.IsMemory() { 244 nextMem[v.MemoryArg().ID] = v 245 } 246 } 247 248 // Add edges to enforce that any load must come before the following store. 249 for _, v := range b.Values { 250 if v.Op == OpPhi || v.Type.IsMemory() { 251 continue 252 } 253 w := v.MemoryArg() 254 if w == nil { 255 continue 256 } 257 if s := nextMem[w.ID]; s != nil && s.Block == b { 258 edges = append(edges, edge{v, s}) 259 } 260 } 261 262 // Sort all the edges by source Value ID. 263 sort.Slice(edges, func(i, j int) bool { 264 return edges[i].x.ID < edges[j].x.ID 265 }) 266 // Compute inEdges for values in this block. 267 for _, e := range edges { 268 inEdges[e.y.ID]++ 269 } 270 271 // Initialize priority queue with schedulable values. 272 priq.a = priq.a[:0] 273 for _, v := range b.Values { 274 if inEdges[v.ID] == 0 { 275 heap.Push(priq, v) 276 } 277 } 278 279 // Produce the schedule. Pick the highest priority scheduleable value, 280 // add it to the schedule, add any of its uses that are now scheduleable 281 // to the queue, and repeat. 282 nv := len(b.Values) 283 b.Values = b.Values[:0] 284 for priq.Len() > 0 { 285 // Schedule the next schedulable value in priority order. 286 v := heap.Pop(priq).(*Value) 287 b.Values = append(b.Values, v) 288 289 // Find all the scheduling edges out from this value. 290 i := sort.Search(len(edges), func(i int) bool { 291 return edges[i].x.ID >= v.ID 292 }) 293 j := sort.Search(len(edges), func(i int) bool { 294 return edges[i].x.ID > v.ID 295 }) 296 // Decrement inEdges for each target of edges from v. 297 for _, e := range edges[i:j] { 298 inEdges[e.y.ID]-- 299 if inEdges[e.y.ID] == 0 { 300 heap.Push(priq, e.y) 301 } 302 } 303 } 304 if len(b.Values) != nv { 305 f.Fatalf("schedule does not include all values in block %s", b) 306 } 307 } 308 309 // Remove SPanchored now that we've scheduled. 310 // Also unlink nil checks now that ordering is assured 311 // between the nil check and the uses of the nil-checked pointer. 312 for _, b := range f.Blocks { 313 for _, v := range b.Values { 314 for i, a := range v.Args { 315 if a.Op == OpSPanchored || opcodeTable[a.Op].nilCheck { 316 v.SetArg(i, a.Args[0]) 317 } 318 } 319 } 320 for i, c := range b.ControlValues() { 321 if c.Op == OpSPanchored || opcodeTable[c.Op].nilCheck { 322 b.ReplaceControl(i, c.Args[0]) 323 } 324 } 325 } 326 for _, b := range f.Blocks { 327 i := 0 328 for _, v := range b.Values { 329 if v.Op == OpSPanchored { 330 // Free this value 331 if v.Uses != 0 { 332 base.Fatalf("SPAnchored still has %d uses", v.Uses) 333 } 334 v.resetArgs() 335 f.freeValue(v) 336 } else { 337 if opcodeTable[v.Op].nilCheck { 338 if v.Uses != 0 { 339 base.Fatalf("nilcheck still has %d uses", v.Uses) 340 } 341 // We can't delete the nil check, but we mark 342 // it as having void type so regalloc won't 343 // try to allocate a register for it. 344 v.Type = types.TypeVoid 345 } 346 b.Values[i] = v 347 i++ 348 } 349 } 350 b.truncateValues(i) 351 } 352 353 f.scheduled = true 354 } 355 356 // storeOrder orders values with respect to stores. That is, 357 // if v transitively depends on store s, v is ordered after s, 358 // otherwise v is ordered before s. 359 // Specifically, values are ordered like 360 // 361 // store1 362 // NilCheck that depends on store1 363 // other values that depends on store1 364 // store2 365 // NilCheck that depends on store2 366 // other values that depends on store2 367 // ... 368 // 369 // The order of non-store and non-NilCheck values are undefined 370 // (not necessarily dependency order). This should be cheaper 371 // than a full scheduling as done above. 372 // Note that simple dependency order won't work: there is no 373 // dependency between NilChecks and values like IsNonNil. 374 // Auxiliary data structures are passed in as arguments, so 375 // that they can be allocated in the caller and be reused. 376 // This function takes care of reset them. 377 func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value { 378 if len(values) == 0 { 379 return values 380 } 381 382 f := values[0].Block.Func 383 384 // find all stores 385 386 // Members of values that are store values. 387 // A constant bound allows this to be stack-allocated. 64 is 388 // enough to cover almost every storeOrder call. 389 stores := make([]*Value, 0, 64) 390 hasNilCheck := false 391 sset.clear() // sset is the set of stores that are used in other values 392 for _, v := range values { 393 if v.Type.IsMemory() { 394 stores = append(stores, v) 395 if v.Op == OpInitMem || v.Op == OpPhi { 396 continue 397 } 398 sset.add(v.MemoryArg().ID) // record that v's memory arg is used 399 } 400 if v.Op == OpNilCheck { 401 hasNilCheck = true 402 } 403 } 404 if len(stores) == 0 || !hasNilCheck && f.pass.name == "nilcheckelim" { 405 // there is no store, the order does not matter 406 return values 407 } 408 409 // find last store, which is the one that is not used by other stores 410 var last *Value 411 for _, v := range stores { 412 if !sset.contains(v.ID) { 413 if last != nil { 414 f.Fatalf("two stores live simultaneously: %v and %v", v, last) 415 } 416 last = v 417 } 418 } 419 420 // We assign a store number to each value. Store number is the 421 // index of the latest store that this value transitively depends. 422 // The i-th store in the current block gets store number 3*i. A nil 423 // check that depends on the i-th store gets store number 3*i+1. 424 // Other values that depends on the i-th store gets store number 3*i+2. 425 // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends 426 // is in the previous block (or no store at all, e.g. value is Const). 427 // First we assign the number to all stores by walking back the store chain, 428 // then assign the number to other values in DFS order. 429 count := make([]int32, 3*(len(stores)+1)) 430 sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once 431 for n, w := len(stores), last; n > 0; n-- { 432 storeNumber[w.ID] = int32(3 * n) 433 count[3*n]++ 434 sset.add(w.ID) 435 if w.Op == OpInitMem || w.Op == OpPhi { 436 if n != 1 { 437 f.Fatalf("store order is wrong: there are stores before %v", w) 438 } 439 break 440 } 441 w = w.MemoryArg() 442 } 443 var stack []*Value 444 for _, v := range values { 445 if sset.contains(v.ID) { 446 // in sset means v is a store, or already pushed to stack, or already assigned a store number 447 continue 448 } 449 stack = append(stack, v) 450 sset.add(v.ID) 451 452 for len(stack) > 0 { 453 w := stack[len(stack)-1] 454 if storeNumber[w.ID] != 0 { 455 stack = stack[:len(stack)-1] 456 continue 457 } 458 if w.Op == OpPhi { 459 // Phi value doesn't depend on store in the current block. 460 // Do this early to avoid dependency cycle. 461 storeNumber[w.ID] = 2 462 count[2]++ 463 stack = stack[:len(stack)-1] 464 continue 465 } 466 467 max := int32(0) // latest store dependency 468 argsdone := true 469 for _, a := range w.Args { 470 if a.Block != w.Block { 471 continue 472 } 473 if !sset.contains(a.ID) { 474 stack = append(stack, a) 475 sset.add(a.ID) 476 argsdone = false 477 break 478 } 479 if storeNumber[a.ID]/3 > max { 480 max = storeNumber[a.ID] / 3 481 } 482 } 483 if !argsdone { 484 continue 485 } 486 487 n := 3*max + 2 488 if w.Op == OpNilCheck { 489 n = 3*max + 1 490 } 491 storeNumber[w.ID] = n 492 count[n]++ 493 stack = stack[:len(stack)-1] 494 } 495 } 496 497 // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i] 498 for i := range count { 499 if i == 0 { 500 continue 501 } 502 count[i] += count[i-1] 503 } 504 if count[len(count)-1] != int32(len(values)) { 505 f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values) 506 } 507 508 // place values in count-indexed bins, which are in the desired store order 509 order := make([]*Value, len(values)) 510 for _, v := range values { 511 s := storeNumber[v.ID] 512 order[count[s-1]] = v 513 count[s-1]++ 514 } 515 516 // Order nil checks in source order. We want the first in source order to trigger. 517 // If two are on the same line, we don't really care which happens first. 518 // See issue 18169. 519 if hasNilCheck { 520 start := -1 521 for i, v := range order { 522 if v.Op == OpNilCheck { 523 if start == -1 { 524 start = i 525 } 526 } else { 527 if start != -1 { 528 sort.Sort(bySourcePos(order[start:i])) 529 start = -1 530 } 531 } 532 } 533 if start != -1 { 534 sort.Sort(bySourcePos(order[start:])) 535 } 536 } 537 538 return order 539 } 540 541 // isFlagOp reports if v is an OP with the flag type. 542 func (v *Value) isFlagOp() bool { 543 if v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags() { 544 return true 545 } 546 // PPC64 carry generators put their carry in a non-flag-typed register 547 // in their output. 548 switch v.Op { 549 case OpPPC64SUBC, OpPPC64ADDC, OpPPC64SUBCconst, OpPPC64ADDCconst: 550 return true 551 } 552 return false 553 } 554 555 // hasFlagInput reports whether v has a flag value as any of its inputs. 556 func (v *Value) hasFlagInput() bool { 557 for _, a := range v.Args { 558 if a.isFlagOp() { 559 return true 560 } 561 } 562 // PPC64 carry dependencies are conveyed through their final argument, 563 // so we treat those operations as taking flags as well. 564 switch v.Op { 565 case OpPPC64SUBE, OpPPC64ADDE, OpPPC64SUBZEzero, OpPPC64ADDZEzero: 566 return true 567 } 568 return false 569 } 570 571 type bySourcePos []*Value 572 573 func (s bySourcePos) Len() int { return len(s) } 574 func (s bySourcePos) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 575 func (s bySourcePos) Less(i, j int) bool { return s[i].Pos.Before(s[j].Pos) }