github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/ssa/schedule.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import ( 8 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 9 "container/heap" 10 "sort" 11 ) 12 13 const ( 14 ScorePhi = iota // towards top of block 15 ScoreArg 16 ScoreNilCheck 17 ScoreReadTuple 18 ScoreVarDef 19 ScoreCarryChainTail 20 ScoreMemory 21 ScoreReadFlags 22 ScoreDefault 23 ScoreFlags 24 ScoreControl // towards bottom of block 25 ) 26 27 type ValHeap struct { 28 a []*Value 29 score []int8 30 } 31 32 func (h ValHeap) Len() int { return len(h.a) } 33 func (h ValHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] } 34 35 func (h *ValHeap) Push(x interface{}) { 36 // Push and Pop use pointer receivers because they modify the slice's length, 37 // not just its contents. 38 v := x.(*Value) 39 h.a = append(h.a, v) 40 } 41 func (h *ValHeap) Pop() interface{} { 42 old := h.a 43 n := len(old) 44 x := old[n-1] 45 h.a = old[0 : n-1] 46 return x 47 } 48 func (h ValHeap) Less(i, j int) bool { 49 x := h.a[i] 50 y := h.a[j] 51 sx := h.score[x.ID] 52 sy := h.score[y.ID] 53 if c := sx - sy; c != 0 { 54 return c > 0 // higher score comes later. 55 } 56 if x.Pos != y.Pos { // Favor in-order line stepping 57 return x.Pos.After(y.Pos) 58 } 59 if x.Op != OpPhi { 60 if c := len(x.Args) - len(y.Args); c != 0 { 61 return c < 0 // smaller args comes later 62 } 63 } 64 if c := x.Uses - y.Uses; c != 0 { 65 return c < 0 // smaller uses come later 66 } 67 // These comparisons are fairly arbitrary. 68 // The goal here is stability in the face 69 // of unrelated changes elsewhere in the compiler. 70 if c := x.AuxInt - y.AuxInt; c != 0 { 71 return c > 0 72 } 73 if cmp := x.Type.Compare(y.Type); cmp != types.CMPeq { 74 return cmp == types.CMPgt 75 } 76 return x.ID > y.ID 77 } 78 79 func (op Op) isLoweredGetClosurePtr() bool { 80 switch op { 81 case OpAMD64LoweredGetClosurePtr, OpPPC64LoweredGetClosurePtr, OpARMLoweredGetClosurePtr, OpARM64LoweredGetClosurePtr, 82 Op386LoweredGetClosurePtr, OpMIPS64LoweredGetClosurePtr, OpLOONG64LoweredGetClosurePtr, OpS390XLoweredGetClosurePtr, OpMIPSLoweredGetClosurePtr, 83 OpRISCV64LoweredGetClosurePtr, OpWasmLoweredGetClosurePtr: 84 return true 85 } 86 return false 87 } 88 89 // Schedule the Values in each Block. After this phase returns, the 90 // order of b.Values matters and is the order in which those values 91 // will appear in the assembly output. For now it generates a 92 // reasonable valid schedule using a priority queue. TODO(khr): 93 // schedule smarter. 94 func schedule(f *Func) { 95 // For each value, the number of times it is used in the block 96 // by values that have not been scheduled yet. 97 uses := f.Cache.allocInt32Slice(f.NumValues()) 98 defer f.Cache.freeInt32Slice(uses) 99 100 // reusable priority queue 101 priq := new(ValHeap) 102 103 // "priority" for a value 104 score := f.Cache.allocInt8Slice(f.NumValues()) 105 defer f.Cache.freeInt8Slice(score) 106 107 // scheduling order. We queue values in this list in reverse order. 108 // A constant bound allows this to be stack-allocated. 64 is 109 // enough to cover almost every schedule call. 110 order := make([]*Value, 0, 64) 111 112 // maps mem values to the next live memory value 113 nextMem := f.Cache.allocValueSlice(f.NumValues()) 114 defer f.Cache.freeValueSlice(nextMem) 115 // additional pretend arguments for each Value. Used to enforce load/store ordering. 116 additionalArgs := make([][]*Value, f.NumValues()) 117 118 for _, b := range f.Blocks { 119 // Compute score. Larger numbers are scheduled closer to the end of the block. 120 for _, v := range b.Values { 121 switch { 122 case v.Op.isLoweredGetClosurePtr(): 123 // We also score GetLoweredClosurePtr as early as possible to ensure that the 124 // context register is not stomped. GetLoweredClosurePtr should only appear 125 // in the entry block where there are no phi functions, so there is no 126 // conflict or ambiguity here. 127 if b != f.Entry { 128 f.Fatalf("LoweredGetClosurePtr appeared outside of entry block, b=%s", b.String()) 129 } 130 score[v.ID] = ScorePhi 131 case v.Op == OpAMD64LoweredNilCheck || v.Op == OpPPC64LoweredNilCheck || 132 v.Op == OpARMLoweredNilCheck || v.Op == OpARM64LoweredNilCheck || 133 v.Op == Op386LoweredNilCheck || v.Op == OpMIPS64LoweredNilCheck || 134 v.Op == OpS390XLoweredNilCheck || v.Op == OpMIPSLoweredNilCheck || 135 v.Op == OpRISCV64LoweredNilCheck || v.Op == OpWasmLoweredNilCheck || 136 v.Op == OpLOONG64LoweredNilCheck: 137 // Nil checks must come before loads from the same address. 138 score[v.ID] = ScoreNilCheck 139 case v.Op == OpPhi: 140 // We want all the phis first. 141 score[v.ID] = ScorePhi 142 case v.Op == OpVarDef: 143 // We want all the vardefs next. 144 score[v.ID] = ScoreVarDef 145 case v.Op == OpArgIntReg || v.Op == OpArgFloatReg: 146 // In-register args must be scheduled as early as possible to ensure that the 147 // context register is not stomped. They should only appear in the entry block. 148 if b != f.Entry { 149 f.Fatalf("%s appeared outside of entry block, b=%s", v.Op, b.String()) 150 } 151 score[v.ID] = ScorePhi 152 case v.Op == OpArg: 153 // We want all the args as early as possible, for better debugging. 154 score[v.ID] = ScoreArg 155 case v.Type.IsMemory(): 156 // Schedule stores as early as possible. This tends to 157 // reduce register pressure. It also helps make sure 158 // VARDEF ops are scheduled before the corresponding LEA. 159 score[v.ID] = ScoreMemory 160 case v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN: 161 if (v.Op == OpSelect1 || v.Op == OpSelect0) && (v.Args[0].isCarry() || v.Type.IsFlags()) { 162 // When the Select pseudo op is being used for a carry or flag from 163 // a tuple then score it as ScoreFlags so it happens later. This 164 // prevents the bit from being clobbered before it is used. 165 score[v.ID] = ScoreFlags 166 } else { 167 score[v.ID] = ScoreReadTuple 168 } 169 case v.isCarry(): 170 if w := v.getCarryInput(); w != nil && w.Block == b { 171 // The producing op is not the final user of the carry bit. Its 172 // current score is one of unscored, Flags, or CarryChainTail. 173 // These occur if the producer has not been scored, another user 174 // of the producers carry flag was scored (there are >1 users of 175 // the carry out flag), or it was visited earlier and already 176 // scored CarryChainTail (and prove w is not a tail). 177 score[w.ID] = ScoreFlags 178 } 179 // Verify v has not been scored. If v has not been visited, v may be 180 // the final (tail) operation in a carry chain. If v is not, v will be 181 // rescored above when v's carry-using op is scored. When scoring is done, 182 // only tail operations will retain the CarryChainTail score. 183 if score[v.ID] != ScoreFlags { 184 // Score the tail of carry chain operations to a lower (earlier in the 185 // block) priority. This creates a priority inversion which allows only 186 // one chain to be scheduled, if possible. 187 score[v.ID] = ScoreCarryChainTail 188 } 189 case v.isFlagOp(): 190 // Schedule flag register generation as late as possible. 191 // This makes sure that we only have one live flags 192 // value at a time. 193 score[v.ID] = ScoreFlags 194 default: 195 score[v.ID] = ScoreDefault 196 // If we're reading flags, schedule earlier to keep flag lifetime short. 197 for _, a := range v.Args { 198 if a.isFlagOp() { 199 score[v.ID] = ScoreReadFlags 200 } 201 } 202 } 203 } 204 } 205 206 for _, b := range f.Blocks { 207 // Find store chain for block. 208 // Store chains for different blocks overwrite each other, so 209 // the calculated store chain is good only for this block. 210 for _, v := range b.Values { 211 if v.Op != OpPhi && v.Type.IsMemory() { 212 for _, w := range v.Args { 213 if w.Type.IsMemory() { 214 nextMem[w.ID] = v 215 } 216 } 217 } 218 } 219 220 // Compute uses. 221 for _, v := range b.Values { 222 if v.Op == OpPhi { 223 // If a value is used by a phi, it does not induce 224 // a scheduling edge because that use is from the 225 // previous iteration. 226 continue 227 } 228 for _, w := range v.Args { 229 if w.Block == b { 230 uses[w.ID]++ 231 } 232 // Any load must come before the following store. 233 if !v.Type.IsMemory() && w.Type.IsMemory() { 234 // v is a load. 235 s := nextMem[w.ID] 236 if s == nil || s.Block != b { 237 continue 238 } 239 additionalArgs[s.ID] = append(additionalArgs[s.ID], v) 240 uses[v.ID]++ 241 } 242 } 243 } 244 245 for _, c := range b.ControlValues() { 246 // Force the control values to be scheduled at the end, 247 // unless they are phi values (which must be first). 248 // OpArg also goes first -- if it is stack it register allocates 249 // to a LoadReg, if it is register it is from the beginning anyway. 250 if score[c.ID] == ScorePhi || score[c.ID] == ScoreArg { 251 continue 252 } 253 score[c.ID] = ScoreControl 254 255 // Schedule values dependent on the control values at the end. 256 // This reduces the number of register spills. We don't find 257 // all values that depend on the controls, just values with a 258 // direct dependency. This is cheaper and in testing there 259 // was no difference in the number of spills. 260 for _, v := range b.Values { 261 if v.Op != OpPhi { 262 for _, a := range v.Args { 263 if a == c { 264 score[v.ID] = ScoreControl 265 } 266 } 267 } 268 } 269 } 270 271 // To put things into a priority queue 272 // The values that should come last are least. 273 priq.score = score 274 priq.a = priq.a[:0] 275 276 // Initialize priority queue with schedulable values. 277 for _, v := range b.Values { 278 if uses[v.ID] == 0 { 279 heap.Push(priq, v) 280 } 281 } 282 283 // Schedule highest priority value, update use counts, repeat. 284 order = order[:0] 285 tuples := make(map[ID][]*Value) 286 for priq.Len() > 0 { 287 // Find highest priority schedulable value. 288 // Note that schedule is assembled backwards. 289 290 v := heap.Pop(priq).(*Value) 291 292 if f.pass.debug > 1 && score[v.ID] == ScoreCarryChainTail && v.isCarry() { 293 // Add some debugging noise if the chain of carrying ops will not 294 // likely be scheduled without potential carry flag clobbers. 295 if !isCarryChainReady(v, uses) { 296 f.Warnl(v.Pos, "carry chain ending with %v not ready", v) 297 } 298 } 299 300 // Add it to the schedule. 301 // Do not emit tuple-reading ops until we're ready to emit the tuple-generating op. 302 //TODO: maybe remove ReadTuple score above, if it does not help on performance 303 switch { 304 case v.Op == OpSelect0: 305 if tuples[v.Args[0].ID] == nil { 306 tuples[v.Args[0].ID] = make([]*Value, 2) 307 } 308 tuples[v.Args[0].ID][0] = v 309 case v.Op == OpSelect1: 310 if tuples[v.Args[0].ID] == nil { 311 tuples[v.Args[0].ID] = make([]*Value, 2) 312 } 313 tuples[v.Args[0].ID][1] = v 314 case v.Op == OpSelectN: 315 if tuples[v.Args[0].ID] == nil { 316 tuples[v.Args[0].ID] = make([]*Value, v.Args[0].Type.NumFields()) 317 } 318 tuples[v.Args[0].ID][v.AuxInt] = v 319 case v.Type.IsResults() && tuples[v.ID] != nil: 320 tup := tuples[v.ID] 321 for i := len(tup) - 1; i >= 0; i-- { 322 if tup[i] != nil { 323 order = append(order, tup[i]) 324 } 325 } 326 delete(tuples, v.ID) 327 order = append(order, v) 328 case v.Type.IsTuple() && tuples[v.ID] != nil: 329 if tuples[v.ID][1] != nil { 330 order = append(order, tuples[v.ID][1]) 331 } 332 if tuples[v.ID][0] != nil { 333 order = append(order, tuples[v.ID][0]) 334 } 335 delete(tuples, v.ID) 336 fallthrough 337 default: 338 order = append(order, v) 339 } 340 341 // Update use counts of arguments. 342 for _, w := range v.Args { 343 if w.Block != b { 344 continue 345 } 346 uses[w.ID]-- 347 if uses[w.ID] == 0 { 348 // All uses scheduled, w is now schedulable. 349 heap.Push(priq, w) 350 } 351 } 352 for _, w := range additionalArgs[v.ID] { 353 uses[w.ID]-- 354 if uses[w.ID] == 0 { 355 // All uses scheduled, w is now schedulable. 356 heap.Push(priq, w) 357 } 358 } 359 } 360 if len(order) != len(b.Values) { 361 f.Fatalf("schedule does not include all values in block %s", b) 362 } 363 for i := 0; i < len(b.Values); i++ { 364 b.Values[i] = order[len(b.Values)-1-i] 365 } 366 } 367 368 f.scheduled = true 369 } 370 371 // storeOrder orders values with respect to stores. That is, 372 // if v transitively depends on store s, v is ordered after s, 373 // otherwise v is ordered before s. 374 // Specifically, values are ordered like 375 // 376 // store1 377 // NilCheck that depends on store1 378 // other values that depends on store1 379 // store2 380 // NilCheck that depends on store2 381 // other values that depends on store2 382 // ... 383 // 384 // The order of non-store and non-NilCheck values are undefined 385 // (not necessarily dependency order). This should be cheaper 386 // than a full scheduling as done above. 387 // Note that simple dependency order won't work: there is no 388 // dependency between NilChecks and values like IsNonNil. 389 // Auxiliary data structures are passed in as arguments, so 390 // that they can be allocated in the caller and be reused. 391 // This function takes care of reset them. 392 func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value { 393 if len(values) == 0 { 394 return values 395 } 396 397 f := values[0].Block.Func 398 399 // find all stores 400 401 // Members of values that are store values. 402 // A constant bound allows this to be stack-allocated. 64 is 403 // enough to cover almost every storeOrder call. 404 stores := make([]*Value, 0, 64) 405 hasNilCheck := false 406 sset.clear() // sset is the set of stores that are used in other values 407 for _, v := range values { 408 if v.Type.IsMemory() { 409 stores = append(stores, v) 410 if v.Op == OpInitMem || v.Op == OpPhi { 411 continue 412 } 413 sset.add(v.MemoryArg().ID) // record that v's memory arg is used 414 } 415 if v.Op == OpNilCheck { 416 hasNilCheck = true 417 } 418 } 419 if len(stores) == 0 || !hasNilCheck && f.pass.name == "nilcheckelim" { 420 // there is no store, the order does not matter 421 return values 422 } 423 424 // find last store, which is the one that is not used by other stores 425 var last *Value 426 for _, v := range stores { 427 if !sset.contains(v.ID) { 428 if last != nil { 429 f.Fatalf("two stores live simultaneously: %v and %v", v, last) 430 } 431 last = v 432 } 433 } 434 435 // We assign a store number to each value. Store number is the 436 // index of the latest store that this value transitively depends. 437 // The i-th store in the current block gets store number 3*i. A nil 438 // check that depends on the i-th store gets store number 3*i+1. 439 // Other values that depends on the i-th store gets store number 3*i+2. 440 // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends 441 // is in the previous block (or no store at all, e.g. value is Const). 442 // First we assign the number to all stores by walking back the store chain, 443 // then assign the number to other values in DFS order. 444 count := make([]int32, 3*(len(stores)+1)) 445 sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once 446 for n, w := len(stores), last; n > 0; n-- { 447 storeNumber[w.ID] = int32(3 * n) 448 count[3*n]++ 449 sset.add(w.ID) 450 if w.Op == OpInitMem || w.Op == OpPhi { 451 if n != 1 { 452 f.Fatalf("store order is wrong: there are stores before %v", w) 453 } 454 break 455 } 456 w = w.MemoryArg() 457 } 458 var stack []*Value 459 for _, v := range values { 460 if sset.contains(v.ID) { 461 // in sset means v is a store, or already pushed to stack, or already assigned a store number 462 continue 463 } 464 stack = append(stack, v) 465 sset.add(v.ID) 466 467 for len(stack) > 0 { 468 w := stack[len(stack)-1] 469 if storeNumber[w.ID] != 0 { 470 stack = stack[:len(stack)-1] 471 continue 472 } 473 if w.Op == OpPhi { 474 // Phi value doesn't depend on store in the current block. 475 // Do this early to avoid dependency cycle. 476 storeNumber[w.ID] = 2 477 count[2]++ 478 stack = stack[:len(stack)-1] 479 continue 480 } 481 482 max := int32(0) // latest store dependency 483 argsdone := true 484 for _, a := range w.Args { 485 if a.Block != w.Block { 486 continue 487 } 488 if !sset.contains(a.ID) { 489 stack = append(stack, a) 490 sset.add(a.ID) 491 argsdone = false 492 break 493 } 494 if storeNumber[a.ID]/3 > max { 495 max = storeNumber[a.ID] / 3 496 } 497 } 498 if !argsdone { 499 continue 500 } 501 502 n := 3*max + 2 503 if w.Op == OpNilCheck { 504 n = 3*max + 1 505 } 506 storeNumber[w.ID] = n 507 count[n]++ 508 stack = stack[:len(stack)-1] 509 } 510 } 511 512 // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i] 513 for i := range count { 514 if i == 0 { 515 continue 516 } 517 count[i] += count[i-1] 518 } 519 if count[len(count)-1] != int32(len(values)) { 520 f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values) 521 } 522 523 // place values in count-indexed bins, which are in the desired store order 524 order := make([]*Value, len(values)) 525 for _, v := range values { 526 s := storeNumber[v.ID] 527 order[count[s-1]] = v 528 count[s-1]++ 529 } 530 531 // Order nil checks in source order. We want the first in source order to trigger. 532 // If two are on the same line, we don't really care which happens first. 533 // See issue 18169. 534 if hasNilCheck { 535 start := -1 536 for i, v := range order { 537 if v.Op == OpNilCheck { 538 if start == -1 { 539 start = i 540 } 541 } else { 542 if start != -1 { 543 sort.Sort(bySourcePos(order[start:i])) 544 start = -1 545 } 546 } 547 } 548 if start != -1 { 549 sort.Sort(bySourcePos(order[start:])) 550 } 551 } 552 553 return order 554 } 555 556 // isFlagOp reports if v is an OP with the flag type. 557 func (v *Value) isFlagOp() bool { 558 return v.Type.IsFlags() || v.Type.IsTuple() && v.Type.FieldType(1).IsFlags() 559 } 560 561 // isCarryChainReady reports whether all dependent carry ops can be scheduled after this. 562 func isCarryChainReady(v *Value, uses []int32) bool { 563 // A chain can be scheduled in it's entirety if 564 // the use count of each dependent op is 1. If none, 565 // schedule the first. 566 j := 1 // The first op uses[k.ID] == 0. Dependent ops are always >= 1. 567 for k := v; k != nil; k = k.getCarryInput() { 568 j += int(uses[k.ID]) - 1 569 } 570 return j == 0 571 } 572 573 // isCarryInput reports whether v accepts a carry value as input. 574 func (v *Value) isCarryInput() bool { 575 return v.getCarryInput() != nil 576 } 577 578 // isCarryOutput reports whether v generates a carry as output. 579 func (v *Value) isCarryOutput() bool { 580 // special cases for PPC64 which put their carry values in XER instead of flags 581 switch v.Block.Func.Config.arch { 582 case "ppc64", "ppc64le": 583 switch v.Op { 584 case OpPPC64SUBC, OpPPC64ADDC, OpPPC64SUBCconst, OpPPC64ADDCconst: 585 return true 586 } 587 return false 588 } 589 return v.isFlagOp() && v.Op != OpSelect1 590 } 591 592 // isCarryCreator reports whether op is an operation which produces a carry bit value, 593 // but does not consume it. 594 func (v *Value) isCarryCreator() bool { 595 return v.isCarryOutput() && !v.isCarryInput() 596 } 597 598 // isCarry reports whether op consumes or creates a carry a bit value. 599 func (v *Value) isCarry() bool { 600 return v.isCarryOutput() || v.isCarryInput() 601 } 602 603 // getCarryInput returns the producing *Value of the carry bit of this op, or nil if none. 604 func (v *Value) getCarryInput() *Value { 605 // special cases for PPC64 which put their carry values in XER instead of flags 606 switch v.Block.Func.Config.arch { 607 case "ppc64", "ppc64le": 608 switch v.Op { 609 case OpPPC64SUBE, OpPPC64ADDE, OpPPC64SUBZEzero, OpPPC64ADDZEzero: 610 // PPC64 carry dependencies are conveyed through their final argument. 611 // Likewise, there is always an OpSelect1 between them. 612 return v.Args[len(v.Args)-1].Args[0] 613 } 614 return nil 615 } 616 for _, a := range v.Args { 617 if !a.isFlagOp() { 618 continue 619 } 620 if a.Op == OpSelect1 { 621 a = a.Args[0] 622 } 623 return a 624 } 625 return nil 626 } 627 628 type bySourcePos []*Value 629 630 func (s bySourcePos) Len() int { return len(s) } 631 func (s bySourcePos) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 632 func (s bySourcePos) Less(i, j int) bool { return s[i].Pos.Before(s[j].Pos) }