github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/ssa/rewrite.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import ( 8 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 9 "github.com/bir3/gocompiler/src/cmd/compile/internal/logopt" 10 "github.com/bir3/gocompiler/src/cmd/compile/internal/reflectdata" 11 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 12 "github.com/bir3/gocompiler/src/cmd/internal/obj" 13 "github.com/bir3/gocompiler/src/cmd/internal/obj/s390x" 14 "github.com/bir3/gocompiler/src/cmd/internal/objabi" 15 "github.com/bir3/gocompiler/src/cmd/internal/src" 16 "encoding/binary" 17 "fmt" 18 "github.com/bir3/gocompiler/src/internal/buildcfg" 19 "io" 20 "math" 21 "math/bits" 22 "os" 23 "path/filepath" 24 "strings" 25 ) 26 27 type deadValueChoice bool 28 29 const ( 30 leaveDeadValues deadValueChoice = false 31 removeDeadValues = true 32 ) 33 34 // deadcode indicates whether rewrite should try to remove any values that become dead. 35 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) { 36 // repeat rewrites until we find no more rewrites 37 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block 38 pendingLines.clear() 39 debug := f.pass.debug 40 if debug > 1 { 41 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name) 42 } 43 var iters int 44 var states map[string]bool 45 for { 46 change := false 47 deadChange := false 48 for _, b := range f.Blocks { 49 var b0 *Block 50 if debug > 1 { 51 b0 = new(Block) 52 *b0 = *b 53 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing 54 } 55 for i, c := range b.ControlValues() { 56 for c.Op == OpCopy { 57 c = c.Args[0] 58 b.ReplaceControl(i, c) 59 } 60 } 61 if rb(b) { 62 change = true 63 if debug > 1 { 64 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString()) 65 } 66 } 67 for j, v := range b.Values { 68 var v0 *Value 69 if debug > 1 { 70 v0 = new(Value) 71 *v0 = *v 72 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing 73 } 74 if v.Uses == 0 && v.removeable() { 75 if v.Op != OpInvalid && deadcode == removeDeadValues { 76 // Reset any values that are now unused, so that we decrement 77 // the use count of all of its arguments. 78 // Not quite a deadcode pass, because it does not handle cycles. 79 // But it should help Uses==1 rules to fire. 80 v.reset(OpInvalid) 81 deadChange = true 82 } 83 // No point rewriting values which aren't used. 84 continue 85 } 86 87 vchange := phielimValue(v) 88 if vchange && debug > 1 { 89 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 90 } 91 92 // Eliminate copy inputs. 93 // If any copy input becomes unused, mark it 94 // as invalid and discard its argument. Repeat 95 // recursively on the discarded argument. 96 // This phase helps remove phantom "dead copy" uses 97 // of a value so that a x.Uses==1 rule condition 98 // fires reliably. 99 for i, a := range v.Args { 100 if a.Op != OpCopy { 101 continue 102 } 103 aa := copySource(a) 104 v.SetArg(i, aa) 105 // If a, a copy, has a line boundary indicator, attempt to find a new value 106 // to hold it. The first candidate is the value that will replace a (aa), 107 // if it shares the same block and line and is eligible. 108 // The second option is v, which has a as an input. Because aa is earlier in 109 // the data flow, it is the better choice. 110 if a.Pos.IsStmt() == src.PosIsStmt { 111 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt { 112 aa.Pos = aa.Pos.WithIsStmt() 113 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt { 114 v.Pos = v.Pos.WithIsStmt() 115 } else { 116 // Record the lost line and look for a new home after all rewrites are complete. 117 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same 118 // line to appear in more than one block, but only one block is stored, so if both end 119 // up here, then one will be lost. 120 pendingLines.set(a.Pos, int32(a.Block.ID)) 121 } 122 a.Pos = a.Pos.WithNotStmt() 123 } 124 vchange = true 125 for a.Uses == 0 { 126 b := a.Args[0] 127 a.reset(OpInvalid) 128 a = b 129 } 130 } 131 if vchange && debug > 1 { 132 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 133 } 134 135 // apply rewrite function 136 if rv(v) { 137 vchange = true 138 // If value changed to a poor choice for a statement boundary, move the boundary 139 if v.Pos.IsStmt() == src.PosIsStmt { 140 if k := nextGoodStatementIndex(v, j, b); k != j { 141 v.Pos = v.Pos.WithNotStmt() 142 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt() 143 } 144 } 145 } 146 147 change = change || vchange 148 if vchange && debug > 1 { 149 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 150 } 151 } 152 } 153 if !change && !deadChange { 154 break 155 } 156 iters++ 157 if (iters > 1000 || debug >= 2) && change { 158 // We've done a suspiciously large number of rewrites (or we're in debug mode). 159 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer 160 // and the maximum value encountered during make.bash is 12. 161 // Start checking for cycles. (This is too expensive to do routinely.) 162 // Note: we avoid this path for deadChange-only iterations, to fix #51639. 163 if states == nil { 164 states = make(map[string]bool) 165 } 166 h := f.rewriteHash() 167 if _, ok := states[h]; ok { 168 // We've found a cycle. 169 // To diagnose it, set debug to 2 and start again, 170 // so that we'll print all rules applied until we complete another cycle. 171 // If debug is already >= 2, we've already done that, so it's time to crash. 172 if debug < 2 { 173 debug = 2 174 states = make(map[string]bool) 175 } else { 176 f.Fatalf("rewrite cycle detected") 177 } 178 } 179 states[h] = true 180 } 181 } 182 // remove clobbered values 183 for _, b := range f.Blocks { 184 j := 0 185 for i, v := range b.Values { 186 vl := v.Pos 187 if v.Op == OpInvalid { 188 if v.Pos.IsStmt() == src.PosIsStmt { 189 pendingLines.set(vl, int32(b.ID)) 190 } 191 f.freeValue(v) 192 continue 193 } 194 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) { 195 pendingLines.remove(vl) 196 v.Pos = v.Pos.WithIsStmt() 197 } 198 if i != j { 199 b.Values[j] = v 200 } 201 j++ 202 } 203 if pendingLines.get(b.Pos) == int32(b.ID) { 204 b.Pos = b.Pos.WithIsStmt() 205 pendingLines.remove(b.Pos) 206 } 207 b.truncateValues(j) 208 } 209 } 210 211 // Common functions called from rewriting rules 212 213 func is64BitFloat(t *types.Type) bool { 214 return t.Size() == 8 && t.IsFloat() 215 } 216 217 func is32BitFloat(t *types.Type) bool { 218 return t.Size() == 4 && t.IsFloat() 219 } 220 221 func is64BitInt(t *types.Type) bool { 222 return t.Size() == 8 && t.IsInteger() 223 } 224 225 func is32BitInt(t *types.Type) bool { 226 return t.Size() == 4 && t.IsInteger() 227 } 228 229 func is16BitInt(t *types.Type) bool { 230 return t.Size() == 2 && t.IsInteger() 231 } 232 233 func is8BitInt(t *types.Type) bool { 234 return t.Size() == 1 && t.IsInteger() 235 } 236 237 func isPtr(t *types.Type) bool { 238 return t.IsPtrShaped() 239 } 240 241 // mergeSym merges two symbolic offsets. There is no real merging of 242 // offsets, we just pick the non-nil one. 243 func mergeSym(x, y Sym) Sym { 244 if x == nil { 245 return y 246 } 247 if y == nil { 248 return x 249 } 250 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y)) 251 } 252 253 func canMergeSym(x, y Sym) bool { 254 return x == nil || y == nil 255 } 256 257 // canMergeLoadClobber reports whether the load can be merged into target without 258 // invalidating the schedule. 259 // It also checks that the other non-load argument x is something we 260 // are ok with clobbering. 261 func canMergeLoadClobber(target, load, x *Value) bool { 262 // The register containing x is going to get clobbered. 263 // Don't merge if we still need the value of x. 264 // We don't have liveness information here, but we can 265 // approximate x dying with: 266 // 1) target is x's only use. 267 // 2) target is not in a deeper loop than x. 268 if x.Uses != 1 { 269 return false 270 } 271 loopnest := x.Block.Func.loopnest() 272 loopnest.calculateDepths() 273 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) { 274 return false 275 } 276 return canMergeLoad(target, load) 277 } 278 279 // canMergeLoad reports whether the load can be merged into target without 280 // invalidating the schedule. 281 func canMergeLoad(target, load *Value) bool { 282 if target.Block.ID != load.Block.ID { 283 // If the load is in a different block do not merge it. 284 return false 285 } 286 287 // We can't merge the load into the target if the load 288 // has more than one use. 289 if load.Uses != 1 { 290 return false 291 } 292 293 mem := load.MemoryArg() 294 295 // We need the load's memory arg to still be alive at target. That 296 // can't be the case if one of target's args depends on a memory 297 // state that is a successor of load's memory arg. 298 // 299 // For example, it would be invalid to merge load into target in 300 // the following situation because newmem has killed oldmem 301 // before target is reached: 302 // load = read ... oldmem 303 // newmem = write ... oldmem 304 // arg0 = read ... newmem 305 // target = add arg0 load 306 // 307 // If the argument comes from a different block then we can exclude 308 // it immediately because it must dominate load (which is in the 309 // same block as target). 310 var args []*Value 311 for _, a := range target.Args { 312 if a != load && a.Block.ID == target.Block.ID { 313 args = append(args, a) 314 } 315 } 316 317 // memPreds contains memory states known to be predecessors of load's 318 // memory state. It is lazily initialized. 319 var memPreds map[*Value]bool 320 for i := 0; len(args) > 0; i++ { 321 const limit = 100 322 if i >= limit { 323 // Give up if we have done a lot of iterations. 324 return false 325 } 326 v := args[len(args)-1] 327 args = args[:len(args)-1] 328 if target.Block.ID != v.Block.ID { 329 // Since target and load are in the same block 330 // we can stop searching when we leave the block. 331 continue 332 } 333 if v.Op == OpPhi { 334 // A Phi implies we have reached the top of the block. 335 // The memory phi, if it exists, is always 336 // the first logical store in the block. 337 continue 338 } 339 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() { 340 // We could handle this situation however it is likely 341 // to be very rare. 342 return false 343 } 344 if v.Op.SymEffect()&SymAddr != 0 { 345 // This case prevents an operation that calculates the 346 // address of a local variable from being forced to schedule 347 // before its corresponding VarDef. 348 // See issue 28445. 349 // v1 = LOAD ... 350 // v2 = VARDEF 351 // v3 = LEAQ 352 // v4 = CMPQ v1 v3 353 // We don't want to combine the CMPQ with the load, because 354 // that would force the CMPQ to schedule before the VARDEF, which 355 // in turn requires the LEAQ to schedule before the VARDEF. 356 return false 357 } 358 if v.Type.IsMemory() { 359 if memPreds == nil { 360 // Initialise a map containing memory states 361 // known to be predecessors of load's memory 362 // state. 363 memPreds = make(map[*Value]bool) 364 m := mem 365 const limit = 50 366 for i := 0; i < limit; i++ { 367 if m.Op == OpPhi { 368 // The memory phi, if it exists, is always 369 // the first logical store in the block. 370 break 371 } 372 if m.Block.ID != target.Block.ID { 373 break 374 } 375 if !m.Type.IsMemory() { 376 break 377 } 378 memPreds[m] = true 379 if len(m.Args) == 0 { 380 break 381 } 382 m = m.MemoryArg() 383 } 384 } 385 386 // We can merge if v is a predecessor of mem. 387 // 388 // For example, we can merge load into target in the 389 // following scenario: 390 // x = read ... v 391 // mem = write ... v 392 // load = read ... mem 393 // target = add x load 394 if memPreds[v] { 395 continue 396 } 397 return false 398 } 399 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem { 400 // If v takes mem as an input then we know mem 401 // is valid at this point. 402 continue 403 } 404 for _, a := range v.Args { 405 if target.Block.ID == a.Block.ID { 406 args = append(args, a) 407 } 408 } 409 } 410 411 return true 412 } 413 414 // isSameCall reports whether sym is the same as the given named symbol. 415 func isSameCall(sym interface{}, name string) bool { 416 fn := sym.(*AuxCall).Fn 417 return fn != nil && fn.String() == name 418 } 419 420 // canLoadUnaligned reports if the architecture supports unaligned load operations. 421 func canLoadUnaligned(c *Config) bool { 422 return c.ctxt.Arch.Alignment == 1 423 } 424 425 // nlzX returns the number of leading zeros. 426 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) } 427 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) } 428 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) } 429 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) } 430 431 // ntzX returns the number of trailing zeros. 432 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) } 433 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) } 434 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) } 435 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) } 436 437 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 } 438 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 } 439 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 } 440 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 } 441 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 } 442 443 // nto returns the number of trailing ones. 444 func nto(x int64) int64 { 445 return int64(ntz64(^x)) 446 } 447 448 // logX returns logarithm of n base 2. 449 // n must be a positive power of 2 (isPowerOfTwoX returns true). 450 func log8(n int8) int64 { 451 return int64(bits.Len8(uint8(n))) - 1 452 } 453 func log16(n int16) int64 { 454 return int64(bits.Len16(uint16(n))) - 1 455 } 456 func log32(n int32) int64 { 457 return int64(bits.Len32(uint32(n))) - 1 458 } 459 func log64(n int64) int64 { 460 return int64(bits.Len64(uint64(n))) - 1 461 } 462 463 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1. 464 // Rounds down. 465 func log2uint32(n int64) int64 { 466 return int64(bits.Len32(uint32(n))) - 1 467 } 468 469 // isPowerOfTwoX functions report whether n is a power of 2. 470 func isPowerOfTwo8(n int8) bool { 471 return n > 0 && n&(n-1) == 0 472 } 473 func isPowerOfTwo16(n int16) bool { 474 return n > 0 && n&(n-1) == 0 475 } 476 func isPowerOfTwo32(n int32) bool { 477 return n > 0 && n&(n-1) == 0 478 } 479 func isPowerOfTwo64(n int64) bool { 480 return n > 0 && n&(n-1) == 0 481 } 482 483 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2. 484 func isUint64PowerOfTwo(in int64) bool { 485 n := uint64(in) 486 return n > 0 && n&(n-1) == 0 487 } 488 489 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2. 490 func isUint32PowerOfTwo(in int64) bool { 491 n := uint64(uint32(in)) 492 return n > 0 && n&(n-1) == 0 493 } 494 495 // is32Bit reports whether n can be represented as a signed 32 bit integer. 496 func is32Bit(n int64) bool { 497 return n == int64(int32(n)) 498 } 499 500 // is16Bit reports whether n can be represented as a signed 16 bit integer. 501 func is16Bit(n int64) bool { 502 return n == int64(int16(n)) 503 } 504 505 // is8Bit reports whether n can be represented as a signed 8 bit integer. 506 func is8Bit(n int64) bool { 507 return n == int64(int8(n)) 508 } 509 510 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer. 511 func isU8Bit(n int64) bool { 512 return n == int64(uint8(n)) 513 } 514 515 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer. 516 func isU12Bit(n int64) bool { 517 return 0 <= n && n < (1<<12) 518 } 519 520 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer. 521 func isU16Bit(n int64) bool { 522 return n == int64(uint16(n)) 523 } 524 525 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer. 526 func isU32Bit(n int64) bool { 527 return n == int64(uint32(n)) 528 } 529 530 // is20Bit reports whether n can be represented as a signed 20 bit integer. 531 func is20Bit(n int64) bool { 532 return -(1<<19) <= n && n < (1<<19) 533 } 534 535 // b2i translates a boolean value to 0 or 1 for assigning to auxInt. 536 func b2i(b bool) int64 { 537 if b { 538 return 1 539 } 540 return 0 541 } 542 543 // b2i32 translates a boolean value to 0 or 1. 544 func b2i32(b bool) int32 { 545 if b { 546 return 1 547 } 548 return 0 549 } 550 551 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded. 552 // A shift is bounded if it is shifting by less than the width of the shifted value. 553 func shiftIsBounded(v *Value) bool { 554 return v.AuxInt != 0 555 } 556 557 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing 558 // generated code as much as possible. 559 func canonLessThan(x, y *Value) bool { 560 if x.Op != y.Op { 561 return x.Op < y.Op 562 } 563 if !x.Pos.SameFileAndLine(y.Pos) { 564 return x.Pos.Before(y.Pos) 565 } 566 return x.ID < y.ID 567 } 568 569 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern 570 // of the mantissa. It will panic if the truncation results in lost information. 571 func truncate64Fto32F(f float64) float32 { 572 if !isExactFloat32(f) { 573 panic("truncate64Fto32F: truncation is not exact") 574 } 575 if !math.IsNaN(f) { 576 return float32(f) 577 } 578 // NaN bit patterns aren't necessarily preserved across conversion 579 // instructions so we need to do the conversion manually. 580 b := math.Float64bits(f) 581 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand) 582 // | sign | exponent | mantissa | 583 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23))) 584 return math.Float32frombits(r) 585 } 586 587 // extend32Fto64F converts a float32 value to a float64 value preserving the bit 588 // pattern of the mantissa. 589 func extend32Fto64F(f float32) float64 { 590 if !math.IsNaN(float64(f)) { 591 return float64(f) 592 } 593 // NaN bit patterns aren't necessarily preserved across conversion 594 // instructions so we need to do the conversion manually. 595 b := uint64(math.Float32bits(f)) 596 // | sign | exponent | mantissa | 597 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23)) 598 return math.Float64frombits(r) 599 } 600 601 // DivisionNeedsFixUp reports whether the division needs fix-up code. 602 func DivisionNeedsFixUp(v *Value) bool { 603 return v.AuxInt == 0 604 } 605 606 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt. 607 func auxFrom64F(f float64) int64 { 608 if f != f { 609 panic("can't encode a NaN in AuxInt field") 610 } 611 return int64(math.Float64bits(f)) 612 } 613 614 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt. 615 func auxFrom32F(f float32) int64 { 616 if f != f { 617 panic("can't encode a NaN in AuxInt field") 618 } 619 return int64(math.Float64bits(extend32Fto64F(f))) 620 } 621 622 // auxTo32F decodes a float32 from the AuxInt value provided. 623 func auxTo32F(i int64) float32 { 624 return truncate64Fto32F(math.Float64frombits(uint64(i))) 625 } 626 627 // auxTo64F decodes a float64 from the AuxInt value provided. 628 func auxTo64F(i int64) float64 { 629 return math.Float64frombits(uint64(i)) 630 } 631 632 func auxIntToBool(i int64) bool { 633 if i == 0 { 634 return false 635 } 636 return true 637 } 638 func auxIntToInt8(i int64) int8 { 639 return int8(i) 640 } 641 func auxIntToInt16(i int64) int16 { 642 return int16(i) 643 } 644 func auxIntToInt32(i int64) int32 { 645 return int32(i) 646 } 647 func auxIntToInt64(i int64) int64 { 648 return i 649 } 650 func auxIntToUint8(i int64) uint8 { 651 return uint8(i) 652 } 653 func auxIntToFloat32(i int64) float32 { 654 return float32(math.Float64frombits(uint64(i))) 655 } 656 func auxIntToFloat64(i int64) float64 { 657 return math.Float64frombits(uint64(i)) 658 } 659 func auxIntToValAndOff(i int64) ValAndOff { 660 return ValAndOff(i) 661 } 662 func auxIntToArm64BitField(i int64) arm64BitField { 663 return arm64BitField(i) 664 } 665 func auxIntToInt128(x int64) int128 { 666 if x != 0 { 667 panic("nonzero int128 not allowed") 668 } 669 return 0 670 } 671 func auxIntToFlagConstant(x int64) flagConstant { 672 return flagConstant(x) 673 } 674 675 func auxIntToOp(cc int64) Op { 676 return Op(cc) 677 } 678 679 func boolToAuxInt(b bool) int64 { 680 if b { 681 return 1 682 } 683 return 0 684 } 685 func int8ToAuxInt(i int8) int64 { 686 return int64(i) 687 } 688 func int16ToAuxInt(i int16) int64 { 689 return int64(i) 690 } 691 func int32ToAuxInt(i int32) int64 { 692 return int64(i) 693 } 694 func int64ToAuxInt(i int64) int64 { 695 return int64(i) 696 } 697 func uint8ToAuxInt(i uint8) int64 { 698 return int64(int8(i)) 699 } 700 func float32ToAuxInt(f float32) int64 { 701 return int64(math.Float64bits(float64(f))) 702 } 703 func float64ToAuxInt(f float64) int64 { 704 return int64(math.Float64bits(f)) 705 } 706 func valAndOffToAuxInt(v ValAndOff) int64 { 707 return int64(v) 708 } 709 func arm64BitFieldToAuxInt(v arm64BitField) int64 { 710 return int64(v) 711 } 712 func int128ToAuxInt(x int128) int64 { 713 if x != 0 { 714 panic("nonzero int128 not allowed") 715 } 716 return 0 717 } 718 func flagConstantToAuxInt(x flagConstant) int64 { 719 return int64(x) 720 } 721 722 func opToAuxInt(o Op) int64 { 723 return int64(o) 724 } 725 726 // Aux is an interface to hold miscellaneous data in Blocks and Values. 727 type Aux interface { 728 CanBeAnSSAAux() 729 } 730 731 // for now only used to mark moves that need to avoid clobbering flags 732 type auxMark bool 733 734 func (auxMark) CanBeAnSSAAux() {} 735 736 var AuxMark auxMark 737 738 // stringAux wraps string values for use in Aux. 739 type stringAux string 740 741 func (stringAux) CanBeAnSSAAux() {} 742 743 func auxToString(i Aux) string { 744 return string(i.(stringAux)) 745 } 746 func auxToSym(i Aux) Sym { 747 // TODO: kind of a hack - allows nil interface through 748 s, _ := i.(Sym) 749 return s 750 } 751 func auxToType(i Aux) *types.Type { 752 return i.(*types.Type) 753 } 754 func auxToCall(i Aux) *AuxCall { 755 return i.(*AuxCall) 756 } 757 func auxToS390xCCMask(i Aux) s390x.CCMask { 758 return i.(s390x.CCMask) 759 } 760 func auxToS390xRotateParams(i Aux) s390x.RotateParams { 761 return i.(s390x.RotateParams) 762 } 763 764 func StringToAux(s string) Aux { 765 return stringAux(s) 766 } 767 func symToAux(s Sym) Aux { 768 return s 769 } 770 func callToAux(s *AuxCall) Aux { 771 return s 772 } 773 func typeToAux(t *types.Type) Aux { 774 return t 775 } 776 func s390xCCMaskToAux(c s390x.CCMask) Aux { 777 return c 778 } 779 func s390xRotateParamsToAux(r s390x.RotateParams) Aux { 780 return r 781 } 782 783 // uaddOvf reports whether unsigned a+b would overflow. 784 func uaddOvf(a, b int64) bool { 785 return uint64(a)+uint64(b) < uint64(a) 786 } 787 788 // loadLSymOffset simulates reading a word at an offset into a 789 // read-only symbol's runtime memory. If it would read a pointer to 790 // another symbol, that symbol is returned. Otherwise, it returns nil. 791 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym { 792 if lsym.Type != objabi.SRODATA { 793 return nil 794 } 795 796 for _, r := range lsym.R { 797 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 { 798 return r.Sym 799 } 800 } 801 802 return nil 803 } 804 805 func devirtLECall(v *Value, sym *obj.LSym) *Value { 806 v.Op = OpStaticLECall 807 auxcall := v.Aux.(*AuxCall) 808 auxcall.Fn = sym 809 // Remove first arg 810 v.Args[0].Uses-- 811 copy(v.Args[0:], v.Args[1:]) 812 v.Args[len(v.Args)-1] = nil // aid GC 813 v.Args = v.Args[:len(v.Args)-1] 814 if f := v.Block.Func; f.pass.debug > 0 { 815 f.Warnl(v.Pos, "de-virtualizing call") 816 } 817 return v 818 } 819 820 // isSamePtr reports whether p1 and p2 point to the same address. 821 func isSamePtr(p1, p2 *Value) bool { 822 if p1 == p2 { 823 return true 824 } 825 if p1.Op != p2.Op { 826 return false 827 } 828 switch p1.Op { 829 case OpOffPtr: 830 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0]) 831 case OpAddr, OpLocalAddr: 832 return p1.Aux == p2.Aux 833 case OpAddPtr: 834 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0]) 835 } 836 return false 837 } 838 839 func isStackPtr(v *Value) bool { 840 for v.Op == OpOffPtr || v.Op == OpAddPtr { 841 v = v.Args[0] 842 } 843 return v.Op == OpSP || v.Op == OpLocalAddr 844 } 845 846 // disjoint reports whether the memory region specified by [p1:p1+n1) 847 // does not overlap with [p2:p2+n2). 848 // A return value of false does not imply the regions overlap. 849 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool { 850 if n1 == 0 || n2 == 0 { 851 return true 852 } 853 if p1 == p2 { 854 return false 855 } 856 baseAndOffset := func(ptr *Value) (base *Value, offset int64) { 857 base, offset = ptr, 0 858 for base.Op == OpOffPtr { 859 offset += base.AuxInt 860 base = base.Args[0] 861 } 862 if opcodeTable[base.Op].nilCheck { 863 base = base.Args[0] 864 } 865 return base, offset 866 } 867 p1, off1 := baseAndOffset(p1) 868 p2, off2 := baseAndOffset(p2) 869 if isSamePtr(p1, p2) { 870 return !overlap(off1, n1, off2, n2) 871 } 872 // p1 and p2 are not the same, so if they are both OpAddrs then 873 // they point to different variables. 874 // If one pointer is on the stack and the other is an argument 875 // then they can't overlap. 876 switch p1.Op { 877 case OpAddr, OpLocalAddr: 878 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP { 879 return true 880 } 881 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP 882 case OpArg, OpArgIntReg: 883 if p2.Op == OpSP || p2.Op == OpLocalAddr { 884 return true 885 } 886 case OpSP: 887 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP 888 } 889 return false 890 } 891 892 // moveSize returns the number of bytes an aligned MOV instruction moves. 893 func moveSize(align int64, c *Config) int64 { 894 switch { 895 case align%8 == 0 && c.PtrSize == 8: 896 return 8 897 case align%4 == 0: 898 return 4 899 case align%2 == 0: 900 return 2 901 } 902 return 1 903 } 904 905 // mergePoint finds a block among a's blocks which dominates b and is itself 906 // dominated by all of a's blocks. Returns nil if it can't find one. 907 // Might return nil even if one does exist. 908 func mergePoint(b *Block, a ...*Value) *Block { 909 // Walk backward from b looking for one of the a's blocks. 910 911 // Max distance 912 d := 100 913 914 for d > 0 { 915 for _, x := range a { 916 if b == x.Block { 917 goto found 918 } 919 } 920 if len(b.Preds) > 1 { 921 // Don't know which way to go back. Abort. 922 return nil 923 } 924 b = b.Preds[0].b 925 d-- 926 } 927 return nil // too far away 928 found: 929 // At this point, r is the first value in a that we find by walking backwards. 930 // if we return anything, r will be it. 931 r := b 932 933 // Keep going, counting the other a's that we find. They must all dominate r. 934 na := 0 935 for d > 0 { 936 for _, x := range a { 937 if b == x.Block { 938 na++ 939 } 940 } 941 if na == len(a) { 942 // Found all of a in a backwards walk. We can return r. 943 return r 944 } 945 if len(b.Preds) > 1 { 946 return nil 947 } 948 b = b.Preds[0].b 949 d-- 950 951 } 952 return nil // too far away 953 } 954 955 // clobber invalidates values. Returns true. 956 // clobber is used by rewrite rules to: 957 // 958 // A) make sure the values are really dead and never used again. 959 // B) decrement use counts of the values' args. 960 func clobber(vv ...*Value) bool { 961 for _, v := range vv { 962 v.reset(OpInvalid) 963 // Note: leave v.Block intact. The Block field is used after clobber. 964 } 965 return true 966 } 967 968 // clobberIfDead resets v when use count is 1. Returns true. 969 // clobberIfDead is used by rewrite rules to decrement 970 // use counts of v's args when v is dead and never used. 971 func clobberIfDead(v *Value) bool { 972 if v.Uses == 1 { 973 v.reset(OpInvalid) 974 } 975 // Note: leave v.Block intact. The Block field is used after clobberIfDead. 976 return true 977 } 978 979 // noteRule is an easy way to track if a rule is matched when writing 980 // new ones. Make the rule of interest also conditional on 981 // 982 // noteRule("note to self: rule of interest matched") 983 // 984 // and that message will print when the rule matches. 985 func noteRule(s string) bool { 986 fmt.Println(s) 987 return true 988 } 989 990 // countRule increments Func.ruleMatches[key]. 991 // If Func.ruleMatches is non-nil at the end 992 // of compilation, it will be printed to stdout. 993 // This is intended to make it easier to find which functions 994 // which contain lots of rules matches when developing new rules. 995 func countRule(v *Value, key string) bool { 996 f := v.Block.Func 997 if f.ruleMatches == nil { 998 f.ruleMatches = make(map[string]int) 999 } 1000 f.ruleMatches[key]++ 1001 return true 1002 } 1003 1004 // warnRule generates compiler debug output with string s when 1005 // v is not in autogenerated code, cond is true and the rule has fired. 1006 func warnRule(cond bool, v *Value, s string) bool { 1007 if pos := v.Pos; pos.Line() > 1 && cond { 1008 v.Block.Func.Warnl(pos, s) 1009 } 1010 return true 1011 } 1012 1013 // for a pseudo-op like (LessThan x), extract x. 1014 func flagArg(v *Value) *Value { 1015 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() { 1016 return nil 1017 } 1018 return v.Args[0] 1019 } 1020 1021 // arm64Negate finds the complement to an ARM64 condition code, 1022 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual 1023 // 1024 // For floating point, it's more subtle because NaN is unordered. We do 1025 // !LessThanF -> NotLessThanF, the latter takes care of NaNs. 1026 func arm64Negate(op Op) Op { 1027 switch op { 1028 case OpARM64LessThan: 1029 return OpARM64GreaterEqual 1030 case OpARM64LessThanU: 1031 return OpARM64GreaterEqualU 1032 case OpARM64GreaterThan: 1033 return OpARM64LessEqual 1034 case OpARM64GreaterThanU: 1035 return OpARM64LessEqualU 1036 case OpARM64LessEqual: 1037 return OpARM64GreaterThan 1038 case OpARM64LessEqualU: 1039 return OpARM64GreaterThanU 1040 case OpARM64GreaterEqual: 1041 return OpARM64LessThan 1042 case OpARM64GreaterEqualU: 1043 return OpARM64LessThanU 1044 case OpARM64Equal: 1045 return OpARM64NotEqual 1046 case OpARM64NotEqual: 1047 return OpARM64Equal 1048 case OpARM64LessThanF: 1049 return OpARM64NotLessThanF 1050 case OpARM64NotLessThanF: 1051 return OpARM64LessThanF 1052 case OpARM64LessEqualF: 1053 return OpARM64NotLessEqualF 1054 case OpARM64NotLessEqualF: 1055 return OpARM64LessEqualF 1056 case OpARM64GreaterThanF: 1057 return OpARM64NotGreaterThanF 1058 case OpARM64NotGreaterThanF: 1059 return OpARM64GreaterThanF 1060 case OpARM64GreaterEqualF: 1061 return OpARM64NotGreaterEqualF 1062 case OpARM64NotGreaterEqualF: 1063 return OpARM64GreaterEqualF 1064 default: 1065 panic("unreachable") 1066 } 1067 } 1068 1069 // arm64Invert evaluates (InvertFlags op), which 1070 // is the same as altering the condition codes such 1071 // that the same result would be produced if the arguments 1072 // to the flag-generating instruction were reversed, e.g. 1073 // (InvertFlags (CMP x y)) -> (CMP y x) 1074 func arm64Invert(op Op) Op { 1075 switch op { 1076 case OpARM64LessThan: 1077 return OpARM64GreaterThan 1078 case OpARM64LessThanU: 1079 return OpARM64GreaterThanU 1080 case OpARM64GreaterThan: 1081 return OpARM64LessThan 1082 case OpARM64GreaterThanU: 1083 return OpARM64LessThanU 1084 case OpARM64LessEqual: 1085 return OpARM64GreaterEqual 1086 case OpARM64LessEqualU: 1087 return OpARM64GreaterEqualU 1088 case OpARM64GreaterEqual: 1089 return OpARM64LessEqual 1090 case OpARM64GreaterEqualU: 1091 return OpARM64LessEqualU 1092 case OpARM64Equal, OpARM64NotEqual: 1093 return op 1094 case OpARM64LessThanF: 1095 return OpARM64GreaterThanF 1096 case OpARM64GreaterThanF: 1097 return OpARM64LessThanF 1098 case OpARM64LessEqualF: 1099 return OpARM64GreaterEqualF 1100 case OpARM64GreaterEqualF: 1101 return OpARM64LessEqualF 1102 case OpARM64NotLessThanF: 1103 return OpARM64NotGreaterThanF 1104 case OpARM64NotGreaterThanF: 1105 return OpARM64NotLessThanF 1106 case OpARM64NotLessEqualF: 1107 return OpARM64NotGreaterEqualF 1108 case OpARM64NotGreaterEqualF: 1109 return OpARM64NotLessEqualF 1110 default: 1111 panic("unreachable") 1112 } 1113 } 1114 1115 // evaluate an ARM64 op against a flags value 1116 // that is potentially constant; return 1 for true, 1117 // -1 for false, and 0 for not constant. 1118 func ccARM64Eval(op Op, flags *Value) int { 1119 fop := flags.Op 1120 if fop == OpARM64InvertFlags { 1121 return -ccARM64Eval(op, flags.Args[0]) 1122 } 1123 if fop != OpARM64FlagConstant { 1124 return 0 1125 } 1126 fc := flagConstant(flags.AuxInt) 1127 b2i := func(b bool) int { 1128 if b { 1129 return 1 1130 } 1131 return -1 1132 } 1133 switch op { 1134 case OpARM64Equal: 1135 return b2i(fc.eq()) 1136 case OpARM64NotEqual: 1137 return b2i(fc.ne()) 1138 case OpARM64LessThan: 1139 return b2i(fc.lt()) 1140 case OpARM64LessThanU: 1141 return b2i(fc.ult()) 1142 case OpARM64GreaterThan: 1143 return b2i(fc.gt()) 1144 case OpARM64GreaterThanU: 1145 return b2i(fc.ugt()) 1146 case OpARM64LessEqual: 1147 return b2i(fc.le()) 1148 case OpARM64LessEqualU: 1149 return b2i(fc.ule()) 1150 case OpARM64GreaterEqual: 1151 return b2i(fc.ge()) 1152 case OpARM64GreaterEqualU: 1153 return b2i(fc.uge()) 1154 } 1155 return 0 1156 } 1157 1158 // logRule logs the use of the rule s. This will only be enabled if 1159 // rewrite rules were generated with the -log option, see _gen/rulegen.go. 1160 func logRule(s string) { 1161 if ruleFile == nil { 1162 // Open a log file to write log to. We open in append 1163 // mode because all.bash runs the compiler lots of times, 1164 // and we want the concatenation of all of those logs. 1165 // This means, of course, that users need to rm the old log 1166 // to get fresh data. 1167 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow? 1168 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"), 1169 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 1170 if err != nil { 1171 panic(err) 1172 } 1173 ruleFile = w 1174 } 1175 _, err := fmt.Fprintln(ruleFile, s) 1176 if err != nil { 1177 panic(err) 1178 } 1179 } 1180 1181 var ruleFile io.Writer 1182 1183 func min(x, y int64) int64 { 1184 if x < y { 1185 return x 1186 } 1187 return y 1188 } 1189 func max(x, y int64) int64 { 1190 if x > y { 1191 return x 1192 } 1193 return y 1194 } 1195 1196 func isConstZero(v *Value) bool { 1197 switch v.Op { 1198 case OpConstNil: 1199 return true 1200 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F: 1201 return v.AuxInt == 0 1202 } 1203 return false 1204 } 1205 1206 // reciprocalExact64 reports whether 1/c is exactly representable. 1207 func reciprocalExact64(c float64) bool { 1208 b := math.Float64bits(c) 1209 man := b & (1<<52 - 1) 1210 if man != 0 { 1211 return false // not a power of 2, denormal, or NaN 1212 } 1213 exp := b >> 52 & (1<<11 - 1) 1214 // exponent bias is 0x3ff. So taking the reciprocal of a number 1215 // changes the exponent to 0x7fe-exp. 1216 switch exp { 1217 case 0: 1218 return false // ±0 1219 case 0x7ff: 1220 return false // ±inf 1221 case 0x7fe: 1222 return false // exponent is not representable 1223 default: 1224 return true 1225 } 1226 } 1227 1228 // reciprocalExact32 reports whether 1/c is exactly representable. 1229 func reciprocalExact32(c float32) bool { 1230 b := math.Float32bits(c) 1231 man := b & (1<<23 - 1) 1232 if man != 0 { 1233 return false // not a power of 2, denormal, or NaN 1234 } 1235 exp := b >> 23 & (1<<8 - 1) 1236 // exponent bias is 0x7f. So taking the reciprocal of a number 1237 // changes the exponent to 0xfe-exp. 1238 switch exp { 1239 case 0: 1240 return false // ±0 1241 case 0xff: 1242 return false // ±inf 1243 case 0xfe: 1244 return false // exponent is not representable 1245 default: 1246 return true 1247 } 1248 } 1249 1250 // check if an immediate can be directly encoded into an ARM's instruction. 1251 func isARMImmRot(v uint32) bool { 1252 for i := 0; i < 16; i++ { 1253 if v&^0xff == 0 { 1254 return true 1255 } 1256 v = v<<2 | v>>30 1257 } 1258 1259 return false 1260 } 1261 1262 // overlap reports whether the ranges given by the given offset and 1263 // size pairs overlap. 1264 func overlap(offset1, size1, offset2, size2 int64) bool { 1265 if offset1 >= offset2 && offset2+size2 > offset1 { 1266 return true 1267 } 1268 if offset2 >= offset1 && offset1+size1 > offset2 { 1269 return true 1270 } 1271 return false 1272 } 1273 1274 func areAdjacentOffsets(off1, off2, size int64) bool { 1275 return off1+size == off2 || off1 == off2+size 1276 } 1277 1278 // check if value zeroes out upper 32-bit of 64-bit register. 1279 // depth limits recursion depth. In AMD64.rules 3 is used as limit, 1280 // because it catches same amount of cases as 4. 1281 func zeroUpper32Bits(x *Value, depth int) bool { 1282 switch x.Op { 1283 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1, 1284 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1, 1285 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload, 1286 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL, 1287 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst, 1288 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst, 1289 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL, 1290 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst, 1291 OpAMD64SHLL, OpAMD64SHLLconst: 1292 return true 1293 case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst, 1294 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW, 1295 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst: 1296 return true 1297 case OpArg: 1298 return x.Type.Size() == 4 1299 case OpPhi, OpSelect0, OpSelect1: 1300 // Phis can use each-other as an arguments, instead of tracking visited values, 1301 // just limit recursion depth. 1302 if depth <= 0 { 1303 return false 1304 } 1305 for i := range x.Args { 1306 if !zeroUpper32Bits(x.Args[i], depth-1) { 1307 return false 1308 } 1309 } 1310 return true 1311 1312 } 1313 return false 1314 } 1315 1316 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits. 1317 func zeroUpper48Bits(x *Value, depth int) bool { 1318 switch x.Op { 1319 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2: 1320 return true 1321 case OpArg: 1322 return x.Type.Size() == 2 1323 case OpPhi, OpSelect0, OpSelect1: 1324 // Phis can use each-other as an arguments, instead of tracking visited values, 1325 // just limit recursion depth. 1326 if depth <= 0 { 1327 return false 1328 } 1329 for i := range x.Args { 1330 if !zeroUpper48Bits(x.Args[i], depth-1) { 1331 return false 1332 } 1333 } 1334 return true 1335 1336 } 1337 return false 1338 } 1339 1340 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits. 1341 func zeroUpper56Bits(x *Value, depth int) bool { 1342 switch x.Op { 1343 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1: 1344 return true 1345 case OpArg: 1346 return x.Type.Size() == 1 1347 case OpPhi, OpSelect0, OpSelect1: 1348 // Phis can use each-other as an arguments, instead of tracking visited values, 1349 // just limit recursion depth. 1350 if depth <= 0 { 1351 return false 1352 } 1353 for i := range x.Args { 1354 if !zeroUpper56Bits(x.Args[i], depth-1) { 1355 return false 1356 } 1357 } 1358 return true 1359 1360 } 1361 return false 1362 } 1363 1364 func isInlinableMemclr(c *Config, sz int64) bool { 1365 if sz < 0 { 1366 return false 1367 } 1368 // TODO: expand this check to allow other architectures 1369 // see CL 454255 and issue 56997 1370 switch c.arch { 1371 case "amd64", "arm64": 1372 return true 1373 case "ppc64le", "ppc64": 1374 return sz < 512 1375 } 1376 return false 1377 } 1378 1379 // isInlinableMemmove reports whether the given arch performs a Move of the given size 1380 // faster than memmove. It will only return true if replacing the memmove with a Move is 1381 // safe, either because Move will do all of its loads before any of its stores, or 1382 // because the arguments are known to be disjoint. 1383 // This is used as a check for replacing memmove with Move ops. 1384 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { 1385 // It is always safe to convert memmove into Move when its arguments are disjoint. 1386 // Move ops may or may not be faster for large sizes depending on how the platform 1387 // lowers them, so we only perform this optimization on platforms that we know to 1388 // have fast Move ops. 1389 switch c.arch { 1390 case "amd64": 1391 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz)) 1392 case "386", "arm64": 1393 return sz <= 8 1394 case "s390x", "ppc64", "ppc64le": 1395 return sz <= 8 || disjoint(dst, sz, src, sz) 1396 case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le": 1397 return sz <= 4 1398 } 1399 return false 1400 } 1401 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { 1402 return isInlinableMemmove(dst, src, sz, c) 1403 } 1404 1405 // logLargeCopy logs the occurrence of a large copy. 1406 // The best place to do this is in the rewrite rules where the size of the move is easy to find. 1407 // "Large" is arbitrarily chosen to be 128 bytes; this may change. 1408 func logLargeCopy(v *Value, s int64) bool { 1409 if s < 128 { 1410 return true 1411 } 1412 if logopt.Enabled() { 1413 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s)) 1414 } 1415 return true 1416 } 1417 func LogLargeCopy(funcName string, pos src.XPos, s int64) { 1418 if s < 128 { 1419 return 1420 } 1421 if logopt.Enabled() { 1422 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s)) 1423 } 1424 } 1425 1426 // hasSmallRotate reports whether the architecture has rotate instructions 1427 // for sizes < 32-bit. This is used to decide whether to promote some rotations. 1428 func hasSmallRotate(c *Config) bool { 1429 switch c.arch { 1430 case "amd64", "386": 1431 return true 1432 default: 1433 return false 1434 } 1435 } 1436 1437 func supportsPPC64PCRel() bool { 1438 // PCRel is currently supported for >= power10, linux only 1439 // Internal and external linking supports this on ppc64le; internal linking on ppc64. 1440 return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux" 1441 } 1442 1443 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 { 1444 if sh < 0 || sh >= sz { 1445 panic("PPC64 shift arg sh out of range") 1446 } 1447 if mb < 0 || mb >= sz { 1448 panic("PPC64 shift arg mb out of range") 1449 } 1450 if me < 0 || me >= sz { 1451 panic("PPC64 shift arg me out of range") 1452 } 1453 return int32(sh<<16 | mb<<8 | me) 1454 } 1455 1456 func GetPPC64Shiftsh(auxint int64) int64 { 1457 return int64(int8(auxint >> 16)) 1458 } 1459 1460 func GetPPC64Shiftmb(auxint int64) int64 { 1461 return int64(int8(auxint >> 8)) 1462 } 1463 1464 func GetPPC64Shiftme(auxint int64) int64 { 1465 return int64(int8(auxint)) 1466 } 1467 1468 // Test if this value can encoded as a mask for a rlwinm like 1469 // operation. Masks can also extend from the msb and wrap to 1470 // the lsb too. That is, the valid masks are 32 bit strings 1471 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1 1472 func isPPC64WordRotateMask(v64 int64) bool { 1473 // Isolate rightmost 1 (if none 0) and add. 1474 v := uint32(v64) 1475 vp := (v & -v) + v 1476 // Likewise, for the wrapping case. 1477 vn := ^v 1478 vpn := (vn & -vn) + vn 1479 return (v&vp == 0 || vn&vpn == 0) && v != 0 1480 } 1481 1482 // Compress mask and shift into single value of the form 1483 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can 1484 // be used to regenerate the input mask. 1485 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 { 1486 var mb, me, mbn, men int 1487 1488 // Determine boundaries and then decode them 1489 if mask == 0 || ^mask == 0 || rotate >= nbits { 1490 panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits)) 1491 } else if nbits == 32 { 1492 mb = bits.LeadingZeros32(uint32(mask)) 1493 me = 32 - bits.TrailingZeros32(uint32(mask)) 1494 mbn = bits.LeadingZeros32(^uint32(mask)) 1495 men = 32 - bits.TrailingZeros32(^uint32(mask)) 1496 } else { 1497 mb = bits.LeadingZeros64(uint64(mask)) 1498 me = 64 - bits.TrailingZeros64(uint64(mask)) 1499 mbn = bits.LeadingZeros64(^uint64(mask)) 1500 men = 64 - bits.TrailingZeros64(^uint64(mask)) 1501 } 1502 // Check for a wrapping mask (e.g bits at 0 and 63) 1503 if mb == 0 && me == int(nbits) { 1504 // swap the inverted values 1505 mb, me = men, mbn 1506 } 1507 1508 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24) 1509 } 1510 1511 // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x) 1512 // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an 1513 // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two 1514 // operations can be combined. This functions assumes the two opcodes can 1515 // be merged, and returns an encoded rotate+mask value of the combined RLDICL. 1516 func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 { 1517 mb := s 1518 r := 64 - s 1519 // A larger mb is a smaller mask. 1520 if (encoded>>8)&0xFF < mb { 1521 encoded = (encoded &^ 0xFF00) | mb<<8 1522 } 1523 // The rotate is expected to be 0. 1524 if (encoded & 0xFF0000) != 0 { 1525 panic("non-zero rotate") 1526 } 1527 return encoded | r<<16 1528 } 1529 1530 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as 1531 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask. 1532 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) { 1533 auxint := uint64(sauxint) 1534 rotate = int64((auxint >> 16) & 0xFF) 1535 mb = int64((auxint >> 8) & 0xFF) 1536 me = int64((auxint >> 0) & 0xFF) 1537 nbits := int64((auxint >> 24) & 0xFF) 1538 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1) 1539 if mb > me { 1540 mask = ^mask 1541 } 1542 if nbits == 32 { 1543 mask = uint64(uint32(mask)) 1544 } 1545 1546 // Fixup ME to match ISA definition. The second argument to MASK(..,me) 1547 // is inclusive. 1548 me = (me - 1) & (nbits - 1) 1549 return 1550 } 1551 1552 // This verifies that the mask is a set of 1553 // consecutive bits including the least 1554 // significant bit. 1555 func isPPC64ValidShiftMask(v int64) bool { 1556 if (v != 0) && ((v+1)&v) == 0 { 1557 return true 1558 } 1559 return false 1560 } 1561 1562 func getPPC64ShiftMaskLength(v int64) int64 { 1563 return int64(bits.Len64(uint64(v))) 1564 } 1565 1566 // Decompose a shift right into an equivalent rotate/mask, 1567 // and return mask & m. 1568 func mergePPC64RShiftMask(m, s, nbits int64) int64 { 1569 smask := uint64((1<<uint(nbits))-1) >> uint(s) 1570 return m & int64(smask) 1571 } 1572 1573 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0 1574 func mergePPC64AndSrwi(m, s int64) int64 { 1575 mask := mergePPC64RShiftMask(m, s, 32) 1576 if !isPPC64WordRotateMask(mask) { 1577 return 0 1578 } 1579 return encodePPC64RotateMask((32-s)&31, mask, 32) 1580 } 1581 1582 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM. 1583 // Return the encoded RLWINM constant, or 0 if they cannot be merged. 1584 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { 1585 mask_1 := uint64(0xFFFFFFFF >> uint(srw)) 1586 // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left. 1587 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) 1588 1589 // Rewrite mask to apply after the final left shift. 1590 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) 1591 1592 r_1 := 32 - srw 1593 r_2 := GetPPC64Shiftsh(sld) 1594 r_3 := (r_1 + r_2) & 31 // This can wrap. 1595 1596 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { 1597 return 0 1598 } 1599 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32) 1600 } 1601 1602 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return 1603 // the encoded RLWINM constant, or 0 if they cannot be merged. 1604 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 { 1605 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw) 1606 // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left. 1607 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) 1608 1609 // combine the masks, and adjust for the final left shift. 1610 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld))) 1611 r_2 := GetPPC64Shiftsh(int64(sld)) 1612 r_3 := (r_1 + r_2) & 31 // This can wrap. 1613 1614 // Verify the result is still a valid bitmask of <= 32 bits. 1615 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 { 1616 return 0 1617 } 1618 return encodePPC64RotateMask(r_3, int64(mask_3), 32) 1619 } 1620 1621 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)), 1622 // or return 0 if they cannot be combined. 1623 func mergePPC64SldiSrw(sld, srw int64) int64 { 1624 if sld > srw || srw >= 32 { 1625 return 0 1626 } 1627 mask_r := uint32(0xFFFFFFFF) >> uint(srw) 1628 mask_l := uint32(0xFFFFFFFF) >> uint(sld) 1629 mask := (mask_r & mask_l) << uint(sld) 1630 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32) 1631 } 1632 1633 // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y) 1634 // to (Select0 (opCC x y)) without having to explicitly fixup every user 1635 // of op. 1636 // 1637 // E.g consider the case: 1638 // a = (ADD x y) 1639 // b = (CMPconst [0] a) 1640 // c = (OR a z) 1641 // 1642 // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y))) 1643 // would produce: 1644 // a = (ADD x y) 1645 // a' = (ADDCC x y) 1646 // a” = (Select0 a') 1647 // b = (CMPconst [0] a”) 1648 // c = (OR a z) 1649 // 1650 // which makes it impossible to rewrite the second user. Instead the result 1651 // of this conversion is: 1652 // a' = (ADDCC x y) 1653 // a = (Select0 a') 1654 // b = (CMPconst [0] a) 1655 // c = (OR a z) 1656 // 1657 // Which makes it trivial to rewrite b using a lowering rule. 1658 func convertPPC64OpToOpCC(op *Value) *Value { 1659 ccOpMap := map[Op]Op{ 1660 OpPPC64ADD: OpPPC64ADDCC, 1661 OpPPC64ADDconst: OpPPC64ADDCCconst, 1662 OpPPC64AND: OpPPC64ANDCC, 1663 OpPPC64ANDN: OpPPC64ANDNCC, 1664 OpPPC64CNTLZD: OpPPC64CNTLZDCC, 1665 OpPPC64OR: OpPPC64ORCC, 1666 OpPPC64SUB: OpPPC64SUBCC, 1667 OpPPC64NEG: OpPPC64NEGCC, 1668 OpPPC64NOR: OpPPC64NORCC, 1669 OpPPC64XOR: OpPPC64XORCC, 1670 } 1671 b := op.Block 1672 opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt) 1673 opCC.AddArgs(op.Args...) 1674 op.reset(OpSelect0) 1675 op.AddArgs(opCC) 1676 return op 1677 } 1678 1679 // Convenience function to rotate a 32 bit constant value by another constant. 1680 func rotateLeft32(v, rotate int64) int64 { 1681 return int64(bits.RotateLeft32(uint32(v), int(rotate))) 1682 } 1683 1684 func rotateRight64(v, rotate int64) int64 { 1685 return int64(bits.RotateLeft64(uint64(v), int(-rotate))) 1686 } 1687 1688 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. 1689 func armBFAuxInt(lsb, width int64) arm64BitField { 1690 if lsb < 0 || lsb > 63 { 1691 panic("ARM(64) bit field lsb constant out of range") 1692 } 1693 if width < 1 || lsb+width > 64 { 1694 panic("ARM(64) bit field width constant out of range") 1695 } 1696 return arm64BitField(width | lsb<<8) 1697 } 1698 1699 // returns the lsb part of the auxInt field of arm64 bitfield ops. 1700 func (bfc arm64BitField) getARM64BFlsb() int64 { 1701 return int64(uint64(bfc) >> 8) 1702 } 1703 1704 // returns the width part of the auxInt field of arm64 bitfield ops. 1705 func (bfc arm64BitField) getARM64BFwidth() int64 { 1706 return int64(bfc) & 0xff 1707 } 1708 1709 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask. 1710 func isARM64BFMask(lsb, mask, rshift int64) bool { 1711 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1712 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64 1713 } 1714 1715 // returns the bitfield width of mask >> rshift for arm64 bitfield ops. 1716 func arm64BFWidth(mask, rshift int64) int64 { 1717 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1718 if shiftedMask == 0 { 1719 panic("ARM64 BF mask is zero") 1720 } 1721 return nto(shiftedMask) 1722 } 1723 1724 // sizeof returns the size of t in bytes. 1725 // It will panic if t is not a *types.Type. 1726 func sizeof(t interface{}) int64 { 1727 return t.(*types.Type).Size() 1728 } 1729 1730 // registerizable reports whether t is a primitive type that fits in 1731 // a register. It assumes float64 values will always fit into registers 1732 // even if that isn't strictly true. 1733 func registerizable(b *Block, typ *types.Type) bool { 1734 if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() { 1735 return true 1736 } 1737 if typ.IsInteger() { 1738 return typ.Size() <= b.Func.Config.RegSize 1739 } 1740 return false 1741 } 1742 1743 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed. 1744 func needRaceCleanup(sym *AuxCall, v *Value) bool { 1745 f := v.Block.Func 1746 if !f.Config.Race { 1747 return false 1748 } 1749 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") { 1750 return false 1751 } 1752 for _, b := range f.Blocks { 1753 for _, v := range b.Values { 1754 switch v.Op { 1755 case OpStaticCall, OpStaticLECall: 1756 // Check for racefuncenter will encounter racefuncexit and vice versa. 1757 // Allow calls to panic* 1758 s := v.Aux.(*AuxCall).Fn.String() 1759 switch s { 1760 case "runtime.racefuncenter", "runtime.racefuncexit", 1761 "runtime.panicdivide", "runtime.panicwrap", 1762 "runtime.panicshift": 1763 continue 1764 } 1765 // If we encountered any call, we need to keep racefunc*, 1766 // for accurate stacktraces. 1767 return false 1768 case OpPanicBounds, OpPanicExtend: 1769 // Note: these are panic generators that are ok (like the static calls above). 1770 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall: 1771 // We must keep the race functions if there are any other call types. 1772 return false 1773 } 1774 } 1775 } 1776 if isSameCall(sym, "runtime.racefuncenter") { 1777 // TODO REGISTER ABI this needs to be cleaned up. 1778 // If we're removing racefuncenter, remove its argument as well. 1779 if v.Args[0].Op != OpStore { 1780 if v.Op == OpStaticLECall { 1781 // there is no store, yet. 1782 return true 1783 } 1784 return false 1785 } 1786 mem := v.Args[0].Args[2] 1787 v.Args[0].reset(OpCopy) 1788 v.Args[0].AddArg(mem) 1789 } 1790 return true 1791 } 1792 1793 // symIsRO reports whether sym is a read-only global. 1794 func symIsRO(sym interface{}) bool { 1795 lsym := sym.(*obj.LSym) 1796 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0 1797 } 1798 1799 // symIsROZero reports whether sym is a read-only global whose data contains all zeros. 1800 func symIsROZero(sym Sym) bool { 1801 lsym := sym.(*obj.LSym) 1802 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 { 1803 return false 1804 } 1805 for _, b := range lsym.P { 1806 if b != 0 { 1807 return false 1808 } 1809 } 1810 return true 1811 } 1812 1813 // isFixed32 returns true if the int32 at offset off in symbol sym 1814 // is known and constant. 1815 func isFixed32(c *Config, sym Sym, off int64) bool { 1816 return isFixed(c, sym, off, 4) 1817 } 1818 1819 // isFixed returns true if the range [off,off+size] of the symbol sym 1820 // is known and constant. 1821 func isFixed(c *Config, sym Sym, off, size int64) bool { 1822 lsym := sym.(*obj.LSym) 1823 if lsym.Extra == nil { 1824 return false 1825 } 1826 if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok { 1827 if off == 2*c.PtrSize && size == 4 { 1828 return true // type hash field 1829 } 1830 } 1831 return false 1832 } 1833 func fixed32(c *Config, sym Sym, off int64) int32 { 1834 lsym := sym.(*obj.LSym) 1835 if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok { 1836 if off == 2*c.PtrSize { 1837 return int32(types.TypeHash(ti.Type.(*types.Type))) 1838 } 1839 } 1840 base.Fatalf("fixed32 data not known for %s:%d", sym, off) 1841 return 0 1842 } 1843 1844 // isFixedSym returns true if the contents of sym at the given offset 1845 // is known and is the constant address of another symbol. 1846 func isFixedSym(sym Sym, off int64) bool { 1847 lsym := sym.(*obj.LSym) 1848 switch { 1849 case lsym.Type == objabi.SRODATA: 1850 // itabs, dictionaries 1851 default: 1852 return false 1853 } 1854 for _, r := range lsym.R { 1855 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 { 1856 return true 1857 } 1858 } 1859 return false 1860 } 1861 func fixedSym(f *Func, sym Sym, off int64) Sym { 1862 lsym := sym.(*obj.LSym) 1863 for _, r := range lsym.R { 1864 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off { 1865 if strings.HasPrefix(r.Sym.Name, "type:") { 1866 // In case we're loading a type out of a dictionary, we need to record 1867 // that the containing function might put that type in an interface. 1868 // That information is currently recorded in relocations in the dictionary, 1869 // but if we perform this load at compile time then the dictionary 1870 // might be dead. 1871 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym()) 1872 } else if strings.HasPrefix(r.Sym.Name, "go:itab") { 1873 // Same, but if we're using an itab we need to record that the 1874 // itab._type might be put in an interface. 1875 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym()) 1876 } 1877 return r.Sym 1878 } 1879 } 1880 base.Fatalf("fixedSym data not known for %s:%d", sym, off) 1881 return nil 1882 } 1883 1884 // read8 reads one byte from the read-only global sym at offset off. 1885 func read8(sym interface{}, off int64) uint8 { 1886 lsym := sym.(*obj.LSym) 1887 if off >= int64(len(lsym.P)) || off < 0 { 1888 // Invalid index into the global sym. 1889 // This can happen in dead code, so we don't want to panic. 1890 // Just return any value, it will eventually get ignored. 1891 // See issue 29215. 1892 return 0 1893 } 1894 return lsym.P[off] 1895 } 1896 1897 // read16 reads two bytes from the read-only global sym at offset off. 1898 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 { 1899 lsym := sym.(*obj.LSym) 1900 // lsym.P is written lazily. 1901 // Bytes requested after the end of lsym.P are 0. 1902 var src []byte 1903 if 0 <= off && off < int64(len(lsym.P)) { 1904 src = lsym.P[off:] 1905 } 1906 buf := make([]byte, 2) 1907 copy(buf, src) 1908 return byteorder.Uint16(buf) 1909 } 1910 1911 // read32 reads four bytes from the read-only global sym at offset off. 1912 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 { 1913 lsym := sym.(*obj.LSym) 1914 var src []byte 1915 if 0 <= off && off < int64(len(lsym.P)) { 1916 src = lsym.P[off:] 1917 } 1918 buf := make([]byte, 4) 1919 copy(buf, src) 1920 return byteorder.Uint32(buf) 1921 } 1922 1923 // read64 reads eight bytes from the read-only global sym at offset off. 1924 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 { 1925 lsym := sym.(*obj.LSym) 1926 var src []byte 1927 if 0 <= off && off < int64(len(lsym.P)) { 1928 src = lsym.P[off:] 1929 } 1930 buf := make([]byte, 8) 1931 copy(buf, src) 1932 return byteorder.Uint64(buf) 1933 } 1934 1935 // sequentialAddresses reports true if it can prove that x + n == y 1936 func sequentialAddresses(x, y *Value, n int64) bool { 1937 if x == y && n == 0 { 1938 return true 1939 } 1940 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil && 1941 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1942 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1943 return true 1944 } 1945 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && 1946 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1947 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1948 return true 1949 } 1950 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil && 1951 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1952 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1953 return true 1954 } 1955 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && 1956 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1957 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1958 return true 1959 } 1960 return false 1961 } 1962 1963 // flagConstant represents the result of a compile-time comparison. 1964 // The sense of these flags does not necessarily represent the hardware's notion 1965 // of a flags register - these are just a compile-time construct. 1966 // We happen to match the semantics to those of arm/arm64. 1967 // Note that these semantics differ from x86: the carry flag has the opposite 1968 // sense on a subtraction! 1969 // 1970 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C. 1971 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C. 1972 // (because it does x + ^y + C). 1973 // 1974 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag 1975 type flagConstant uint8 1976 1977 // N reports whether the result of an operation is negative (high bit set). 1978 func (fc flagConstant) N() bool { 1979 return fc&1 != 0 1980 } 1981 1982 // Z reports whether the result of an operation is 0. 1983 func (fc flagConstant) Z() bool { 1984 return fc&2 != 0 1985 } 1986 1987 // C reports whether an unsigned add overflowed (carry), or an 1988 // unsigned subtract did not underflow (borrow). 1989 func (fc flagConstant) C() bool { 1990 return fc&4 != 0 1991 } 1992 1993 // V reports whether a signed operation overflowed or underflowed. 1994 func (fc flagConstant) V() bool { 1995 return fc&8 != 0 1996 } 1997 1998 func (fc flagConstant) eq() bool { 1999 return fc.Z() 2000 } 2001 func (fc flagConstant) ne() bool { 2002 return !fc.Z() 2003 } 2004 func (fc flagConstant) lt() bool { 2005 return fc.N() != fc.V() 2006 } 2007 func (fc flagConstant) le() bool { 2008 return fc.Z() || fc.lt() 2009 } 2010 func (fc flagConstant) gt() bool { 2011 return !fc.Z() && fc.ge() 2012 } 2013 func (fc flagConstant) ge() bool { 2014 return fc.N() == fc.V() 2015 } 2016 func (fc flagConstant) ult() bool { 2017 return !fc.C() 2018 } 2019 func (fc flagConstant) ule() bool { 2020 return fc.Z() || fc.ult() 2021 } 2022 func (fc flagConstant) ugt() bool { 2023 return !fc.Z() && fc.uge() 2024 } 2025 func (fc flagConstant) uge() bool { 2026 return fc.C() 2027 } 2028 2029 func (fc flagConstant) ltNoov() bool { 2030 return fc.lt() && !fc.V() 2031 } 2032 func (fc flagConstant) leNoov() bool { 2033 return fc.le() && !fc.V() 2034 } 2035 func (fc flagConstant) gtNoov() bool { 2036 return fc.gt() && !fc.V() 2037 } 2038 func (fc flagConstant) geNoov() bool { 2039 return fc.ge() && !fc.V() 2040 } 2041 2042 func (fc flagConstant) String() string { 2043 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V()) 2044 } 2045 2046 type flagConstantBuilder struct { 2047 N bool 2048 Z bool 2049 C bool 2050 V bool 2051 } 2052 2053 func (fcs flagConstantBuilder) encode() flagConstant { 2054 var fc flagConstant 2055 if fcs.N { 2056 fc |= 1 2057 } 2058 if fcs.Z { 2059 fc |= 2 2060 } 2061 if fcs.C { 2062 fc |= 4 2063 } 2064 if fcs.V { 2065 fc |= 8 2066 } 2067 return fc 2068 } 2069 2070 // Note: addFlags(x,y) != subFlags(x,-y) in some situations: 2071 // - the results of the C flag are different 2072 // - the results of the V flag when y==minint are different 2073 2074 // addFlags64 returns the flags that would be set from computing x+y. 2075 func addFlags64(x, y int64) flagConstant { 2076 var fcb flagConstantBuilder 2077 fcb.Z = x+y == 0 2078 fcb.N = x+y < 0 2079 fcb.C = uint64(x+y) < uint64(x) 2080 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 2081 return fcb.encode() 2082 } 2083 2084 // subFlags64 returns the flags that would be set from computing x-y. 2085 func subFlags64(x, y int64) flagConstant { 2086 var fcb flagConstantBuilder 2087 fcb.Z = x-y == 0 2088 fcb.N = x-y < 0 2089 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model. 2090 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 2091 return fcb.encode() 2092 } 2093 2094 // addFlags32 returns the flags that would be set from computing x+y. 2095 func addFlags32(x, y int32) flagConstant { 2096 var fcb flagConstantBuilder 2097 fcb.Z = x+y == 0 2098 fcb.N = x+y < 0 2099 fcb.C = uint32(x+y) < uint32(x) 2100 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 2101 return fcb.encode() 2102 } 2103 2104 // subFlags32 returns the flags that would be set from computing x-y. 2105 func subFlags32(x, y int32) flagConstant { 2106 var fcb flagConstantBuilder 2107 fcb.Z = x-y == 0 2108 fcb.N = x-y < 0 2109 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model. 2110 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 2111 return fcb.encode() 2112 } 2113 2114 // logicFlags64 returns flags set to the sign/zeroness of x. 2115 // C and V are set to false. 2116 func logicFlags64(x int64) flagConstant { 2117 var fcb flagConstantBuilder 2118 fcb.Z = x == 0 2119 fcb.N = x < 0 2120 return fcb.encode() 2121 } 2122 2123 // logicFlags32 returns flags set to the sign/zeroness of x. 2124 // C and V are set to false. 2125 func logicFlags32(x int32) flagConstant { 2126 var fcb flagConstantBuilder 2127 fcb.Z = x == 0 2128 fcb.N = x < 0 2129 return fcb.encode() 2130 } 2131 2132 func makeJumpTableSym(b *Block) *obj.LSym { 2133 s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID)) 2134 s.Set(obj.AttrDuplicateOK, true) 2135 s.Set(obj.AttrLocal, true) 2136 return s 2137 } 2138 2139 // canRotate reports whether the architecture supports 2140 // rotates of integer registers with the given number of bits. 2141 func canRotate(c *Config, bits int64) bool { 2142 if bits > c.PtrSize*8 { 2143 // Don't rewrite to rotates bigger than the machine word. 2144 return false 2145 } 2146 switch c.arch { 2147 case "386", "amd64", "arm64": 2148 return true 2149 case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64": 2150 return bits >= 32 2151 default: 2152 return false 2153 } 2154 } 2155 2156 // isARM64bitcon reports whether a constant can be encoded into a logical instruction. 2157 func isARM64bitcon(x uint64) bool { 2158 if x == 1<<64-1 || x == 0 { 2159 return false 2160 } 2161 // determine the period and sign-extend a unit to 64 bits 2162 switch { 2163 case x != x>>32|x<<32: 2164 // period is 64 2165 // nothing to do 2166 case x != x>>16|x<<48: 2167 // period is 32 2168 x = uint64(int64(int32(x))) 2169 case x != x>>8|x<<56: 2170 // period is 16 2171 x = uint64(int64(int16(x))) 2172 case x != x>>4|x<<60: 2173 // period is 8 2174 x = uint64(int64(int8(x))) 2175 default: 2176 // period is 4 or 2, always true 2177 // 0001, 0010, 0100, 1000 -- 0001 rotate 2178 // 0011, 0110, 1100, 1001 -- 0011 rotate 2179 // 0111, 1011, 1101, 1110 -- 0111 rotate 2180 // 0101, 1010 -- 01 rotate, repeat 2181 return true 2182 } 2183 return sequenceOfOnes(x) || sequenceOfOnes(^x) 2184 } 2185 2186 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros. 2187 func sequenceOfOnes(x uint64) bool { 2188 y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2 2189 y += x 2190 return (y-1)&y == 0 2191 } 2192 2193 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction. 2194 func isARM64addcon(v int64) bool { 2195 /* uimm12 or uimm24? */ 2196 if v < 0 { 2197 return false 2198 } 2199 if (v & 0xFFF) == 0 { 2200 v >>= 12 2201 } 2202 return v <= 0xFFF 2203 } 2204 2205 // setPos sets the position of v to pos, then returns true. 2206 // Useful for setting the result of a rewrite's position to 2207 // something other than the default. 2208 func setPos(v *Value, pos src.XPos) bool { 2209 v.Pos = pos 2210 return true 2211 }