github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/ssa/rewrite.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import ( 8 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 9 "github.com/bir3/gocompiler/src/cmd/compile/internal/logopt" 10 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 11 "github.com/bir3/gocompiler/src/cmd/internal/obj" 12 "github.com/bir3/gocompiler/src/cmd/internal/obj/s390x" 13 "github.com/bir3/gocompiler/src/cmd/internal/objabi" 14 "github.com/bir3/gocompiler/src/cmd/internal/src" 15 "encoding/binary" 16 "fmt" 17 "io" 18 "math" 19 "math/bits" 20 "os" 21 "path/filepath" 22 ) 23 24 type deadValueChoice bool 25 26 const ( 27 leaveDeadValues deadValueChoice = false 28 removeDeadValues = true 29 ) 30 31 // deadcode indicates whether rewrite should try to remove any values that become dead. 32 func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) { 33 // repeat rewrites until we find no more rewrites 34 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block 35 pendingLines.clear() 36 debug := f.pass.debug 37 if debug > 1 { 38 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name) 39 } 40 var iters int 41 var states map[string]bool 42 for { 43 change := false 44 deadChange := false 45 for _, b := range f.Blocks { 46 var b0 *Block 47 if debug > 1 { 48 b0 = new(Block) 49 *b0 = *b 50 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing 51 } 52 for i, c := range b.ControlValues() { 53 for c.Op == OpCopy { 54 c = c.Args[0] 55 b.ReplaceControl(i, c) 56 } 57 } 58 if rb(b) { 59 change = true 60 if debug > 1 { 61 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString()) 62 } 63 } 64 for j, v := range b.Values { 65 var v0 *Value 66 if debug > 1 { 67 v0 = new(Value) 68 *v0 = *v 69 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing 70 } 71 if v.Uses == 0 && v.removeable() { 72 if v.Op != OpInvalid && deadcode == removeDeadValues { 73 // Reset any values that are now unused, so that we decrement 74 // the use count of all of its arguments. 75 // Not quite a deadcode pass, because it does not handle cycles. 76 // But it should help Uses==1 rules to fire. 77 v.reset(OpInvalid) 78 deadChange = true 79 } 80 // No point rewriting values which aren't used. 81 continue 82 } 83 84 vchange := phielimValue(v) 85 if vchange && debug > 1 { 86 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 87 } 88 89 // Eliminate copy inputs. 90 // If any copy input becomes unused, mark it 91 // as invalid and discard its argument. Repeat 92 // recursively on the discarded argument. 93 // This phase helps remove phantom "dead copy" uses 94 // of a value so that a x.Uses==1 rule condition 95 // fires reliably. 96 for i, a := range v.Args { 97 if a.Op != OpCopy { 98 continue 99 } 100 aa := copySource(a) 101 v.SetArg(i, aa) 102 // If a, a copy, has a line boundary indicator, attempt to find a new value 103 // to hold it. The first candidate is the value that will replace a (aa), 104 // if it shares the same block and line and is eligible. 105 // The second option is v, which has a as an input. Because aa is earlier in 106 // the data flow, it is the better choice. 107 if a.Pos.IsStmt() == src.PosIsStmt { 108 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt { 109 aa.Pos = aa.Pos.WithIsStmt() 110 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt { 111 v.Pos = v.Pos.WithIsStmt() 112 } else { 113 // Record the lost line and look for a new home after all rewrites are complete. 114 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same 115 // line to appear in more than one block, but only one block is stored, so if both end 116 // up here, then one will be lost. 117 pendingLines.set(a.Pos, int32(a.Block.ID)) 118 } 119 a.Pos = a.Pos.WithNotStmt() 120 } 121 vchange = true 122 for a.Uses == 0 { 123 b := a.Args[0] 124 a.reset(OpInvalid) 125 a = b 126 } 127 } 128 if vchange && debug > 1 { 129 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 130 } 131 132 // apply rewrite function 133 if rv(v) { 134 vchange = true 135 // If value changed to a poor choice for a statement boundary, move the boundary 136 if v.Pos.IsStmt() == src.PosIsStmt { 137 if k := nextGoodStatementIndex(v, j, b); k != j { 138 v.Pos = v.Pos.WithNotStmt() 139 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt() 140 } 141 } 142 } 143 144 change = change || vchange 145 if vchange && debug > 1 { 146 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 147 } 148 } 149 } 150 if !change && !deadChange { 151 break 152 } 153 iters++ 154 if (iters > 1000 || debug >= 2) && change { 155 // We've done a suspiciously large number of rewrites (or we're in debug mode). 156 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer 157 // and the maximum value encountered during make.bash is 12. 158 // Start checking for cycles. (This is too expensive to do routinely.) 159 // Note: we avoid this path for deadChange-only iterations, to fix #51639. 160 if states == nil { 161 states = make(map[string]bool) 162 } 163 h := f.rewriteHash() 164 if _, ok := states[h]; ok { 165 // We've found a cycle. 166 // To diagnose it, set debug to 2 and start again, 167 // so that we'll print all rules applied until we complete another cycle. 168 // If debug is already >= 2, we've already done that, so it's time to crash. 169 if debug < 2 { 170 debug = 2 171 states = make(map[string]bool) 172 } else { 173 f.Fatalf("rewrite cycle detected") 174 } 175 } 176 states[h] = true 177 } 178 } 179 // remove clobbered values 180 for _, b := range f.Blocks { 181 j := 0 182 for i, v := range b.Values { 183 vl := v.Pos 184 if v.Op == OpInvalid { 185 if v.Pos.IsStmt() == src.PosIsStmt { 186 pendingLines.set(vl, int32(b.ID)) 187 } 188 f.freeValue(v) 189 continue 190 } 191 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) { 192 pendingLines.remove(vl) 193 v.Pos = v.Pos.WithIsStmt() 194 } 195 if i != j { 196 b.Values[j] = v 197 } 198 j++ 199 } 200 if pendingLines.get(b.Pos) == int32(b.ID) { 201 b.Pos = b.Pos.WithIsStmt() 202 pendingLines.remove(b.Pos) 203 } 204 b.truncateValues(j) 205 } 206 } 207 208 // Common functions called from rewriting rules 209 210 func is64BitFloat(t *types.Type) bool { 211 return t.Size() == 8 && t.IsFloat() 212 } 213 214 func is32BitFloat(t *types.Type) bool { 215 return t.Size() == 4 && t.IsFloat() 216 } 217 218 func is64BitInt(t *types.Type) bool { 219 return t.Size() == 8 && t.IsInteger() 220 } 221 222 func is32BitInt(t *types.Type) bool { 223 return t.Size() == 4 && t.IsInteger() 224 } 225 226 func is16BitInt(t *types.Type) bool { 227 return t.Size() == 2 && t.IsInteger() 228 } 229 230 func is8BitInt(t *types.Type) bool { 231 return t.Size() == 1 && t.IsInteger() 232 } 233 234 func isPtr(t *types.Type) bool { 235 return t.IsPtrShaped() 236 } 237 238 func isSigned(t *types.Type) bool { 239 return t.IsSigned() 240 } 241 242 // mergeSym merges two symbolic offsets. There is no real merging of 243 // offsets, we just pick the non-nil one. 244 func mergeSym(x, y Sym) Sym { 245 if x == nil { 246 return y 247 } 248 if y == nil { 249 return x 250 } 251 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y)) 252 } 253 254 func canMergeSym(x, y Sym) bool { 255 return x == nil || y == nil 256 } 257 258 // canMergeLoadClobber reports whether the load can be merged into target without 259 // invalidating the schedule. 260 // It also checks that the other non-load argument x is something we 261 // are ok with clobbering. 262 func canMergeLoadClobber(target, load, x *Value) bool { 263 // The register containing x is going to get clobbered. 264 // Don't merge if we still need the value of x. 265 // We don't have liveness information here, but we can 266 // approximate x dying with: 267 // 1) target is x's only use. 268 // 2) target is not in a deeper loop than x. 269 if x.Uses != 1 { 270 return false 271 } 272 loopnest := x.Block.Func.loopnest() 273 loopnest.calculateDepths() 274 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) { 275 return false 276 } 277 return canMergeLoad(target, load) 278 } 279 280 // canMergeLoad reports whether the load can be merged into target without 281 // invalidating the schedule. 282 func canMergeLoad(target, load *Value) bool { 283 if target.Block.ID != load.Block.ID { 284 // If the load is in a different block do not merge it. 285 return false 286 } 287 288 // We can't merge the load into the target if the load 289 // has more than one use. 290 if load.Uses != 1 { 291 return false 292 } 293 294 mem := load.MemoryArg() 295 296 // We need the load's memory arg to still be alive at target. That 297 // can't be the case if one of target's args depends on a memory 298 // state that is a successor of load's memory arg. 299 // 300 // For example, it would be invalid to merge load into target in 301 // the following situation because newmem has killed oldmem 302 // before target is reached: 303 // load = read ... oldmem 304 // newmem = write ... oldmem 305 // arg0 = read ... newmem 306 // target = add arg0 load 307 // 308 // If the argument comes from a different block then we can exclude 309 // it immediately because it must dominate load (which is in the 310 // same block as target). 311 var args []*Value 312 for _, a := range target.Args { 313 if a != load && a.Block.ID == target.Block.ID { 314 args = append(args, a) 315 } 316 } 317 318 // memPreds contains memory states known to be predecessors of load's 319 // memory state. It is lazily initialized. 320 var memPreds map[*Value]bool 321 for i := 0; len(args) > 0; i++ { 322 const limit = 100 323 if i >= limit { 324 // Give up if we have done a lot of iterations. 325 return false 326 } 327 v := args[len(args)-1] 328 args = args[:len(args)-1] 329 if target.Block.ID != v.Block.ID { 330 // Since target and load are in the same block 331 // we can stop searching when we leave the block. 332 continue 333 } 334 if v.Op == OpPhi { 335 // A Phi implies we have reached the top of the block. 336 // The memory phi, if it exists, is always 337 // the first logical store in the block. 338 continue 339 } 340 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() { 341 // We could handle this situation however it is likely 342 // to be very rare. 343 return false 344 } 345 if v.Op.SymEffect()&SymAddr != 0 { 346 // This case prevents an operation that calculates the 347 // address of a local variable from being forced to schedule 348 // before its corresponding VarDef. 349 // See issue 28445. 350 // v1 = LOAD ... 351 // v2 = VARDEF 352 // v3 = LEAQ 353 // v4 = CMPQ v1 v3 354 // We don't want to combine the CMPQ with the load, because 355 // that would force the CMPQ to schedule before the VARDEF, which 356 // in turn requires the LEAQ to schedule before the VARDEF. 357 return false 358 } 359 if v.Type.IsMemory() { 360 if memPreds == nil { 361 // Initialise a map containing memory states 362 // known to be predecessors of load's memory 363 // state. 364 memPreds = make(map[*Value]bool) 365 m := mem 366 const limit = 50 367 for i := 0; i < limit; i++ { 368 if m.Op == OpPhi { 369 // The memory phi, if it exists, is always 370 // the first logical store in the block. 371 break 372 } 373 if m.Block.ID != target.Block.ID { 374 break 375 } 376 if !m.Type.IsMemory() { 377 break 378 } 379 memPreds[m] = true 380 if len(m.Args) == 0 { 381 break 382 } 383 m = m.MemoryArg() 384 } 385 } 386 387 // We can merge if v is a predecessor of mem. 388 // 389 // For example, we can merge load into target in the 390 // following scenario: 391 // x = read ... v 392 // mem = write ... v 393 // load = read ... mem 394 // target = add x load 395 if memPreds[v] { 396 continue 397 } 398 return false 399 } 400 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem { 401 // If v takes mem as an input then we know mem 402 // is valid at this point. 403 continue 404 } 405 for _, a := range v.Args { 406 if target.Block.ID == a.Block.ID { 407 args = append(args, a) 408 } 409 } 410 } 411 412 return true 413 } 414 415 // isSameCall reports whether sym is the same as the given named symbol. 416 func isSameCall(sym interface{}, name string) bool { 417 fn := sym.(*AuxCall).Fn 418 return fn != nil && fn.String() == name 419 } 420 421 // canLoadUnaligned reports if the architecture supports unaligned load operations. 422 func canLoadUnaligned(c *Config) bool { 423 return c.ctxt.Arch.Alignment == 1 424 } 425 426 // nlzX returns the number of leading zeros. 427 func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) } 428 func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) } 429 func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) } 430 func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) } 431 432 // ntzX returns the number of trailing zeros. 433 func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) } 434 func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) } 435 func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) } 436 func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) } 437 438 func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 } 439 func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 } 440 func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 } 441 func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 } 442 func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 } 443 444 // nto returns the number of trailing ones. 445 func nto(x int64) int64 { 446 return int64(ntz64(^x)) 447 } 448 449 // logX returns logarithm of n base 2. 450 // n must be a positive power of 2 (isPowerOfTwoX returns true). 451 func log8(n int8) int64 { 452 return int64(bits.Len8(uint8(n))) - 1 453 } 454 func log16(n int16) int64 { 455 return int64(bits.Len16(uint16(n))) - 1 456 } 457 func log32(n int32) int64 { 458 return int64(bits.Len32(uint32(n))) - 1 459 } 460 func log64(n int64) int64 { 461 return int64(bits.Len64(uint64(n))) - 1 462 } 463 464 // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1. 465 // Rounds down. 466 func log2uint32(n int64) int64 { 467 return int64(bits.Len32(uint32(n))) - 1 468 } 469 470 // isPowerOfTwoX functions report whether n is a power of 2. 471 func isPowerOfTwo8(n int8) bool { 472 return n > 0 && n&(n-1) == 0 473 } 474 func isPowerOfTwo16(n int16) bool { 475 return n > 0 && n&(n-1) == 0 476 } 477 func isPowerOfTwo32(n int32) bool { 478 return n > 0 && n&(n-1) == 0 479 } 480 func isPowerOfTwo64(n int64) bool { 481 return n > 0 && n&(n-1) == 0 482 } 483 484 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2. 485 func isUint64PowerOfTwo(in int64) bool { 486 n := uint64(in) 487 return n > 0 && n&(n-1) == 0 488 } 489 490 // isUint32PowerOfTwo reports whether uint32(n) is a power of 2. 491 func isUint32PowerOfTwo(in int64) bool { 492 n := uint64(uint32(in)) 493 return n > 0 && n&(n-1) == 0 494 } 495 496 // is32Bit reports whether n can be represented as a signed 32 bit integer. 497 func is32Bit(n int64) bool { 498 return n == int64(int32(n)) 499 } 500 501 // is16Bit reports whether n can be represented as a signed 16 bit integer. 502 func is16Bit(n int64) bool { 503 return n == int64(int16(n)) 504 } 505 506 // is8Bit reports whether n can be represented as a signed 8 bit integer. 507 func is8Bit(n int64) bool { 508 return n == int64(int8(n)) 509 } 510 511 // isU8Bit reports whether n can be represented as an unsigned 8 bit integer. 512 func isU8Bit(n int64) bool { 513 return n == int64(uint8(n)) 514 } 515 516 // isU12Bit reports whether n can be represented as an unsigned 12 bit integer. 517 func isU12Bit(n int64) bool { 518 return 0 <= n && n < (1<<12) 519 } 520 521 // isU16Bit reports whether n can be represented as an unsigned 16 bit integer. 522 func isU16Bit(n int64) bool { 523 return n == int64(uint16(n)) 524 } 525 526 // isU32Bit reports whether n can be represented as an unsigned 32 bit integer. 527 func isU32Bit(n int64) bool { 528 return n == int64(uint32(n)) 529 } 530 531 // is20Bit reports whether n can be represented as a signed 20 bit integer. 532 func is20Bit(n int64) bool { 533 return -(1<<19) <= n && n < (1<<19) 534 } 535 536 // b2i translates a boolean value to 0 or 1 for assigning to auxInt. 537 func b2i(b bool) int64 { 538 if b { 539 return 1 540 } 541 return 0 542 } 543 544 // b2i32 translates a boolean value to 0 or 1. 545 func b2i32(b bool) int32 { 546 if b { 547 return 1 548 } 549 return 0 550 } 551 552 // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded. 553 // A shift is bounded if it is shifting by less than the width of the shifted value. 554 func shiftIsBounded(v *Value) bool { 555 return v.AuxInt != 0 556 } 557 558 // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing 559 // generated code as much as possible. 560 func canonLessThan(x, y *Value) bool { 561 if x.Op != y.Op { 562 return x.Op < y.Op 563 } 564 if !x.Pos.SameFileAndLine(y.Pos) { 565 return x.Pos.Before(y.Pos) 566 } 567 return x.ID < y.ID 568 } 569 570 // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern 571 // of the mantissa. It will panic if the truncation results in lost information. 572 func truncate64Fto32F(f float64) float32 { 573 if !isExactFloat32(f) { 574 panic("truncate64Fto32F: truncation is not exact") 575 } 576 if !math.IsNaN(f) { 577 return float32(f) 578 } 579 // NaN bit patterns aren't necessarily preserved across conversion 580 // instructions so we need to do the conversion manually. 581 b := math.Float64bits(f) 582 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand) 583 // | sign | exponent | mantissa | 584 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23))) 585 return math.Float32frombits(r) 586 } 587 588 // extend32Fto64F converts a float32 value to a float64 value preserving the bit 589 // pattern of the mantissa. 590 func extend32Fto64F(f float32) float64 { 591 if !math.IsNaN(float64(f)) { 592 return float64(f) 593 } 594 // NaN bit patterns aren't necessarily preserved across conversion 595 // instructions so we need to do the conversion manually. 596 b := uint64(math.Float32bits(f)) 597 // | sign | exponent | mantissa | 598 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23)) 599 return math.Float64frombits(r) 600 } 601 602 // DivisionNeedsFixUp reports whether the division needs fix-up code. 603 func DivisionNeedsFixUp(v *Value) bool { 604 return v.AuxInt == 0 605 } 606 607 // auxFrom64F encodes a float64 value so it can be stored in an AuxInt. 608 func auxFrom64F(f float64) int64 { 609 if f != f { 610 panic("can't encode a NaN in AuxInt field") 611 } 612 return int64(math.Float64bits(f)) 613 } 614 615 // auxFrom32F encodes a float32 value so it can be stored in an AuxInt. 616 func auxFrom32F(f float32) int64 { 617 if f != f { 618 panic("can't encode a NaN in AuxInt field") 619 } 620 return int64(math.Float64bits(extend32Fto64F(f))) 621 } 622 623 // auxTo32F decodes a float32 from the AuxInt value provided. 624 func auxTo32F(i int64) float32 { 625 return truncate64Fto32F(math.Float64frombits(uint64(i))) 626 } 627 628 // auxTo64F decodes a float64 from the AuxInt value provided. 629 func auxTo64F(i int64) float64 { 630 return math.Float64frombits(uint64(i)) 631 } 632 633 func auxIntToBool(i int64) bool { 634 if i == 0 { 635 return false 636 } 637 return true 638 } 639 func auxIntToInt8(i int64) int8 { 640 return int8(i) 641 } 642 func auxIntToInt16(i int64) int16 { 643 return int16(i) 644 } 645 func auxIntToInt32(i int64) int32 { 646 return int32(i) 647 } 648 func auxIntToInt64(i int64) int64 { 649 return i 650 } 651 func auxIntToUint8(i int64) uint8 { 652 return uint8(i) 653 } 654 func auxIntToFloat32(i int64) float32 { 655 return float32(math.Float64frombits(uint64(i))) 656 } 657 func auxIntToFloat64(i int64) float64 { 658 return math.Float64frombits(uint64(i)) 659 } 660 func auxIntToValAndOff(i int64) ValAndOff { 661 return ValAndOff(i) 662 } 663 func auxIntToArm64BitField(i int64) arm64BitField { 664 return arm64BitField(i) 665 } 666 func auxIntToInt128(x int64) int128 { 667 if x != 0 { 668 panic("nonzero int128 not allowed") 669 } 670 return 0 671 } 672 func auxIntToFlagConstant(x int64) flagConstant { 673 return flagConstant(x) 674 } 675 676 func auxIntToOp(cc int64) Op { 677 return Op(cc) 678 } 679 680 func boolToAuxInt(b bool) int64 { 681 if b { 682 return 1 683 } 684 return 0 685 } 686 func int8ToAuxInt(i int8) int64 { 687 return int64(i) 688 } 689 func int16ToAuxInt(i int16) int64 { 690 return int64(i) 691 } 692 func int32ToAuxInt(i int32) int64 { 693 return int64(i) 694 } 695 func int64ToAuxInt(i int64) int64 { 696 return int64(i) 697 } 698 func uint8ToAuxInt(i uint8) int64 { 699 return int64(int8(i)) 700 } 701 func float32ToAuxInt(f float32) int64 { 702 return int64(math.Float64bits(float64(f))) 703 } 704 func float64ToAuxInt(f float64) int64 { 705 return int64(math.Float64bits(f)) 706 } 707 func valAndOffToAuxInt(v ValAndOff) int64 { 708 return int64(v) 709 } 710 func arm64BitFieldToAuxInt(v arm64BitField) int64 { 711 return int64(v) 712 } 713 func int128ToAuxInt(x int128) int64 { 714 if x != 0 { 715 panic("nonzero int128 not allowed") 716 } 717 return 0 718 } 719 func flagConstantToAuxInt(x flagConstant) int64 { 720 return int64(x) 721 } 722 723 func opToAuxInt(o Op) int64 { 724 return int64(o) 725 } 726 727 // Aux is an interface to hold miscellaneous data in Blocks and Values. 728 type Aux interface { 729 CanBeAnSSAAux() 730 } 731 732 // stringAux wraps string values for use in Aux. 733 type stringAux string 734 735 func (stringAux) CanBeAnSSAAux() {} 736 737 func auxToString(i Aux) string { 738 return string(i.(stringAux)) 739 } 740 func auxToSym(i Aux) Sym { 741 // TODO: kind of a hack - allows nil interface through 742 s, _ := i.(Sym) 743 return s 744 } 745 func auxToType(i Aux) *types.Type { 746 return i.(*types.Type) 747 } 748 func auxToCall(i Aux) *AuxCall { 749 return i.(*AuxCall) 750 } 751 func auxToS390xCCMask(i Aux) s390x.CCMask { 752 return i.(s390x.CCMask) 753 } 754 func auxToS390xRotateParams(i Aux) s390x.RotateParams { 755 return i.(s390x.RotateParams) 756 } 757 758 func StringToAux(s string) Aux { 759 return stringAux(s) 760 } 761 func symToAux(s Sym) Aux { 762 return s 763 } 764 func callToAux(s *AuxCall) Aux { 765 return s 766 } 767 func typeToAux(t *types.Type) Aux { 768 return t 769 } 770 func s390xCCMaskToAux(c s390x.CCMask) Aux { 771 return c 772 } 773 func s390xRotateParamsToAux(r s390x.RotateParams) Aux { 774 return r 775 } 776 777 // uaddOvf reports whether unsigned a+b would overflow. 778 func uaddOvf(a, b int64) bool { 779 return uint64(a)+uint64(b) < uint64(a) 780 } 781 782 // loadLSymOffset simulates reading a word at an offset into a 783 // read-only symbol's runtime memory. If it would read a pointer to 784 // another symbol, that symbol is returned. Otherwise, it returns nil. 785 func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym { 786 if lsym.Type != objabi.SRODATA { 787 return nil 788 } 789 790 for _, r := range lsym.R { 791 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 { 792 return r.Sym 793 } 794 } 795 796 return nil 797 } 798 799 // de-virtualize an InterLECall 800 // 'sym' is the symbol for the itab. 801 func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym { 802 n, ok := sym.(*obj.LSym) 803 if !ok { 804 return nil 805 } 806 807 lsym := loadLSymOffset(n, offset) 808 if f := v.Block.Func; f.pass.debug > 0 { 809 if lsym != nil { 810 f.Warnl(v.Pos, "de-virtualizing call") 811 } else { 812 f.Warnl(v.Pos, "couldn't de-virtualize call") 813 } 814 } 815 return lsym 816 } 817 818 func devirtLECall(v *Value, sym *obj.LSym) *Value { 819 v.Op = OpStaticLECall 820 auxcall := v.Aux.(*AuxCall) 821 auxcall.Fn = sym 822 // Remove first arg 823 v.Args[0].Uses-- 824 copy(v.Args[0:], v.Args[1:]) 825 v.Args[len(v.Args)-1] = nil // aid GC 826 v.Args = v.Args[:len(v.Args)-1] 827 return v 828 } 829 830 // isSamePtr reports whether p1 and p2 point to the same address. 831 func isSamePtr(p1, p2 *Value) bool { 832 if p1 == p2 { 833 return true 834 } 835 if p1.Op != p2.Op { 836 return false 837 } 838 switch p1.Op { 839 case OpOffPtr: 840 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0]) 841 case OpAddr, OpLocalAddr: 842 // OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op. 843 // Checking for value equality only works after [z]cse has run. 844 return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op 845 case OpAddPtr: 846 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0]) 847 } 848 return false 849 } 850 851 func isStackPtr(v *Value) bool { 852 for v.Op == OpOffPtr || v.Op == OpAddPtr { 853 v = v.Args[0] 854 } 855 return v.Op == OpSP || v.Op == OpLocalAddr 856 } 857 858 // disjoint reports whether the memory region specified by [p1:p1+n1) 859 // does not overlap with [p2:p2+n2). 860 // A return value of false does not imply the regions overlap. 861 func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool { 862 if n1 == 0 || n2 == 0 { 863 return true 864 } 865 if p1 == p2 { 866 return false 867 } 868 baseAndOffset := func(ptr *Value) (base *Value, offset int64) { 869 base, offset = ptr, 0 870 for base.Op == OpOffPtr { 871 offset += base.AuxInt 872 base = base.Args[0] 873 } 874 return base, offset 875 } 876 p1, off1 := baseAndOffset(p1) 877 p2, off2 := baseAndOffset(p2) 878 if isSamePtr(p1, p2) { 879 return !overlap(off1, n1, off2, n2) 880 } 881 // p1 and p2 are not the same, so if they are both OpAddrs then 882 // they point to different variables. 883 // If one pointer is on the stack and the other is an argument 884 // then they can't overlap. 885 switch p1.Op { 886 case OpAddr, OpLocalAddr: 887 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP { 888 return true 889 } 890 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP 891 case OpArg, OpArgIntReg: 892 if p2.Op == OpSP || p2.Op == OpLocalAddr { 893 return true 894 } 895 case OpSP: 896 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP 897 } 898 return false 899 } 900 901 // moveSize returns the number of bytes an aligned MOV instruction moves. 902 func moveSize(align int64, c *Config) int64 { 903 switch { 904 case align%8 == 0 && c.PtrSize == 8: 905 return 8 906 case align%4 == 0: 907 return 4 908 case align%2 == 0: 909 return 2 910 } 911 return 1 912 } 913 914 // mergePoint finds a block among a's blocks which dominates b and is itself 915 // dominated by all of a's blocks. Returns nil if it can't find one. 916 // Might return nil even if one does exist. 917 func mergePoint(b *Block, a ...*Value) *Block { 918 // Walk backward from b looking for one of the a's blocks. 919 920 // Max distance 921 d := 100 922 923 for d > 0 { 924 for _, x := range a { 925 if b == x.Block { 926 goto found 927 } 928 } 929 if len(b.Preds) > 1 { 930 // Don't know which way to go back. Abort. 931 return nil 932 } 933 b = b.Preds[0].b 934 d-- 935 } 936 return nil // too far away 937 found: 938 // At this point, r is the first value in a that we find by walking backwards. 939 // if we return anything, r will be it. 940 r := b 941 942 // Keep going, counting the other a's that we find. They must all dominate r. 943 na := 0 944 for d > 0 { 945 for _, x := range a { 946 if b == x.Block { 947 na++ 948 } 949 } 950 if na == len(a) { 951 // Found all of a in a backwards walk. We can return r. 952 return r 953 } 954 if len(b.Preds) > 1 { 955 return nil 956 } 957 b = b.Preds[0].b 958 d-- 959 960 } 961 return nil // too far away 962 } 963 964 // clobber invalidates values. Returns true. 965 // clobber is used by rewrite rules to: 966 // 967 // A) make sure the values are really dead and never used again. 968 // B) decrement use counts of the values' args. 969 func clobber(vv ...*Value) bool { 970 for _, v := range vv { 971 v.reset(OpInvalid) 972 // Note: leave v.Block intact. The Block field is used after clobber. 973 } 974 return true 975 } 976 977 // clobberIfDead resets v when use count is 1. Returns true. 978 // clobberIfDead is used by rewrite rules to decrement 979 // use counts of v's args when v is dead and never used. 980 func clobberIfDead(v *Value) bool { 981 if v.Uses == 1 { 982 v.reset(OpInvalid) 983 } 984 // Note: leave v.Block intact. The Block field is used after clobberIfDead. 985 return true 986 } 987 988 // noteRule is an easy way to track if a rule is matched when writing 989 // new ones. Make the rule of interest also conditional on 990 // 991 // noteRule("note to self: rule of interest matched") 992 // 993 // and that message will print when the rule matches. 994 func noteRule(s string) bool { 995 fmt.Println(s) 996 return true 997 } 998 999 // countRule increments Func.ruleMatches[key]. 1000 // If Func.ruleMatches is non-nil at the end 1001 // of compilation, it will be printed to stdout. 1002 // This is intended to make it easier to find which functions 1003 // which contain lots of rules matches when developing new rules. 1004 func countRule(v *Value, key string) bool { 1005 f := v.Block.Func 1006 if f.ruleMatches == nil { 1007 f.ruleMatches = make(map[string]int) 1008 } 1009 f.ruleMatches[key]++ 1010 return true 1011 } 1012 1013 // warnRule generates compiler debug output with string s when 1014 // v is not in autogenerated code, cond is true and the rule has fired. 1015 func warnRule(cond bool, v *Value, s string) bool { 1016 if pos := v.Pos; pos.Line() > 1 && cond { 1017 v.Block.Func.Warnl(pos, s) 1018 } 1019 return true 1020 } 1021 1022 // for a pseudo-op like (LessThan x), extract x. 1023 func flagArg(v *Value) *Value { 1024 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() { 1025 return nil 1026 } 1027 return v.Args[0] 1028 } 1029 1030 // arm64Negate finds the complement to an ARM64 condition code, 1031 // for example !Equal -> NotEqual or !LessThan -> GreaterEqual 1032 // 1033 // For floating point, it's more subtle because NaN is unordered. We do 1034 // !LessThanF -> NotLessThanF, the latter takes care of NaNs. 1035 func arm64Negate(op Op) Op { 1036 switch op { 1037 case OpARM64LessThan: 1038 return OpARM64GreaterEqual 1039 case OpARM64LessThanU: 1040 return OpARM64GreaterEqualU 1041 case OpARM64GreaterThan: 1042 return OpARM64LessEqual 1043 case OpARM64GreaterThanU: 1044 return OpARM64LessEqualU 1045 case OpARM64LessEqual: 1046 return OpARM64GreaterThan 1047 case OpARM64LessEqualU: 1048 return OpARM64GreaterThanU 1049 case OpARM64GreaterEqual: 1050 return OpARM64LessThan 1051 case OpARM64GreaterEqualU: 1052 return OpARM64LessThanU 1053 case OpARM64Equal: 1054 return OpARM64NotEqual 1055 case OpARM64NotEqual: 1056 return OpARM64Equal 1057 case OpARM64LessThanF: 1058 return OpARM64NotLessThanF 1059 case OpARM64NotLessThanF: 1060 return OpARM64LessThanF 1061 case OpARM64LessEqualF: 1062 return OpARM64NotLessEqualF 1063 case OpARM64NotLessEqualF: 1064 return OpARM64LessEqualF 1065 case OpARM64GreaterThanF: 1066 return OpARM64NotGreaterThanF 1067 case OpARM64NotGreaterThanF: 1068 return OpARM64GreaterThanF 1069 case OpARM64GreaterEqualF: 1070 return OpARM64NotGreaterEqualF 1071 case OpARM64NotGreaterEqualF: 1072 return OpARM64GreaterEqualF 1073 default: 1074 panic("unreachable") 1075 } 1076 } 1077 1078 // arm64Invert evaluates (InvertFlags op), which 1079 // is the same as altering the condition codes such 1080 // that the same result would be produced if the arguments 1081 // to the flag-generating instruction were reversed, e.g. 1082 // (InvertFlags (CMP x y)) -> (CMP y x) 1083 func arm64Invert(op Op) Op { 1084 switch op { 1085 case OpARM64LessThan: 1086 return OpARM64GreaterThan 1087 case OpARM64LessThanU: 1088 return OpARM64GreaterThanU 1089 case OpARM64GreaterThan: 1090 return OpARM64LessThan 1091 case OpARM64GreaterThanU: 1092 return OpARM64LessThanU 1093 case OpARM64LessEqual: 1094 return OpARM64GreaterEqual 1095 case OpARM64LessEqualU: 1096 return OpARM64GreaterEqualU 1097 case OpARM64GreaterEqual: 1098 return OpARM64LessEqual 1099 case OpARM64GreaterEqualU: 1100 return OpARM64LessEqualU 1101 case OpARM64Equal, OpARM64NotEqual: 1102 return op 1103 case OpARM64LessThanF: 1104 return OpARM64GreaterThanF 1105 case OpARM64GreaterThanF: 1106 return OpARM64LessThanF 1107 case OpARM64LessEqualF: 1108 return OpARM64GreaterEqualF 1109 case OpARM64GreaterEqualF: 1110 return OpARM64LessEqualF 1111 case OpARM64NotLessThanF: 1112 return OpARM64NotGreaterThanF 1113 case OpARM64NotGreaterThanF: 1114 return OpARM64NotLessThanF 1115 case OpARM64NotLessEqualF: 1116 return OpARM64NotGreaterEqualF 1117 case OpARM64NotGreaterEqualF: 1118 return OpARM64NotLessEqualF 1119 default: 1120 panic("unreachable") 1121 } 1122 } 1123 1124 // evaluate an ARM64 op against a flags value 1125 // that is potentially constant; return 1 for true, 1126 // -1 for false, and 0 for not constant. 1127 func ccARM64Eval(op Op, flags *Value) int { 1128 fop := flags.Op 1129 if fop == OpARM64InvertFlags { 1130 return -ccARM64Eval(op, flags.Args[0]) 1131 } 1132 if fop != OpARM64FlagConstant { 1133 return 0 1134 } 1135 fc := flagConstant(flags.AuxInt) 1136 b2i := func(b bool) int { 1137 if b { 1138 return 1 1139 } 1140 return -1 1141 } 1142 switch op { 1143 case OpARM64Equal: 1144 return b2i(fc.eq()) 1145 case OpARM64NotEqual: 1146 return b2i(fc.ne()) 1147 case OpARM64LessThan: 1148 return b2i(fc.lt()) 1149 case OpARM64LessThanU: 1150 return b2i(fc.ult()) 1151 case OpARM64GreaterThan: 1152 return b2i(fc.gt()) 1153 case OpARM64GreaterThanU: 1154 return b2i(fc.ugt()) 1155 case OpARM64LessEqual: 1156 return b2i(fc.le()) 1157 case OpARM64LessEqualU: 1158 return b2i(fc.ule()) 1159 case OpARM64GreaterEqual: 1160 return b2i(fc.ge()) 1161 case OpARM64GreaterEqualU: 1162 return b2i(fc.uge()) 1163 } 1164 return 0 1165 } 1166 1167 // logRule logs the use of the rule s. This will only be enabled if 1168 // rewrite rules were generated with the -log option, see _gen/rulegen.go. 1169 func logRule(s string) { 1170 if ruleFile == nil { 1171 // Open a log file to write log to. We open in append 1172 // mode because all.bash runs the compiler lots of times, 1173 // and we want the concatenation of all of those logs. 1174 // This means, of course, that users need to rm the old log 1175 // to get fresh data. 1176 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow? 1177 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"), 1178 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 1179 if err != nil { 1180 panic(err) 1181 } 1182 ruleFile = w 1183 } 1184 _, err := fmt.Fprintln(ruleFile, s) 1185 if err != nil { 1186 panic(err) 1187 } 1188 } 1189 1190 var ruleFile io.Writer 1191 1192 func min(x, y int64) int64 { 1193 if x < y { 1194 return x 1195 } 1196 return y 1197 } 1198 1199 func isConstZero(v *Value) bool { 1200 switch v.Op { 1201 case OpConstNil: 1202 return true 1203 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F: 1204 return v.AuxInt == 0 1205 } 1206 return false 1207 } 1208 1209 // reciprocalExact64 reports whether 1/c is exactly representable. 1210 func reciprocalExact64(c float64) bool { 1211 b := math.Float64bits(c) 1212 man := b & (1<<52 - 1) 1213 if man != 0 { 1214 return false // not a power of 2, denormal, or NaN 1215 } 1216 exp := b >> 52 & (1<<11 - 1) 1217 // exponent bias is 0x3ff. So taking the reciprocal of a number 1218 // changes the exponent to 0x7fe-exp. 1219 switch exp { 1220 case 0: 1221 return false // ±0 1222 case 0x7ff: 1223 return false // ±inf 1224 case 0x7fe: 1225 return false // exponent is not representable 1226 default: 1227 return true 1228 } 1229 } 1230 1231 // reciprocalExact32 reports whether 1/c is exactly representable. 1232 func reciprocalExact32(c float32) bool { 1233 b := math.Float32bits(c) 1234 man := b & (1<<23 - 1) 1235 if man != 0 { 1236 return false // not a power of 2, denormal, or NaN 1237 } 1238 exp := b >> 23 & (1<<8 - 1) 1239 // exponent bias is 0x7f. So taking the reciprocal of a number 1240 // changes the exponent to 0xfe-exp. 1241 switch exp { 1242 case 0: 1243 return false // ±0 1244 case 0xff: 1245 return false // ±inf 1246 case 0xfe: 1247 return false // exponent is not representable 1248 default: 1249 return true 1250 } 1251 } 1252 1253 // check if an immediate can be directly encoded into an ARM's instruction. 1254 func isARMImmRot(v uint32) bool { 1255 for i := 0; i < 16; i++ { 1256 if v&^0xff == 0 { 1257 return true 1258 } 1259 v = v<<2 | v>>30 1260 } 1261 1262 return false 1263 } 1264 1265 // overlap reports whether the ranges given by the given offset and 1266 // size pairs overlap. 1267 func overlap(offset1, size1, offset2, size2 int64) bool { 1268 if offset1 >= offset2 && offset2+size2 > offset1 { 1269 return true 1270 } 1271 if offset2 >= offset1 && offset1+size1 > offset2 { 1272 return true 1273 } 1274 return false 1275 } 1276 1277 func areAdjacentOffsets(off1, off2, size int64) bool { 1278 return off1+size == off2 || off1 == off2+size 1279 } 1280 1281 // check if value zeroes out upper 32-bit of 64-bit register. 1282 // depth limits recursion depth. In AMD64.rules 3 is used as limit, 1283 // because it catches same amount of cases as 4. 1284 func zeroUpper32Bits(x *Value, depth int) bool { 1285 switch x.Op { 1286 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1, 1287 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1, 1288 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload, 1289 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL, 1290 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst, 1291 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst, 1292 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL, 1293 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst, 1294 OpAMD64SHLL, OpAMD64SHLLconst: 1295 return true 1296 case OpArg: 1297 return x.Type.Size() == 4 1298 case OpPhi, OpSelect0, OpSelect1: 1299 // Phis can use each-other as an arguments, instead of tracking visited values, 1300 // just limit recursion depth. 1301 if depth <= 0 { 1302 return false 1303 } 1304 for i := range x.Args { 1305 if !zeroUpper32Bits(x.Args[i], depth-1) { 1306 return false 1307 } 1308 } 1309 return true 1310 1311 } 1312 return false 1313 } 1314 1315 // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits. 1316 func zeroUpper48Bits(x *Value, depth int) bool { 1317 switch x.Op { 1318 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2: 1319 return true 1320 case OpArg: 1321 return x.Type.Size() == 2 1322 case OpPhi, OpSelect0, OpSelect1: 1323 // Phis can use each-other as an arguments, instead of tracking visited values, 1324 // just limit recursion depth. 1325 if depth <= 0 { 1326 return false 1327 } 1328 for i := range x.Args { 1329 if !zeroUpper48Bits(x.Args[i], depth-1) { 1330 return false 1331 } 1332 } 1333 return true 1334 1335 } 1336 return false 1337 } 1338 1339 // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits. 1340 func zeroUpper56Bits(x *Value, depth int) bool { 1341 switch x.Op { 1342 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1: 1343 return true 1344 case OpArg: 1345 return x.Type.Size() == 1 1346 case OpPhi, OpSelect0, OpSelect1: 1347 // Phis can use each-other as an arguments, instead of tracking visited values, 1348 // just limit recursion depth. 1349 if depth <= 0 { 1350 return false 1351 } 1352 for i := range x.Args { 1353 if !zeroUpper56Bits(x.Args[i], depth-1) { 1354 return false 1355 } 1356 } 1357 return true 1358 1359 } 1360 return false 1361 } 1362 1363 // isInlinableMemmove reports whether the given arch performs a Move of the given size 1364 // faster than memmove. It will only return true if replacing the memmove with a Move is 1365 // safe, either because Move will do all of its loads before any of its stores, or 1366 // because the arguments are known to be disjoint. 1367 // This is used as a check for replacing memmove with Move ops. 1368 func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { 1369 // It is always safe to convert memmove into Move when its arguments are disjoint. 1370 // Move ops may or may not be faster for large sizes depending on how the platform 1371 // lowers them, so we only perform this optimization on platforms that we know to 1372 // have fast Move ops. 1373 switch c.arch { 1374 case "amd64": 1375 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz)) 1376 case "386", "arm64": 1377 return sz <= 8 1378 case "s390x", "ppc64", "ppc64le": 1379 return sz <= 8 || disjoint(dst, sz, src, sz) 1380 case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le": 1381 return sz <= 4 1382 } 1383 return false 1384 } 1385 func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { 1386 return isInlinableMemmove(dst, src, sz, c) 1387 } 1388 1389 // logLargeCopy logs the occurrence of a large copy. 1390 // The best place to do this is in the rewrite rules where the size of the move is easy to find. 1391 // "Large" is arbitrarily chosen to be 128 bytes; this may change. 1392 func logLargeCopy(v *Value, s int64) bool { 1393 if s < 128 { 1394 return true 1395 } 1396 if logopt.Enabled() { 1397 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s)) 1398 } 1399 return true 1400 } 1401 func LogLargeCopy(funcName string, pos src.XPos, s int64) { 1402 if s < 128 { 1403 return 1404 } 1405 if logopt.Enabled() { 1406 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s)) 1407 } 1408 } 1409 1410 // hasSmallRotate reports whether the architecture has rotate instructions 1411 // for sizes < 32-bit. This is used to decide whether to promote some rotations. 1412 func hasSmallRotate(c *Config) bool { 1413 switch c.arch { 1414 case "amd64", "386": 1415 return true 1416 default: 1417 return false 1418 } 1419 } 1420 1421 func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 { 1422 if sh < 0 || sh >= sz { 1423 panic("PPC64 shift arg sh out of range") 1424 } 1425 if mb < 0 || mb >= sz { 1426 panic("PPC64 shift arg mb out of range") 1427 } 1428 if me < 0 || me >= sz { 1429 panic("PPC64 shift arg me out of range") 1430 } 1431 return int32(sh<<16 | mb<<8 | me) 1432 } 1433 1434 func GetPPC64Shiftsh(auxint int64) int64 { 1435 return int64(int8(auxint >> 16)) 1436 } 1437 1438 func GetPPC64Shiftmb(auxint int64) int64 { 1439 return int64(int8(auxint >> 8)) 1440 } 1441 1442 func GetPPC64Shiftme(auxint int64) int64 { 1443 return int64(int8(auxint)) 1444 } 1445 1446 // Test if this value can encoded as a mask for a rlwinm like 1447 // operation. Masks can also extend from the msb and wrap to 1448 // the lsb too. That is, the valid masks are 32 bit strings 1449 // of the form: 0..01..10..0 or 1..10..01..1 or 1...1 1450 func isPPC64WordRotateMask(v64 int64) bool { 1451 // Isolate rightmost 1 (if none 0) and add. 1452 v := uint32(v64) 1453 vp := (v & -v) + v 1454 // Likewise, for the wrapping case. 1455 vn := ^v 1456 vpn := (vn & -vn) + vn 1457 return (v&vp == 0 || vn&vpn == 0) && v != 0 1458 } 1459 1460 // Compress mask and shift into single value of the form 1461 // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can 1462 // be used to regenerate the input mask. 1463 func encodePPC64RotateMask(rotate, mask, nbits int64) int64 { 1464 var mb, me, mbn, men int 1465 1466 // Determine boundaries and then decode them 1467 if mask == 0 || ^mask == 0 || rotate >= nbits { 1468 panic("Invalid PPC64 rotate mask") 1469 } else if nbits == 32 { 1470 mb = bits.LeadingZeros32(uint32(mask)) 1471 me = 32 - bits.TrailingZeros32(uint32(mask)) 1472 mbn = bits.LeadingZeros32(^uint32(mask)) 1473 men = 32 - bits.TrailingZeros32(^uint32(mask)) 1474 } else { 1475 mb = bits.LeadingZeros64(uint64(mask)) 1476 me = 64 - bits.TrailingZeros64(uint64(mask)) 1477 mbn = bits.LeadingZeros64(^uint64(mask)) 1478 men = 64 - bits.TrailingZeros64(^uint64(mask)) 1479 } 1480 // Check for a wrapping mask (e.g bits at 0 and 63) 1481 if mb == 0 && me == int(nbits) { 1482 // swap the inverted values 1483 mb, me = men, mbn 1484 } 1485 1486 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24) 1487 } 1488 1489 // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as 1490 // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask. 1491 func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) { 1492 auxint := uint64(sauxint) 1493 rotate = int64((auxint >> 16) & 0xFF) 1494 mb = int64((auxint >> 8) & 0xFF) 1495 me = int64((auxint >> 0) & 0xFF) 1496 nbits := int64((auxint >> 24) & 0xFF) 1497 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1) 1498 if mb > me { 1499 mask = ^mask 1500 } 1501 if nbits == 32 { 1502 mask = uint64(uint32(mask)) 1503 } 1504 1505 // Fixup ME to match ISA definition. The second argument to MASK(..,me) 1506 // is inclusive. 1507 me = (me - 1) & (nbits - 1) 1508 return 1509 } 1510 1511 // This verifies that the mask is a set of 1512 // consecutive bits including the least 1513 // significant bit. 1514 func isPPC64ValidShiftMask(v int64) bool { 1515 if (v != 0) && ((v+1)&v) == 0 { 1516 return true 1517 } 1518 return false 1519 } 1520 1521 func getPPC64ShiftMaskLength(v int64) int64 { 1522 return int64(bits.Len64(uint64(v))) 1523 } 1524 1525 // Decompose a shift right into an equivalent rotate/mask, 1526 // and return mask & m. 1527 func mergePPC64RShiftMask(m, s, nbits int64) int64 { 1528 smask := uint64((1<<uint(nbits))-1) >> uint(s) 1529 return m & int64(smask) 1530 } 1531 1532 // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0 1533 func mergePPC64AndSrwi(m, s int64) int64 { 1534 mask := mergePPC64RShiftMask(m, s, 32) 1535 if !isPPC64WordRotateMask(mask) { 1536 return 0 1537 } 1538 return encodePPC64RotateMask((32-s)&31, mask, 32) 1539 } 1540 1541 // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM. 1542 // Return the encoded RLWINM constant, or 0 if they cannot be merged. 1543 func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { 1544 mask_1 := uint64(0xFFFFFFFF >> uint(srw)) 1545 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. 1546 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) 1547 1548 // Rewrite mask to apply after the final left shift. 1549 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) 1550 1551 r_1 := 32 - srw 1552 r_2 := GetPPC64Shiftsh(sld) 1553 r_3 := (r_1 + r_2) & 31 // This can wrap. 1554 1555 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { 1556 return 0 1557 } 1558 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32) 1559 } 1560 1561 // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return 1562 // the encoded RLWINM constant, or 0 if they cannot be merged. 1563 func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 { 1564 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw) 1565 // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. 1566 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) 1567 1568 // combine the masks, and adjust for the final left shift. 1569 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld))) 1570 r_2 := GetPPC64Shiftsh(int64(sld)) 1571 r_3 := (r_1 + r_2) & 31 // This can wrap. 1572 1573 // Verify the result is still a valid bitmask of <= 32 bits. 1574 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 { 1575 return 0 1576 } 1577 return encodePPC64RotateMask(r_3, int64(mask_3), 32) 1578 } 1579 1580 // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)), 1581 // or return 0 if they cannot be combined. 1582 func mergePPC64SldiSrw(sld, srw int64) int64 { 1583 if sld > srw || srw >= 32 { 1584 return 0 1585 } 1586 mask_r := uint32(0xFFFFFFFF) >> uint(srw) 1587 mask_l := uint32(0xFFFFFFFF) >> uint(sld) 1588 mask := (mask_r & mask_l) << uint(sld) 1589 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32) 1590 } 1591 1592 // Convenience function to rotate a 32 bit constant value by another constant. 1593 func rotateLeft32(v, rotate int64) int64 { 1594 return int64(bits.RotateLeft32(uint32(v), int(rotate))) 1595 } 1596 1597 func rotateRight64(v, rotate int64) int64 { 1598 return int64(bits.RotateLeft64(uint64(v), int(-rotate))) 1599 } 1600 1601 // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. 1602 func armBFAuxInt(lsb, width int64) arm64BitField { 1603 if lsb < 0 || lsb > 63 { 1604 panic("ARM(64) bit field lsb constant out of range") 1605 } 1606 if width < 1 || lsb+width > 64 { 1607 panic("ARM(64) bit field width constant out of range") 1608 } 1609 return arm64BitField(width | lsb<<8) 1610 } 1611 1612 // returns the lsb part of the auxInt field of arm64 bitfield ops. 1613 func (bfc arm64BitField) getARM64BFlsb() int64 { 1614 return int64(uint64(bfc) >> 8) 1615 } 1616 1617 // returns the width part of the auxInt field of arm64 bitfield ops. 1618 func (bfc arm64BitField) getARM64BFwidth() int64 { 1619 return int64(bfc) & 0xff 1620 } 1621 1622 // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask. 1623 func isARM64BFMask(lsb, mask, rshift int64) bool { 1624 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1625 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64 1626 } 1627 1628 // returns the bitfield width of mask >> rshift for arm64 bitfield ops. 1629 func arm64BFWidth(mask, rshift int64) int64 { 1630 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1631 if shiftedMask == 0 { 1632 panic("ARM64 BF mask is zero") 1633 } 1634 return nto(shiftedMask) 1635 } 1636 1637 // sizeof returns the size of t in bytes. 1638 // It will panic if t is not a *types.Type. 1639 func sizeof(t interface{}) int64 { 1640 return t.(*types.Type).Size() 1641 } 1642 1643 // registerizable reports whether t is a primitive type that fits in 1644 // a register. It assumes float64 values will always fit into registers 1645 // even if that isn't strictly true. 1646 func registerizable(b *Block, typ *types.Type) bool { 1647 if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() { 1648 return true 1649 } 1650 if typ.IsInteger() { 1651 return typ.Size() <= b.Func.Config.RegSize 1652 } 1653 return false 1654 } 1655 1656 // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed. 1657 func needRaceCleanup(sym *AuxCall, v *Value) bool { 1658 f := v.Block.Func 1659 if !f.Config.Race { 1660 return false 1661 } 1662 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") { 1663 return false 1664 } 1665 for _, b := range f.Blocks { 1666 for _, v := range b.Values { 1667 switch v.Op { 1668 case OpStaticCall, OpStaticLECall: 1669 // Check for racefuncenter will encounter racefuncexit and vice versa. 1670 // Allow calls to panic* 1671 s := v.Aux.(*AuxCall).Fn.String() 1672 switch s { 1673 case "runtime.racefuncenter", "runtime.racefuncexit", 1674 "runtime.panicdivide", "runtime.panicwrap", 1675 "runtime.panicshift": 1676 continue 1677 } 1678 // If we encountered any call, we need to keep racefunc*, 1679 // for accurate stacktraces. 1680 return false 1681 case OpPanicBounds, OpPanicExtend: 1682 // Note: these are panic generators that are ok (like the static calls above). 1683 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall: 1684 // We must keep the race functions if there are any other call types. 1685 return false 1686 } 1687 } 1688 } 1689 if isSameCall(sym, "runtime.racefuncenter") { 1690 // TODO REGISTER ABI this needs to be cleaned up. 1691 // If we're removing racefuncenter, remove its argument as well. 1692 if v.Args[0].Op != OpStore { 1693 if v.Op == OpStaticLECall { 1694 // there is no store, yet. 1695 return true 1696 } 1697 return false 1698 } 1699 mem := v.Args[0].Args[2] 1700 v.Args[0].reset(OpCopy) 1701 v.Args[0].AddArg(mem) 1702 } 1703 return true 1704 } 1705 1706 // symIsRO reports whether sym is a read-only global. 1707 func symIsRO(sym interface{}) bool { 1708 lsym := sym.(*obj.LSym) 1709 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0 1710 } 1711 1712 // symIsROZero reports whether sym is a read-only global whose data contains all zeros. 1713 func symIsROZero(sym Sym) bool { 1714 lsym := sym.(*obj.LSym) 1715 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 { 1716 return false 1717 } 1718 for _, b := range lsym.P { 1719 if b != 0 { 1720 return false 1721 } 1722 } 1723 return true 1724 } 1725 1726 // read8 reads one byte from the read-only global sym at offset off. 1727 func read8(sym interface{}, off int64) uint8 { 1728 lsym := sym.(*obj.LSym) 1729 if off >= int64(len(lsym.P)) || off < 0 { 1730 // Invalid index into the global sym. 1731 // This can happen in dead code, so we don't want to panic. 1732 // Just return any value, it will eventually get ignored. 1733 // See issue 29215. 1734 return 0 1735 } 1736 return lsym.P[off] 1737 } 1738 1739 // read16 reads two bytes from the read-only global sym at offset off. 1740 func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 { 1741 lsym := sym.(*obj.LSym) 1742 // lsym.P is written lazily. 1743 // Bytes requested after the end of lsym.P are 0. 1744 var src []byte 1745 if 0 <= off && off < int64(len(lsym.P)) { 1746 src = lsym.P[off:] 1747 } 1748 buf := make([]byte, 2) 1749 copy(buf, src) 1750 return byteorder.Uint16(buf) 1751 } 1752 1753 // read32 reads four bytes from the read-only global sym at offset off. 1754 func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 { 1755 lsym := sym.(*obj.LSym) 1756 var src []byte 1757 if 0 <= off && off < int64(len(lsym.P)) { 1758 src = lsym.P[off:] 1759 } 1760 buf := make([]byte, 4) 1761 copy(buf, src) 1762 return byteorder.Uint32(buf) 1763 } 1764 1765 // read64 reads eight bytes from the read-only global sym at offset off. 1766 func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 { 1767 lsym := sym.(*obj.LSym) 1768 var src []byte 1769 if 0 <= off && off < int64(len(lsym.P)) { 1770 src = lsym.P[off:] 1771 } 1772 buf := make([]byte, 8) 1773 copy(buf, src) 1774 return byteorder.Uint64(buf) 1775 } 1776 1777 // sequentialAddresses reports true if it can prove that x + n == y 1778 func sequentialAddresses(x, y *Value, n int64) bool { 1779 if x == y && n == 0 { 1780 return true 1781 } 1782 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil && 1783 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1784 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1785 return true 1786 } 1787 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && 1788 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1789 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1790 return true 1791 } 1792 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil && 1793 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1794 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1795 return true 1796 } 1797 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && 1798 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 1799 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 1800 return true 1801 } 1802 return false 1803 } 1804 1805 // flagConstant represents the result of a compile-time comparison. 1806 // The sense of these flags does not necessarily represent the hardware's notion 1807 // of a flags register - these are just a compile-time construct. 1808 // We happen to match the semantics to those of arm/arm64. 1809 // Note that these semantics differ from x86: the carry flag has the opposite 1810 // sense on a subtraction! 1811 // 1812 // On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C. 1813 // On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C. 1814 // (because it does x + ^y + C). 1815 // 1816 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag 1817 type flagConstant uint8 1818 1819 // N reports whether the result of an operation is negative (high bit set). 1820 func (fc flagConstant) N() bool { 1821 return fc&1 != 0 1822 } 1823 1824 // Z reports whether the result of an operation is 0. 1825 func (fc flagConstant) Z() bool { 1826 return fc&2 != 0 1827 } 1828 1829 // C reports whether an unsigned add overflowed (carry), or an 1830 // unsigned subtract did not underflow (borrow). 1831 func (fc flagConstant) C() bool { 1832 return fc&4 != 0 1833 } 1834 1835 // V reports whether a signed operation overflowed or underflowed. 1836 func (fc flagConstant) V() bool { 1837 return fc&8 != 0 1838 } 1839 1840 func (fc flagConstant) eq() bool { 1841 return fc.Z() 1842 } 1843 func (fc flagConstant) ne() bool { 1844 return !fc.Z() 1845 } 1846 func (fc flagConstant) lt() bool { 1847 return fc.N() != fc.V() 1848 } 1849 func (fc flagConstant) le() bool { 1850 return fc.Z() || fc.lt() 1851 } 1852 func (fc flagConstant) gt() bool { 1853 return !fc.Z() && fc.ge() 1854 } 1855 func (fc flagConstant) ge() bool { 1856 return fc.N() == fc.V() 1857 } 1858 func (fc flagConstant) ult() bool { 1859 return !fc.C() 1860 } 1861 func (fc flagConstant) ule() bool { 1862 return fc.Z() || fc.ult() 1863 } 1864 func (fc flagConstant) ugt() bool { 1865 return !fc.Z() && fc.uge() 1866 } 1867 func (fc flagConstant) uge() bool { 1868 return fc.C() 1869 } 1870 1871 func (fc flagConstant) ltNoov() bool { 1872 return fc.lt() && !fc.V() 1873 } 1874 func (fc flagConstant) leNoov() bool { 1875 return fc.le() && !fc.V() 1876 } 1877 func (fc flagConstant) gtNoov() bool { 1878 return fc.gt() && !fc.V() 1879 } 1880 func (fc flagConstant) geNoov() bool { 1881 return fc.ge() && !fc.V() 1882 } 1883 1884 func (fc flagConstant) String() string { 1885 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V()) 1886 } 1887 1888 type flagConstantBuilder struct { 1889 N bool 1890 Z bool 1891 C bool 1892 V bool 1893 } 1894 1895 func (fcs flagConstantBuilder) encode() flagConstant { 1896 var fc flagConstant 1897 if fcs.N { 1898 fc |= 1 1899 } 1900 if fcs.Z { 1901 fc |= 2 1902 } 1903 if fcs.C { 1904 fc |= 4 1905 } 1906 if fcs.V { 1907 fc |= 8 1908 } 1909 return fc 1910 } 1911 1912 // Note: addFlags(x,y) != subFlags(x,-y) in some situations: 1913 // - the results of the C flag are different 1914 // - the results of the V flag when y==minint are different 1915 1916 // addFlags64 returns the flags that would be set from computing x+y. 1917 func addFlags64(x, y int64) flagConstant { 1918 var fcb flagConstantBuilder 1919 fcb.Z = x+y == 0 1920 fcb.N = x+y < 0 1921 fcb.C = uint64(x+y) < uint64(x) 1922 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 1923 return fcb.encode() 1924 } 1925 1926 // subFlags64 returns the flags that would be set from computing x-y. 1927 func subFlags64(x, y int64) flagConstant { 1928 var fcb flagConstantBuilder 1929 fcb.Z = x-y == 0 1930 fcb.N = x-y < 0 1931 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model. 1932 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 1933 return fcb.encode() 1934 } 1935 1936 // addFlags32 returns the flags that would be set from computing x+y. 1937 func addFlags32(x, y int32) flagConstant { 1938 var fcb flagConstantBuilder 1939 fcb.Z = x+y == 0 1940 fcb.N = x+y < 0 1941 fcb.C = uint32(x+y) < uint32(x) 1942 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 1943 return fcb.encode() 1944 } 1945 1946 // subFlags32 returns the flags that would be set from computing x-y. 1947 func subFlags32(x, y int32) flagConstant { 1948 var fcb flagConstantBuilder 1949 fcb.Z = x-y == 0 1950 fcb.N = x-y < 0 1951 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model. 1952 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 1953 return fcb.encode() 1954 } 1955 1956 // logicFlags64 returns flags set to the sign/zeroness of x. 1957 // C and V are set to false. 1958 func logicFlags64(x int64) flagConstant { 1959 var fcb flagConstantBuilder 1960 fcb.Z = x == 0 1961 fcb.N = x < 0 1962 return fcb.encode() 1963 } 1964 1965 // logicFlags32 returns flags set to the sign/zeroness of x. 1966 // C and V are set to false. 1967 func logicFlags32(x int32) flagConstant { 1968 var fcb flagConstantBuilder 1969 fcb.Z = x == 0 1970 fcb.N = x < 0 1971 return fcb.encode() 1972 } 1973 1974 func makeJumpTableSym(b *Block) *obj.LSym { 1975 s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID)) 1976 s.Set(obj.AttrDuplicateOK, true) 1977 s.Set(obj.AttrLocal, true) 1978 return s 1979 } 1980 1981 // canRotate reports whether the architecture supports 1982 // rotates of integer registers with the given number of bits. 1983 func canRotate(c *Config, bits int64) bool { 1984 if bits > c.PtrSize*8 { 1985 // Don't rewrite to rotates bigger than the machine word. 1986 return false 1987 } 1988 switch c.arch { 1989 case "386", "amd64", "arm64": 1990 return true 1991 case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64": 1992 return bits >= 32 1993 default: 1994 return false 1995 } 1996 } 1997 1998 // isARM64bitcon reports whether a constant can be encoded into a logical instruction. 1999 func isARM64bitcon(x uint64) bool { 2000 if x == 1<<64-1 || x == 0 { 2001 return false 2002 } 2003 // determine the period and sign-extend a unit to 64 bits 2004 switch { 2005 case x != x>>32|x<<32: 2006 // period is 64 2007 // nothing to do 2008 case x != x>>16|x<<48: 2009 // period is 32 2010 x = uint64(int64(int32(x))) 2011 case x != x>>8|x<<56: 2012 // period is 16 2013 x = uint64(int64(int16(x))) 2014 case x != x>>4|x<<60: 2015 // period is 8 2016 x = uint64(int64(int8(x))) 2017 default: 2018 // period is 4 or 2, always true 2019 // 0001, 0010, 0100, 1000 -- 0001 rotate 2020 // 0011, 0110, 1100, 1001 -- 0011 rotate 2021 // 0111, 1011, 1101, 1110 -- 0111 rotate 2022 // 0101, 1010 -- 01 rotate, repeat 2023 return true 2024 } 2025 return sequenceOfOnes(x) || sequenceOfOnes(^x) 2026 } 2027 2028 // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros. 2029 func sequenceOfOnes(x uint64) bool { 2030 y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2 2031 y += x 2032 return (y-1)&y == 0 2033 } 2034 2035 // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction. 2036 func isARM64addcon(v int64) bool { 2037 /* uimm12 or uimm24? */ 2038 if v < 0 { 2039 return false 2040 } 2041 if (v & 0xFFF) == 0 { 2042 v >>= 12 2043 } 2044 return v <= 0xFFF 2045 }