github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/ssa/rewrite.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"github.com/bir3/gocompiler/src/cmd/compile/internal/base"
     9  	"github.com/bir3/gocompiler/src/cmd/compile/internal/logopt"
    10  	"github.com/bir3/gocompiler/src/cmd/compile/internal/types"
    11  	"github.com/bir3/gocompiler/src/cmd/internal/obj"
    12  	"github.com/bir3/gocompiler/src/cmd/internal/obj/s390x"
    13  	"github.com/bir3/gocompiler/src/cmd/internal/objabi"
    14  	"github.com/bir3/gocompiler/src/cmd/internal/src"
    15  	"encoding/binary"
    16  	"fmt"
    17  	"io"
    18  	"math"
    19  	"math/bits"
    20  	"os"
    21  	"path/filepath"
    22  )
    23  
    24  type deadValueChoice bool
    25  
    26  const (
    27  	leaveDeadValues  deadValueChoice = false
    28  	removeDeadValues                 = true
    29  )
    30  
    31  // deadcode indicates whether rewrite should try to remove any values that become dead.
    32  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    33  	// repeat rewrites until we find no more rewrites
    34  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    35  	pendingLines.clear()
    36  	debug := f.pass.debug
    37  	if debug > 1 {
    38  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    39  	}
    40  	var iters int
    41  	var states map[string]bool
    42  	for {
    43  		change := false
    44  		deadChange := false
    45  		for _, b := range f.Blocks {
    46  			var b0 *Block
    47  			if debug > 1 {
    48  				b0 = new(Block)
    49  				*b0 = *b
    50  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    51  			}
    52  			for i, c := range b.ControlValues() {
    53  				for c.Op == OpCopy {
    54  					c = c.Args[0]
    55  					b.ReplaceControl(i, c)
    56  				}
    57  			}
    58  			if rb(b) {
    59  				change = true
    60  				if debug > 1 {
    61  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    62  				}
    63  			}
    64  			for j, v := range b.Values {
    65  				var v0 *Value
    66  				if debug > 1 {
    67  					v0 = new(Value)
    68  					*v0 = *v
    69  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    70  				}
    71  				if v.Uses == 0 && v.removeable() {
    72  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    73  						// Reset any values that are now unused, so that we decrement
    74  						// the use count of all of its arguments.
    75  						// Not quite a deadcode pass, because it does not handle cycles.
    76  						// But it should help Uses==1 rules to fire.
    77  						v.reset(OpInvalid)
    78  						deadChange = true
    79  					}
    80  					// No point rewriting values which aren't used.
    81  					continue
    82  				}
    83  
    84  				vchange := phielimValue(v)
    85  				if vchange && debug > 1 {
    86  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    87  				}
    88  
    89  				// Eliminate copy inputs.
    90  				// If any copy input becomes unused, mark it
    91  				// as invalid and discard its argument. Repeat
    92  				// recursively on the discarded argument.
    93  				// This phase helps remove phantom "dead copy" uses
    94  				// of a value so that a x.Uses==1 rule condition
    95  				// fires reliably.
    96  				for i, a := range v.Args {
    97  					if a.Op != OpCopy {
    98  						continue
    99  					}
   100  					aa := copySource(a)
   101  					v.SetArg(i, aa)
   102  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   103  					// to hold it.  The first candidate is the value that will replace a (aa),
   104  					// if it shares the same block and line and is eligible.
   105  					// The second option is v, which has a as an input.  Because aa is earlier in
   106  					// the data flow, it is the better choice.
   107  					if a.Pos.IsStmt() == src.PosIsStmt {
   108  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   109  							aa.Pos = aa.Pos.WithIsStmt()
   110  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   111  							v.Pos = v.Pos.WithIsStmt()
   112  						} else {
   113  							// Record the lost line and look for a new home after all rewrites are complete.
   114  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   115  							// line to appear in more than one block, but only one block is stored, so if both end
   116  							// up here, then one will be lost.
   117  							pendingLines.set(a.Pos, int32(a.Block.ID))
   118  						}
   119  						a.Pos = a.Pos.WithNotStmt()
   120  					}
   121  					vchange = true
   122  					for a.Uses == 0 {
   123  						b := a.Args[0]
   124  						a.reset(OpInvalid)
   125  						a = b
   126  					}
   127  				}
   128  				if vchange && debug > 1 {
   129  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   130  				}
   131  
   132  				// apply rewrite function
   133  				if rv(v) {
   134  					vchange = true
   135  					// If value changed to a poor choice for a statement boundary, move the boundary
   136  					if v.Pos.IsStmt() == src.PosIsStmt {
   137  						if k := nextGoodStatementIndex(v, j, b); k != j {
   138  							v.Pos = v.Pos.WithNotStmt()
   139  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   140  						}
   141  					}
   142  				}
   143  
   144  				change = change || vchange
   145  				if vchange && debug > 1 {
   146  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   147  				}
   148  			}
   149  		}
   150  		if !change && !deadChange {
   151  			break
   152  		}
   153  		iters++
   154  		if (iters > 1000 || debug >= 2) && change {
   155  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   156  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   157  			// and the maximum value encountered during make.bash is 12.
   158  			// Start checking for cycles. (This is too expensive to do routinely.)
   159  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   160  			if states == nil {
   161  				states = make(map[string]bool)
   162  			}
   163  			h := f.rewriteHash()
   164  			if _, ok := states[h]; ok {
   165  				// We've found a cycle.
   166  				// To diagnose it, set debug to 2 and start again,
   167  				// so that we'll print all rules applied until we complete another cycle.
   168  				// If debug is already >= 2, we've already done that, so it's time to crash.
   169  				if debug < 2 {
   170  					debug = 2
   171  					states = make(map[string]bool)
   172  				} else {
   173  					f.Fatalf("rewrite cycle detected")
   174  				}
   175  			}
   176  			states[h] = true
   177  		}
   178  	}
   179  	// remove clobbered values
   180  	for _, b := range f.Blocks {
   181  		j := 0
   182  		for i, v := range b.Values {
   183  			vl := v.Pos
   184  			if v.Op == OpInvalid {
   185  				if v.Pos.IsStmt() == src.PosIsStmt {
   186  					pendingLines.set(vl, int32(b.ID))
   187  				}
   188  				f.freeValue(v)
   189  				continue
   190  			}
   191  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   192  				pendingLines.remove(vl)
   193  				v.Pos = v.Pos.WithIsStmt()
   194  			}
   195  			if i != j {
   196  				b.Values[j] = v
   197  			}
   198  			j++
   199  		}
   200  		if pendingLines.get(b.Pos) == int32(b.ID) {
   201  			b.Pos = b.Pos.WithIsStmt()
   202  			pendingLines.remove(b.Pos)
   203  		}
   204  		b.truncateValues(j)
   205  	}
   206  }
   207  
   208  // Common functions called from rewriting rules
   209  
   210  func is64BitFloat(t *types.Type) bool {
   211  	return t.Size() == 8 && t.IsFloat()
   212  }
   213  
   214  func is32BitFloat(t *types.Type) bool {
   215  	return t.Size() == 4 && t.IsFloat()
   216  }
   217  
   218  func is64BitInt(t *types.Type) bool {
   219  	return t.Size() == 8 && t.IsInteger()
   220  }
   221  
   222  func is32BitInt(t *types.Type) bool {
   223  	return t.Size() == 4 && t.IsInteger()
   224  }
   225  
   226  func is16BitInt(t *types.Type) bool {
   227  	return t.Size() == 2 && t.IsInteger()
   228  }
   229  
   230  func is8BitInt(t *types.Type) bool {
   231  	return t.Size() == 1 && t.IsInteger()
   232  }
   233  
   234  func isPtr(t *types.Type) bool {
   235  	return t.IsPtrShaped()
   236  }
   237  
   238  func isSigned(t *types.Type) bool {
   239  	return t.IsSigned()
   240  }
   241  
   242  // mergeSym merges two symbolic offsets. There is no real merging of
   243  // offsets, we just pick the non-nil one.
   244  func mergeSym(x, y Sym) Sym {
   245  	if x == nil {
   246  		return y
   247  	}
   248  	if y == nil {
   249  		return x
   250  	}
   251  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   252  }
   253  
   254  func canMergeSym(x, y Sym) bool {
   255  	return x == nil || y == nil
   256  }
   257  
   258  // canMergeLoadClobber reports whether the load can be merged into target without
   259  // invalidating the schedule.
   260  // It also checks that the other non-load argument x is something we
   261  // are ok with clobbering.
   262  func canMergeLoadClobber(target, load, x *Value) bool {
   263  	// The register containing x is going to get clobbered.
   264  	// Don't merge if we still need the value of x.
   265  	// We don't have liveness information here, but we can
   266  	// approximate x dying with:
   267  	//  1) target is x's only use.
   268  	//  2) target is not in a deeper loop than x.
   269  	if x.Uses != 1 {
   270  		return false
   271  	}
   272  	loopnest := x.Block.Func.loopnest()
   273  	loopnest.calculateDepths()
   274  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   275  		return false
   276  	}
   277  	return canMergeLoad(target, load)
   278  }
   279  
   280  // canMergeLoad reports whether the load can be merged into target without
   281  // invalidating the schedule.
   282  func canMergeLoad(target, load *Value) bool {
   283  	if target.Block.ID != load.Block.ID {
   284  		// If the load is in a different block do not merge it.
   285  		return false
   286  	}
   287  
   288  	// We can't merge the load into the target if the load
   289  	// has more than one use.
   290  	if load.Uses != 1 {
   291  		return false
   292  	}
   293  
   294  	mem := load.MemoryArg()
   295  
   296  	// We need the load's memory arg to still be alive at target. That
   297  	// can't be the case if one of target's args depends on a memory
   298  	// state that is a successor of load's memory arg.
   299  	//
   300  	// For example, it would be invalid to merge load into target in
   301  	// the following situation because newmem has killed oldmem
   302  	// before target is reached:
   303  	//     load = read ... oldmem
   304  	//   newmem = write ... oldmem
   305  	//     arg0 = read ... newmem
   306  	//   target = add arg0 load
   307  	//
   308  	// If the argument comes from a different block then we can exclude
   309  	// it immediately because it must dominate load (which is in the
   310  	// same block as target).
   311  	var args []*Value
   312  	for _, a := range target.Args {
   313  		if a != load && a.Block.ID == target.Block.ID {
   314  			args = append(args, a)
   315  		}
   316  	}
   317  
   318  	// memPreds contains memory states known to be predecessors of load's
   319  	// memory state. It is lazily initialized.
   320  	var memPreds map[*Value]bool
   321  	for i := 0; len(args) > 0; i++ {
   322  		const limit = 100
   323  		if i >= limit {
   324  			// Give up if we have done a lot of iterations.
   325  			return false
   326  		}
   327  		v := args[len(args)-1]
   328  		args = args[:len(args)-1]
   329  		if target.Block.ID != v.Block.ID {
   330  			// Since target and load are in the same block
   331  			// we can stop searching when we leave the block.
   332  			continue
   333  		}
   334  		if v.Op == OpPhi {
   335  			// A Phi implies we have reached the top of the block.
   336  			// The memory phi, if it exists, is always
   337  			// the first logical store in the block.
   338  			continue
   339  		}
   340  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   341  			// We could handle this situation however it is likely
   342  			// to be very rare.
   343  			return false
   344  		}
   345  		if v.Op.SymEffect()&SymAddr != 0 {
   346  			// This case prevents an operation that calculates the
   347  			// address of a local variable from being forced to schedule
   348  			// before its corresponding VarDef.
   349  			// See issue 28445.
   350  			//   v1 = LOAD ...
   351  			//   v2 = VARDEF
   352  			//   v3 = LEAQ
   353  			//   v4 = CMPQ v1 v3
   354  			// We don't want to combine the CMPQ with the load, because
   355  			// that would force the CMPQ to schedule before the VARDEF, which
   356  			// in turn requires the LEAQ to schedule before the VARDEF.
   357  			return false
   358  		}
   359  		if v.Type.IsMemory() {
   360  			if memPreds == nil {
   361  				// Initialise a map containing memory states
   362  				// known to be predecessors of load's memory
   363  				// state.
   364  				memPreds = make(map[*Value]bool)
   365  				m := mem
   366  				const limit = 50
   367  				for i := 0; i < limit; i++ {
   368  					if m.Op == OpPhi {
   369  						// The memory phi, if it exists, is always
   370  						// the first logical store in the block.
   371  						break
   372  					}
   373  					if m.Block.ID != target.Block.ID {
   374  						break
   375  					}
   376  					if !m.Type.IsMemory() {
   377  						break
   378  					}
   379  					memPreds[m] = true
   380  					if len(m.Args) == 0 {
   381  						break
   382  					}
   383  					m = m.MemoryArg()
   384  				}
   385  			}
   386  
   387  			// We can merge if v is a predecessor of mem.
   388  			//
   389  			// For example, we can merge load into target in the
   390  			// following scenario:
   391  			//      x = read ... v
   392  			//    mem = write ... v
   393  			//   load = read ... mem
   394  			// target = add x load
   395  			if memPreds[v] {
   396  				continue
   397  			}
   398  			return false
   399  		}
   400  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   401  			// If v takes mem as an input then we know mem
   402  			// is valid at this point.
   403  			continue
   404  		}
   405  		for _, a := range v.Args {
   406  			if target.Block.ID == a.Block.ID {
   407  				args = append(args, a)
   408  			}
   409  		}
   410  	}
   411  
   412  	return true
   413  }
   414  
   415  // isSameCall reports whether sym is the same as the given named symbol.
   416  func isSameCall(sym interface{}, name string) bool {
   417  	fn := sym.(*AuxCall).Fn
   418  	return fn != nil && fn.String() == name
   419  }
   420  
   421  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   422  func canLoadUnaligned(c *Config) bool {
   423  	return c.ctxt.Arch.Alignment == 1
   424  }
   425  
   426  // nlzX returns the number of leading zeros.
   427  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   428  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   429  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   430  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   431  
   432  // ntzX returns the number of trailing zeros.
   433  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   434  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   435  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   436  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   437  
   438  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   439  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   440  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   441  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   442  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   443  
   444  // nto returns the number of trailing ones.
   445  func nto(x int64) int64 {
   446  	return int64(ntz64(^x))
   447  }
   448  
   449  // logX returns logarithm of n base 2.
   450  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   451  func log8(n int8) int64 {
   452  	return int64(bits.Len8(uint8(n))) - 1
   453  }
   454  func log16(n int16) int64 {
   455  	return int64(bits.Len16(uint16(n))) - 1
   456  }
   457  func log32(n int32) int64 {
   458  	return int64(bits.Len32(uint32(n))) - 1
   459  }
   460  func log64(n int64) int64 {
   461  	return int64(bits.Len64(uint64(n))) - 1
   462  }
   463  
   464  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   465  // Rounds down.
   466  func log2uint32(n int64) int64 {
   467  	return int64(bits.Len32(uint32(n))) - 1
   468  }
   469  
   470  // isPowerOfTwoX functions report whether n is a power of 2.
   471  func isPowerOfTwo8(n int8) bool {
   472  	return n > 0 && n&(n-1) == 0
   473  }
   474  func isPowerOfTwo16(n int16) bool {
   475  	return n > 0 && n&(n-1) == 0
   476  }
   477  func isPowerOfTwo32(n int32) bool {
   478  	return n > 0 && n&(n-1) == 0
   479  }
   480  func isPowerOfTwo64(n int64) bool {
   481  	return n > 0 && n&(n-1) == 0
   482  }
   483  
   484  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   485  func isUint64PowerOfTwo(in int64) bool {
   486  	n := uint64(in)
   487  	return n > 0 && n&(n-1) == 0
   488  }
   489  
   490  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   491  func isUint32PowerOfTwo(in int64) bool {
   492  	n := uint64(uint32(in))
   493  	return n > 0 && n&(n-1) == 0
   494  }
   495  
   496  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   497  func is32Bit(n int64) bool {
   498  	return n == int64(int32(n))
   499  }
   500  
   501  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   502  func is16Bit(n int64) bool {
   503  	return n == int64(int16(n))
   504  }
   505  
   506  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   507  func is8Bit(n int64) bool {
   508  	return n == int64(int8(n))
   509  }
   510  
   511  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   512  func isU8Bit(n int64) bool {
   513  	return n == int64(uint8(n))
   514  }
   515  
   516  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   517  func isU12Bit(n int64) bool {
   518  	return 0 <= n && n < (1<<12)
   519  }
   520  
   521  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   522  func isU16Bit(n int64) bool {
   523  	return n == int64(uint16(n))
   524  }
   525  
   526  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   527  func isU32Bit(n int64) bool {
   528  	return n == int64(uint32(n))
   529  }
   530  
   531  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   532  func is20Bit(n int64) bool {
   533  	return -(1<<19) <= n && n < (1<<19)
   534  }
   535  
   536  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   537  func b2i(b bool) int64 {
   538  	if b {
   539  		return 1
   540  	}
   541  	return 0
   542  }
   543  
   544  // b2i32 translates a boolean value to 0 or 1.
   545  func b2i32(b bool) int32 {
   546  	if b {
   547  		return 1
   548  	}
   549  	return 0
   550  }
   551  
   552  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   553  // A shift is bounded if it is shifting by less than the width of the shifted value.
   554  func shiftIsBounded(v *Value) bool {
   555  	return v.AuxInt != 0
   556  }
   557  
   558  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   559  // generated code as much as possible.
   560  func canonLessThan(x, y *Value) bool {
   561  	if x.Op != y.Op {
   562  		return x.Op < y.Op
   563  	}
   564  	if !x.Pos.SameFileAndLine(y.Pos) {
   565  		return x.Pos.Before(y.Pos)
   566  	}
   567  	return x.ID < y.ID
   568  }
   569  
   570  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   571  // of the mantissa. It will panic if the truncation results in lost information.
   572  func truncate64Fto32F(f float64) float32 {
   573  	if !isExactFloat32(f) {
   574  		panic("truncate64Fto32F: truncation is not exact")
   575  	}
   576  	if !math.IsNaN(f) {
   577  		return float32(f)
   578  	}
   579  	// NaN bit patterns aren't necessarily preserved across conversion
   580  	// instructions so we need to do the conversion manually.
   581  	b := math.Float64bits(f)
   582  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   583  	//          | sign                  | exponent   | mantissa       |
   584  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   585  	return math.Float32frombits(r)
   586  }
   587  
   588  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   589  // pattern of the mantissa.
   590  func extend32Fto64F(f float32) float64 {
   591  	if !math.IsNaN(float64(f)) {
   592  		return float64(f)
   593  	}
   594  	// NaN bit patterns aren't necessarily preserved across conversion
   595  	// instructions so we need to do the conversion manually.
   596  	b := uint64(math.Float32bits(f))
   597  	//   | sign                  | exponent      | mantissa                    |
   598  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   599  	return math.Float64frombits(r)
   600  }
   601  
   602  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   603  func DivisionNeedsFixUp(v *Value) bool {
   604  	return v.AuxInt == 0
   605  }
   606  
   607  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   608  func auxFrom64F(f float64) int64 {
   609  	if f != f {
   610  		panic("can't encode a NaN in AuxInt field")
   611  	}
   612  	return int64(math.Float64bits(f))
   613  }
   614  
   615  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   616  func auxFrom32F(f float32) int64 {
   617  	if f != f {
   618  		panic("can't encode a NaN in AuxInt field")
   619  	}
   620  	return int64(math.Float64bits(extend32Fto64F(f)))
   621  }
   622  
   623  // auxTo32F decodes a float32 from the AuxInt value provided.
   624  func auxTo32F(i int64) float32 {
   625  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   626  }
   627  
   628  // auxTo64F decodes a float64 from the AuxInt value provided.
   629  func auxTo64F(i int64) float64 {
   630  	return math.Float64frombits(uint64(i))
   631  }
   632  
   633  func auxIntToBool(i int64) bool {
   634  	if i == 0 {
   635  		return false
   636  	}
   637  	return true
   638  }
   639  func auxIntToInt8(i int64) int8 {
   640  	return int8(i)
   641  }
   642  func auxIntToInt16(i int64) int16 {
   643  	return int16(i)
   644  }
   645  func auxIntToInt32(i int64) int32 {
   646  	return int32(i)
   647  }
   648  func auxIntToInt64(i int64) int64 {
   649  	return i
   650  }
   651  func auxIntToUint8(i int64) uint8 {
   652  	return uint8(i)
   653  }
   654  func auxIntToFloat32(i int64) float32 {
   655  	return float32(math.Float64frombits(uint64(i)))
   656  }
   657  func auxIntToFloat64(i int64) float64 {
   658  	return math.Float64frombits(uint64(i))
   659  }
   660  func auxIntToValAndOff(i int64) ValAndOff {
   661  	return ValAndOff(i)
   662  }
   663  func auxIntToArm64BitField(i int64) arm64BitField {
   664  	return arm64BitField(i)
   665  }
   666  func auxIntToInt128(x int64) int128 {
   667  	if x != 0 {
   668  		panic("nonzero int128 not allowed")
   669  	}
   670  	return 0
   671  }
   672  func auxIntToFlagConstant(x int64) flagConstant {
   673  	return flagConstant(x)
   674  }
   675  
   676  func auxIntToOp(cc int64) Op {
   677  	return Op(cc)
   678  }
   679  
   680  func boolToAuxInt(b bool) int64 {
   681  	if b {
   682  		return 1
   683  	}
   684  	return 0
   685  }
   686  func int8ToAuxInt(i int8) int64 {
   687  	return int64(i)
   688  }
   689  func int16ToAuxInt(i int16) int64 {
   690  	return int64(i)
   691  }
   692  func int32ToAuxInt(i int32) int64 {
   693  	return int64(i)
   694  }
   695  func int64ToAuxInt(i int64) int64 {
   696  	return int64(i)
   697  }
   698  func uint8ToAuxInt(i uint8) int64 {
   699  	return int64(int8(i))
   700  }
   701  func float32ToAuxInt(f float32) int64 {
   702  	return int64(math.Float64bits(float64(f)))
   703  }
   704  func float64ToAuxInt(f float64) int64 {
   705  	return int64(math.Float64bits(f))
   706  }
   707  func valAndOffToAuxInt(v ValAndOff) int64 {
   708  	return int64(v)
   709  }
   710  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   711  	return int64(v)
   712  }
   713  func int128ToAuxInt(x int128) int64 {
   714  	if x != 0 {
   715  		panic("nonzero int128 not allowed")
   716  	}
   717  	return 0
   718  }
   719  func flagConstantToAuxInt(x flagConstant) int64 {
   720  	return int64(x)
   721  }
   722  
   723  func opToAuxInt(o Op) int64 {
   724  	return int64(o)
   725  }
   726  
   727  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   728  type Aux interface {
   729  	CanBeAnSSAAux()
   730  }
   731  
   732  // stringAux wraps string values for use in Aux.
   733  type stringAux string
   734  
   735  func (stringAux) CanBeAnSSAAux() {}
   736  
   737  func auxToString(i Aux) string {
   738  	return string(i.(stringAux))
   739  }
   740  func auxToSym(i Aux) Sym {
   741  	// TODO: kind of a hack - allows nil interface through
   742  	s, _ := i.(Sym)
   743  	return s
   744  }
   745  func auxToType(i Aux) *types.Type {
   746  	return i.(*types.Type)
   747  }
   748  func auxToCall(i Aux) *AuxCall {
   749  	return i.(*AuxCall)
   750  }
   751  func auxToS390xCCMask(i Aux) s390x.CCMask {
   752  	return i.(s390x.CCMask)
   753  }
   754  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   755  	return i.(s390x.RotateParams)
   756  }
   757  
   758  func StringToAux(s string) Aux {
   759  	return stringAux(s)
   760  }
   761  func symToAux(s Sym) Aux {
   762  	return s
   763  }
   764  func callToAux(s *AuxCall) Aux {
   765  	return s
   766  }
   767  func typeToAux(t *types.Type) Aux {
   768  	return t
   769  }
   770  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   771  	return c
   772  }
   773  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   774  	return r
   775  }
   776  
   777  // uaddOvf reports whether unsigned a+b would overflow.
   778  func uaddOvf(a, b int64) bool {
   779  	return uint64(a)+uint64(b) < uint64(a)
   780  }
   781  
   782  // loadLSymOffset simulates reading a word at an offset into a
   783  // read-only symbol's runtime memory. If it would read a pointer to
   784  // another symbol, that symbol is returned. Otherwise, it returns nil.
   785  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   786  	if lsym.Type != objabi.SRODATA {
   787  		return nil
   788  	}
   789  
   790  	for _, r := range lsym.R {
   791  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   792  			return r.Sym
   793  		}
   794  	}
   795  
   796  	return nil
   797  }
   798  
   799  // de-virtualize an InterLECall
   800  // 'sym' is the symbol for the itab.
   801  func devirtLESym(v *Value, aux Aux, sym Sym, offset int64) *obj.LSym {
   802  	n, ok := sym.(*obj.LSym)
   803  	if !ok {
   804  		return nil
   805  	}
   806  
   807  	lsym := loadLSymOffset(n, offset)
   808  	if f := v.Block.Func; f.pass.debug > 0 {
   809  		if lsym != nil {
   810  			f.Warnl(v.Pos, "de-virtualizing call")
   811  		} else {
   812  			f.Warnl(v.Pos, "couldn't de-virtualize call")
   813  		}
   814  	}
   815  	return lsym
   816  }
   817  
   818  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   819  	v.Op = OpStaticLECall
   820  	auxcall := v.Aux.(*AuxCall)
   821  	auxcall.Fn = sym
   822  	// Remove first arg
   823  	v.Args[0].Uses--
   824  	copy(v.Args[0:], v.Args[1:])
   825  	v.Args[len(v.Args)-1] = nil // aid GC
   826  	v.Args = v.Args[:len(v.Args)-1]
   827  	return v
   828  }
   829  
   830  // isSamePtr reports whether p1 and p2 point to the same address.
   831  func isSamePtr(p1, p2 *Value) bool {
   832  	if p1 == p2 {
   833  		return true
   834  	}
   835  	if p1.Op != p2.Op {
   836  		return false
   837  	}
   838  	switch p1.Op {
   839  	case OpOffPtr:
   840  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   841  	case OpAddr, OpLocalAddr:
   842  		// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
   843  		// Checking for value equality only works after [z]cse has run.
   844  		return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
   845  	case OpAddPtr:
   846  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   847  	}
   848  	return false
   849  }
   850  
   851  func isStackPtr(v *Value) bool {
   852  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   853  		v = v.Args[0]
   854  	}
   855  	return v.Op == OpSP || v.Op == OpLocalAddr
   856  }
   857  
   858  // disjoint reports whether the memory region specified by [p1:p1+n1)
   859  // does not overlap with [p2:p2+n2).
   860  // A return value of false does not imply the regions overlap.
   861  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   862  	if n1 == 0 || n2 == 0 {
   863  		return true
   864  	}
   865  	if p1 == p2 {
   866  		return false
   867  	}
   868  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   869  		base, offset = ptr, 0
   870  		for base.Op == OpOffPtr {
   871  			offset += base.AuxInt
   872  			base = base.Args[0]
   873  		}
   874  		return base, offset
   875  	}
   876  	p1, off1 := baseAndOffset(p1)
   877  	p2, off2 := baseAndOffset(p2)
   878  	if isSamePtr(p1, p2) {
   879  		return !overlap(off1, n1, off2, n2)
   880  	}
   881  	// p1 and p2 are not the same, so if they are both OpAddrs then
   882  	// they point to different variables.
   883  	// If one pointer is on the stack and the other is an argument
   884  	// then they can't overlap.
   885  	switch p1.Op {
   886  	case OpAddr, OpLocalAddr:
   887  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   888  			return true
   889  		}
   890  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   891  	case OpArg, OpArgIntReg:
   892  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   893  			return true
   894  		}
   895  	case OpSP:
   896  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   897  	}
   898  	return false
   899  }
   900  
   901  // moveSize returns the number of bytes an aligned MOV instruction moves.
   902  func moveSize(align int64, c *Config) int64 {
   903  	switch {
   904  	case align%8 == 0 && c.PtrSize == 8:
   905  		return 8
   906  	case align%4 == 0:
   907  		return 4
   908  	case align%2 == 0:
   909  		return 2
   910  	}
   911  	return 1
   912  }
   913  
   914  // mergePoint finds a block among a's blocks which dominates b and is itself
   915  // dominated by all of a's blocks. Returns nil if it can't find one.
   916  // Might return nil even if one does exist.
   917  func mergePoint(b *Block, a ...*Value) *Block {
   918  	// Walk backward from b looking for one of the a's blocks.
   919  
   920  	// Max distance
   921  	d := 100
   922  
   923  	for d > 0 {
   924  		for _, x := range a {
   925  			if b == x.Block {
   926  				goto found
   927  			}
   928  		}
   929  		if len(b.Preds) > 1 {
   930  			// Don't know which way to go back. Abort.
   931  			return nil
   932  		}
   933  		b = b.Preds[0].b
   934  		d--
   935  	}
   936  	return nil // too far away
   937  found:
   938  	// At this point, r is the first value in a that we find by walking backwards.
   939  	// if we return anything, r will be it.
   940  	r := b
   941  
   942  	// Keep going, counting the other a's that we find. They must all dominate r.
   943  	na := 0
   944  	for d > 0 {
   945  		for _, x := range a {
   946  			if b == x.Block {
   947  				na++
   948  			}
   949  		}
   950  		if na == len(a) {
   951  			// Found all of a in a backwards walk. We can return r.
   952  			return r
   953  		}
   954  		if len(b.Preds) > 1 {
   955  			return nil
   956  		}
   957  		b = b.Preds[0].b
   958  		d--
   959  
   960  	}
   961  	return nil // too far away
   962  }
   963  
   964  // clobber invalidates values. Returns true.
   965  // clobber is used by rewrite rules to:
   966  //
   967  //	A) make sure the values are really dead and never used again.
   968  //	B) decrement use counts of the values' args.
   969  func clobber(vv ...*Value) bool {
   970  	for _, v := range vv {
   971  		v.reset(OpInvalid)
   972  		// Note: leave v.Block intact.  The Block field is used after clobber.
   973  	}
   974  	return true
   975  }
   976  
   977  // clobberIfDead resets v when use count is 1. Returns true.
   978  // clobberIfDead is used by rewrite rules to decrement
   979  // use counts of v's args when v is dead and never used.
   980  func clobberIfDead(v *Value) bool {
   981  	if v.Uses == 1 {
   982  		v.reset(OpInvalid)
   983  	}
   984  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   985  	return true
   986  }
   987  
   988  // noteRule is an easy way to track if a rule is matched when writing
   989  // new ones.  Make the rule of interest also conditional on
   990  //
   991  //	noteRule("note to self: rule of interest matched")
   992  //
   993  // and that message will print when the rule matches.
   994  func noteRule(s string) bool {
   995  	fmt.Println(s)
   996  	return true
   997  }
   998  
   999  // countRule increments Func.ruleMatches[key].
  1000  // If Func.ruleMatches is non-nil at the end
  1001  // of compilation, it will be printed to stdout.
  1002  // This is intended to make it easier to find which functions
  1003  // which contain lots of rules matches when developing new rules.
  1004  func countRule(v *Value, key string) bool {
  1005  	f := v.Block.Func
  1006  	if f.ruleMatches == nil {
  1007  		f.ruleMatches = make(map[string]int)
  1008  	}
  1009  	f.ruleMatches[key]++
  1010  	return true
  1011  }
  1012  
  1013  // warnRule generates compiler debug output with string s when
  1014  // v is not in autogenerated code, cond is true and the rule has fired.
  1015  func warnRule(cond bool, v *Value, s string) bool {
  1016  	if pos := v.Pos; pos.Line() > 1 && cond {
  1017  		v.Block.Func.Warnl(pos, s)
  1018  	}
  1019  	return true
  1020  }
  1021  
  1022  // for a pseudo-op like (LessThan x), extract x.
  1023  func flagArg(v *Value) *Value {
  1024  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1025  		return nil
  1026  	}
  1027  	return v.Args[0]
  1028  }
  1029  
  1030  // arm64Negate finds the complement to an ARM64 condition code,
  1031  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1032  //
  1033  // For floating point, it's more subtle because NaN is unordered. We do
  1034  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1035  func arm64Negate(op Op) Op {
  1036  	switch op {
  1037  	case OpARM64LessThan:
  1038  		return OpARM64GreaterEqual
  1039  	case OpARM64LessThanU:
  1040  		return OpARM64GreaterEqualU
  1041  	case OpARM64GreaterThan:
  1042  		return OpARM64LessEqual
  1043  	case OpARM64GreaterThanU:
  1044  		return OpARM64LessEqualU
  1045  	case OpARM64LessEqual:
  1046  		return OpARM64GreaterThan
  1047  	case OpARM64LessEqualU:
  1048  		return OpARM64GreaterThanU
  1049  	case OpARM64GreaterEqual:
  1050  		return OpARM64LessThan
  1051  	case OpARM64GreaterEqualU:
  1052  		return OpARM64LessThanU
  1053  	case OpARM64Equal:
  1054  		return OpARM64NotEqual
  1055  	case OpARM64NotEqual:
  1056  		return OpARM64Equal
  1057  	case OpARM64LessThanF:
  1058  		return OpARM64NotLessThanF
  1059  	case OpARM64NotLessThanF:
  1060  		return OpARM64LessThanF
  1061  	case OpARM64LessEqualF:
  1062  		return OpARM64NotLessEqualF
  1063  	case OpARM64NotLessEqualF:
  1064  		return OpARM64LessEqualF
  1065  	case OpARM64GreaterThanF:
  1066  		return OpARM64NotGreaterThanF
  1067  	case OpARM64NotGreaterThanF:
  1068  		return OpARM64GreaterThanF
  1069  	case OpARM64GreaterEqualF:
  1070  		return OpARM64NotGreaterEqualF
  1071  	case OpARM64NotGreaterEqualF:
  1072  		return OpARM64GreaterEqualF
  1073  	default:
  1074  		panic("unreachable")
  1075  	}
  1076  }
  1077  
  1078  // arm64Invert evaluates (InvertFlags op), which
  1079  // is the same as altering the condition codes such
  1080  // that the same result would be produced if the arguments
  1081  // to the flag-generating instruction were reversed, e.g.
  1082  // (InvertFlags (CMP x y)) -> (CMP y x)
  1083  func arm64Invert(op Op) Op {
  1084  	switch op {
  1085  	case OpARM64LessThan:
  1086  		return OpARM64GreaterThan
  1087  	case OpARM64LessThanU:
  1088  		return OpARM64GreaterThanU
  1089  	case OpARM64GreaterThan:
  1090  		return OpARM64LessThan
  1091  	case OpARM64GreaterThanU:
  1092  		return OpARM64LessThanU
  1093  	case OpARM64LessEqual:
  1094  		return OpARM64GreaterEqual
  1095  	case OpARM64LessEqualU:
  1096  		return OpARM64GreaterEqualU
  1097  	case OpARM64GreaterEqual:
  1098  		return OpARM64LessEqual
  1099  	case OpARM64GreaterEqualU:
  1100  		return OpARM64LessEqualU
  1101  	case OpARM64Equal, OpARM64NotEqual:
  1102  		return op
  1103  	case OpARM64LessThanF:
  1104  		return OpARM64GreaterThanF
  1105  	case OpARM64GreaterThanF:
  1106  		return OpARM64LessThanF
  1107  	case OpARM64LessEqualF:
  1108  		return OpARM64GreaterEqualF
  1109  	case OpARM64GreaterEqualF:
  1110  		return OpARM64LessEqualF
  1111  	case OpARM64NotLessThanF:
  1112  		return OpARM64NotGreaterThanF
  1113  	case OpARM64NotGreaterThanF:
  1114  		return OpARM64NotLessThanF
  1115  	case OpARM64NotLessEqualF:
  1116  		return OpARM64NotGreaterEqualF
  1117  	case OpARM64NotGreaterEqualF:
  1118  		return OpARM64NotLessEqualF
  1119  	default:
  1120  		panic("unreachable")
  1121  	}
  1122  }
  1123  
  1124  // evaluate an ARM64 op against a flags value
  1125  // that is potentially constant; return 1 for true,
  1126  // -1 for false, and 0 for not constant.
  1127  func ccARM64Eval(op Op, flags *Value) int {
  1128  	fop := flags.Op
  1129  	if fop == OpARM64InvertFlags {
  1130  		return -ccARM64Eval(op, flags.Args[0])
  1131  	}
  1132  	if fop != OpARM64FlagConstant {
  1133  		return 0
  1134  	}
  1135  	fc := flagConstant(flags.AuxInt)
  1136  	b2i := func(b bool) int {
  1137  		if b {
  1138  			return 1
  1139  		}
  1140  		return -1
  1141  	}
  1142  	switch op {
  1143  	case OpARM64Equal:
  1144  		return b2i(fc.eq())
  1145  	case OpARM64NotEqual:
  1146  		return b2i(fc.ne())
  1147  	case OpARM64LessThan:
  1148  		return b2i(fc.lt())
  1149  	case OpARM64LessThanU:
  1150  		return b2i(fc.ult())
  1151  	case OpARM64GreaterThan:
  1152  		return b2i(fc.gt())
  1153  	case OpARM64GreaterThanU:
  1154  		return b2i(fc.ugt())
  1155  	case OpARM64LessEqual:
  1156  		return b2i(fc.le())
  1157  	case OpARM64LessEqualU:
  1158  		return b2i(fc.ule())
  1159  	case OpARM64GreaterEqual:
  1160  		return b2i(fc.ge())
  1161  	case OpARM64GreaterEqualU:
  1162  		return b2i(fc.uge())
  1163  	}
  1164  	return 0
  1165  }
  1166  
  1167  // logRule logs the use of the rule s. This will only be enabled if
  1168  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1169  func logRule(s string) {
  1170  	if ruleFile == nil {
  1171  		// Open a log file to write log to. We open in append
  1172  		// mode because all.bash runs the compiler lots of times,
  1173  		// and we want the concatenation of all of those logs.
  1174  		// This means, of course, that users need to rm the old log
  1175  		// to get fresh data.
  1176  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1177  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1178  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1179  		if err != nil {
  1180  			panic(err)
  1181  		}
  1182  		ruleFile = w
  1183  	}
  1184  	_, err := fmt.Fprintln(ruleFile, s)
  1185  	if err != nil {
  1186  		panic(err)
  1187  	}
  1188  }
  1189  
  1190  var ruleFile io.Writer
  1191  
  1192  func min(x, y int64) int64 {
  1193  	if x < y {
  1194  		return x
  1195  	}
  1196  	return y
  1197  }
  1198  
  1199  func isConstZero(v *Value) bool {
  1200  	switch v.Op {
  1201  	case OpConstNil:
  1202  		return true
  1203  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1204  		return v.AuxInt == 0
  1205  	}
  1206  	return false
  1207  }
  1208  
  1209  // reciprocalExact64 reports whether 1/c is exactly representable.
  1210  func reciprocalExact64(c float64) bool {
  1211  	b := math.Float64bits(c)
  1212  	man := b & (1<<52 - 1)
  1213  	if man != 0 {
  1214  		return false // not a power of 2, denormal, or NaN
  1215  	}
  1216  	exp := b >> 52 & (1<<11 - 1)
  1217  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1218  	// changes the exponent to 0x7fe-exp.
  1219  	switch exp {
  1220  	case 0:
  1221  		return false // ±0
  1222  	case 0x7ff:
  1223  		return false // ±inf
  1224  	case 0x7fe:
  1225  		return false // exponent is not representable
  1226  	default:
  1227  		return true
  1228  	}
  1229  }
  1230  
  1231  // reciprocalExact32 reports whether 1/c is exactly representable.
  1232  func reciprocalExact32(c float32) bool {
  1233  	b := math.Float32bits(c)
  1234  	man := b & (1<<23 - 1)
  1235  	if man != 0 {
  1236  		return false // not a power of 2, denormal, or NaN
  1237  	}
  1238  	exp := b >> 23 & (1<<8 - 1)
  1239  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1240  	// changes the exponent to 0xfe-exp.
  1241  	switch exp {
  1242  	case 0:
  1243  		return false // ±0
  1244  	case 0xff:
  1245  		return false // ±inf
  1246  	case 0xfe:
  1247  		return false // exponent is not representable
  1248  	default:
  1249  		return true
  1250  	}
  1251  }
  1252  
  1253  // check if an immediate can be directly encoded into an ARM's instruction.
  1254  func isARMImmRot(v uint32) bool {
  1255  	for i := 0; i < 16; i++ {
  1256  		if v&^0xff == 0 {
  1257  			return true
  1258  		}
  1259  		v = v<<2 | v>>30
  1260  	}
  1261  
  1262  	return false
  1263  }
  1264  
  1265  // overlap reports whether the ranges given by the given offset and
  1266  // size pairs overlap.
  1267  func overlap(offset1, size1, offset2, size2 int64) bool {
  1268  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1269  		return true
  1270  	}
  1271  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1272  		return true
  1273  	}
  1274  	return false
  1275  }
  1276  
  1277  func areAdjacentOffsets(off1, off2, size int64) bool {
  1278  	return off1+size == off2 || off1 == off2+size
  1279  }
  1280  
  1281  // check if value zeroes out upper 32-bit of 64-bit register.
  1282  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1283  // because it catches same amount of cases as 4.
  1284  func zeroUpper32Bits(x *Value, depth int) bool {
  1285  	switch x.Op {
  1286  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1287  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1288  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1289  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1290  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1291  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1292  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1293  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1294  		OpAMD64SHLL, OpAMD64SHLLconst:
  1295  		return true
  1296  	case OpArg:
  1297  		return x.Type.Size() == 4
  1298  	case OpPhi, OpSelect0, OpSelect1:
  1299  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1300  		// just limit recursion depth.
  1301  		if depth <= 0 {
  1302  			return false
  1303  		}
  1304  		for i := range x.Args {
  1305  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1306  				return false
  1307  			}
  1308  		}
  1309  		return true
  1310  
  1311  	}
  1312  	return false
  1313  }
  1314  
  1315  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1316  func zeroUpper48Bits(x *Value, depth int) bool {
  1317  	switch x.Op {
  1318  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1319  		return true
  1320  	case OpArg:
  1321  		return x.Type.Size() == 2
  1322  	case OpPhi, OpSelect0, OpSelect1:
  1323  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1324  		// just limit recursion depth.
  1325  		if depth <= 0 {
  1326  			return false
  1327  		}
  1328  		for i := range x.Args {
  1329  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1330  				return false
  1331  			}
  1332  		}
  1333  		return true
  1334  
  1335  	}
  1336  	return false
  1337  }
  1338  
  1339  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1340  func zeroUpper56Bits(x *Value, depth int) bool {
  1341  	switch x.Op {
  1342  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1343  		return true
  1344  	case OpArg:
  1345  		return x.Type.Size() == 1
  1346  	case OpPhi, OpSelect0, OpSelect1:
  1347  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1348  		// just limit recursion depth.
  1349  		if depth <= 0 {
  1350  			return false
  1351  		}
  1352  		for i := range x.Args {
  1353  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1354  				return false
  1355  			}
  1356  		}
  1357  		return true
  1358  
  1359  	}
  1360  	return false
  1361  }
  1362  
  1363  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1364  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1365  // safe, either because Move will do all of its loads before any of its stores, or
  1366  // because the arguments are known to be disjoint.
  1367  // This is used as a check for replacing memmove with Move ops.
  1368  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1369  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1370  	// Move ops may or may not be faster for large sizes depending on how the platform
  1371  	// lowers them, so we only perform this optimization on platforms that we know to
  1372  	// have fast Move ops.
  1373  	switch c.arch {
  1374  	case "amd64":
  1375  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1376  	case "386", "arm64":
  1377  		return sz <= 8
  1378  	case "s390x", "ppc64", "ppc64le":
  1379  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1380  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1381  		return sz <= 4
  1382  	}
  1383  	return false
  1384  }
  1385  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1386  	return isInlinableMemmove(dst, src, sz, c)
  1387  }
  1388  
  1389  // logLargeCopy logs the occurrence of a large copy.
  1390  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1391  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1392  func logLargeCopy(v *Value, s int64) bool {
  1393  	if s < 128 {
  1394  		return true
  1395  	}
  1396  	if logopt.Enabled() {
  1397  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1398  	}
  1399  	return true
  1400  }
  1401  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1402  	if s < 128 {
  1403  		return
  1404  	}
  1405  	if logopt.Enabled() {
  1406  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1407  	}
  1408  }
  1409  
  1410  // hasSmallRotate reports whether the architecture has rotate instructions
  1411  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1412  func hasSmallRotate(c *Config) bool {
  1413  	switch c.arch {
  1414  	case "amd64", "386":
  1415  		return true
  1416  	default:
  1417  		return false
  1418  	}
  1419  }
  1420  
  1421  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1422  	if sh < 0 || sh >= sz {
  1423  		panic("PPC64 shift arg sh out of range")
  1424  	}
  1425  	if mb < 0 || mb >= sz {
  1426  		panic("PPC64 shift arg mb out of range")
  1427  	}
  1428  	if me < 0 || me >= sz {
  1429  		panic("PPC64 shift arg me out of range")
  1430  	}
  1431  	return int32(sh<<16 | mb<<8 | me)
  1432  }
  1433  
  1434  func GetPPC64Shiftsh(auxint int64) int64 {
  1435  	return int64(int8(auxint >> 16))
  1436  }
  1437  
  1438  func GetPPC64Shiftmb(auxint int64) int64 {
  1439  	return int64(int8(auxint >> 8))
  1440  }
  1441  
  1442  func GetPPC64Shiftme(auxint int64) int64 {
  1443  	return int64(int8(auxint))
  1444  }
  1445  
  1446  // Test if this value can encoded as a mask for a rlwinm like
  1447  // operation.  Masks can also extend from the msb and wrap to
  1448  // the lsb too.  That is, the valid masks are 32 bit strings
  1449  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1450  func isPPC64WordRotateMask(v64 int64) bool {
  1451  	// Isolate rightmost 1 (if none 0) and add.
  1452  	v := uint32(v64)
  1453  	vp := (v & -v) + v
  1454  	// Likewise, for the wrapping case.
  1455  	vn := ^v
  1456  	vpn := (vn & -vn) + vn
  1457  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1458  }
  1459  
  1460  // Compress mask and shift into single value of the form
  1461  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1462  // be used to regenerate the input mask.
  1463  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1464  	var mb, me, mbn, men int
  1465  
  1466  	// Determine boundaries and then decode them
  1467  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1468  		panic("Invalid PPC64 rotate mask")
  1469  	} else if nbits == 32 {
  1470  		mb = bits.LeadingZeros32(uint32(mask))
  1471  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1472  		mbn = bits.LeadingZeros32(^uint32(mask))
  1473  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1474  	} else {
  1475  		mb = bits.LeadingZeros64(uint64(mask))
  1476  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1477  		mbn = bits.LeadingZeros64(^uint64(mask))
  1478  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1479  	}
  1480  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1481  	if mb == 0 && me == int(nbits) {
  1482  		// swap the inverted values
  1483  		mb, me = men, mbn
  1484  	}
  1485  
  1486  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1487  }
  1488  
  1489  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1490  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1491  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1492  	auxint := uint64(sauxint)
  1493  	rotate = int64((auxint >> 16) & 0xFF)
  1494  	mb = int64((auxint >> 8) & 0xFF)
  1495  	me = int64((auxint >> 0) & 0xFF)
  1496  	nbits := int64((auxint >> 24) & 0xFF)
  1497  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1498  	if mb > me {
  1499  		mask = ^mask
  1500  	}
  1501  	if nbits == 32 {
  1502  		mask = uint64(uint32(mask))
  1503  	}
  1504  
  1505  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1506  	// is inclusive.
  1507  	me = (me - 1) & (nbits - 1)
  1508  	return
  1509  }
  1510  
  1511  // This verifies that the mask is a set of
  1512  // consecutive bits including the least
  1513  // significant bit.
  1514  func isPPC64ValidShiftMask(v int64) bool {
  1515  	if (v != 0) && ((v+1)&v) == 0 {
  1516  		return true
  1517  	}
  1518  	return false
  1519  }
  1520  
  1521  func getPPC64ShiftMaskLength(v int64) int64 {
  1522  	return int64(bits.Len64(uint64(v)))
  1523  }
  1524  
  1525  // Decompose a shift right into an equivalent rotate/mask,
  1526  // and return mask & m.
  1527  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1528  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1529  	return m & int64(smask)
  1530  }
  1531  
  1532  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1533  func mergePPC64AndSrwi(m, s int64) int64 {
  1534  	mask := mergePPC64RShiftMask(m, s, 32)
  1535  	if !isPPC64WordRotateMask(mask) {
  1536  		return 0
  1537  	}
  1538  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1539  }
  1540  
  1541  // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1542  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1543  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1544  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1545  	// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
  1546  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1547  
  1548  	// Rewrite mask to apply after the final left shift.
  1549  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1550  
  1551  	r_1 := 32 - srw
  1552  	r_2 := GetPPC64Shiftsh(sld)
  1553  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1554  
  1555  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1556  		return 0
  1557  	}
  1558  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1559  }
  1560  
  1561  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1562  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1563  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1564  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1565  	// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
  1566  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1567  
  1568  	// combine the masks, and adjust for the final left shift.
  1569  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1570  	r_2 := GetPPC64Shiftsh(int64(sld))
  1571  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1572  
  1573  	// Verify the result is still a valid bitmask of <= 32 bits.
  1574  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1575  		return 0
  1576  	}
  1577  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1578  }
  1579  
  1580  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1581  // or return 0 if they cannot be combined.
  1582  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1583  	if sld > srw || srw >= 32 {
  1584  		return 0
  1585  	}
  1586  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1587  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1588  	mask := (mask_r & mask_l) << uint(sld)
  1589  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1590  }
  1591  
  1592  // Convenience function to rotate a 32 bit constant value by another constant.
  1593  func rotateLeft32(v, rotate int64) int64 {
  1594  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1595  }
  1596  
  1597  func rotateRight64(v, rotate int64) int64 {
  1598  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1599  }
  1600  
  1601  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1602  func armBFAuxInt(lsb, width int64) arm64BitField {
  1603  	if lsb < 0 || lsb > 63 {
  1604  		panic("ARM(64) bit field lsb constant out of range")
  1605  	}
  1606  	if width < 1 || lsb+width > 64 {
  1607  		panic("ARM(64) bit field width constant out of range")
  1608  	}
  1609  	return arm64BitField(width | lsb<<8)
  1610  }
  1611  
  1612  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1613  func (bfc arm64BitField) getARM64BFlsb() int64 {
  1614  	return int64(uint64(bfc) >> 8)
  1615  }
  1616  
  1617  // returns the width part of the auxInt field of arm64 bitfield ops.
  1618  func (bfc arm64BitField) getARM64BFwidth() int64 {
  1619  	return int64(bfc) & 0xff
  1620  }
  1621  
  1622  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1623  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1624  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1625  	return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1626  }
  1627  
  1628  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1629  func arm64BFWidth(mask, rshift int64) int64 {
  1630  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1631  	if shiftedMask == 0 {
  1632  		panic("ARM64 BF mask is zero")
  1633  	}
  1634  	return nto(shiftedMask)
  1635  }
  1636  
  1637  // sizeof returns the size of t in bytes.
  1638  // It will panic if t is not a *types.Type.
  1639  func sizeof(t interface{}) int64 {
  1640  	return t.(*types.Type).Size()
  1641  }
  1642  
  1643  // registerizable reports whether t is a primitive type that fits in
  1644  // a register. It assumes float64 values will always fit into registers
  1645  // even if that isn't strictly true.
  1646  func registerizable(b *Block, typ *types.Type) bool {
  1647  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1648  		return true
  1649  	}
  1650  	if typ.IsInteger() {
  1651  		return typ.Size() <= b.Func.Config.RegSize
  1652  	}
  1653  	return false
  1654  }
  1655  
  1656  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1657  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1658  	f := v.Block.Func
  1659  	if !f.Config.Race {
  1660  		return false
  1661  	}
  1662  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1663  		return false
  1664  	}
  1665  	for _, b := range f.Blocks {
  1666  		for _, v := range b.Values {
  1667  			switch v.Op {
  1668  			case OpStaticCall, OpStaticLECall:
  1669  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1670  				// Allow calls to panic*
  1671  				s := v.Aux.(*AuxCall).Fn.String()
  1672  				switch s {
  1673  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1674  					"runtime.panicdivide", "runtime.panicwrap",
  1675  					"runtime.panicshift":
  1676  					continue
  1677  				}
  1678  				// If we encountered any call, we need to keep racefunc*,
  1679  				// for accurate stacktraces.
  1680  				return false
  1681  			case OpPanicBounds, OpPanicExtend:
  1682  				// Note: these are panic generators that are ok (like the static calls above).
  1683  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1684  				// We must keep the race functions if there are any other call types.
  1685  				return false
  1686  			}
  1687  		}
  1688  	}
  1689  	if isSameCall(sym, "runtime.racefuncenter") {
  1690  		// TODO REGISTER ABI this needs to be cleaned up.
  1691  		// If we're removing racefuncenter, remove its argument as well.
  1692  		if v.Args[0].Op != OpStore {
  1693  			if v.Op == OpStaticLECall {
  1694  				// there is no store, yet.
  1695  				return true
  1696  			}
  1697  			return false
  1698  		}
  1699  		mem := v.Args[0].Args[2]
  1700  		v.Args[0].reset(OpCopy)
  1701  		v.Args[0].AddArg(mem)
  1702  	}
  1703  	return true
  1704  }
  1705  
  1706  // symIsRO reports whether sym is a read-only global.
  1707  func symIsRO(sym interface{}) bool {
  1708  	lsym := sym.(*obj.LSym)
  1709  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1710  }
  1711  
  1712  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1713  func symIsROZero(sym Sym) bool {
  1714  	lsym := sym.(*obj.LSym)
  1715  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1716  		return false
  1717  	}
  1718  	for _, b := range lsym.P {
  1719  		if b != 0 {
  1720  			return false
  1721  		}
  1722  	}
  1723  	return true
  1724  }
  1725  
  1726  // read8 reads one byte from the read-only global sym at offset off.
  1727  func read8(sym interface{}, off int64) uint8 {
  1728  	lsym := sym.(*obj.LSym)
  1729  	if off >= int64(len(lsym.P)) || off < 0 {
  1730  		// Invalid index into the global sym.
  1731  		// This can happen in dead code, so we don't want to panic.
  1732  		// Just return any value, it will eventually get ignored.
  1733  		// See issue 29215.
  1734  		return 0
  1735  	}
  1736  	return lsym.P[off]
  1737  }
  1738  
  1739  // read16 reads two bytes from the read-only global sym at offset off.
  1740  func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
  1741  	lsym := sym.(*obj.LSym)
  1742  	// lsym.P is written lazily.
  1743  	// Bytes requested after the end of lsym.P are 0.
  1744  	var src []byte
  1745  	if 0 <= off && off < int64(len(lsym.P)) {
  1746  		src = lsym.P[off:]
  1747  	}
  1748  	buf := make([]byte, 2)
  1749  	copy(buf, src)
  1750  	return byteorder.Uint16(buf)
  1751  }
  1752  
  1753  // read32 reads four bytes from the read-only global sym at offset off.
  1754  func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
  1755  	lsym := sym.(*obj.LSym)
  1756  	var src []byte
  1757  	if 0 <= off && off < int64(len(lsym.P)) {
  1758  		src = lsym.P[off:]
  1759  	}
  1760  	buf := make([]byte, 4)
  1761  	copy(buf, src)
  1762  	return byteorder.Uint32(buf)
  1763  }
  1764  
  1765  // read64 reads eight bytes from the read-only global sym at offset off.
  1766  func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
  1767  	lsym := sym.(*obj.LSym)
  1768  	var src []byte
  1769  	if 0 <= off && off < int64(len(lsym.P)) {
  1770  		src = lsym.P[off:]
  1771  	}
  1772  	buf := make([]byte, 8)
  1773  	copy(buf, src)
  1774  	return byteorder.Uint64(buf)
  1775  }
  1776  
  1777  // sequentialAddresses reports true if it can prove that x + n == y
  1778  func sequentialAddresses(x, y *Value, n int64) bool {
  1779  	if x == y && n == 0 {
  1780  		return true
  1781  	}
  1782  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  1783  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1784  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1785  		return true
  1786  	}
  1787  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  1788  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1789  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1790  		return true
  1791  	}
  1792  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  1793  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1794  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1795  		return true
  1796  	}
  1797  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  1798  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1799  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1800  		return true
  1801  	}
  1802  	return false
  1803  }
  1804  
  1805  // flagConstant represents the result of a compile-time comparison.
  1806  // The sense of these flags does not necessarily represent the hardware's notion
  1807  // of a flags register - these are just a compile-time construct.
  1808  // We happen to match the semantics to those of arm/arm64.
  1809  // Note that these semantics differ from x86: the carry flag has the opposite
  1810  // sense on a subtraction!
  1811  //
  1812  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  1813  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  1814  //	 (because it does x + ^y + C).
  1815  //
  1816  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  1817  type flagConstant uint8
  1818  
  1819  // N reports whether the result of an operation is negative (high bit set).
  1820  func (fc flagConstant) N() bool {
  1821  	return fc&1 != 0
  1822  }
  1823  
  1824  // Z reports whether the result of an operation is 0.
  1825  func (fc flagConstant) Z() bool {
  1826  	return fc&2 != 0
  1827  }
  1828  
  1829  // C reports whether an unsigned add overflowed (carry), or an
  1830  // unsigned subtract did not underflow (borrow).
  1831  func (fc flagConstant) C() bool {
  1832  	return fc&4 != 0
  1833  }
  1834  
  1835  // V reports whether a signed operation overflowed or underflowed.
  1836  func (fc flagConstant) V() bool {
  1837  	return fc&8 != 0
  1838  }
  1839  
  1840  func (fc flagConstant) eq() bool {
  1841  	return fc.Z()
  1842  }
  1843  func (fc flagConstant) ne() bool {
  1844  	return !fc.Z()
  1845  }
  1846  func (fc flagConstant) lt() bool {
  1847  	return fc.N() != fc.V()
  1848  }
  1849  func (fc flagConstant) le() bool {
  1850  	return fc.Z() || fc.lt()
  1851  }
  1852  func (fc flagConstant) gt() bool {
  1853  	return !fc.Z() && fc.ge()
  1854  }
  1855  func (fc flagConstant) ge() bool {
  1856  	return fc.N() == fc.V()
  1857  }
  1858  func (fc flagConstant) ult() bool {
  1859  	return !fc.C()
  1860  }
  1861  func (fc flagConstant) ule() bool {
  1862  	return fc.Z() || fc.ult()
  1863  }
  1864  func (fc flagConstant) ugt() bool {
  1865  	return !fc.Z() && fc.uge()
  1866  }
  1867  func (fc flagConstant) uge() bool {
  1868  	return fc.C()
  1869  }
  1870  
  1871  func (fc flagConstant) ltNoov() bool {
  1872  	return fc.lt() && !fc.V()
  1873  }
  1874  func (fc flagConstant) leNoov() bool {
  1875  	return fc.le() && !fc.V()
  1876  }
  1877  func (fc flagConstant) gtNoov() bool {
  1878  	return fc.gt() && !fc.V()
  1879  }
  1880  func (fc flagConstant) geNoov() bool {
  1881  	return fc.ge() && !fc.V()
  1882  }
  1883  
  1884  func (fc flagConstant) String() string {
  1885  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  1886  }
  1887  
  1888  type flagConstantBuilder struct {
  1889  	N bool
  1890  	Z bool
  1891  	C bool
  1892  	V bool
  1893  }
  1894  
  1895  func (fcs flagConstantBuilder) encode() flagConstant {
  1896  	var fc flagConstant
  1897  	if fcs.N {
  1898  		fc |= 1
  1899  	}
  1900  	if fcs.Z {
  1901  		fc |= 2
  1902  	}
  1903  	if fcs.C {
  1904  		fc |= 4
  1905  	}
  1906  	if fcs.V {
  1907  		fc |= 8
  1908  	}
  1909  	return fc
  1910  }
  1911  
  1912  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  1913  //  - the results of the C flag are different
  1914  //  - the results of the V flag when y==minint are different
  1915  
  1916  // addFlags64 returns the flags that would be set from computing x+y.
  1917  func addFlags64(x, y int64) flagConstant {
  1918  	var fcb flagConstantBuilder
  1919  	fcb.Z = x+y == 0
  1920  	fcb.N = x+y < 0
  1921  	fcb.C = uint64(x+y) < uint64(x)
  1922  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  1923  	return fcb.encode()
  1924  }
  1925  
  1926  // subFlags64 returns the flags that would be set from computing x-y.
  1927  func subFlags64(x, y int64) flagConstant {
  1928  	var fcb flagConstantBuilder
  1929  	fcb.Z = x-y == 0
  1930  	fcb.N = x-y < 0
  1931  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  1932  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  1933  	return fcb.encode()
  1934  }
  1935  
  1936  // addFlags32 returns the flags that would be set from computing x+y.
  1937  func addFlags32(x, y int32) flagConstant {
  1938  	var fcb flagConstantBuilder
  1939  	fcb.Z = x+y == 0
  1940  	fcb.N = x+y < 0
  1941  	fcb.C = uint32(x+y) < uint32(x)
  1942  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  1943  	return fcb.encode()
  1944  }
  1945  
  1946  // subFlags32 returns the flags that would be set from computing x-y.
  1947  func subFlags32(x, y int32) flagConstant {
  1948  	var fcb flagConstantBuilder
  1949  	fcb.Z = x-y == 0
  1950  	fcb.N = x-y < 0
  1951  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  1952  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  1953  	return fcb.encode()
  1954  }
  1955  
  1956  // logicFlags64 returns flags set to the sign/zeroness of x.
  1957  // C and V are set to false.
  1958  func logicFlags64(x int64) flagConstant {
  1959  	var fcb flagConstantBuilder
  1960  	fcb.Z = x == 0
  1961  	fcb.N = x < 0
  1962  	return fcb.encode()
  1963  }
  1964  
  1965  // logicFlags32 returns flags set to the sign/zeroness of x.
  1966  // C and V are set to false.
  1967  func logicFlags32(x int32) flagConstant {
  1968  	var fcb flagConstantBuilder
  1969  	fcb.Z = x == 0
  1970  	fcb.N = x < 0
  1971  	return fcb.encode()
  1972  }
  1973  
  1974  func makeJumpTableSym(b *Block) *obj.LSym {
  1975  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
  1976  	s.Set(obj.AttrDuplicateOK, true)
  1977  	s.Set(obj.AttrLocal, true)
  1978  	return s
  1979  }
  1980  
  1981  // canRotate reports whether the architecture supports
  1982  // rotates of integer registers with the given number of bits.
  1983  func canRotate(c *Config, bits int64) bool {
  1984  	if bits > c.PtrSize*8 {
  1985  		// Don't rewrite to rotates bigger than the machine word.
  1986  		return false
  1987  	}
  1988  	switch c.arch {
  1989  	case "386", "amd64", "arm64":
  1990  		return true
  1991  	case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
  1992  		return bits >= 32
  1993  	default:
  1994  		return false
  1995  	}
  1996  }
  1997  
  1998  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  1999  func isARM64bitcon(x uint64) bool {
  2000  	if x == 1<<64-1 || x == 0 {
  2001  		return false
  2002  	}
  2003  	// determine the period and sign-extend a unit to 64 bits
  2004  	switch {
  2005  	case x != x>>32|x<<32:
  2006  		// period is 64
  2007  		// nothing to do
  2008  	case x != x>>16|x<<48:
  2009  		// period is 32
  2010  		x = uint64(int64(int32(x)))
  2011  	case x != x>>8|x<<56:
  2012  		// period is 16
  2013  		x = uint64(int64(int16(x)))
  2014  	case x != x>>4|x<<60:
  2015  		// period is 8
  2016  		x = uint64(int64(int8(x)))
  2017  	default:
  2018  		// period is 4 or 2, always true
  2019  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2020  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2021  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2022  		// 0101, 1010             -- 01   rotate, repeat
  2023  		return true
  2024  	}
  2025  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2026  }
  2027  
  2028  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2029  func sequenceOfOnes(x uint64) bool {
  2030  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2031  	y += x
  2032  	return (y-1)&y == 0
  2033  }
  2034  
  2035  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2036  func isARM64addcon(v int64) bool {
  2037  	/* uimm12 or uimm24? */
  2038  	if v < 0 {
  2039  		return false
  2040  	}
  2041  	if (v & 0xFFF) == 0 {
  2042  		v >>= 12
  2043  	}
  2044  	return v <= 0xFFF
  2045  }