github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/ssa/rewrite.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"github.com/bir3/gocompiler/src/cmd/compile/internal/base"
     9  	"github.com/bir3/gocompiler/src/cmd/compile/internal/logopt"
    10  	"github.com/bir3/gocompiler/src/cmd/compile/internal/reflectdata"
    11  	"github.com/bir3/gocompiler/src/cmd/compile/internal/types"
    12  	"github.com/bir3/gocompiler/src/cmd/internal/obj"
    13  	"github.com/bir3/gocompiler/src/cmd/internal/obj/s390x"
    14  	"github.com/bir3/gocompiler/src/cmd/internal/objabi"
    15  	"github.com/bir3/gocompiler/src/cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"github.com/bir3/gocompiler/src/internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues		deadValueChoice	= false
    31  	removeDeadValues			= true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts	// Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	var iters int
    44  	var states map[string]bool
    45  	for {
    46  		change := false
    47  		deadChange := false
    48  		for _, b := range f.Blocks {
    49  			var b0 *Block
    50  			if debug > 1 {
    51  				b0 = new(Block)
    52  				*b0 = *b
    53  				b0.Succs = append([]Edge{}, b.Succs...)	// make a new copy, not aliasing
    54  			}
    55  			for i, c := range b.ControlValues() {
    56  				for c.Op == OpCopy {
    57  					c = c.Args[0]
    58  					b.ReplaceControl(i, c)
    59  				}
    60  			}
    61  			if rb(b) {
    62  				change = true
    63  				if debug > 1 {
    64  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    65  				}
    66  			}
    67  			for j, v := range b.Values {
    68  				var v0 *Value
    69  				if debug > 1 {
    70  					v0 = new(Value)
    71  					*v0 = *v
    72  					v0.Args = append([]*Value{}, v.Args...)	// make a new copy, not aliasing
    73  				}
    74  				if v.Uses == 0 && v.removeable() {
    75  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    76  						// Reset any values that are now unused, so that we decrement
    77  						// the use count of all of its arguments.
    78  						// Not quite a deadcode pass, because it does not handle cycles.
    79  						// But it should help Uses==1 rules to fire.
    80  						v.reset(OpInvalid)
    81  						deadChange = true
    82  					}
    83  					// No point rewriting values which aren't used.
    84  					continue
    85  				}
    86  
    87  				vchange := phielimValue(v)
    88  				if vchange && debug > 1 {
    89  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    90  				}
    91  
    92  				// Eliminate copy inputs.
    93  				// If any copy input becomes unused, mark it
    94  				// as invalid and discard its argument. Repeat
    95  				// recursively on the discarded argument.
    96  				// This phase helps remove phantom "dead copy" uses
    97  				// of a value so that a x.Uses==1 rule condition
    98  				// fires reliably.
    99  				for i, a := range v.Args {
   100  					if a.Op != OpCopy {
   101  						continue
   102  					}
   103  					aa := copySource(a)
   104  					v.SetArg(i, aa)
   105  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   106  					// to hold it.  The first candidate is the value that will replace a (aa),
   107  					// if it shares the same block and line and is eligible.
   108  					// The second option is v, which has a as an input.  Because aa is earlier in
   109  					// the data flow, it is the better choice.
   110  					if a.Pos.IsStmt() == src.PosIsStmt {
   111  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   112  							aa.Pos = aa.Pos.WithIsStmt()
   113  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   114  							v.Pos = v.Pos.WithIsStmt()
   115  						} else {
   116  							// Record the lost line and look for a new home after all rewrites are complete.
   117  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   118  							// line to appear in more than one block, but only one block is stored, so if both end
   119  							// up here, then one will be lost.
   120  							pendingLines.set(a.Pos, int32(a.Block.ID))
   121  						}
   122  						a.Pos = a.Pos.WithNotStmt()
   123  					}
   124  					vchange = true
   125  					for a.Uses == 0 {
   126  						b := a.Args[0]
   127  						a.reset(OpInvalid)
   128  						a = b
   129  					}
   130  				}
   131  				if vchange && debug > 1 {
   132  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   133  				}
   134  
   135  				// apply rewrite function
   136  				if rv(v) {
   137  					vchange = true
   138  					// If value changed to a poor choice for a statement boundary, move the boundary
   139  					if v.Pos.IsStmt() == src.PosIsStmt {
   140  						if k := nextGoodStatementIndex(v, j, b); k != j {
   141  							v.Pos = v.Pos.WithNotStmt()
   142  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   143  						}
   144  					}
   145  				}
   146  
   147  				change = change || vchange
   148  				if vchange && debug > 1 {
   149  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   150  				}
   151  			}
   152  		}
   153  		if !change && !deadChange {
   154  			break
   155  		}
   156  		iters++
   157  		if (iters > 1000 || debug >= 2) && change {
   158  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   159  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   160  			// and the maximum value encountered during make.bash is 12.
   161  			// Start checking for cycles. (This is too expensive to do routinely.)
   162  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   163  			if states == nil {
   164  				states = make(map[string]bool)
   165  			}
   166  			h := f.rewriteHash()
   167  			if _, ok := states[h]; ok {
   168  				// We've found a cycle.
   169  				// To diagnose it, set debug to 2 and start again,
   170  				// so that we'll print all rules applied until we complete another cycle.
   171  				// If debug is already >= 2, we've already done that, so it's time to crash.
   172  				if debug < 2 {
   173  					debug = 2
   174  					states = make(map[string]bool)
   175  				} else {
   176  					f.Fatalf("rewrite cycle detected")
   177  				}
   178  			}
   179  			states[h] = true
   180  		}
   181  	}
   182  	// remove clobbered values
   183  	for _, b := range f.Blocks {
   184  		j := 0
   185  		for i, v := range b.Values {
   186  			vl := v.Pos
   187  			if v.Op == OpInvalid {
   188  				if v.Pos.IsStmt() == src.PosIsStmt {
   189  					pendingLines.set(vl, int32(b.ID))
   190  				}
   191  				f.freeValue(v)
   192  				continue
   193  			}
   194  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   195  				pendingLines.remove(vl)
   196  				v.Pos = v.Pos.WithIsStmt()
   197  			}
   198  			if i != j {
   199  				b.Values[j] = v
   200  			}
   201  			j++
   202  		}
   203  		if pendingLines.get(b.Pos) == int32(b.ID) {
   204  			b.Pos = b.Pos.WithIsStmt()
   205  			pendingLines.remove(b.Pos)
   206  		}
   207  		b.truncateValues(j)
   208  	}
   209  }
   210  
   211  // Common functions called from rewriting rules
   212  
   213  func is64BitFloat(t *types.Type) bool {
   214  	return t.Size() == 8 && t.IsFloat()
   215  }
   216  
   217  func is32BitFloat(t *types.Type) bool {
   218  	return t.Size() == 4 && t.IsFloat()
   219  }
   220  
   221  func is64BitInt(t *types.Type) bool {
   222  	return t.Size() == 8 && t.IsInteger()
   223  }
   224  
   225  func is32BitInt(t *types.Type) bool {
   226  	return t.Size() == 4 && t.IsInteger()
   227  }
   228  
   229  func is16BitInt(t *types.Type) bool {
   230  	return t.Size() == 2 && t.IsInteger()
   231  }
   232  
   233  func is8BitInt(t *types.Type) bool {
   234  	return t.Size() == 1 && t.IsInteger()
   235  }
   236  
   237  func isPtr(t *types.Type) bool {
   238  	return t.IsPtrShaped()
   239  }
   240  
   241  // mergeSym merges two symbolic offsets. There is no real merging of
   242  // offsets, we just pick the non-nil one.
   243  func mergeSym(x, y Sym) Sym {
   244  	if x == nil {
   245  		return y
   246  	}
   247  	if y == nil {
   248  		return x
   249  	}
   250  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   251  }
   252  
   253  func canMergeSym(x, y Sym) bool {
   254  	return x == nil || y == nil
   255  }
   256  
   257  // canMergeLoadClobber reports whether the load can be merged into target without
   258  // invalidating the schedule.
   259  // It also checks that the other non-load argument x is something we
   260  // are ok with clobbering.
   261  func canMergeLoadClobber(target, load, x *Value) bool {
   262  	// The register containing x is going to get clobbered.
   263  	// Don't merge if we still need the value of x.
   264  	// We don't have liveness information here, but we can
   265  	// approximate x dying with:
   266  	//  1) target is x's only use.
   267  	//  2) target is not in a deeper loop than x.
   268  	if x.Uses != 1 {
   269  		return false
   270  	}
   271  	loopnest := x.Block.Func.loopnest()
   272  	loopnest.calculateDepths()
   273  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   274  		return false
   275  	}
   276  	return canMergeLoad(target, load)
   277  }
   278  
   279  // canMergeLoad reports whether the load can be merged into target without
   280  // invalidating the schedule.
   281  func canMergeLoad(target, load *Value) bool {
   282  	if target.Block.ID != load.Block.ID {
   283  		// If the load is in a different block do not merge it.
   284  		return false
   285  	}
   286  
   287  	// We can't merge the load into the target if the load
   288  	// has more than one use.
   289  	if load.Uses != 1 {
   290  		return false
   291  	}
   292  
   293  	mem := load.MemoryArg()
   294  
   295  	// We need the load's memory arg to still be alive at target. That
   296  	// can't be the case if one of target's args depends on a memory
   297  	// state that is a successor of load's memory arg.
   298  	//
   299  	// For example, it would be invalid to merge load into target in
   300  	// the following situation because newmem has killed oldmem
   301  	// before target is reached:
   302  	//     load = read ... oldmem
   303  	//   newmem = write ... oldmem
   304  	//     arg0 = read ... newmem
   305  	//   target = add arg0 load
   306  	//
   307  	// If the argument comes from a different block then we can exclude
   308  	// it immediately because it must dominate load (which is in the
   309  	// same block as target).
   310  	var args []*Value
   311  	for _, a := range target.Args {
   312  		if a != load && a.Block.ID == target.Block.ID {
   313  			args = append(args, a)
   314  		}
   315  	}
   316  
   317  	// memPreds contains memory states known to be predecessors of load's
   318  	// memory state. It is lazily initialized.
   319  	var memPreds map[*Value]bool
   320  	for i := 0; len(args) > 0; i++ {
   321  		const limit = 100
   322  		if i >= limit {
   323  			// Give up if we have done a lot of iterations.
   324  			return false
   325  		}
   326  		v := args[len(args)-1]
   327  		args = args[:len(args)-1]
   328  		if target.Block.ID != v.Block.ID {
   329  			// Since target and load are in the same block
   330  			// we can stop searching when we leave the block.
   331  			continue
   332  		}
   333  		if v.Op == OpPhi {
   334  			// A Phi implies we have reached the top of the block.
   335  			// The memory phi, if it exists, is always
   336  			// the first logical store in the block.
   337  			continue
   338  		}
   339  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   340  			// We could handle this situation however it is likely
   341  			// to be very rare.
   342  			return false
   343  		}
   344  		if v.Op.SymEffect()&SymAddr != 0 {
   345  			// This case prevents an operation that calculates the
   346  			// address of a local variable from being forced to schedule
   347  			// before its corresponding VarDef.
   348  			// See issue 28445.
   349  			//   v1 = LOAD ...
   350  			//   v2 = VARDEF
   351  			//   v3 = LEAQ
   352  			//   v4 = CMPQ v1 v3
   353  			// We don't want to combine the CMPQ with the load, because
   354  			// that would force the CMPQ to schedule before the VARDEF, which
   355  			// in turn requires the LEAQ to schedule before the VARDEF.
   356  			return false
   357  		}
   358  		if v.Type.IsMemory() {
   359  			if memPreds == nil {
   360  				// Initialise a map containing memory states
   361  				// known to be predecessors of load's memory
   362  				// state.
   363  				memPreds = make(map[*Value]bool)
   364  				m := mem
   365  				const limit = 50
   366  				for i := 0; i < limit; i++ {
   367  					if m.Op == OpPhi {
   368  						// The memory phi, if it exists, is always
   369  						// the first logical store in the block.
   370  						break
   371  					}
   372  					if m.Block.ID != target.Block.ID {
   373  						break
   374  					}
   375  					if !m.Type.IsMemory() {
   376  						break
   377  					}
   378  					memPreds[m] = true
   379  					if len(m.Args) == 0 {
   380  						break
   381  					}
   382  					m = m.MemoryArg()
   383  				}
   384  			}
   385  
   386  			// We can merge if v is a predecessor of mem.
   387  			//
   388  			// For example, we can merge load into target in the
   389  			// following scenario:
   390  			//      x = read ... v
   391  			//    mem = write ... v
   392  			//   load = read ... mem
   393  			// target = add x load
   394  			if memPreds[v] {
   395  				continue
   396  			}
   397  			return false
   398  		}
   399  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   400  			// If v takes mem as an input then we know mem
   401  			// is valid at this point.
   402  			continue
   403  		}
   404  		for _, a := range v.Args {
   405  			if target.Block.ID == a.Block.ID {
   406  				args = append(args, a)
   407  			}
   408  		}
   409  	}
   410  
   411  	return true
   412  }
   413  
   414  // isSameCall reports whether sym is the same as the given named symbol.
   415  func isSameCall(sym interface{}, name string) bool {
   416  	fn := sym.(*AuxCall).Fn
   417  	return fn != nil && fn.String() == name
   418  }
   419  
   420  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   421  func canLoadUnaligned(c *Config) bool {
   422  	return c.ctxt.Arch.Alignment == 1
   423  }
   424  
   425  // nlzX returns the number of leading zeros.
   426  func nlz64(x int64) int	{ return bits.LeadingZeros64(uint64(x)) }
   427  func nlz32(x int32) int	{ return bits.LeadingZeros32(uint32(x)) }
   428  func nlz16(x int16) int	{ return bits.LeadingZeros16(uint16(x)) }
   429  func nlz8(x int8) int	{ return bits.LeadingZeros8(uint8(x)) }
   430  
   431  // ntzX returns the number of trailing zeros.
   432  func ntz64(x int64) int	{ return bits.TrailingZeros64(uint64(x)) }
   433  func ntz32(x int32) int	{ return bits.TrailingZeros32(uint32(x)) }
   434  func ntz16(x int16) int	{ return bits.TrailingZeros16(uint16(x)) }
   435  func ntz8(x int8) int	{ return bits.TrailingZeros8(uint8(x)) }
   436  
   437  func oneBit(x int64) bool	{ return x&(x-1) == 0 && x != 0 }
   438  func oneBit8(x int8) bool	{ return x&(x-1) == 0 && x != 0 }
   439  func oneBit16(x int16) bool	{ return x&(x-1) == 0 && x != 0 }
   440  func oneBit32(x int32) bool	{ return x&(x-1) == 0 && x != 0 }
   441  func oneBit64(x int64) bool	{ return x&(x-1) == 0 && x != 0 }
   442  
   443  // nto returns the number of trailing ones.
   444  func nto(x int64) int64 {
   445  	return int64(ntz64(^x))
   446  }
   447  
   448  // logX returns logarithm of n base 2.
   449  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   450  func log8(n int8) int64 {
   451  	return int64(bits.Len8(uint8(n))) - 1
   452  }
   453  func log16(n int16) int64 {
   454  	return int64(bits.Len16(uint16(n))) - 1
   455  }
   456  func log32(n int32) int64 {
   457  	return int64(bits.Len32(uint32(n))) - 1
   458  }
   459  func log64(n int64) int64 {
   460  	return int64(bits.Len64(uint64(n))) - 1
   461  }
   462  
   463  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   464  // Rounds down.
   465  func log2uint32(n int64) int64 {
   466  	return int64(bits.Len32(uint32(n))) - 1
   467  }
   468  
   469  // isPowerOfTwoX functions report whether n is a power of 2.
   470  func isPowerOfTwo8(n int8) bool {
   471  	return n > 0 && n&(n-1) == 0
   472  }
   473  func isPowerOfTwo16(n int16) bool {
   474  	return n > 0 && n&(n-1) == 0
   475  }
   476  func isPowerOfTwo32(n int32) bool {
   477  	return n > 0 && n&(n-1) == 0
   478  }
   479  func isPowerOfTwo64(n int64) bool {
   480  	return n > 0 && n&(n-1) == 0
   481  }
   482  
   483  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   484  func isUint64PowerOfTwo(in int64) bool {
   485  	n := uint64(in)
   486  	return n > 0 && n&(n-1) == 0
   487  }
   488  
   489  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   490  func isUint32PowerOfTwo(in int64) bool {
   491  	n := uint64(uint32(in))
   492  	return n > 0 && n&(n-1) == 0
   493  }
   494  
   495  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   496  func is32Bit(n int64) bool {
   497  	return n == int64(int32(n))
   498  }
   499  
   500  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   501  func is16Bit(n int64) bool {
   502  	return n == int64(int16(n))
   503  }
   504  
   505  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   506  func is8Bit(n int64) bool {
   507  	return n == int64(int8(n))
   508  }
   509  
   510  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   511  func isU8Bit(n int64) bool {
   512  	return n == int64(uint8(n))
   513  }
   514  
   515  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   516  func isU12Bit(n int64) bool {
   517  	return 0 <= n && n < (1<<12)
   518  }
   519  
   520  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   521  func isU16Bit(n int64) bool {
   522  	return n == int64(uint16(n))
   523  }
   524  
   525  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   526  func isU32Bit(n int64) bool {
   527  	return n == int64(uint32(n))
   528  }
   529  
   530  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   531  func is20Bit(n int64) bool {
   532  	return -(1<<19) <= n && n < (1<<19)
   533  }
   534  
   535  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   536  func b2i(b bool) int64 {
   537  	if b {
   538  		return 1
   539  	}
   540  	return 0
   541  }
   542  
   543  // b2i32 translates a boolean value to 0 or 1.
   544  func b2i32(b bool) int32 {
   545  	if b {
   546  		return 1
   547  	}
   548  	return 0
   549  }
   550  
   551  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   552  // A shift is bounded if it is shifting by less than the width of the shifted value.
   553  func shiftIsBounded(v *Value) bool {
   554  	return v.AuxInt != 0
   555  }
   556  
   557  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   558  // generated code as much as possible.
   559  func canonLessThan(x, y *Value) bool {
   560  	if x.Op != y.Op {
   561  		return x.Op < y.Op
   562  	}
   563  	if !x.Pos.SameFileAndLine(y.Pos) {
   564  		return x.Pos.Before(y.Pos)
   565  	}
   566  	return x.ID < y.ID
   567  }
   568  
   569  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   570  // of the mantissa. It will panic if the truncation results in lost information.
   571  func truncate64Fto32F(f float64) float32 {
   572  	if !isExactFloat32(f) {
   573  		panic("truncate64Fto32F: truncation is not exact")
   574  	}
   575  	if !math.IsNaN(f) {
   576  		return float32(f)
   577  	}
   578  	// NaN bit patterns aren't necessarily preserved across conversion
   579  	// instructions so we need to do the conversion manually.
   580  	b := math.Float64bits(f)
   581  	m := b & ((1 << 52) - 1)	// mantissa (a.k.a. significand)
   582  	//          | sign                  | exponent   | mantissa       |
   583  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   584  	return math.Float32frombits(r)
   585  }
   586  
   587  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   588  // pattern of the mantissa.
   589  func extend32Fto64F(f float32) float64 {
   590  	if !math.IsNaN(float64(f)) {
   591  		return float64(f)
   592  	}
   593  	// NaN bit patterns aren't necessarily preserved across conversion
   594  	// instructions so we need to do the conversion manually.
   595  	b := uint64(math.Float32bits(f))
   596  	//   | sign                  | exponent      | mantissa                    |
   597  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   598  	return math.Float64frombits(r)
   599  }
   600  
   601  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   602  func DivisionNeedsFixUp(v *Value) bool {
   603  	return v.AuxInt == 0
   604  }
   605  
   606  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   607  func auxFrom64F(f float64) int64 {
   608  	if f != f {
   609  		panic("can't encode a NaN in AuxInt field")
   610  	}
   611  	return int64(math.Float64bits(f))
   612  }
   613  
   614  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   615  func auxFrom32F(f float32) int64 {
   616  	if f != f {
   617  		panic("can't encode a NaN in AuxInt field")
   618  	}
   619  	return int64(math.Float64bits(extend32Fto64F(f)))
   620  }
   621  
   622  // auxTo32F decodes a float32 from the AuxInt value provided.
   623  func auxTo32F(i int64) float32 {
   624  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   625  }
   626  
   627  // auxTo64F decodes a float64 from the AuxInt value provided.
   628  func auxTo64F(i int64) float64 {
   629  	return math.Float64frombits(uint64(i))
   630  }
   631  
   632  func auxIntToBool(i int64) bool {
   633  	if i == 0 {
   634  		return false
   635  	}
   636  	return true
   637  }
   638  func auxIntToInt8(i int64) int8 {
   639  	return int8(i)
   640  }
   641  func auxIntToInt16(i int64) int16 {
   642  	return int16(i)
   643  }
   644  func auxIntToInt32(i int64) int32 {
   645  	return int32(i)
   646  }
   647  func auxIntToInt64(i int64) int64 {
   648  	return i
   649  }
   650  func auxIntToUint8(i int64) uint8 {
   651  	return uint8(i)
   652  }
   653  func auxIntToFloat32(i int64) float32 {
   654  	return float32(math.Float64frombits(uint64(i)))
   655  }
   656  func auxIntToFloat64(i int64) float64 {
   657  	return math.Float64frombits(uint64(i))
   658  }
   659  func auxIntToValAndOff(i int64) ValAndOff {
   660  	return ValAndOff(i)
   661  }
   662  func auxIntToArm64BitField(i int64) arm64BitField {
   663  	return arm64BitField(i)
   664  }
   665  func auxIntToInt128(x int64) int128 {
   666  	if x != 0 {
   667  		panic("nonzero int128 not allowed")
   668  	}
   669  	return 0
   670  }
   671  func auxIntToFlagConstant(x int64) flagConstant {
   672  	return flagConstant(x)
   673  }
   674  
   675  func auxIntToOp(cc int64) Op {
   676  	return Op(cc)
   677  }
   678  
   679  func boolToAuxInt(b bool) int64 {
   680  	if b {
   681  		return 1
   682  	}
   683  	return 0
   684  }
   685  func int8ToAuxInt(i int8) int64 {
   686  	return int64(i)
   687  }
   688  func int16ToAuxInt(i int16) int64 {
   689  	return int64(i)
   690  }
   691  func int32ToAuxInt(i int32) int64 {
   692  	return int64(i)
   693  }
   694  func int64ToAuxInt(i int64) int64 {
   695  	return int64(i)
   696  }
   697  func uint8ToAuxInt(i uint8) int64 {
   698  	return int64(int8(i))
   699  }
   700  func float32ToAuxInt(f float32) int64 {
   701  	return int64(math.Float64bits(float64(f)))
   702  }
   703  func float64ToAuxInt(f float64) int64 {
   704  	return int64(math.Float64bits(f))
   705  }
   706  func valAndOffToAuxInt(v ValAndOff) int64 {
   707  	return int64(v)
   708  }
   709  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   710  	return int64(v)
   711  }
   712  func int128ToAuxInt(x int128) int64 {
   713  	if x != 0 {
   714  		panic("nonzero int128 not allowed")
   715  	}
   716  	return 0
   717  }
   718  func flagConstantToAuxInt(x flagConstant) int64 {
   719  	return int64(x)
   720  }
   721  
   722  func opToAuxInt(o Op) int64 {
   723  	return int64(o)
   724  }
   725  
   726  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   727  type Aux interface {
   728  	CanBeAnSSAAux()
   729  }
   730  
   731  // for now only used to mark moves that need to avoid clobbering flags
   732  type auxMark bool
   733  
   734  func (auxMark) CanBeAnSSAAux()	{}
   735  
   736  var AuxMark auxMark
   737  
   738  // stringAux wraps string values for use in Aux.
   739  type stringAux string
   740  
   741  func (stringAux) CanBeAnSSAAux()	{}
   742  
   743  func auxToString(i Aux) string {
   744  	return string(i.(stringAux))
   745  }
   746  func auxToSym(i Aux) Sym {
   747  	// TODO: kind of a hack - allows nil interface through
   748  	s, _ := i.(Sym)
   749  	return s
   750  }
   751  func auxToType(i Aux) *types.Type {
   752  	return i.(*types.Type)
   753  }
   754  func auxToCall(i Aux) *AuxCall {
   755  	return i.(*AuxCall)
   756  }
   757  func auxToS390xCCMask(i Aux) s390x.CCMask {
   758  	return i.(s390x.CCMask)
   759  }
   760  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   761  	return i.(s390x.RotateParams)
   762  }
   763  
   764  func StringToAux(s string) Aux {
   765  	return stringAux(s)
   766  }
   767  func symToAux(s Sym) Aux {
   768  	return s
   769  }
   770  func callToAux(s *AuxCall) Aux {
   771  	return s
   772  }
   773  func typeToAux(t *types.Type) Aux {
   774  	return t
   775  }
   776  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   777  	return c
   778  }
   779  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   780  	return r
   781  }
   782  
   783  // uaddOvf reports whether unsigned a+b would overflow.
   784  func uaddOvf(a, b int64) bool {
   785  	return uint64(a)+uint64(b) < uint64(a)
   786  }
   787  
   788  // loadLSymOffset simulates reading a word at an offset into a
   789  // read-only symbol's runtime memory. If it would read a pointer to
   790  // another symbol, that symbol is returned. Otherwise, it returns nil.
   791  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   792  	if lsym.Type != objabi.SRODATA {
   793  		return nil
   794  	}
   795  
   796  	for _, r := range lsym.R {
   797  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   798  			return r.Sym
   799  		}
   800  	}
   801  
   802  	return nil
   803  }
   804  
   805  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   806  	v.Op = OpStaticLECall
   807  	auxcall := v.Aux.(*AuxCall)
   808  	auxcall.Fn = sym
   809  	// Remove first arg
   810  	v.Args[0].Uses--
   811  	copy(v.Args[0:], v.Args[1:])
   812  	v.Args[len(v.Args)-1] = nil	// aid GC
   813  	v.Args = v.Args[:len(v.Args)-1]
   814  	if f := v.Block.Func; f.pass.debug > 0 {
   815  		f.Warnl(v.Pos, "de-virtualizing call")
   816  	}
   817  	return v
   818  }
   819  
   820  // isSamePtr reports whether p1 and p2 point to the same address.
   821  func isSamePtr(p1, p2 *Value) bool {
   822  	if p1 == p2 {
   823  		return true
   824  	}
   825  	if p1.Op != p2.Op {
   826  		return false
   827  	}
   828  	switch p1.Op {
   829  	case OpOffPtr:
   830  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   831  	case OpAddr, OpLocalAddr:
   832  		return p1.Aux == p2.Aux
   833  	case OpAddPtr:
   834  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   835  	}
   836  	return false
   837  }
   838  
   839  func isStackPtr(v *Value) bool {
   840  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   841  		v = v.Args[0]
   842  	}
   843  	return v.Op == OpSP || v.Op == OpLocalAddr
   844  }
   845  
   846  // disjoint reports whether the memory region specified by [p1:p1+n1)
   847  // does not overlap with [p2:p2+n2).
   848  // A return value of false does not imply the regions overlap.
   849  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   850  	if n1 == 0 || n2 == 0 {
   851  		return true
   852  	}
   853  	if p1 == p2 {
   854  		return false
   855  	}
   856  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   857  		base, offset = ptr, 0
   858  		for base.Op == OpOffPtr {
   859  			offset += base.AuxInt
   860  			base = base.Args[0]
   861  		}
   862  		if opcodeTable[base.Op].nilCheck {
   863  			base = base.Args[0]
   864  		}
   865  		return base, offset
   866  	}
   867  	p1, off1 := baseAndOffset(p1)
   868  	p2, off2 := baseAndOffset(p2)
   869  	if isSamePtr(p1, p2) {
   870  		return !overlap(off1, n1, off2, n2)
   871  	}
   872  	// p1 and p2 are not the same, so if they are both OpAddrs then
   873  	// they point to different variables.
   874  	// If one pointer is on the stack and the other is an argument
   875  	// then they can't overlap.
   876  	switch p1.Op {
   877  	case OpAddr, OpLocalAddr:
   878  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   879  			return true
   880  		}
   881  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   882  	case OpArg, OpArgIntReg:
   883  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   884  			return true
   885  		}
   886  	case OpSP:
   887  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   888  	}
   889  	return false
   890  }
   891  
   892  // moveSize returns the number of bytes an aligned MOV instruction moves.
   893  func moveSize(align int64, c *Config) int64 {
   894  	switch {
   895  	case align%8 == 0 && c.PtrSize == 8:
   896  		return 8
   897  	case align%4 == 0:
   898  		return 4
   899  	case align%2 == 0:
   900  		return 2
   901  	}
   902  	return 1
   903  }
   904  
   905  // mergePoint finds a block among a's blocks which dominates b and is itself
   906  // dominated by all of a's blocks. Returns nil if it can't find one.
   907  // Might return nil even if one does exist.
   908  func mergePoint(b *Block, a ...*Value) *Block {
   909  	// Walk backward from b looking for one of the a's blocks.
   910  
   911  	// Max distance
   912  	d := 100
   913  
   914  	for d > 0 {
   915  		for _, x := range a {
   916  			if b == x.Block {
   917  				goto found
   918  			}
   919  		}
   920  		if len(b.Preds) > 1 {
   921  			// Don't know which way to go back. Abort.
   922  			return nil
   923  		}
   924  		b = b.Preds[0].b
   925  		d--
   926  	}
   927  	return nil	// too far away
   928  found:
   929  	// At this point, r is the first value in a that we find by walking backwards.
   930  	// if we return anything, r will be it.
   931  	r := b
   932  
   933  	// Keep going, counting the other a's that we find. They must all dominate r.
   934  	na := 0
   935  	for d > 0 {
   936  		for _, x := range a {
   937  			if b == x.Block {
   938  				na++
   939  			}
   940  		}
   941  		if na == len(a) {
   942  			// Found all of a in a backwards walk. We can return r.
   943  			return r
   944  		}
   945  		if len(b.Preds) > 1 {
   946  			return nil
   947  		}
   948  		b = b.Preds[0].b
   949  		d--
   950  
   951  	}
   952  	return nil	// too far away
   953  }
   954  
   955  // clobber invalidates values. Returns true.
   956  // clobber is used by rewrite rules to:
   957  //
   958  //	A) make sure the values are really dead and never used again.
   959  //	B) decrement use counts of the values' args.
   960  func clobber(vv ...*Value) bool {
   961  	for _, v := range vv {
   962  		v.reset(OpInvalid)
   963  		// Note: leave v.Block intact.  The Block field is used after clobber.
   964  	}
   965  	return true
   966  }
   967  
   968  // clobberIfDead resets v when use count is 1. Returns true.
   969  // clobberIfDead is used by rewrite rules to decrement
   970  // use counts of v's args when v is dead and never used.
   971  func clobberIfDead(v *Value) bool {
   972  	if v.Uses == 1 {
   973  		v.reset(OpInvalid)
   974  	}
   975  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   976  	return true
   977  }
   978  
   979  // noteRule is an easy way to track if a rule is matched when writing
   980  // new ones.  Make the rule of interest also conditional on
   981  //
   982  //	noteRule("note to self: rule of interest matched")
   983  //
   984  // and that message will print when the rule matches.
   985  func noteRule(s string) bool {
   986  	fmt.Println(s)
   987  	return true
   988  }
   989  
   990  // countRule increments Func.ruleMatches[key].
   991  // If Func.ruleMatches is non-nil at the end
   992  // of compilation, it will be printed to stdout.
   993  // This is intended to make it easier to find which functions
   994  // which contain lots of rules matches when developing new rules.
   995  func countRule(v *Value, key string) bool {
   996  	f := v.Block.Func
   997  	if f.ruleMatches == nil {
   998  		f.ruleMatches = make(map[string]int)
   999  	}
  1000  	f.ruleMatches[key]++
  1001  	return true
  1002  }
  1003  
  1004  // warnRule generates compiler debug output with string s when
  1005  // v is not in autogenerated code, cond is true and the rule has fired.
  1006  func warnRule(cond bool, v *Value, s string) bool {
  1007  	if pos := v.Pos; pos.Line() > 1 && cond {
  1008  		v.Block.Func.Warnl(pos, s)
  1009  	}
  1010  	return true
  1011  }
  1012  
  1013  // for a pseudo-op like (LessThan x), extract x.
  1014  func flagArg(v *Value) *Value {
  1015  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1016  		return nil
  1017  	}
  1018  	return v.Args[0]
  1019  }
  1020  
  1021  // arm64Negate finds the complement to an ARM64 condition code,
  1022  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1023  //
  1024  // For floating point, it's more subtle because NaN is unordered. We do
  1025  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1026  func arm64Negate(op Op) Op {
  1027  	switch op {
  1028  	case OpARM64LessThan:
  1029  		return OpARM64GreaterEqual
  1030  	case OpARM64LessThanU:
  1031  		return OpARM64GreaterEqualU
  1032  	case OpARM64GreaterThan:
  1033  		return OpARM64LessEqual
  1034  	case OpARM64GreaterThanU:
  1035  		return OpARM64LessEqualU
  1036  	case OpARM64LessEqual:
  1037  		return OpARM64GreaterThan
  1038  	case OpARM64LessEqualU:
  1039  		return OpARM64GreaterThanU
  1040  	case OpARM64GreaterEqual:
  1041  		return OpARM64LessThan
  1042  	case OpARM64GreaterEqualU:
  1043  		return OpARM64LessThanU
  1044  	case OpARM64Equal:
  1045  		return OpARM64NotEqual
  1046  	case OpARM64NotEqual:
  1047  		return OpARM64Equal
  1048  	case OpARM64LessThanF:
  1049  		return OpARM64NotLessThanF
  1050  	case OpARM64NotLessThanF:
  1051  		return OpARM64LessThanF
  1052  	case OpARM64LessEqualF:
  1053  		return OpARM64NotLessEqualF
  1054  	case OpARM64NotLessEqualF:
  1055  		return OpARM64LessEqualF
  1056  	case OpARM64GreaterThanF:
  1057  		return OpARM64NotGreaterThanF
  1058  	case OpARM64NotGreaterThanF:
  1059  		return OpARM64GreaterThanF
  1060  	case OpARM64GreaterEqualF:
  1061  		return OpARM64NotGreaterEqualF
  1062  	case OpARM64NotGreaterEqualF:
  1063  		return OpARM64GreaterEqualF
  1064  	default:
  1065  		panic("unreachable")
  1066  	}
  1067  }
  1068  
  1069  // arm64Invert evaluates (InvertFlags op), which
  1070  // is the same as altering the condition codes such
  1071  // that the same result would be produced if the arguments
  1072  // to the flag-generating instruction were reversed, e.g.
  1073  // (InvertFlags (CMP x y)) -> (CMP y x)
  1074  func arm64Invert(op Op) Op {
  1075  	switch op {
  1076  	case OpARM64LessThan:
  1077  		return OpARM64GreaterThan
  1078  	case OpARM64LessThanU:
  1079  		return OpARM64GreaterThanU
  1080  	case OpARM64GreaterThan:
  1081  		return OpARM64LessThan
  1082  	case OpARM64GreaterThanU:
  1083  		return OpARM64LessThanU
  1084  	case OpARM64LessEqual:
  1085  		return OpARM64GreaterEqual
  1086  	case OpARM64LessEqualU:
  1087  		return OpARM64GreaterEqualU
  1088  	case OpARM64GreaterEqual:
  1089  		return OpARM64LessEqual
  1090  	case OpARM64GreaterEqualU:
  1091  		return OpARM64LessEqualU
  1092  	case OpARM64Equal, OpARM64NotEqual:
  1093  		return op
  1094  	case OpARM64LessThanF:
  1095  		return OpARM64GreaterThanF
  1096  	case OpARM64GreaterThanF:
  1097  		return OpARM64LessThanF
  1098  	case OpARM64LessEqualF:
  1099  		return OpARM64GreaterEqualF
  1100  	case OpARM64GreaterEqualF:
  1101  		return OpARM64LessEqualF
  1102  	case OpARM64NotLessThanF:
  1103  		return OpARM64NotGreaterThanF
  1104  	case OpARM64NotGreaterThanF:
  1105  		return OpARM64NotLessThanF
  1106  	case OpARM64NotLessEqualF:
  1107  		return OpARM64NotGreaterEqualF
  1108  	case OpARM64NotGreaterEqualF:
  1109  		return OpARM64NotLessEqualF
  1110  	default:
  1111  		panic("unreachable")
  1112  	}
  1113  }
  1114  
  1115  // evaluate an ARM64 op against a flags value
  1116  // that is potentially constant; return 1 for true,
  1117  // -1 for false, and 0 for not constant.
  1118  func ccARM64Eval(op Op, flags *Value) int {
  1119  	fop := flags.Op
  1120  	if fop == OpARM64InvertFlags {
  1121  		return -ccARM64Eval(op, flags.Args[0])
  1122  	}
  1123  	if fop != OpARM64FlagConstant {
  1124  		return 0
  1125  	}
  1126  	fc := flagConstant(flags.AuxInt)
  1127  	b2i := func(b bool) int {
  1128  		if b {
  1129  			return 1
  1130  		}
  1131  		return -1
  1132  	}
  1133  	switch op {
  1134  	case OpARM64Equal:
  1135  		return b2i(fc.eq())
  1136  	case OpARM64NotEqual:
  1137  		return b2i(fc.ne())
  1138  	case OpARM64LessThan:
  1139  		return b2i(fc.lt())
  1140  	case OpARM64LessThanU:
  1141  		return b2i(fc.ult())
  1142  	case OpARM64GreaterThan:
  1143  		return b2i(fc.gt())
  1144  	case OpARM64GreaterThanU:
  1145  		return b2i(fc.ugt())
  1146  	case OpARM64LessEqual:
  1147  		return b2i(fc.le())
  1148  	case OpARM64LessEqualU:
  1149  		return b2i(fc.ule())
  1150  	case OpARM64GreaterEqual:
  1151  		return b2i(fc.ge())
  1152  	case OpARM64GreaterEqualU:
  1153  		return b2i(fc.uge())
  1154  	}
  1155  	return 0
  1156  }
  1157  
  1158  // logRule logs the use of the rule s. This will only be enabled if
  1159  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1160  func logRule(s string) {
  1161  	if ruleFile == nil {
  1162  		// Open a log file to write log to. We open in append
  1163  		// mode because all.bash runs the compiler lots of times,
  1164  		// and we want the concatenation of all of those logs.
  1165  		// This means, of course, that users need to rm the old log
  1166  		// to get fresh data.
  1167  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1168  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1169  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1170  		if err != nil {
  1171  			panic(err)
  1172  		}
  1173  		ruleFile = w
  1174  	}
  1175  	_, err := fmt.Fprintln(ruleFile, s)
  1176  	if err != nil {
  1177  		panic(err)
  1178  	}
  1179  }
  1180  
  1181  var ruleFile io.Writer
  1182  
  1183  func min(x, y int64) int64 {
  1184  	if x < y {
  1185  		return x
  1186  	}
  1187  	return y
  1188  }
  1189  func max(x, y int64) int64 {
  1190  	if x > y {
  1191  		return x
  1192  	}
  1193  	return y
  1194  }
  1195  
  1196  func isConstZero(v *Value) bool {
  1197  	switch v.Op {
  1198  	case OpConstNil:
  1199  		return true
  1200  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1201  		return v.AuxInt == 0
  1202  	}
  1203  	return false
  1204  }
  1205  
  1206  // reciprocalExact64 reports whether 1/c is exactly representable.
  1207  func reciprocalExact64(c float64) bool {
  1208  	b := math.Float64bits(c)
  1209  	man := b & (1<<52 - 1)
  1210  	if man != 0 {
  1211  		return false	// not a power of 2, denormal, or NaN
  1212  	}
  1213  	exp := b >> 52 & (1<<11 - 1)
  1214  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1215  	// changes the exponent to 0x7fe-exp.
  1216  	switch exp {
  1217  	case 0:
  1218  		return false	// ±0
  1219  	case 0x7ff:
  1220  		return false	// ±inf
  1221  	case 0x7fe:
  1222  		return false	// exponent is not representable
  1223  	default:
  1224  		return true
  1225  	}
  1226  }
  1227  
  1228  // reciprocalExact32 reports whether 1/c is exactly representable.
  1229  func reciprocalExact32(c float32) bool {
  1230  	b := math.Float32bits(c)
  1231  	man := b & (1<<23 - 1)
  1232  	if man != 0 {
  1233  		return false	// not a power of 2, denormal, or NaN
  1234  	}
  1235  	exp := b >> 23 & (1<<8 - 1)
  1236  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1237  	// changes the exponent to 0xfe-exp.
  1238  	switch exp {
  1239  	case 0:
  1240  		return false	// ±0
  1241  	case 0xff:
  1242  		return false	// ±inf
  1243  	case 0xfe:
  1244  		return false	// exponent is not representable
  1245  	default:
  1246  		return true
  1247  	}
  1248  }
  1249  
  1250  // check if an immediate can be directly encoded into an ARM's instruction.
  1251  func isARMImmRot(v uint32) bool {
  1252  	for i := 0; i < 16; i++ {
  1253  		if v&^0xff == 0 {
  1254  			return true
  1255  		}
  1256  		v = v<<2 | v>>30
  1257  	}
  1258  
  1259  	return false
  1260  }
  1261  
  1262  // overlap reports whether the ranges given by the given offset and
  1263  // size pairs overlap.
  1264  func overlap(offset1, size1, offset2, size2 int64) bool {
  1265  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1266  		return true
  1267  	}
  1268  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1269  		return true
  1270  	}
  1271  	return false
  1272  }
  1273  
  1274  func areAdjacentOffsets(off1, off2, size int64) bool {
  1275  	return off1+size == off2 || off1 == off2+size
  1276  }
  1277  
  1278  // check if value zeroes out upper 32-bit of 64-bit register.
  1279  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1280  // because it catches same amount of cases as 4.
  1281  func zeroUpper32Bits(x *Value, depth int) bool {
  1282  	switch x.Op {
  1283  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1284  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1285  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1286  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1287  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1288  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1289  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1290  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1291  		OpAMD64SHLL, OpAMD64SHLLconst:
  1292  		return true
  1293  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1294  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1295  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1296  		return true
  1297  	case OpArg:
  1298  		return x.Type.Size() == 4
  1299  	case OpPhi, OpSelect0, OpSelect1:
  1300  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1301  		// just limit recursion depth.
  1302  		if depth <= 0 {
  1303  			return false
  1304  		}
  1305  		for i := range x.Args {
  1306  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1307  				return false
  1308  			}
  1309  		}
  1310  		return true
  1311  
  1312  	}
  1313  	return false
  1314  }
  1315  
  1316  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1317  func zeroUpper48Bits(x *Value, depth int) bool {
  1318  	switch x.Op {
  1319  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1320  		return true
  1321  	case OpArg:
  1322  		return x.Type.Size() == 2
  1323  	case OpPhi, OpSelect0, OpSelect1:
  1324  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1325  		// just limit recursion depth.
  1326  		if depth <= 0 {
  1327  			return false
  1328  		}
  1329  		for i := range x.Args {
  1330  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1331  				return false
  1332  			}
  1333  		}
  1334  		return true
  1335  
  1336  	}
  1337  	return false
  1338  }
  1339  
  1340  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1341  func zeroUpper56Bits(x *Value, depth int) bool {
  1342  	switch x.Op {
  1343  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1344  		return true
  1345  	case OpArg:
  1346  		return x.Type.Size() == 1
  1347  	case OpPhi, OpSelect0, OpSelect1:
  1348  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1349  		// just limit recursion depth.
  1350  		if depth <= 0 {
  1351  			return false
  1352  		}
  1353  		for i := range x.Args {
  1354  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1355  				return false
  1356  			}
  1357  		}
  1358  		return true
  1359  
  1360  	}
  1361  	return false
  1362  }
  1363  
  1364  func isInlinableMemclr(c *Config, sz int64) bool {
  1365  	if sz < 0 {
  1366  		return false
  1367  	}
  1368  	// TODO: expand this check to allow other architectures
  1369  	// see CL 454255 and issue 56997
  1370  	switch c.arch {
  1371  	case "amd64", "arm64":
  1372  		return true
  1373  	case "ppc64le", "ppc64":
  1374  		return sz < 512
  1375  	}
  1376  	return false
  1377  }
  1378  
  1379  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1380  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1381  // safe, either because Move will do all of its loads before any of its stores, or
  1382  // because the arguments are known to be disjoint.
  1383  // This is used as a check for replacing memmove with Move ops.
  1384  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1385  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1386  	// Move ops may or may not be faster for large sizes depending on how the platform
  1387  	// lowers them, so we only perform this optimization on platforms that we know to
  1388  	// have fast Move ops.
  1389  	switch c.arch {
  1390  	case "amd64":
  1391  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1392  	case "386", "arm64":
  1393  		return sz <= 8
  1394  	case "s390x", "ppc64", "ppc64le":
  1395  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1396  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1397  		return sz <= 4
  1398  	}
  1399  	return false
  1400  }
  1401  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1402  	return isInlinableMemmove(dst, src, sz, c)
  1403  }
  1404  
  1405  // logLargeCopy logs the occurrence of a large copy.
  1406  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1407  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1408  func logLargeCopy(v *Value, s int64) bool {
  1409  	if s < 128 {
  1410  		return true
  1411  	}
  1412  	if logopt.Enabled() {
  1413  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1414  	}
  1415  	return true
  1416  }
  1417  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1418  	if s < 128 {
  1419  		return
  1420  	}
  1421  	if logopt.Enabled() {
  1422  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1423  	}
  1424  }
  1425  
  1426  // hasSmallRotate reports whether the architecture has rotate instructions
  1427  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1428  func hasSmallRotate(c *Config) bool {
  1429  	switch c.arch {
  1430  	case "amd64", "386":
  1431  		return true
  1432  	default:
  1433  		return false
  1434  	}
  1435  }
  1436  
  1437  func supportsPPC64PCRel() bool {
  1438  	// PCRel is currently supported for >= power10, linux only
  1439  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1440  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1441  }
  1442  
  1443  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1444  	if sh < 0 || sh >= sz {
  1445  		panic("PPC64 shift arg sh out of range")
  1446  	}
  1447  	if mb < 0 || mb >= sz {
  1448  		panic("PPC64 shift arg mb out of range")
  1449  	}
  1450  	if me < 0 || me >= sz {
  1451  		panic("PPC64 shift arg me out of range")
  1452  	}
  1453  	return int32(sh<<16 | mb<<8 | me)
  1454  }
  1455  
  1456  func GetPPC64Shiftsh(auxint int64) int64 {
  1457  	return int64(int8(auxint >> 16))
  1458  }
  1459  
  1460  func GetPPC64Shiftmb(auxint int64) int64 {
  1461  	return int64(int8(auxint >> 8))
  1462  }
  1463  
  1464  func GetPPC64Shiftme(auxint int64) int64 {
  1465  	return int64(int8(auxint))
  1466  }
  1467  
  1468  // Test if this value can encoded as a mask for a rlwinm like
  1469  // operation.  Masks can also extend from the msb and wrap to
  1470  // the lsb too.  That is, the valid masks are 32 bit strings
  1471  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1472  func isPPC64WordRotateMask(v64 int64) bool {
  1473  	// Isolate rightmost 1 (if none 0) and add.
  1474  	v := uint32(v64)
  1475  	vp := (v & -v) + v
  1476  	// Likewise, for the wrapping case.
  1477  	vn := ^v
  1478  	vpn := (vn & -vn) + vn
  1479  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1480  }
  1481  
  1482  // Compress mask and shift into single value of the form
  1483  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1484  // be used to regenerate the input mask.
  1485  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1486  	var mb, me, mbn, men int
  1487  
  1488  	// Determine boundaries and then decode them
  1489  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1490  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1491  	} else if nbits == 32 {
  1492  		mb = bits.LeadingZeros32(uint32(mask))
  1493  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1494  		mbn = bits.LeadingZeros32(^uint32(mask))
  1495  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1496  	} else {
  1497  		mb = bits.LeadingZeros64(uint64(mask))
  1498  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1499  		mbn = bits.LeadingZeros64(^uint64(mask))
  1500  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1501  	}
  1502  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1503  	if mb == 0 && me == int(nbits) {
  1504  		// swap the inverted values
  1505  		mb, me = men, mbn
  1506  	}
  1507  
  1508  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1509  }
  1510  
  1511  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1512  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1513  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1514  // operations can be combined. This functions assumes the two opcodes can
  1515  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1516  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1517  	mb := s
  1518  	r := 64 - s
  1519  	// A larger mb is a smaller mask.
  1520  	if (encoded>>8)&0xFF < mb {
  1521  		encoded = (encoded &^ 0xFF00) | mb<<8
  1522  	}
  1523  	// The rotate is expected to be 0.
  1524  	if (encoded & 0xFF0000) != 0 {
  1525  		panic("non-zero rotate")
  1526  	}
  1527  	return encoded | r<<16
  1528  }
  1529  
  1530  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1531  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1532  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1533  	auxint := uint64(sauxint)
  1534  	rotate = int64((auxint >> 16) & 0xFF)
  1535  	mb = int64((auxint >> 8) & 0xFF)
  1536  	me = int64((auxint >> 0) & 0xFF)
  1537  	nbits := int64((auxint >> 24) & 0xFF)
  1538  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1539  	if mb > me {
  1540  		mask = ^mask
  1541  	}
  1542  	if nbits == 32 {
  1543  		mask = uint64(uint32(mask))
  1544  	}
  1545  
  1546  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1547  	// is inclusive.
  1548  	me = (me - 1) & (nbits - 1)
  1549  	return
  1550  }
  1551  
  1552  // This verifies that the mask is a set of
  1553  // consecutive bits including the least
  1554  // significant bit.
  1555  func isPPC64ValidShiftMask(v int64) bool {
  1556  	if (v != 0) && ((v+1)&v) == 0 {
  1557  		return true
  1558  	}
  1559  	return false
  1560  }
  1561  
  1562  func getPPC64ShiftMaskLength(v int64) int64 {
  1563  	return int64(bits.Len64(uint64(v)))
  1564  }
  1565  
  1566  // Decompose a shift right into an equivalent rotate/mask,
  1567  // and return mask & m.
  1568  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1569  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1570  	return m & int64(smask)
  1571  }
  1572  
  1573  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1574  func mergePPC64AndSrwi(m, s int64) int64 {
  1575  	mask := mergePPC64RShiftMask(m, s, 32)
  1576  	if !isPPC64WordRotateMask(mask) {
  1577  		return 0
  1578  	}
  1579  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1580  }
  1581  
  1582  // Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1583  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1584  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1585  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1586  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1587  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1588  
  1589  	// Rewrite mask to apply after the final left shift.
  1590  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1591  
  1592  	r_1 := 32 - srw
  1593  	r_2 := GetPPC64Shiftsh(sld)
  1594  	r_3 := (r_1 + r_2) & 31	// This can wrap.
  1595  
  1596  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1597  		return 0
  1598  	}
  1599  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1600  }
  1601  
  1602  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1603  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1604  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1605  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1606  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1607  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1608  
  1609  	// combine the masks, and adjust for the final left shift.
  1610  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1611  	r_2 := GetPPC64Shiftsh(int64(sld))
  1612  	r_3 := (r_1 + r_2) & 31	// This can wrap.
  1613  
  1614  	// Verify the result is still a valid bitmask of <= 32 bits.
  1615  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1616  		return 0
  1617  	}
  1618  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1619  }
  1620  
  1621  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1622  // or return 0 if they cannot be combined.
  1623  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1624  	if sld > srw || srw >= 32 {
  1625  		return 0
  1626  	}
  1627  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1628  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1629  	mask := (mask_r & mask_l) << uint(sld)
  1630  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1631  }
  1632  
  1633  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1634  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1635  // of op.
  1636  //
  1637  // E.g consider the case:
  1638  // a = (ADD x y)
  1639  // b = (CMPconst [0] a)
  1640  // c = (OR a z)
  1641  //
  1642  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1643  // would produce:
  1644  // a  = (ADD x y)
  1645  // a' = (ADDCC x y)
  1646  // a” = (Select0 a')
  1647  // b  = (CMPconst [0] a”)
  1648  // c  = (OR a z)
  1649  //
  1650  // which makes it impossible to rewrite the second user. Instead the result
  1651  // of this conversion is:
  1652  // a' = (ADDCC x y)
  1653  // a  = (Select0 a')
  1654  // b  = (CMPconst [0] a)
  1655  // c  = (OR a z)
  1656  //
  1657  // Which makes it trivial to rewrite b using a lowering rule.
  1658  func convertPPC64OpToOpCC(op *Value) *Value {
  1659  	ccOpMap := map[Op]Op{
  1660  		OpPPC64ADD:		OpPPC64ADDCC,
  1661  		OpPPC64ADDconst:	OpPPC64ADDCCconst,
  1662  		OpPPC64AND:		OpPPC64ANDCC,
  1663  		OpPPC64ANDN:		OpPPC64ANDNCC,
  1664  		OpPPC64CNTLZD:		OpPPC64CNTLZDCC,
  1665  		OpPPC64OR:		OpPPC64ORCC,
  1666  		OpPPC64SUB:		OpPPC64SUBCC,
  1667  		OpPPC64NEG:		OpPPC64NEGCC,
  1668  		OpPPC64NOR:		OpPPC64NORCC,
  1669  		OpPPC64XOR:		OpPPC64XORCC,
  1670  	}
  1671  	b := op.Block
  1672  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1673  	opCC.AddArgs(op.Args...)
  1674  	op.reset(OpSelect0)
  1675  	op.AddArgs(opCC)
  1676  	return op
  1677  }
  1678  
  1679  // Convenience function to rotate a 32 bit constant value by another constant.
  1680  func rotateLeft32(v, rotate int64) int64 {
  1681  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1682  }
  1683  
  1684  func rotateRight64(v, rotate int64) int64 {
  1685  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1686  }
  1687  
  1688  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1689  func armBFAuxInt(lsb, width int64) arm64BitField {
  1690  	if lsb < 0 || lsb > 63 {
  1691  		panic("ARM(64) bit field lsb constant out of range")
  1692  	}
  1693  	if width < 1 || lsb+width > 64 {
  1694  		panic("ARM(64) bit field width constant out of range")
  1695  	}
  1696  	return arm64BitField(width | lsb<<8)
  1697  }
  1698  
  1699  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1700  func (bfc arm64BitField) getARM64BFlsb() int64 {
  1701  	return int64(uint64(bfc) >> 8)
  1702  }
  1703  
  1704  // returns the width part of the auxInt field of arm64 bitfield ops.
  1705  func (bfc arm64BitField) getARM64BFwidth() int64 {
  1706  	return int64(bfc) & 0xff
  1707  }
  1708  
  1709  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1710  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1711  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1712  	return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1713  }
  1714  
  1715  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1716  func arm64BFWidth(mask, rshift int64) int64 {
  1717  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1718  	if shiftedMask == 0 {
  1719  		panic("ARM64 BF mask is zero")
  1720  	}
  1721  	return nto(shiftedMask)
  1722  }
  1723  
  1724  // sizeof returns the size of t in bytes.
  1725  // It will panic if t is not a *types.Type.
  1726  func sizeof(t interface{}) int64 {
  1727  	return t.(*types.Type).Size()
  1728  }
  1729  
  1730  // registerizable reports whether t is a primitive type that fits in
  1731  // a register. It assumes float64 values will always fit into registers
  1732  // even if that isn't strictly true.
  1733  func registerizable(b *Block, typ *types.Type) bool {
  1734  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1735  		return true
  1736  	}
  1737  	if typ.IsInteger() {
  1738  		return typ.Size() <= b.Func.Config.RegSize
  1739  	}
  1740  	return false
  1741  }
  1742  
  1743  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1744  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1745  	f := v.Block.Func
  1746  	if !f.Config.Race {
  1747  		return false
  1748  	}
  1749  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1750  		return false
  1751  	}
  1752  	for _, b := range f.Blocks {
  1753  		for _, v := range b.Values {
  1754  			switch v.Op {
  1755  			case OpStaticCall, OpStaticLECall:
  1756  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1757  				// Allow calls to panic*
  1758  				s := v.Aux.(*AuxCall).Fn.String()
  1759  				switch s {
  1760  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1761  					"runtime.panicdivide", "runtime.panicwrap",
  1762  					"runtime.panicshift":
  1763  					continue
  1764  				}
  1765  				// If we encountered any call, we need to keep racefunc*,
  1766  				// for accurate stacktraces.
  1767  				return false
  1768  			case OpPanicBounds, OpPanicExtend:
  1769  				// Note: these are panic generators that are ok (like the static calls above).
  1770  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1771  				// We must keep the race functions if there are any other call types.
  1772  				return false
  1773  			}
  1774  		}
  1775  	}
  1776  	if isSameCall(sym, "runtime.racefuncenter") {
  1777  		// TODO REGISTER ABI this needs to be cleaned up.
  1778  		// If we're removing racefuncenter, remove its argument as well.
  1779  		if v.Args[0].Op != OpStore {
  1780  			if v.Op == OpStaticLECall {
  1781  				// there is no store, yet.
  1782  				return true
  1783  			}
  1784  			return false
  1785  		}
  1786  		mem := v.Args[0].Args[2]
  1787  		v.Args[0].reset(OpCopy)
  1788  		v.Args[0].AddArg(mem)
  1789  	}
  1790  	return true
  1791  }
  1792  
  1793  // symIsRO reports whether sym is a read-only global.
  1794  func symIsRO(sym interface{}) bool {
  1795  	lsym := sym.(*obj.LSym)
  1796  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1797  }
  1798  
  1799  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1800  func symIsROZero(sym Sym) bool {
  1801  	lsym := sym.(*obj.LSym)
  1802  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1803  		return false
  1804  	}
  1805  	for _, b := range lsym.P {
  1806  		if b != 0 {
  1807  			return false
  1808  		}
  1809  	}
  1810  	return true
  1811  }
  1812  
  1813  // isFixed32 returns true if the int32 at offset off in symbol sym
  1814  // is known and constant.
  1815  func isFixed32(c *Config, sym Sym, off int64) bool {
  1816  	return isFixed(c, sym, off, 4)
  1817  }
  1818  
  1819  // isFixed returns true if the range [off,off+size] of the symbol sym
  1820  // is known and constant.
  1821  func isFixed(c *Config, sym Sym, off, size int64) bool {
  1822  	lsym := sym.(*obj.LSym)
  1823  	if lsym.Extra == nil {
  1824  		return false
  1825  	}
  1826  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1827  		if off == 2*c.PtrSize && size == 4 {
  1828  			return true	// type hash field
  1829  		}
  1830  	}
  1831  	return false
  1832  }
  1833  func fixed32(c *Config, sym Sym, off int64) int32 {
  1834  	lsym := sym.(*obj.LSym)
  1835  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1836  		if off == 2*c.PtrSize {
  1837  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  1838  		}
  1839  	}
  1840  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  1841  	return 0
  1842  }
  1843  
  1844  // isFixedSym returns true if the contents of sym at the given offset
  1845  // is known and is the constant address of another symbol.
  1846  func isFixedSym(sym Sym, off int64) bool {
  1847  	lsym := sym.(*obj.LSym)
  1848  	switch {
  1849  	case lsym.Type == objabi.SRODATA:
  1850  		// itabs, dictionaries
  1851  	default:
  1852  		return false
  1853  	}
  1854  	for _, r := range lsym.R {
  1855  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  1856  			return true
  1857  		}
  1858  	}
  1859  	return false
  1860  }
  1861  func fixedSym(f *Func, sym Sym, off int64) Sym {
  1862  	lsym := sym.(*obj.LSym)
  1863  	for _, r := range lsym.R {
  1864  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  1865  			if strings.HasPrefix(r.Sym.Name, "type:") {
  1866  				// In case we're loading a type out of a dictionary, we need to record
  1867  				// that the containing function might put that type in an interface.
  1868  				// That information is currently recorded in relocations in the dictionary,
  1869  				// but if we perform this load at compile time then the dictionary
  1870  				// might be dead.
  1871  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1872  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  1873  				// Same, but if we're using an itab we need to record that the
  1874  				// itab._type might be put in an interface.
  1875  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1876  			}
  1877  			return r.Sym
  1878  		}
  1879  	}
  1880  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  1881  	return nil
  1882  }
  1883  
  1884  // read8 reads one byte from the read-only global sym at offset off.
  1885  func read8(sym interface{}, off int64) uint8 {
  1886  	lsym := sym.(*obj.LSym)
  1887  	if off >= int64(len(lsym.P)) || off < 0 {
  1888  		// Invalid index into the global sym.
  1889  		// This can happen in dead code, so we don't want to panic.
  1890  		// Just return any value, it will eventually get ignored.
  1891  		// See issue 29215.
  1892  		return 0
  1893  	}
  1894  	return lsym.P[off]
  1895  }
  1896  
  1897  // read16 reads two bytes from the read-only global sym at offset off.
  1898  func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
  1899  	lsym := sym.(*obj.LSym)
  1900  	// lsym.P is written lazily.
  1901  	// Bytes requested after the end of lsym.P are 0.
  1902  	var src []byte
  1903  	if 0 <= off && off < int64(len(lsym.P)) {
  1904  		src = lsym.P[off:]
  1905  	}
  1906  	buf := make([]byte, 2)
  1907  	copy(buf, src)
  1908  	return byteorder.Uint16(buf)
  1909  }
  1910  
  1911  // read32 reads four bytes from the read-only global sym at offset off.
  1912  func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
  1913  	lsym := sym.(*obj.LSym)
  1914  	var src []byte
  1915  	if 0 <= off && off < int64(len(lsym.P)) {
  1916  		src = lsym.P[off:]
  1917  	}
  1918  	buf := make([]byte, 4)
  1919  	copy(buf, src)
  1920  	return byteorder.Uint32(buf)
  1921  }
  1922  
  1923  // read64 reads eight bytes from the read-only global sym at offset off.
  1924  func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
  1925  	lsym := sym.(*obj.LSym)
  1926  	var src []byte
  1927  	if 0 <= off && off < int64(len(lsym.P)) {
  1928  		src = lsym.P[off:]
  1929  	}
  1930  	buf := make([]byte, 8)
  1931  	copy(buf, src)
  1932  	return byteorder.Uint64(buf)
  1933  }
  1934  
  1935  // sequentialAddresses reports true if it can prove that x + n == y
  1936  func sequentialAddresses(x, y *Value, n int64) bool {
  1937  	if x == y && n == 0 {
  1938  		return true
  1939  	}
  1940  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  1941  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1942  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1943  		return true
  1944  	}
  1945  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  1946  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1947  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1948  		return true
  1949  	}
  1950  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  1951  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1952  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1953  		return true
  1954  	}
  1955  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  1956  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  1957  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  1958  		return true
  1959  	}
  1960  	return false
  1961  }
  1962  
  1963  // flagConstant represents the result of a compile-time comparison.
  1964  // The sense of these flags does not necessarily represent the hardware's notion
  1965  // of a flags register - these are just a compile-time construct.
  1966  // We happen to match the semantics to those of arm/arm64.
  1967  // Note that these semantics differ from x86: the carry flag has the opposite
  1968  // sense on a subtraction!
  1969  //
  1970  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  1971  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  1972  //	 (because it does x + ^y + C).
  1973  //
  1974  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  1975  type flagConstant uint8
  1976  
  1977  // N reports whether the result of an operation is negative (high bit set).
  1978  func (fc flagConstant) N() bool {
  1979  	return fc&1 != 0
  1980  }
  1981  
  1982  // Z reports whether the result of an operation is 0.
  1983  func (fc flagConstant) Z() bool {
  1984  	return fc&2 != 0
  1985  }
  1986  
  1987  // C reports whether an unsigned add overflowed (carry), or an
  1988  // unsigned subtract did not underflow (borrow).
  1989  func (fc flagConstant) C() bool {
  1990  	return fc&4 != 0
  1991  }
  1992  
  1993  // V reports whether a signed operation overflowed or underflowed.
  1994  func (fc flagConstant) V() bool {
  1995  	return fc&8 != 0
  1996  }
  1997  
  1998  func (fc flagConstant) eq() bool {
  1999  	return fc.Z()
  2000  }
  2001  func (fc flagConstant) ne() bool {
  2002  	return !fc.Z()
  2003  }
  2004  func (fc flagConstant) lt() bool {
  2005  	return fc.N() != fc.V()
  2006  }
  2007  func (fc flagConstant) le() bool {
  2008  	return fc.Z() || fc.lt()
  2009  }
  2010  func (fc flagConstant) gt() bool {
  2011  	return !fc.Z() && fc.ge()
  2012  }
  2013  func (fc flagConstant) ge() bool {
  2014  	return fc.N() == fc.V()
  2015  }
  2016  func (fc flagConstant) ult() bool {
  2017  	return !fc.C()
  2018  }
  2019  func (fc flagConstant) ule() bool {
  2020  	return fc.Z() || fc.ult()
  2021  }
  2022  func (fc flagConstant) ugt() bool {
  2023  	return !fc.Z() && fc.uge()
  2024  }
  2025  func (fc flagConstant) uge() bool {
  2026  	return fc.C()
  2027  }
  2028  
  2029  func (fc flagConstant) ltNoov() bool {
  2030  	return fc.lt() && !fc.V()
  2031  }
  2032  func (fc flagConstant) leNoov() bool {
  2033  	return fc.le() && !fc.V()
  2034  }
  2035  func (fc flagConstant) gtNoov() bool {
  2036  	return fc.gt() && !fc.V()
  2037  }
  2038  func (fc flagConstant) geNoov() bool {
  2039  	return fc.ge() && !fc.V()
  2040  }
  2041  
  2042  func (fc flagConstant) String() string {
  2043  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2044  }
  2045  
  2046  type flagConstantBuilder struct {
  2047  	N	bool
  2048  	Z	bool
  2049  	C	bool
  2050  	V	bool
  2051  }
  2052  
  2053  func (fcs flagConstantBuilder) encode() flagConstant {
  2054  	var fc flagConstant
  2055  	if fcs.N {
  2056  		fc |= 1
  2057  	}
  2058  	if fcs.Z {
  2059  		fc |= 2
  2060  	}
  2061  	if fcs.C {
  2062  		fc |= 4
  2063  	}
  2064  	if fcs.V {
  2065  		fc |= 8
  2066  	}
  2067  	return fc
  2068  }
  2069  
  2070  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2071  //  - the results of the C flag are different
  2072  //  - the results of the V flag when y==minint are different
  2073  
  2074  // addFlags64 returns the flags that would be set from computing x+y.
  2075  func addFlags64(x, y int64) flagConstant {
  2076  	var fcb flagConstantBuilder
  2077  	fcb.Z = x+y == 0
  2078  	fcb.N = x+y < 0
  2079  	fcb.C = uint64(x+y) < uint64(x)
  2080  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2081  	return fcb.encode()
  2082  }
  2083  
  2084  // subFlags64 returns the flags that would be set from computing x-y.
  2085  func subFlags64(x, y int64) flagConstant {
  2086  	var fcb flagConstantBuilder
  2087  	fcb.Z = x-y == 0
  2088  	fcb.N = x-y < 0
  2089  	fcb.C = uint64(y) <= uint64(x)	// This code follows the arm carry flag model.
  2090  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2091  	return fcb.encode()
  2092  }
  2093  
  2094  // addFlags32 returns the flags that would be set from computing x+y.
  2095  func addFlags32(x, y int32) flagConstant {
  2096  	var fcb flagConstantBuilder
  2097  	fcb.Z = x+y == 0
  2098  	fcb.N = x+y < 0
  2099  	fcb.C = uint32(x+y) < uint32(x)
  2100  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2101  	return fcb.encode()
  2102  }
  2103  
  2104  // subFlags32 returns the flags that would be set from computing x-y.
  2105  func subFlags32(x, y int32) flagConstant {
  2106  	var fcb flagConstantBuilder
  2107  	fcb.Z = x-y == 0
  2108  	fcb.N = x-y < 0
  2109  	fcb.C = uint32(y) <= uint32(x)	// This code follows the arm carry flag model.
  2110  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2111  	return fcb.encode()
  2112  }
  2113  
  2114  // logicFlags64 returns flags set to the sign/zeroness of x.
  2115  // C and V are set to false.
  2116  func logicFlags64(x int64) flagConstant {
  2117  	var fcb flagConstantBuilder
  2118  	fcb.Z = x == 0
  2119  	fcb.N = x < 0
  2120  	return fcb.encode()
  2121  }
  2122  
  2123  // logicFlags32 returns flags set to the sign/zeroness of x.
  2124  // C and V are set to false.
  2125  func logicFlags32(x int32) flagConstant {
  2126  	var fcb flagConstantBuilder
  2127  	fcb.Z = x == 0
  2128  	fcb.N = x < 0
  2129  	return fcb.encode()
  2130  }
  2131  
  2132  func makeJumpTableSym(b *Block) *obj.LSym {
  2133  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2134  	s.Set(obj.AttrDuplicateOK, true)
  2135  	s.Set(obj.AttrLocal, true)
  2136  	return s
  2137  }
  2138  
  2139  // canRotate reports whether the architecture supports
  2140  // rotates of integer registers with the given number of bits.
  2141  func canRotate(c *Config, bits int64) bool {
  2142  	if bits > c.PtrSize*8 {
  2143  		// Don't rewrite to rotates bigger than the machine word.
  2144  		return false
  2145  	}
  2146  	switch c.arch {
  2147  	case "386", "amd64", "arm64":
  2148  		return true
  2149  	case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
  2150  		return bits >= 32
  2151  	default:
  2152  		return false
  2153  	}
  2154  }
  2155  
  2156  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2157  func isARM64bitcon(x uint64) bool {
  2158  	if x == 1<<64-1 || x == 0 {
  2159  		return false
  2160  	}
  2161  	// determine the period and sign-extend a unit to 64 bits
  2162  	switch {
  2163  	case x != x>>32|x<<32:
  2164  		// period is 64
  2165  		// nothing to do
  2166  	case x != x>>16|x<<48:
  2167  		// period is 32
  2168  		x = uint64(int64(int32(x)))
  2169  	case x != x>>8|x<<56:
  2170  		// period is 16
  2171  		x = uint64(int64(int16(x)))
  2172  	case x != x>>4|x<<60:
  2173  		// period is 8
  2174  		x = uint64(int64(int8(x)))
  2175  	default:
  2176  		// period is 4 or 2, always true
  2177  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2178  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2179  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2180  		// 0101, 1010             -- 01   rotate, repeat
  2181  		return true
  2182  	}
  2183  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2184  }
  2185  
  2186  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2187  func sequenceOfOnes(x uint64) bool {
  2188  	y := x & -x	// lowest set bit of x. x is good iff x+y is a power of 2
  2189  	y += x
  2190  	return (y-1)&y == 0
  2191  }
  2192  
  2193  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2194  func isARM64addcon(v int64) bool {
  2195  	/* uimm12 or uimm24? */
  2196  	if v < 0 {
  2197  		return false
  2198  	}
  2199  	if (v & 0xFFF) == 0 {
  2200  		v >>= 12
  2201  	}
  2202  	return v <= 0xFFF
  2203  }
  2204  
  2205  // setPos sets the position of v to pos, then returns true.
  2206  // Useful for setting the result of a rewrite's position to
  2207  // something other than the default.
  2208  func setPos(v *Value, pos src.XPos) bool {
  2209  	v.Pos = pos
  2210  	return true
  2211  }