github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/cmd/compile/internal/ssa/rewrite.go

github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/cmd/compile/internal/ssa/rewrite.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/obj"
    10  	"fmt"
    11  	"io"
    12  	"math"
    13  	"os"
    14  	"path/filepath"
    15  )
    16  
    17  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) {
    18  	// repeat rewrites until we find no more rewrites
    19  	for {
    20  		change := false
    21  		for _, b := range f.Blocks {
    22  			if b.Control != nil && b.Control.Op == OpCopy {
    23  				for b.Control.Op == OpCopy {
    24  					b.SetControl(b.Control.Args[0])
    25  				}
    26  			}
    27  			if rb(b) {
    28  				change = true
    29  			}
    30  			for _, v := range b.Values {
    31  				change = phielimValue(v) || change
    32  
    33  				// Eliminate copy inputs.
    34  				// If any copy input becomes unused, mark it
    35  				// as invalid and discard its argument. Repeat
    36  				// recursively on the discarded argument.
    37  				// This phase helps remove phantom "dead copy" uses
    38  				// of a value so that a x.Uses==1 rule condition
    39  				// fires reliably.
    40  				for i, a := range v.Args {
    41  					if a.Op != OpCopy {
    42  						continue
    43  					}
    44  					v.SetArg(i, copySource(a))
    45  					change = true
    46  					for a.Uses == 0 {
    47  						b := a.Args[0]
    48  						a.reset(OpInvalid)
    49  						a = b
    50  					}
    51  				}
    52  
    53  				// apply rewrite function
    54  				if rv(v) {
    55  					change = true
    56  				}
    57  			}
    58  		}
    59  		if !change {
    60  			break
    61  		}
    62  	}
    63  	// remove clobbered values
    64  	for _, b := range f.Blocks {
    65  		j := 0
    66  		for i, v := range b.Values {
    67  			if v.Op == OpInvalid {
    68  				f.freeValue(v)
    69  				continue
    70  			}
    71  			if i != j {
    72  				b.Values[j] = v
    73  			}
    74  			j++
    75  		}
    76  		if j != len(b.Values) {
    77  			tail := b.Values[j:]
    78  			for j := range tail {
    79  				tail[j] = nil
    80  			}
    81  			b.Values = b.Values[:j]
    82  		}
    83  	}
    84  }
    85  
    86  // Common functions called from rewriting rules
    87  
    88  func is64BitFloat(t *types.Type) bool {
    89  	return t.Size() == 8 && t.IsFloat()
    90  }
    91  
    92  func is32BitFloat(t *types.Type) bool {
    93  	return t.Size() == 4 && t.IsFloat()
    94  }
    95  
    96  func is64BitInt(t *types.Type) bool {
    97  	return t.Size() == 8 && t.IsInteger()
    98  }
    99  
   100  func is32BitInt(t *types.Type) bool {
   101  	return t.Size() == 4 && t.IsInteger()
   102  }
   103  
   104  func is16BitInt(t *types.Type) bool {
   105  	return t.Size() == 2 && t.IsInteger()
   106  }
   107  
   108  func is8BitInt(t *types.Type) bool {
   109  	return t.Size() == 1 && t.IsInteger()
   110  }
   111  
   112  func isPtr(t *types.Type) bool {
   113  	return t.IsPtrShaped()
   114  }
   115  
   116  func isSigned(t *types.Type) bool {
   117  	return t.IsSigned()
   118  }
   119  
   120  // mergeSym merges two symbolic offsets. There is no real merging of
   121  // offsets, we just pick the non-nil one.
   122  func mergeSym(x, y interface{}) interface{} {
   123  	if x == nil {
   124  		return y
   125  	}
   126  	if y == nil {
   127  		return x
   128  	}
   129  	panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
   130  }
   131  func canMergeSym(x, y interface{}) bool {
   132  	return x == nil || y == nil
   133  }
   134  
   135  // canMergeLoad reports whether the load can be merged into target without
   136  // invalidating the schedule.
   137  // It also checks that the other non-load argument x is something we
   138  // are ok with clobbering (all our current load+op instructions clobber
   139  // their input register).
   140  func canMergeLoad(target, load, x *Value) bool {
   141  	if target.Block.ID != load.Block.ID {
   142  		// If the load is in a different block do not merge it.
   143  		return false
   144  	}
   145  
   146  	// We can't merge the load into the target if the load
   147  	// has more than one use.
   148  	if load.Uses != 1 {
   149  		return false
   150  	}
   151  
   152  	// The register containing x is going to get clobbered.
   153  	// Don't merge if we still need the value of x.
   154  	// We don't have liveness information here, but we can
   155  	// approximate x dying with:
   156  	//  1) target is x's only use.
   157  	//  2) target is not in a deeper loop than x.
   158  	if x.Uses != 1 {
   159  		return false
   160  	}
   161  	loopnest := x.Block.Func.loopnest()
   162  	loopnest.calculateDepths()
   163  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   164  		return false
   165  	}
   166  
   167  	mem := load.MemoryArg()
   168  
   169  	// We need the load's memory arg to still be alive at target. That
   170  	// can't be the case if one of target's args depends on a memory
   171  	// state that is a successor of load's memory arg.
   172  	//
   173  	// For example, it would be invalid to merge load into target in
   174  	// the following situation because newmem has killed oldmem
   175  	// before target is reached:
   176  	//     load = read ... oldmem
   177  	//   newmem = write ... oldmem
   178  	//     arg0 = read ... newmem
   179  	//   target = add arg0 load
   180  	//
   181  	// If the argument comes from a different block then we can exclude
   182  	// it immediately because it must dominate load (which is in the
   183  	// same block as target).
   184  	var args []*Value
   185  	for _, a := range target.Args {
   186  		if a != load && a.Block.ID == target.Block.ID {
   187  			args = append(args, a)
   188  		}
   189  	}
   190  
   191  	// memPreds contains memory states known to be predecessors of load's
   192  	// memory state. It is lazily initialized.
   193  	var memPreds map[*Value]bool
   194  search:
   195  	for i := 0; len(args) > 0; i++ {
   196  		const limit = 100
   197  		if i >= limit {
   198  			// Give up if we have done a lot of iterations.
   199  			return false
   200  		}
   201  		v := args[len(args)-1]
   202  		args = args[:len(args)-1]
   203  		if target.Block.ID != v.Block.ID {
   204  			// Since target and load are in the same block
   205  			// we can stop searching when we leave the block.
   206  			continue search
   207  		}
   208  		if v.Op == OpPhi {
   209  			// A Phi implies we have reached the top of the block.
   210  			// The memory phi, if it exists, is always
   211  			// the first logical store in the block.
   212  			continue search
   213  		}
   214  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   215  			// We could handle this situation however it is likely
   216  			// to be very rare.
   217  			return false
   218  		}
   219  		if v.Type.IsMemory() {
   220  			if memPreds == nil {
   221  				// Initialise a map containing memory states
   222  				// known to be predecessors of load's memory
   223  				// state.
   224  				memPreds = make(map[*Value]bool)
   225  				m := mem
   226  				const limit = 50
   227  				for i := 0; i < limit; i++ {
   228  					if m.Op == OpPhi {
   229  						// The memory phi, if it exists, is always
   230  						// the first logical store in the block.
   231  						break
   232  					}
   233  					if m.Block.ID != target.Block.ID {
   234  						break
   235  					}
   236  					if !m.Type.IsMemory() {
   237  						break
   238  					}
   239  					memPreds[m] = true
   240  					if len(m.Args) == 0 {
   241  						break
   242  					}
   243  					m = m.MemoryArg()
   244  				}
   245  			}
   246  
   247  			// We can merge if v is a predecessor of mem.
   248  			//
   249  			// For example, we can merge load into target in the
   250  			// following scenario:
   251  			//      x = read ... v
   252  			//    mem = write ... v
   253  			//   load = read ... mem
   254  			// target = add x load
   255  			if memPreds[v] {
   256  				continue search
   257  			}
   258  			return false
   259  		}
   260  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   261  			// If v takes mem as an input then we know mem
   262  			// is valid at this point.
   263  			continue search
   264  		}
   265  		for _, a := range v.Args {
   266  			if target.Block.ID == a.Block.ID {
   267  				args = append(args, a)
   268  			}
   269  		}
   270  	}
   271  
   272  	return true
   273  }
   274  
   275  // isSameSym returns whether sym is the same as the given named symbol
   276  func isSameSym(sym interface{}, name string) bool {
   277  	s, ok := sym.(fmt.Stringer)
   278  	return ok && s.String() == name
   279  }
   280  
   281  // nlz returns the number of leading zeros.
   282  func nlz(x int64) int64 {
   283  	// log2(0) == 1, so nlz(0) == 64
   284  	return 63 - log2(x)
   285  }
   286  
   287  // ntz returns the number of trailing zeros.
   288  func ntz(x int64) int64 {
   289  	return 64 - nlz(^x&(x-1))
   290  }
   291  
   292  func oneBit(x int64) bool {
   293  	return nlz(x)+ntz(x) == 63
   294  }
   295  
   296  // nlo returns the number of leading ones.
   297  func nlo(x int64) int64 {
   298  	return nlz(^x)
   299  }
   300  
   301  // nto returns the number of trailing ones.
   302  func nto(x int64) int64 {
   303  	return ntz(^x)
   304  }
   305  
   306  // log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
   307  // Rounds down.
   308  func log2(n int64) (l int64) {
   309  	l = -1
   310  	x := uint64(n)
   311  	for ; x >= 0x8000; x >>= 16 {
   312  		l += 16
   313  	}
   314  	if x >= 0x80 {
   315  		x >>= 8
   316  		l += 8
   317  	}
   318  	if x >= 0x8 {
   319  		x >>= 4
   320  		l += 4
   321  	}
   322  	if x >= 0x2 {
   323  		x >>= 2
   324  		l += 2
   325  	}
   326  	if x >= 0x1 {
   327  		l++
   328  	}
   329  	return
   330  }
   331  
   332  // isPowerOfTwo reports whether n is a power of 2.
   333  func isPowerOfTwo(n int64) bool {
   334  	return n > 0 && n&(n-1) == 0
   335  }
   336  
   337  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   338  func is32Bit(n int64) bool {
   339  	return n == int64(int32(n))
   340  }
   341  
   342  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   343  func is16Bit(n int64) bool {
   344  	return n == int64(int16(n))
   345  }
   346  
   347  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   348  func isU12Bit(n int64) bool {
   349  	return 0 <= n && n < (1<<12)
   350  }
   351  
   352  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   353  func isU16Bit(n int64) bool {
   354  	return n == int64(uint16(n))
   355  }
   356  
   357  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   358  func isU32Bit(n int64) bool {
   359  	return n == int64(uint32(n))
   360  }
   361  
   362  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   363  func is20Bit(n int64) bool {
   364  	return -(1<<19) <= n && n < (1<<19)
   365  }
   366  
   367  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   368  func b2i(b bool) int64 {
   369  	if b {
   370  		return 1
   371  	}
   372  	return 0
   373  }
   374  
   375  // i2f is used in rules for converting from an AuxInt to a float.
   376  func i2f(i int64) float64 {
   377  	return math.Float64frombits(uint64(i))
   378  }
   379  
   380  // i2f32 is used in rules for converting from an AuxInt to a float32.
   381  func i2f32(i int64) float32 {
   382  	return float32(math.Float64frombits(uint64(i)))
   383  }
   384  
   385  // f2i is used in the rules for storing a float in AuxInt.
   386  func f2i(f float64) int64 {
   387  	return int64(math.Float64bits(f))
   388  }
   389  
   390  // uaddOvf returns true if unsigned a+b would overflow.
   391  func uaddOvf(a, b int64) bool {
   392  	return uint64(a)+uint64(b) < uint64(a)
   393  }
   394  
   395  // de-virtualize an InterCall
   396  // 'sym' is the symbol for the itab
   397  func devirt(v *Value, sym interface{}, offset int64) *obj.LSym {
   398  	f := v.Block.Func
   399  	n, ok := sym.(*obj.LSym)
   400  	if !ok {
   401  		return nil
   402  	}
   403  	lsym := f.fe.DerefItab(n, offset)
   404  	if f.pass.debug > 0 {
   405  		if lsym != nil {
   406  			f.Warnl(v.Pos, "de-virtualizing call")
   407  		} else {
   408  			f.Warnl(v.Pos, "couldn't de-virtualize call")
   409  		}
   410  	}
   411  	return lsym
   412  }
   413  
   414  // isSamePtr reports whether p1 and p2 point to the same address.
   415  func isSamePtr(p1, p2 *Value) bool {
   416  	if p1 == p2 {
   417  		return true
   418  	}
   419  	if p1.Op != p2.Op {
   420  		return false
   421  	}
   422  	switch p1.Op {
   423  	case OpOffPtr:
   424  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   425  	case OpAddr:
   426  		// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
   427  		// Checking for value equality only works after [z]cse has run.
   428  		return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
   429  	case OpAddPtr:
   430  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   431  	}
   432  	return false
   433  }
   434  
   435  // moveSize returns the number of bytes an aligned MOV instruction moves
   436  func moveSize(align int64, c *Config) int64 {
   437  	switch {
   438  	case align%8 == 0 && c.PtrSize == 8:
   439  		return 8
   440  	case align%4 == 0:
   441  		return 4
   442  	case align%2 == 0:
   443  		return 2
   444  	}
   445  	return 1
   446  }
   447  
   448  // mergePoint finds a block among a's blocks which dominates b and is itself
   449  // dominated by all of a's blocks. Returns nil if it can't find one.
   450  // Might return nil even if one does exist.
   451  func mergePoint(b *Block, a ...*Value) *Block {
   452  	// Walk backward from b looking for one of the a's blocks.
   453  
   454  	// Max distance
   455  	d := 100
   456  
   457  	for d > 0 {
   458  		for _, x := range a {
   459  			if b == x.Block {
   460  				goto found
   461  			}
   462  		}
   463  		if len(b.Preds) > 1 {
   464  			// Don't know which way to go back. Abort.
   465  			return nil
   466  		}
   467  		b = b.Preds[0].b
   468  		d--
   469  	}
   470  	return nil // too far away
   471  found:
   472  	// At this point, r is the first value in a that we find by walking backwards.
   473  	// if we return anything, r will be it.
   474  	r := b
   475  
   476  	// Keep going, counting the other a's that we find. They must all dominate r.
   477  	na := 0
   478  	for d > 0 {
   479  		for _, x := range a {
   480  			if b == x.Block {
   481  				na++
   482  			}
   483  		}
   484  		if na == len(a) {
   485  			// Found all of a in a backwards walk. We can return r.
   486  			return r
   487  		}
   488  		if len(b.Preds) > 1 {
   489  			return nil
   490  		}
   491  		b = b.Preds[0].b
   492  		d--
   493  
   494  	}
   495  	return nil // too far away
   496  }
   497  
   498  // clobber invalidates v.  Returns true.
   499  // clobber is used by rewrite rules to:
   500  //   A) make sure v is really dead and never used again.
   501  //   B) decrement use counts of v's args.
   502  func clobber(v *Value) bool {
   503  	v.reset(OpInvalid)
   504  	// Note: leave v.Block intact.  The Block field is used after clobber.
   505  	return true
   506  }
   507  
   508  // clobberIfDead resets v when use count is 1. Returns true.
   509  // clobberIfDead is used by rewrite rules to decrement
   510  // use counts of v's args when v is dead and never used.
   511  func clobberIfDead(v *Value) bool {
   512  	if v.Uses == 1 {
   513  		v.reset(OpInvalid)
   514  	}
   515  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   516  	return true
   517  }
   518  
   519  // noteRule is an easy way to track if a rule is matched when writing
   520  // new ones.  Make the rule of interest also conditional on
   521  //     noteRule("note to self: rule of interest matched")
   522  // and that message will print when the rule matches.
   523  func noteRule(s string) bool {
   524  	fmt.Println(s)
   525  	return true
   526  }
   527  
   528  // warnRule generates a compiler debug output with string s when
   529  // cond is true and the rule is fired.
   530  func warnRule(cond bool, v *Value, s string) bool {
   531  	if cond {
   532  		v.Block.Func.Warnl(v.Pos, s)
   533  	}
   534  	return true
   535  }
   536  
   537  // for a pseudo-op like (LessThan x), extract x
   538  func flagArg(v *Value) *Value {
   539  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
   540  		return nil
   541  	}
   542  	return v.Args[0]
   543  }
   544  
   545  // arm64Negate finds the complement to an ARM64 condition code,
   546  // for example Equal -> NotEqual or LessThan -> GreaterEqual
   547  //
   548  // TODO: add floating-point conditions
   549  func arm64Negate(op Op) Op {
   550  	switch op {
   551  	case OpARM64LessThan:
   552  		return OpARM64GreaterEqual
   553  	case OpARM64LessThanU:
   554  		return OpARM64GreaterEqualU
   555  	case OpARM64GreaterThan:
   556  		return OpARM64LessEqual
   557  	case OpARM64GreaterThanU:
   558  		return OpARM64LessEqualU
   559  	case OpARM64LessEqual:
   560  		return OpARM64GreaterThan
   561  	case OpARM64LessEqualU:
   562  		return OpARM64GreaterThanU
   563  	case OpARM64GreaterEqual:
   564  		return OpARM64LessThan
   565  	case OpARM64GreaterEqualU:
   566  		return OpARM64LessThanU
   567  	case OpARM64Equal:
   568  		return OpARM64NotEqual
   569  	case OpARM64NotEqual:
   570  		return OpARM64Equal
   571  	default:
   572  		panic("unreachable")
   573  	}
   574  }
   575  
   576  // arm64Invert evaluates (InvertFlags op), which
   577  // is the same as altering the condition codes such
   578  // that the same result would be produced if the arguments
   579  // to the flag-generating instruction were reversed, e.g.
   580  // (InvertFlags (CMP x y)) -> (CMP y x)
   581  //
   582  // TODO: add floating-point conditions
   583  func arm64Invert(op Op) Op {
   584  	switch op {
   585  	case OpARM64LessThan:
   586  		return OpARM64GreaterThan
   587  	case OpARM64LessThanU:
   588  		return OpARM64GreaterThanU
   589  	case OpARM64GreaterThan:
   590  		return OpARM64LessThan
   591  	case OpARM64GreaterThanU:
   592  		return OpARM64LessThanU
   593  	case OpARM64LessEqual:
   594  		return OpARM64GreaterEqual
   595  	case OpARM64LessEqualU:
   596  		return OpARM64GreaterEqualU
   597  	case OpARM64GreaterEqual:
   598  		return OpARM64LessEqual
   599  	case OpARM64GreaterEqualU:
   600  		return OpARM64LessEqualU
   601  	case OpARM64Equal, OpARM64NotEqual:
   602  		return op
   603  	default:
   604  		panic("unreachable")
   605  	}
   606  }
   607  
   608  // evaluate an ARM64 op against a flags value
   609  // that is potentially constant; return 1 for true,
   610  // -1 for false, and 0 for not constant.
   611  func ccARM64Eval(cc interface{}, flags *Value) int {
   612  	op := cc.(Op)
   613  	fop := flags.Op
   614  	switch fop {
   615  	case OpARM64InvertFlags:
   616  		return -ccARM64Eval(op, flags.Args[0])
   617  	case OpARM64FlagEQ:
   618  		switch op {
   619  		case OpARM64Equal, OpARM64GreaterEqual, OpARM64LessEqual,
   620  			OpARM64GreaterEqualU, OpARM64LessEqualU:
   621  			return 1
   622  		default:
   623  			return -1
   624  		}
   625  	case OpARM64FlagLT_ULT:
   626  		switch op {
   627  		case OpARM64LessThan, OpARM64LessThanU,
   628  			OpARM64LessEqual, OpARM64LessEqualU:
   629  			return 1
   630  		default:
   631  			return -1
   632  		}
   633  	case OpARM64FlagLT_UGT:
   634  		switch op {
   635  		case OpARM64LessThan, OpARM64GreaterThanU,
   636  			OpARM64LessEqual, OpARM64GreaterEqualU:
   637  			return 1
   638  		default:
   639  			return -1
   640  		}
   641  	case OpARM64FlagGT_ULT:
   642  		switch op {
   643  		case OpARM64GreaterThan, OpARM64LessThanU,
   644  			OpARM64GreaterEqual, OpARM64LessEqualU:
   645  			return 1
   646  		default:
   647  			return -1
   648  		}
   649  	case OpARM64FlagGT_UGT:
   650  		switch op {
   651  		case OpARM64GreaterThan, OpARM64GreaterThanU,
   652  			OpARM64GreaterEqual, OpARM64GreaterEqualU:
   653  			return 1
   654  		default:
   655  			return -1
   656  		}
   657  	default:
   658  		return 0
   659  	}
   660  }
   661  
   662  // logRule logs the use of the rule s. This will only be enabled if
   663  // rewrite rules were generated with the -log option, see gen/rulegen.go.
   664  func logRule(s string) {
   665  	if ruleFile == nil {
   666  		// Open a log file to write log to. We open in append
   667  		// mode because all.bash runs the compiler lots of times,
   668  		// and we want the concatenation of all of those logs.
   669  		// This means, of course, that users need to rm the old log
   670  		// to get fresh data.
   671  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
   672  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
   673  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
   674  		if err != nil {
   675  			panic(err)
   676  		}
   677  		ruleFile = w
   678  	}
   679  	_, err := fmt.Fprintf(ruleFile, "rewrite %s\n", s)
   680  	if err != nil {
   681  		panic(err)
   682  	}
   683  }
   684  
   685  var ruleFile io.Writer
   686  
   687  func min(x, y int64) int64 {
   688  	if x < y {
   689  		return x
   690  	}
   691  	return y
   692  }
   693  
   694  func isConstZero(v *Value) bool {
   695  	switch v.Op {
   696  	case OpConstNil:
   697  		return true
   698  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
   699  		return v.AuxInt == 0
   700  	}
   701  	return false
   702  }
   703  
   704  // reciprocalExact64 reports whether 1/c is exactly representable.
   705  func reciprocalExact64(c float64) bool {
   706  	b := math.Float64bits(c)
   707  	man := b & (1<<52 - 1)
   708  	if man != 0 {
   709  		return false // not a power of 2, denormal, or NaN
   710  	}
   711  	exp := b >> 52 & (1<<11 - 1)
   712  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
   713  	// changes the exponent to 0x7fe-exp.
   714  	switch exp {
   715  	case 0:
   716  		return false // ±0
   717  	case 0x7ff:
   718  		return false // ±inf
   719  	case 0x7fe:
   720  		return false // exponent is not representable
   721  	default:
   722  		return true
   723  	}
   724  }
   725  
   726  // reciprocalExact32 reports whether 1/c is exactly representable.
   727  func reciprocalExact32(c float32) bool {
   728  	b := math.Float32bits(c)
   729  	man := b & (1<<23 - 1)
   730  	if man != 0 {
   731  		return false // not a power of 2, denormal, or NaN
   732  	}
   733  	exp := b >> 23 & (1<<8 - 1)
   734  	// exponent bias is 0x7f.  So taking the reciprocal of a number
   735  	// changes the exponent to 0xfe-exp.
   736  	switch exp {
   737  	case 0:
   738  		return false // ±0
   739  	case 0xff:
   740  		return false // ±inf
   741  	case 0xfe:
   742  		return false // exponent is not representable
   743  	default:
   744  		return true
   745  	}
   746  }
   747  
   748  // check if an immediate can be directly encoded into an ARM's instruction
   749  func isARMImmRot(v uint32) bool {
   750  	for i := 0; i < 16; i++ {
   751  		if v&^0xff == 0 {
   752  			return true
   753  		}
   754  		v = v<<2 | v>>30
   755  	}
   756  
   757  	return false
   758  }
   759  
   760  // overlap reports whether the ranges given by the given offset and
   761  // size pairs overlap.
   762  func overlap(offset1, size1, offset2, size2 int64) bool {
   763  	if offset1 >= offset2 && offset2+size2 > offset1 {
   764  		return true
   765  	}
   766  	if offset2 >= offset1 && offset1+size1 > offset2 {
   767  		return true
   768  	}
   769  	return false
   770  }
   771  
   772  // check if value zeroes out upper 32-bit of 64-bit register.
   773  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
   774  // because it catches same amount of cases as 4.
   775  func zeroUpper32Bits(x *Value, depth int) bool {
   776  	switch x.Op {
   777  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
   778  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
   779  		OpAMD64MOVLloadidx4, OpAMD64ADDLmem, OpAMD64SUBLmem, OpAMD64ANDLmem,
   780  		OpAMD64ORLmem, OpAMD64XORLmem, OpAMD64CVTTSD2SL,
   781  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
   782  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
   783  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL:
   784  		return true
   785  	case OpArg:
   786  		return x.Type.Width == 4
   787  	case OpSelect0, OpSelect1:
   788  		// Disabled for now. See issue 23305.
   789  		// TODO: we could look into the arg of the Select to decide.
   790  		return false
   791  	case OpPhi:
   792  		// Phis can use each-other as an arguments, instead of tracking visited values,
   793  		// just limit recursion depth.
   794  		if depth <= 0 {
   795  			return false
   796  		}
   797  		for i := range x.Args {
   798  			if !zeroUpper32Bits(x.Args[i], depth-1) {
   799  				return false
   800  			}
   801  		}
   802  		return true
   803  
   804  	}
   805  	return false
   806  }
   807  
   808  // inlineablememmovesize reports whether the given arch performs OpMove of the given size
   809  // faster than memmove and in a safe way when src and dst overlap.
   810  // This is used as a check for replacing memmove with OpMove.
   811  func isInlinableMemmoveSize(sz int64, c *Config) bool {
   812  	switch c.arch {
   813  	case "amd64", "amd64p32":
   814  		return sz <= 16
   815  	case "386", "ppc64", "s390x", "ppc64le":
   816  		return sz <= 8
   817  	case "arm", "mips", "mips64", "mipsle", "mips64le":
   818  		return sz <= 4
   819  	}
   820  	return false
   821  }