github.com/epfl-dcsl/gotee@v0.0.0-20200909122901-014b35f5e5e9/src/cmd/compile/internal/ssa/magic.go

github.com/epfl-dcsl/gotee@v0.0.0-20200909122901-014b35f5e5e9/src/cmd/compile/internal/ssa/magic.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import "math/big"
     8  
     9  // So you want to compute x / c for some constant c?
    10  // Machine division instructions are slow, so we try to
    11  // compute this division with a multiplication + a few
    12  // other cheap instructions instead.
    13  // (We assume here that c != 0, +/- 1, or +/- 2^i.  Those
    14  // cases are easy to handle in different ways).
    15  
    16  // Technique from https://gmplib.org/~tege/divcnst-pldi94.pdf
    17  
    18  // First consider unsigned division.
    19  // Our strategy is to precompute 1/c then do
    20  //   ⎣x / c⎦ = ⎣x * (1/c)⎦.
    21  // 1/c is less than 1, so we can't compute it directly in
    22  // integer arithmetic.  Let's instead compute 2^e/c
    23  // for a value of e TBD (^ = exponentiation).  Then
    24  //   ⎣x / c⎦ = ⎣x * (2^e/c) / 2^e⎦.
    25  // Dividing by 2^e is easy.  2^e/c isn't an integer, unfortunately.
    26  // So we must approximate it.  Let's call its approximation m.
    27  // We'll then compute
    28  //   ⎣x * m / 2^e⎦
    29  // Which we want to be equal to ⎣x / c⎦ for 0 <= x < 2^n-1
    30  // where n is the word size.
    31  // Setting x = c gives us c * m >= 2^e.
    32  // We'll chose m = ⎡2^e/c⎤ to satisfy that equation.
    33  // What remains is to choose e.
    34  // Let m = 2^e/c + delta, 0 <= delta < 1
    35  //   ⎣x * (2^e/c + delta) / 2^e⎦
    36  //   ⎣x / c + x * delta / 2^e⎦
    37  // We must have x * delta / 2^e < 1/c so that this
    38  // additional term never rounds differently than ⎣x / c⎦ does.
    39  // Rearranging,
    40  //   2^e > x * delta * c
    41  // x can be at most 2^n-1 and delta can be at most 1.
    42  // So it is sufficient to have 2^e >= 2^n*c.
    43  // So we'll choose e = n + s, with s = ⎡log2(c)⎤.
    44  //
    45  // An additional complication arises because m has n+1 bits in it.
    46  // Hardware restricts us to n bit by n bit multiplies.
    47  // We divide into 3 cases:
    48  //
    49  // Case 1: m is even.
    50  //   ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦
    51  //   ⎣x / c⎦ = ⎣x * (m/2) / 2^(n+s-1)⎦
    52  //   ⎣x / c⎦ = ⎣x * (m/2) / 2^n / 2^(s-1)⎦
    53  //   ⎣x / c⎦ = ⎣⎣x * (m/2) / 2^n⎦ / 2^(s-1)⎦
    54  //   multiply + shift
    55  //
    56  // Case 2: c is even.
    57  //   ⎣x / c⎦ = ⎣(x/2) / (c/2)⎦
    58  //   ⎣x / c⎦ = ⎣⎣x/2⎦ / (c/2)⎦
    59  //     This is just the original problem, with x' = ⎣x/2⎦, c' = c/2, n' = n-1.
    60  //       s' = s-1
    61  //       m' = ⎡2^(n'+s')/c'⎤
    62  //          = ⎡2^(n+s-1)/c⎤
    63  //          = ⎡m/2⎤
    64  //   ⎣x / c⎦ = ⎣x' * m' / 2^(n'+s')⎦
    65  //   ⎣x / c⎦ = ⎣⎣x/2⎦ * ⎡m/2⎤ / 2^(n+s-2)⎦
    66  //   ⎣x / c⎦ = ⎣⎣⎣x/2⎦ * ⎡m/2⎤ / 2^n⎦ / 2^(s-2)⎦
    67  //   shift + multiply + shift
    68  //
    69  // Case 3: everything else
    70  //   let k = m - 2^n. k fits in n bits.
    71  //   ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦
    72  //   ⎣x / c⎦ = ⎣x * (2^n + k) / 2^(n+s)⎦
    73  //   ⎣x / c⎦ = ⎣(x + x * k / 2^n) / 2^s⎦
    74  //   ⎣x / c⎦ = ⎣(x + ⎣x * k / 2^n⎦) / 2^s⎦
    75  //   ⎣x / c⎦ = ⎣(x + ⎣x * k / 2^n⎦) / 2^s⎦
    76  //   ⎣x / c⎦ = ⎣⎣(x + ⎣x * k / 2^n⎦) / 2⎦ / 2^(s-1)⎦
    77  //   multiply + avg + shift
    78  //
    79  // These can be implemented in hardware using:
    80  //  ⎣a * b / 2^n⎦ - aka high n bits of an n-bit by n-bit multiply.
    81  //  ⎣(a+b) / 2⎦   - aka "average" of two n-bit numbers.
    82  //                  (Not just a regular add & shift because the intermediate result
    83  //                   a+b has n+1 bits in it.  Nevertheless, can be done
    84  //                   in 2 instructions on x86.)
    85  
    86  // umagicOK returns whether we should strength reduce a n-bit divide by c.
    87  func umagicOK(n uint, c int64) bool {
    88  	// Convert from ConstX auxint values to the real uint64 constant they represent.
    89  	d := uint64(c) << (64 - n) >> (64 - n)
    90  
    91  	// Doesn't work for 0.
    92  	// Don't use for powers of 2.
    93  	return d&(d-1) != 0
    94  }
    95  
    96  type umagicData struct {
    97  	s int64  // ⎡log2(c)⎤
    98  	m uint64 // ⎡2^(n+s)/c⎤ - 2^n
    99  }
   100  
   101  // umagic computes the constants needed to strength reduce unsigned n-bit divides by the constant uint64(c).
   102  // The return values satisfy for all 0 <= x < 2^n
   103  //  floor(x / uint64(c)) = x * (m + 2^n) >> (n+s)
   104  func umagic(n uint, c int64) umagicData {
   105  	// Convert from ConstX auxint values to the real uint64 constant they represent.
   106  	d := uint64(c) << (64 - n) >> (64 - n)
   107  
   108  	C := new(big.Int).SetUint64(d)
   109  	s := C.BitLen()
   110  	M := big.NewInt(1)
   111  	M.Lsh(M, n+uint(s))     // 2^(n+s)
   112  	M.Add(M, C)             // 2^(n+s)+c
   113  	M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1
   114  	M.Div(M, C)             // ⎡2^(n+s)/c⎤
   115  	if M.Bit(int(n)) != 1 {
   116  		panic("n+1st bit isn't set")
   117  	}
   118  	M.SetBit(M, int(n), 0)
   119  	m := M.Uint64()
   120  	return umagicData{s: int64(s), m: m}
   121  }
   122  
   123  // For signed division, we use a similar strategy.
   124  // First, we enforce a positive c.
   125  //   x / c = -(x / (-c))
   126  // This will require an additional Neg op for c<0.
   127  //
   128  // If x is positive we're in a very similar state
   129  // to the unsigned case above.  We define:
   130  //   s = ⎡log2(c)⎤-1
   131  //   m = ⎡2^(n+s)/c⎤
   132  // Then
   133  //   ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦
   134  // If x is negative we have
   135  //   ⎡x / c⎤ = ⎣x * m / 2^(n+s)⎦ + 1
   136  // (TODO: derivation?)
   137  //
   138  // The multiply is a bit odd, as it is a signed n-bit value
   139  // times an unsigned n-bit value.  For n smaller than the
   140  // word size, we can extend x and m appropriately and use the
   141  // signed multiply instruction.  For n == word size,
   142  // we must use the signed multiply high and correct
   143  // the result by adding x*2^n.
   144  //
   145  // Adding 1 if x<0 is done by subtracting x>>(n-1).
   146  
   147  func smagicOK(n uint, c int64) bool {
   148  	if c < 0 {
   149  		// Doesn't work for negative c.
   150  		return false
   151  	}
   152  	// Doesn't work for 0.
   153  	// Don't use it for powers of 2.
   154  	return c&(c-1) != 0
   155  }
   156  
   157  type smagicData struct {
   158  	s int64  // ⎡log2(c)⎤-1
   159  	m uint64 // ⎡2^(n+s)/c⎤
   160  }
   161  
   162  // magic computes the constants needed to strength reduce signed n-bit divides by the constant c.
   163  // Must have c>0.
   164  // The return values satisfy for all -2^(n-1) <= x < 2^(n-1)
   165  //  trunc(x / c) = x * m >> (n+s) + (x < 0 ? 1 : 0)
   166  func smagic(n uint, c int64) smagicData {
   167  	C := new(big.Int).SetInt64(c)
   168  	s := C.BitLen() - 1
   169  	M := big.NewInt(1)
   170  	M.Lsh(M, n+uint(s))     // 2^(n+s)
   171  	M.Add(M, C)             // 2^(n+s)+c
   172  	M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1
   173  	M.Div(M, C)             // ⎡2^(n+s)/c⎤
   174  	if M.Bit(int(n)) != 0 {
   175  		panic("n+1st bit is set")
   176  	}
   177  	if M.Bit(int(n-1)) == 0 {
   178  		panic("nth bit is not set")
   179  	}
   180  	m := M.Uint64()
   181  	return smagicData{s: int64(s), m: m}
   182  }