github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/ssa/magic.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ssa 6 7 import ( 8 "math/big" 9 "math/bits" 10 ) 11 12 // So you want to compute x / c for some constant c? 13 // Machine division instructions are slow, so we try to 14 // compute this division with a multiplication + a few 15 // other cheap instructions instead. 16 // (We assume here that c != 0, +/- 1, or +/- 2^i. Those 17 // cases are easy to handle in different ways). 18 19 // Technique from https://gmplib.org/~tege/divcnst-pldi94.pdf 20 21 // First consider unsigned division. 22 // Our strategy is to precompute 1/c then do 23 // ⎣x / c⎦ = ⎣x * (1/c)⎦. 24 // 1/c is less than 1, so we can't compute it directly in 25 // integer arithmetic. Let's instead compute 2^e/c 26 // for a value of e TBD (^ = exponentiation). Then 27 // ⎣x / c⎦ = ⎣x * (2^e/c) / 2^e⎦. 28 // Dividing by 2^e is easy. 2^e/c isn't an integer, unfortunately. 29 // So we must approximate it. Let's call its approximation m. 30 // We'll then compute 31 // ⎣x * m / 2^e⎦ 32 // Which we want to be equal to ⎣x / c⎦ for 0 <= x < 2^n-1 33 // where n is the word size. 34 // Setting x = c gives us c * m >= 2^e. 35 // We'll chose m = ⎡2^e/c⎤ to satisfy that equation. 36 // What remains is to choose e. 37 // Let m = 2^e/c + delta, 0 <= delta < 1 38 // ⎣x * (2^e/c + delta) / 2^e⎦ 39 // ⎣x / c + x * delta / 2^e⎦ 40 // We must have x * delta / 2^e < 1/c so that this 41 // additional term never rounds differently than ⎣x / c⎦ does. 42 // Rearranging, 43 // 2^e > x * delta * c 44 // x can be at most 2^n-1 and delta can be at most 1. 45 // So it is sufficient to have 2^e >= 2^n*c. 46 // So we'll choose e = n + s, with s = ⎡log2(c)⎤. 47 // 48 // An additional complication arises because m has n+1 bits in it. 49 // Hardware restricts us to n bit by n bit multiplies. 50 // We divide into 3 cases: 51 // 52 // Case 1: m is even. 53 // ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦ 54 // ⎣x / c⎦ = ⎣x * (m/2) / 2^(n+s-1)⎦ 55 // ⎣x / c⎦ = ⎣x * (m/2) / 2^n / 2^(s-1)⎦ 56 // ⎣x / c⎦ = ⎣⎣x * (m/2) / 2^n⎦ / 2^(s-1)⎦ 57 // multiply + shift 58 // 59 // Case 2: c is even. 60 // ⎣x / c⎦ = ⎣(x/2) / (c/2)⎦ 61 // ⎣x / c⎦ = ⎣⎣x/2⎦ / (c/2)⎦ 62 // This is just the original problem, with x' = ⎣x/2⎦, c' = c/2, n' = n-1. 63 // s' = s-1 64 // m' = ⎡2^(n'+s')/c'⎤ 65 // = ⎡2^(n+s-1)/c⎤ 66 // = ⎡m/2⎤ 67 // ⎣x / c⎦ = ⎣x' * m' / 2^(n'+s')⎦ 68 // ⎣x / c⎦ = ⎣⎣x/2⎦ * ⎡m/2⎤ / 2^(n+s-2)⎦ 69 // ⎣x / c⎦ = ⎣⎣⎣x/2⎦ * ⎡m/2⎤ / 2^n⎦ / 2^(s-2)⎦ 70 // shift + multiply + shift 71 // 72 // Case 3: everything else 73 // let k = m - 2^n. k fits in n bits. 74 // ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦ 75 // ⎣x / c⎦ = ⎣x * (2^n + k) / 2^(n+s)⎦ 76 // ⎣x / c⎦ = ⎣(x + x * k / 2^n) / 2^s⎦ 77 // ⎣x / c⎦ = ⎣(x + ⎣x * k / 2^n⎦) / 2^s⎦ 78 // ⎣x / c⎦ = ⎣(x + ⎣x * k / 2^n⎦) / 2^s⎦ 79 // ⎣x / c⎦ = ⎣⎣(x + ⎣x * k / 2^n⎦) / 2⎦ / 2^(s-1)⎦ 80 // multiply + avg + shift 81 // 82 // These can be implemented in hardware using: 83 // ⎣a * b / 2^n⎦ - aka high n bits of an n-bit by n-bit multiply. 84 // ⎣(a+b) / 2⎦ - aka "average" of two n-bit numbers. 85 // (Not just a regular add & shift because the intermediate result 86 // a+b has n+1 bits in it. Nevertheless, can be done 87 // in 2 instructions on x86.) 88 89 // umagicOK reports whether we should strength reduce a n-bit divide by c. 90 func umagicOK(n uint, c int64) bool { 91 // Convert from ConstX auxint values to the real uint64 constant they represent. 92 d := uint64(c) << (64 - n) >> (64 - n) 93 94 // Doesn't work for 0. 95 // Don't use for powers of 2. 96 return d&(d-1) != 0 97 } 98 99 type umagicData struct { 100 s int64 // ⎡log2(c)⎤ 101 m uint64 // ⎡2^(n+s)/c⎤ - 2^n 102 } 103 104 // umagic computes the constants needed to strength reduce unsigned n-bit divides by the constant uint64(c). 105 // The return values satisfy for all 0 <= x < 2^n 106 // floor(x / uint64(c)) = x * (m + 2^n) >> (n+s) 107 func umagic(n uint, c int64) umagicData { 108 // Convert from ConstX auxint values to the real uint64 constant they represent. 109 d := uint64(c) << (64 - n) >> (64 - n) 110 111 C := new(big.Int).SetUint64(d) 112 s := C.BitLen() 113 M := big.NewInt(1) 114 M.Lsh(M, n+uint(s)) // 2^(n+s) 115 M.Add(M, C) // 2^(n+s)+c 116 M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1 117 M.Div(M, C) // ⎡2^(n+s)/c⎤ 118 if M.Bit(int(n)) != 1 { 119 panic("n+1st bit isn't set") 120 } 121 M.SetBit(M, int(n), 0) 122 m := M.Uint64() 123 return umagicData{s: int64(s), m: m} 124 } 125 126 // For signed division, we use a similar strategy. 127 // First, we enforce a positive c. 128 // x / c = -(x / (-c)) 129 // This will require an additional Neg op for c<0. 130 // 131 // If x is positive we're in a very similar state 132 // to the unsigned case above. We define: 133 // s = ⎡log2(c)⎤-1 134 // m = ⎡2^(n+s)/c⎤ 135 // Then 136 // ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦ 137 // If x is negative we have 138 // ⎡x / c⎤ = ⎣x * m / 2^(n+s)⎦ + 1 139 // (TODO: derivation?) 140 // 141 // The multiply is a bit odd, as it is a signed n-bit value 142 // times an unsigned n-bit value. For n smaller than the 143 // word size, we can extend x and m appropriately and use the 144 // signed multiply instruction. For n == word size, 145 // we must use the signed multiply high and correct 146 // the result by adding x*2^n. 147 // 148 // Adding 1 if x<0 is done by subtracting x>>(n-1). 149 150 func smagicOK(n uint, c int64) bool { 151 if c < 0 { 152 // Doesn't work for negative c. 153 return false 154 } 155 // Doesn't work for 0. 156 // Don't use it for powers of 2. 157 return c&(c-1) != 0 158 } 159 160 type smagicData struct { 161 s int64 // ⎡log2(c)⎤-1 162 m uint64 // ⎡2^(n+s)/c⎤ 163 } 164 165 // magic computes the constants needed to strength reduce signed n-bit divides by the constant c. 166 // Must have c>0. 167 // The return values satisfy for all -2^(n-1) <= x < 2^(n-1) 168 // trunc(x / c) = x * m >> (n+s) + (x < 0 ? 1 : 0) 169 func smagic(n uint, c int64) smagicData { 170 C := new(big.Int).SetInt64(c) 171 s := C.BitLen() - 1 172 M := big.NewInt(1) 173 M.Lsh(M, n+uint(s)) // 2^(n+s) 174 M.Add(M, C) // 2^(n+s)+c 175 M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1 176 M.Div(M, C) // ⎡2^(n+s)/c⎤ 177 if M.Bit(int(n)) != 0 { 178 panic("n+1st bit is set") 179 } 180 if M.Bit(int(n-1)) == 0 { 181 panic("nth bit is not set") 182 } 183 m := M.Uint64() 184 return smagicData{s: int64(s), m: m} 185 } 186 187 // Divisibility x%c == 0 can be checked more efficiently than directly computing 188 // the modulus x%c and comparing against 0. 189 // 190 // The same "Division by invariant integers using multiplication" paper 191 // by Granlund and Montgomery referenced above briefly mentions this method 192 // and it is further elaborated in "Hacker's Delight" by Warren Section 10-17 193 // 194 // The first thing to note is that for odd integers, exact division can be computed 195 // by using the modular inverse with respect to the word size 2^n. 196 // 197 // Given c, compute m such that (c * m) mod 2^n == 1 198 // Then if c divides x (x%c ==0), the quotient is given by q = x/c == x*m mod 2^n 199 // 200 // x can range from 0, c, 2c, 3c, ... ⎣(2^n - 1)/c⎦ * c the maximum multiple 201 // Thus, x*m mod 2^n is 0, 1, 2, 3, ... ⎣(2^n - 1)/c⎦ 202 // i.e. the quotient takes all values from zero up to max = ⎣(2^n - 1)/c⎦ 203 // 204 // If x is not divisible by c, then x*m mod 2^n must take some larger value than max. 205 // 206 // This gives x*m mod 2^n <= ⎣(2^n - 1)/c⎦ as a test for divisibility 207 // involving one multiplication and compare. 208 // 209 // To extend this to even integers, consider c = d0 * 2^k where d0 is odd. 210 // We can test whether x is divisible by both d0 and 2^k. 211 // For d0, the test is the same as above. Let m be such that m*d0 mod 2^n == 1 212 // Then x*m mod 2^n <= ⎣(2^n - 1)/d0⎦ is the first test. 213 // The test for divisibility by 2^k is a check for k trailing zeroes. 214 // Note that since d0 is odd, m is odd and thus x*m will have the same number of 215 // trailing zeroes as x. So the two tests are, 216 // 217 // x*m mod 2^n <= ⎣(2^n - 1)/d0⎦ 218 // and x*m ends in k zero bits 219 // 220 // These can be combined into a single comparison by the following 221 // (theorem ZRU in Hacker's Delight) for unsigned integers. 222 // 223 // x <= a and x ends in k zero bits if and only if RotRight(x ,k) <= ⎣a/(2^k)⎦ 224 // Where RotRight(x ,k) is right rotation of x by k bits. 225 // 226 // To prove the first direction, x <= a -> ⎣x/(2^k)⎦ <= ⎣a/(2^k)⎦ 227 // But since x ends in k zeroes all the rotated bits would be zero too. 228 // So RotRight(x, k) == ⎣x/(2^k)⎦ <= ⎣a/(2^k)⎦ 229 // 230 // If x does not end in k zero bits, then RotRight(x, k) 231 // has some non-zero bits in the k highest bits. 232 // ⎣x/(2^k)⎦ has all zeroes in the k highest bits, 233 // so RotRight(x, k) > ⎣x/(2^k)⎦ 234 // 235 // Finally, if x > a and has k trailing zero bits, then RotRight(x, k) == ⎣x/(2^k)⎦ 236 // and ⎣x/(2^k)⎦ must be greater than ⎣a/(2^k)⎦, that is the top n-k bits of x must 237 // be greater than the top n-k bits of a because the rest of x bits are zero. 238 // 239 // So the two conditions about can be replaced with the single test 240 // 241 // RotRight(x*m mod 2^n, k) <= ⎣(2^n - 1)/c⎦ 242 // 243 // Where d0*2^k was replaced by c on the right hand side. 244 245 // uivisibleOK reports whether we should strength reduce a n-bit dividisibilty check by c. 246 func udivisibleOK(n uint, c int64) bool { 247 // Convert from ConstX auxint values to the real uint64 constant they represent. 248 d := uint64(c) << (64 - n) >> (64 - n) 249 250 // Doesn't work for 0. 251 // Don't use for powers of 2. 252 return d&(d-1) != 0 253 } 254 255 type udivisibleData struct { 256 k int64 // trailingZeros(c) 257 m uint64 // m * (c>>k) mod 2^n == 1 multiplicative inverse of odd portion modulo 2^n 258 max uint64 // ⎣(2^n - 1)/ c⎦ max value to for divisibility 259 } 260 261 func udivisible(n uint, c int64) udivisibleData { 262 // Convert from ConstX auxint values to the real uint64 constant they represent. 263 d := uint64(c) << (64 - n) >> (64 - n) 264 265 k := bits.TrailingZeros64(d) 266 d0 := d >> uint(k) // the odd portion of the divisor 267 268 mask := ^uint64(0) >> (64 - n) 269 270 // Calculate the multiplicative inverse via Newton's method. 271 // Quadratic convergence doubles the number of correct bits per iteration. 272 m := d0 // initial guess correct to 3-bits d0*d0 mod 8 == 1 273 m = m * (2 - m*d0) // 6-bits 274 m = m * (2 - m*d0) // 12-bits 275 m = m * (2 - m*d0) // 24-bits 276 m = m * (2 - m*d0) // 48-bits 277 m = m * (2 - m*d0) // 96-bits >= 64-bits 278 m = m & mask 279 280 max := mask / d 281 282 return udivisibleData{ 283 k: int64(k), 284 m: m, 285 max: max, 286 } 287 } 288 289 // For signed integers, a similar method follows. 290 // 291 // Given c > 1 and odd, compute m such that (c * m) mod 2^n == 1 292 // Then if c divides x (x%c ==0), the quotient is given by q = x/c == x*m mod 2^n 293 // 294 // x can range from ⎡-2^(n-1)/c⎤ * c, ... -c, 0, c, ... ⎣(2^(n-1) - 1)/c⎦ * c 295 // Thus, x*m mod 2^n is ⎡-2^(n-1)/c⎤, ... -2, -1, 0, 1, 2, ... ⎣(2^(n-1) - 1)/c⎦ 296 // 297 // So, x is a multiple of c if and only if: 298 // ⎡-2^(n-1)/c⎤ <= x*m mod 2^n <= ⎣(2^(n-1) - 1)/c⎦ 299 // 300 // Since c > 1 and odd, this can be simplified by 301 // ⎡-2^(n-1)/c⎤ == ⎡(-2^(n-1) + 1)/c⎤ == -⎣(2^(n-1) - 1)/c⎦ 302 // 303 // -⎣(2^(n-1) - 1)/c⎦ <= x*m mod 2^n <= ⎣(2^(n-1) - 1)/c⎦ 304 // 305 // To extend this to even integers, consider c = d0 * 2^k where d0 is odd. 306 // We can test whether x is divisible by both d0 and 2^k. 307 // 308 // Let m be such that (d0 * m) mod 2^n == 1. 309 // Let q = x*m mod 2^n. Then c divides x if: 310 // 311 // -⎣(2^(n-1) - 1)/d0⎦ <= q <= ⎣(2^(n-1) - 1)/d0⎦ and q ends in at least k 0-bits 312 // 313 // To transform this to a single comparison, we use the following theorem (ZRS in Hacker's Delight). 314 // 315 // For a >= 0 the following conditions are equivalent: 316 // 1) -a <= x <= a and x ends in at least k 0-bits 317 // 2) RotRight(x+a', k) <= ⎣2a'/2^k⎦ 318 // 319 // Where a' = a & -2^k (a with its right k bits set to zero) 320 // 321 // To see that 1 & 2 are equivalent, note that -a <= x <= a is equivalent to 322 // -a' <= x <= a' if and only if x ends in at least k 0-bits. Adding -a' to each side gives, 323 // 0 <= x + a' <= 2a' and x + a' ends in at least k 0-bits if and only if x does since a' has 324 // k 0-bits by definition. We can use theorem ZRU above with x -> x + a' and a -> 2a' giving 1) == 2). 325 // 326 // Let m be such that (d0 * m) mod 2^n == 1. 327 // Let q = x*m mod 2^n. 328 // Let a' = ⎣(2^(n-1) - 1)/d0⎦ & -2^k 329 // 330 // Then the divisibility test is: 331 // 332 // RotRight(q+a', k) <= ⎣2a'/2^k⎦ 333 // 334 // Note that the calculation is performed using unsigned integers. 335 // Since a' can have n-1 bits, 2a' may have n bits and there is no risk of overflow. 336 337 // sdivisibleOK reports whether we should strength reduce a n-bit dividisibilty check by c. 338 func sdivisibleOK(n uint, c int64) bool { 339 if c < 0 { 340 // Doesn't work for negative c. 341 return false 342 } 343 // Doesn't work for 0. 344 // Don't use it for powers of 2. 345 return c&(c-1) != 0 346 } 347 348 type sdivisibleData struct { 349 k int64 // trailingZeros(c) 350 m uint64 // m * (c>>k) mod 2^n == 1 multiplicative inverse of odd portion modulo 2^n 351 a uint64 // ⎣(2^(n-1) - 1)/ (c>>k)⎦ & -(1<<k) additive constant 352 max uint64 // ⎣(2 a) / (1<<k)⎦ max value to for divisibility 353 } 354 355 func sdivisible(n uint, c int64) sdivisibleData { 356 d := uint64(c) 357 k := bits.TrailingZeros64(d) 358 d0 := d >> uint(k) // the odd portion of the divisor 359 360 mask := ^uint64(0) >> (64 - n) 361 362 // Calculate the multiplicative inverse via Newton's method. 363 // Quadratic convergence doubles the number of correct bits per iteration. 364 m := d0 // initial guess correct to 3-bits d0*d0 mod 8 == 1 365 m = m * (2 - m*d0) // 6-bits 366 m = m * (2 - m*d0) // 12-bits 367 m = m * (2 - m*d0) // 24-bits 368 m = m * (2 - m*d0) // 48-bits 369 m = m * (2 - m*d0) // 96-bits >= 64-bits 370 m = m & mask 371 372 a := ((mask >> 1) / d0) & -(1 << uint(k)) 373 max := (2 * a) >> uint(k) 374 375 return sdivisibleData{ 376 k: int64(k), 377 m: m, 378 a: a, 379 max: max, 380 } 381 }