github.com/ericlagergren/ctb@v0.0.0-20220810041818-96749d9c394d/xbits/arith.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xbits 6 7 // Many of the loops in this file are of the form 8 // for i := 0; i < len(z) && i < len(x) && i < len(y); i++ 9 // i < len(z) is the real condition. 10 // However, checking i < len(x) && i < len(y) as well is faster than 11 // having the compiler do a bounds check in the body of the loop; 12 // remarkably it is even faster than hoisting the bounds check 13 // out of the loop, by doing something like 14 // _, _ = x[len(z)-1], y[len(z)-1] 15 // There are other ways to hoist the bounds check out of the loop, 16 // but the compiler's BCE isn't powerful enough for them (yet?). 17 // See the discussion in CL 164966. 18 19 import "math/bits" 20 21 func mulAddVWW(z, x []uint64, y, r uint64) (c uint64) { 22 c = r 23 // The comment near the top of this file discusses this for loop condition. 24 for i := 0; i < len(z) && i < len(x); i++ { 25 c, z[i] = mul128(x[i], y, c) 26 } 27 return 28 } 29 30 // The resulting carry c is either 0 or 1. 31 func addVV(z, x, y []uint64) (c uint64) { 32 // The comment near the top of this file discusses this for loop condition. 33 for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { 34 zi, cc := bits.Add64(x[i], y[i], c) 35 z[i] = zi 36 c = cc 37 } 38 return 39 } 40 41 // The resulting carry c is either 0 or 1. 42 func subVV(z, x, y []uint64) (c uint64) { 43 // The comment near the top of this file discusses this for loop condition. 44 for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { 45 zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c)) 46 z[i] = uint64(zi) 47 c = uint64(cc) 48 } 49 return 50 } 51 52 // q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y. 53 // An approximate reciprocal with a reference to "Improved Division by Invariant Integers 54 // (IEEE Transactions on Computers, 11 Jun. 2010)" 55 func divWW(x1, x0, y, m uint64) (q, r uint64) { 56 s := bits.LeadingZeros64(y) 57 if s != 0 { 58 x1 = x1<<s | x0>>(64-s) 59 x0 <<= s 60 y <<= s 61 } 62 d := uint(y) 63 // We know that 64 // m = ⎣(B^2-1)/d⎦-B 65 // ⎣(B^2-1)/d⎦ = m+B 66 // (B^2-1)/d = m+B+delta1 0 <= delta1 <= (d-1)/d 67 // B^2/d = m+B+delta2 0 <= delta2 <= 1 68 // The quotient we're trying to compute is 69 // quotient = ⎣(x1*B+x0)/d⎦ 70 // = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦ 71 // = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦ 72 // = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦ 73 // The latter two terms of this three-term sum are between 0 and 1. 74 // So we can compute just the first term, and we will be low by at most 2. 75 t1, t0 := bits.Mul(uint(m), uint(x1)) 76 _, c := bits.Add(t0, uint(x0), 0) 77 t1, _ = bits.Add(t1, uint(x1), c) 78 // The quotient is either t1, t1+1, or t1+2. 79 // We'll try t1 and adjust if needed. 80 qq := t1 81 // compute remainder r=x-d*q. 82 dq1, dq0 := bits.Mul(d, qq) 83 r0, b := bits.Sub(uint(x0), dq0, 0) 84 r1, _ := bits.Sub(uint(x1), dq1, b) 85 // The remainder we just computed is bounded above by B+d: 86 // r = x1*B + x0 - d*q. 87 // = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦ 88 // = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha) 0 <= alpha < 1 89 // = x1*B + x0 - x1*d/B*m - x1*d - x0*d/B + d*alpha 90 // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha 91 // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha 92 // = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta) - x1*d - x0*d/B + d*alpha 0 <= beta < 1 93 // = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha 94 // = x0 + x1/B + x1*d/B*beta - x0*d/B + d*alpha 95 // = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha 96 // < B*(1-d/B) + d*B/B + d because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1 97 // = B - d + d + d 98 // = B+d 99 // So r1 can only be 0 or 1. If r1 is 1, then we know q was too small. 100 // Add 1 to q and subtract d from r. That guarantees that r is <B, so 101 // we no longer need to keep track of r1. 102 if r1 != 0 { 103 qq++ 104 r0 -= d 105 } 106 // If the remainder is still too large, increment q one more time. 107 if r0 >= d { 108 qq++ 109 r0 -= d 110 } 111 return uint64(qq), uint64(r0 >> s) 112 } 113 114 // greaterThan reports whether (x1<<_W + x2) > (y1<<_W + y2) 115 func greaterThan(x1, x2, y1, y2 uint64) bool { 116 return x1 > y1 || x1 == y1 && x2 > y2 117 } 118 119 func reciprocal(d1 uint64) uint64 { 120 const mask = 1<<64 - 1 121 u := d1 << bits.LeadingZeros64(d1) 122 // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U 123 rec, _ := bits.Div64(^u, mask, u) 124 return rec 125 }