github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/math/big/arith.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file provides Go implementations of elementary multi-precision 6 // arithmetic operations on word vectors. These have the suffix _g. 7 // These are needed for platforms without assembly implementations of these routines. 8 // This file also contains elementary operations that can be implemented 9 // sufficiently efficiently in Go. 10 11 package big 12 13 import "math/bits" 14 15 // A Word represents a single digit of a multi-precision unsigned integer. 16 type Word uint 17 18 const ( 19 _S = _W / 8 // word size in bytes 20 21 _W = bits.UintSize // word size in bits 22 _B = 1 << _W // digit base 23 _M = _B - 1 // digit mask 24 ) 25 26 // Many of the loops in this file are of the form 27 // for i := 0; i < len(z) && i < len(x) && i < len(y); i++ 28 // i < len(z) is the real condition. 29 // However, checking i < len(x) && i < len(y) as well is faster than 30 // having the compiler do a bounds check in the body of the loop; 31 // remarkably it is even faster than hoisting the bounds check 32 // out of the loop, by doing something like 33 // _, _ = x[len(z)-1], y[len(z)-1] 34 // There are other ways to hoist the bounds check out of the loop, 35 // but the compiler's BCE isn't powerful enough for them (yet?). 36 // See the discussion in CL 164966. 37 38 // ---------------------------------------------------------------------------- 39 // Elementary operations on words 40 // 41 // These operations are used by the vector operations below. 42 43 // z1<<_W + z0 = x*y 44 func mulWW_g(x, y Word) (z1, z0 Word) { 45 hi, lo := bits.Mul(uint(x), uint(y)) 46 return Word(hi), Word(lo) 47 } 48 49 // z1<<_W + z0 = x*y + c 50 func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { 51 hi, lo := bits.Mul(uint(x), uint(y)) 52 var cc uint 53 lo, cc = bits.Add(lo, uint(c), 0) 54 return Word(hi + cc), Word(lo) 55 } 56 57 // nlz returns the number of leading zeros in x. 58 // Wraps bits.LeadingZeros call for convenience. 59 func nlz(x Word) uint { 60 return uint(bits.LeadingZeros(uint(x))) 61 } 62 63 // The resulting carry c is either 0 or 1. 64 func addVV_g(z, x, y []Word) (c Word) { 65 // The comment near the top of this file discusses this for loop condition. 66 for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { 67 zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c)) 68 z[i] = Word(zi) 69 c = Word(cc) 70 } 71 return 72 } 73 74 // The resulting carry c is either 0 or 1. 75 func subVV_g(z, x, y []Word) (c Word) { 76 // The comment near the top of this file discusses this for loop condition. 77 for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { 78 zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c)) 79 z[i] = Word(zi) 80 c = Word(cc) 81 } 82 return 83 } 84 85 // The resulting carry c is either 0 or 1. 86 func addVW_g(z, x []Word, y Word) (c Word) { 87 c = y 88 // The comment near the top of this file discusses this for loop condition. 89 for i := 0; i < len(z) && i < len(x); i++ { 90 zi, cc := bits.Add(uint(x[i]), uint(c), 0) 91 z[i] = Word(zi) 92 c = Word(cc) 93 } 94 return 95 } 96 97 // addVWlarge is addVW, but intended for large z. 98 // The only difference is that we check on every iteration 99 // whether we are done with carries, 100 // and if so, switch to a much faster copy instead. 101 // This is only a good idea for large z, 102 // because the overhead of the check and the function call 103 // outweigh the benefits when z is small. 104 func addVWlarge(z, x []Word, y Word) (c Word) { 105 c = y 106 // The comment near the top of this file discusses this for loop condition. 107 for i := 0; i < len(z) && i < len(x); i++ { 108 if c == 0 { 109 copy(z[i:], x[i:]) 110 return 111 } 112 zi, cc := bits.Add(uint(x[i]), uint(c), 0) 113 z[i] = Word(zi) 114 c = Word(cc) 115 } 116 return 117 } 118 119 func subVW_g(z, x []Word, y Word) (c Word) { 120 c = y 121 // The comment near the top of this file discusses this for loop condition. 122 for i := 0; i < len(z) && i < len(x); i++ { 123 zi, cc := bits.Sub(uint(x[i]), uint(c), 0) 124 z[i] = Word(zi) 125 c = Word(cc) 126 } 127 return 128 } 129 130 // subVWlarge is to subVW as addVWlarge is to addVW. 131 func subVWlarge(z, x []Word, y Word) (c Word) { 132 c = y 133 // The comment near the top of this file discusses this for loop condition. 134 for i := 0; i < len(z) && i < len(x); i++ { 135 if c == 0 { 136 copy(z[i:], x[i:]) 137 return 138 } 139 zi, cc := bits.Sub(uint(x[i]), uint(c), 0) 140 z[i] = Word(zi) 141 c = Word(cc) 142 } 143 return 144 } 145 146 func shlVU_g(z, x []Word, s uint) (c Word) { 147 if s == 0 { 148 copy(z, x) 149 return 150 } 151 if len(z) == 0 { 152 return 153 } 154 s &= _W - 1 // hint to the compiler that shifts by s don't need guard code 155 ŝ := _W - s 156 ŝ &= _W - 1 // ditto 157 c = x[len(z)-1] >> ŝ 158 for i := len(z) - 1; i > 0; i-- { 159 z[i] = x[i]<<s | x[i-1]>>ŝ 160 } 161 z[0] = x[0] << s 162 return 163 } 164 165 func shrVU_g(z, x []Word, s uint) (c Word) { 166 if s == 0 { 167 copy(z, x) 168 return 169 } 170 if len(z) == 0 { 171 return 172 } 173 s &= _W - 1 // hint to the compiler that shifts by s don't need guard code 174 ŝ := _W - s 175 ŝ &= _W - 1 // ditto 176 c = x[0] << ŝ 177 for i := 0; i < len(z)-1; i++ { 178 z[i] = x[i]>>s | x[i+1]<<ŝ 179 } 180 z[len(z)-1] = x[len(z)-1] >> s 181 return 182 } 183 184 func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { 185 c = r 186 // The comment near the top of this file discusses this for loop condition. 187 for i := 0; i < len(z) && i < len(x); i++ { 188 c, z[i] = mulAddWWW_g(x[i], y, c) 189 } 190 return 191 } 192 193 func addMulVVW_g(z, x []Word, y Word) (c Word) { 194 // The comment near the top of this file discusses this for loop condition. 195 for i := 0; i < len(z) && i < len(x); i++ { 196 z1, z0 := mulAddWWW_g(x[i], y, z[i]) 197 lo, cc := bits.Add(uint(z0), uint(c), 0) 198 c, z[i] = Word(cc), Word(lo) 199 c += z1 200 } 201 return 202 } 203 204 // q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y. 205 // An approximate reciprocal with a reference to "Improved Division by Invariant Integers 206 // (IEEE Transactions on Computers, 11 Jun. 2010)" 207 func divWW(x1, x0, y, m Word) (q, r Word) { 208 s := nlz(y) 209 if s != 0 { 210 x1 = x1<<s | x0>>(_W-s) 211 x0 <<= s 212 y <<= s 213 } 214 d := uint(y) 215 // We know that 216 // m = ⎣(B^2-1)/d⎦-B 217 // ⎣(B^2-1)/d⎦ = m+B 218 // (B^2-1)/d = m+B+delta1 0 <= delta1 <= (d-1)/d 219 // B^2/d = m+B+delta2 0 <= delta2 <= 1 220 // The quotient we're trying to compute is 221 // quotient = ⎣(x1*B+x0)/d⎦ 222 // = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦ 223 // = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦ 224 // = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦ 225 // The latter two terms of this three-term sum are between 0 and 1. 226 // So we can compute just the first term, and we will be low by at most 2. 227 t1, t0 := bits.Mul(uint(m), uint(x1)) 228 _, c := bits.Add(t0, uint(x0), 0) 229 t1, _ = bits.Add(t1, uint(x1), c) 230 // The quotient is either t1, t1+1, or t1+2. 231 // We'll try t1 and adjust if needed. 232 qq := t1 233 // compute remainder r=x-d*q. 234 dq1, dq0 := bits.Mul(d, qq) 235 r0, b := bits.Sub(uint(x0), dq0, 0) 236 r1, _ := bits.Sub(uint(x1), dq1, b) 237 // The remainder we just computed is bounded above by B+d: 238 // r = x1*B + x0 - d*q. 239 // = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦ 240 // = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha) 0 <= alpha < 1 241 // = x1*B + x0 - x1*d/B*m - x1*d - x0*d/B + d*alpha 242 // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha 243 // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha 244 // = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta) - x1*d - x0*d/B + d*alpha 0 <= beta < 1 245 // = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha 246 // = x0 + x1/B + x1*d/B*beta - x0*d/B + d*alpha 247 // = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha 248 // < B*(1-d/B) + d*B/B + d because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1 249 // = B - d + d + d 250 // = B+d 251 // So r1 can only be 0 or 1. If r1 is 1, then we know q was too small. 252 // Add 1 to q and subtract d from r. That guarantees that r is <B, so 253 // we no longer need to keep track of r1. 254 if r1 != 0 { 255 qq++ 256 r0 -= d 257 } 258 // If the remainder is still too large, increment q one more time. 259 if r0 >= d { 260 qq++ 261 r0 -= d 262 } 263 return Word(qq), Word(r0 >> s) 264 } 265 266 func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) { 267 r = xn 268 if len(x) == 1 { 269 qq, rr := bits.Div(uint(r), uint(x[0]), uint(y)) 270 z[0] = Word(qq) 271 return Word(rr) 272 } 273 rec := reciprocalWord(y) 274 for i := len(z) - 1; i >= 0; i-- { 275 z[i], r = divWW(r, x[i], y, rec) 276 } 277 return r 278 } 279 280 // reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1). 281 func reciprocalWord(d1 Word) Word { 282 u := uint(d1 << nlz(d1)) 283 x1 := ^u 284 x0 := uint(_M) 285 rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U 286 return Word(rec) 287 }