github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/math/big/nat.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements unsigned multi-precision integers (natural 6 // numbers). They are the building blocks for the implementation 7 // of signed integers, rationals, and floating-point numbers. 8 9 package big 10 11 import ( 12 "math/bits" 13 "math/rand" 14 "sync" 15 ) 16 17 // An unsigned integer x of the form 18 // 19 // x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0] 20 // 21 // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n, 22 // with the digits x[i] as the slice elements. 23 // 24 // A number is normalized if the slice contains no leading 0 digits. 25 // During arithmetic operations, denormalized values may occur but are 26 // always normalized before returning the final result. The normalized 27 // representation of 0 is the empty or nil slice (length = 0). 28 // 29 type nat []Word 30 31 var ( 32 natOne = nat{1} 33 natTwo = nat{2} 34 natTen = nat{10} 35 ) 36 37 func (z nat) clear() { 38 for i := range z { 39 z[i] = 0 40 } 41 } 42 43 func (z nat) norm() nat { 44 i := len(z) 45 for i > 0 && z[i-1] == 0 { 46 i-- 47 } 48 return z[0:i] 49 } 50 51 func (z nat) make(n int) nat { 52 if n <= cap(z) { 53 return z[:n] // reuse z 54 } 55 // Choosing a good value for e has significant performance impact 56 // because it increases the chance that a value can be reused. 57 const e = 4 // extra capacity 58 return make(nat, n, n+e) 59 } 60 61 func (z nat) setWord(x Word) nat { 62 if x == 0 { 63 return z[:0] 64 } 65 z = z.make(1) 66 z[0] = x 67 return z 68 } 69 70 func (z nat) setUint64(x uint64) nat { 71 // single-word value 72 if w := Word(x); uint64(w) == x { 73 return z.setWord(w) 74 } 75 // 2-word value 76 z = z.make(2) 77 z[1] = Word(x >> 32) 78 z[0] = Word(x) 79 return z 80 } 81 82 func (z nat) set(x nat) nat { 83 z = z.make(len(x)) 84 copy(z, x) 85 return z 86 } 87 88 func (z nat) add(x, y nat) nat { 89 m := len(x) 90 n := len(y) 91 92 switch { 93 case m < n: 94 return z.add(y, x) 95 case m == 0: 96 // n == 0 because m >= n; result is 0 97 return z[:0] 98 case n == 0: 99 // result is x 100 return z.set(x) 101 } 102 // m > 0 103 104 z = z.make(m + 1) 105 c := addVV(z[0:n], x, y) 106 if m > n { 107 c = addVW(z[n:m], x[n:], c) 108 } 109 z[m] = c 110 111 return z.norm() 112 } 113 114 func (z nat) sub(x, y nat) nat { 115 m := len(x) 116 n := len(y) 117 118 switch { 119 case m < n: 120 panic("underflow") 121 case m == 0: 122 // n == 0 because m >= n; result is 0 123 return z[:0] 124 case n == 0: 125 // result is x 126 return z.set(x) 127 } 128 // m > 0 129 130 z = z.make(m) 131 c := subVV(z[0:n], x, y) 132 if m > n { 133 c = subVW(z[n:], x[n:], c) 134 } 135 if c != 0 { 136 panic("underflow") 137 } 138 139 return z.norm() 140 } 141 142 func (x nat) cmp(y nat) (r int) { 143 m := len(x) 144 n := len(y) 145 if m != n || m == 0 { 146 switch { 147 case m < n: 148 r = -1 149 case m > n: 150 r = 1 151 } 152 return 153 } 154 155 i := m - 1 156 for i > 0 && x[i] == y[i] { 157 i-- 158 } 159 160 switch { 161 case x[i] < y[i]: 162 r = -1 163 case x[i] > y[i]: 164 r = 1 165 } 166 return 167 } 168 169 func (z nat) mulAddWW(x nat, y, r Word) nat { 170 m := len(x) 171 if m == 0 || y == 0 { 172 return z.setWord(r) // result is r 173 } 174 // m > 0 175 176 z = z.make(m + 1) 177 z[m] = mulAddVWW(z[0:m], x, y, r) 178 179 return z.norm() 180 } 181 182 // basicMul multiplies x and y and leaves the result in z. 183 // The (non-normalized) result is placed in z[0 : len(x) + len(y)]. 184 func basicMul(z, x, y nat) { 185 z[0 : len(x)+len(y)].clear() // initialize z 186 for i, d := range y { 187 if d != 0 { 188 z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) 189 } 190 } 191 } 192 193 // montgomery computes z mod m = x*y*2**(-n*_W) mod m, 194 // assuming k = -1/m mod 2**_W. 195 // z is used for storing the result which is returned; 196 // z must not alias x, y or m. 197 // See Gueron, "Efficient Software Implementations of Modular Exponentiation". 198 // https://eprint.iacr.org/2011/239.pdf 199 // In the terminology of that paper, this is an "Almost Montgomery Multiplication": 200 // x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result 201 // z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m. 202 func (z nat) montgomery(x, y, m nat, k Word, n int) nat { 203 // This code assumes x, y, m are all the same length, n. 204 // (required by addMulVVW and the for loop). 205 // It also assumes that x, y are already reduced mod m, 206 // or else the result will not be properly reduced. 207 if len(x) != n || len(y) != n || len(m) != n { 208 panic("math/big: mismatched montgomery number lengths") 209 } 210 z = z.make(n) 211 z.clear() 212 var c Word 213 for i := 0; i < n; i++ { 214 d := y[i] 215 c2 := addMulVVW(z, x, d) 216 t := z[0] * k 217 c3 := addMulVVW(z, m, t) 218 copy(z, z[1:]) 219 cx := c + c2 220 cy := cx + c3 221 z[n-1] = cy 222 if cx < c2 || cy < c3 { 223 c = 1 224 } else { 225 c = 0 226 } 227 } 228 if c != 0 { 229 subVV(z, z, m) 230 } 231 return z 232 } 233 234 // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks. 235 // Factored out for readability - do not use outside karatsuba. 236 func karatsubaAdd(z, x nat, n int) { 237 if c := addVV(z[0:n], z, x); c != 0 { 238 addVW(z[n:n+n>>1], z[n:], c) 239 } 240 } 241 242 // Like karatsubaAdd, but does subtract. 243 func karatsubaSub(z, x nat, n int) { 244 if c := subVV(z[0:n], z, x); c != 0 { 245 subVW(z[n:n+n>>1], z[n:], c) 246 } 247 } 248 249 // Operands that are shorter than karatsubaThreshold are multiplied using 250 // "grade school" multiplication; for longer operands the Karatsuba algorithm 251 // is used. 252 var karatsubaThreshold = 40 // computed by calibrate_test.go 253 254 // karatsuba multiplies x and y and leaves the result in z. 255 // Both x and y must have the same length n and n must be a 256 // power of 2. The result vector z must have len(z) >= 6*n. 257 // The (non-normalized) result is placed in z[0 : 2*n]. 258 func karatsuba(z, x, y nat) { 259 n := len(y) 260 261 // Switch to basic multiplication if numbers are odd or small. 262 // (n is always even if karatsubaThreshold is even, but be 263 // conservative) 264 if n&1 != 0 || n < karatsubaThreshold || n < 2 { 265 basicMul(z, x, y) 266 return 267 } 268 // n&1 == 0 && n >= karatsubaThreshold && n >= 2 269 270 // Karatsuba multiplication is based on the observation that 271 // for two numbers x and y with: 272 // 273 // x = x1*b + x0 274 // y = y1*b + y0 275 // 276 // the product x*y can be obtained with 3 products z2, z1, z0 277 // instead of 4: 278 // 279 // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0 280 // = z2*b*b + z1*b + z0 281 // 282 // with: 283 // 284 // xd = x1 - x0 285 // yd = y0 - y1 286 // 287 // z1 = xd*yd + z2 + z0 288 // = (x1-x0)*(y0 - y1) + z2 + z0 289 // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0 290 // = x1*y0 - z2 - z0 + x0*y1 + z2 + z0 291 // = x1*y0 + x0*y1 292 293 // split x, y into "digits" 294 n2 := n >> 1 // n2 >= 1 295 x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0 296 y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0 297 298 // z is used for the result and temporary storage: 299 // 300 // 6*n 5*n 4*n 3*n 2*n 1*n 0*n 301 // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ] 302 // 303 // For each recursive call of karatsuba, an unused slice of 304 // z is passed in that has (at least) half the length of the 305 // caller's z. 306 307 // compute z0 and z2 with the result "in place" in z 308 karatsuba(z, x0, y0) // z0 = x0*y0 309 karatsuba(z[n:], x1, y1) // z2 = x1*y1 310 311 // compute xd (or the negative value if underflow occurs) 312 s := 1 // sign of product xd*yd 313 xd := z[2*n : 2*n+n2] 314 if subVV(xd, x1, x0) != 0 { // x1-x0 315 s = -s 316 subVV(xd, x0, x1) // x0-x1 317 } 318 319 // compute yd (or the negative value if underflow occurs) 320 yd := z[2*n+n2 : 3*n] 321 if subVV(yd, y0, y1) != 0 { // y0-y1 322 s = -s 323 subVV(yd, y1, y0) // y1-y0 324 } 325 326 // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0 327 // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0 328 p := z[n*3:] 329 karatsuba(p, xd, yd) 330 331 // save original z2:z0 332 // (ok to use upper half of z since we're done recursing) 333 r := z[n*4:] 334 copy(r, z[:n*2]) 335 336 // add up all partial products 337 // 338 // 2*n n 0 339 // z = [ z2 | z0 ] 340 // + [ z0 ] 341 // + [ z2 ] 342 // + [ p ] 343 // 344 karatsubaAdd(z[n2:], r, n) 345 karatsubaAdd(z[n2:], r[n:], n) 346 if s > 0 { 347 karatsubaAdd(z[n2:], p, n) 348 } else { 349 karatsubaSub(z[n2:], p, n) 350 } 351 } 352 353 // alias reports whether x and y share the same base array. 354 func alias(x, y nat) bool { 355 return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1] 356 } 357 358 // addAt implements z += x<<(_W*i); z must be long enough. 359 // (we don't use nat.add because we need z to stay the same 360 // slice, and we don't need to normalize z after each addition) 361 func addAt(z, x nat, i int) { 362 if n := len(x); n > 0 { 363 if c := addVV(z[i:i+n], z[i:], x); c != 0 { 364 j := i + n 365 if j < len(z) { 366 addVW(z[j:], z[j:], c) 367 } 368 } 369 } 370 } 371 372 func max(x, y int) int { 373 if x > y { 374 return x 375 } 376 return y 377 } 378 379 // karatsubaLen computes an approximation to the maximum k <= n such that 380 // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the 381 // result is the largest number that can be divided repeatedly by 2 before 382 // becoming about the value of karatsubaThreshold. 383 func karatsubaLen(n int) int { 384 i := uint(0) 385 for n > karatsubaThreshold { 386 n >>= 1 387 i++ 388 } 389 return n << i 390 } 391 392 func (z nat) mul(x, y nat) nat { 393 m := len(x) 394 n := len(y) 395 396 switch { 397 case m < n: 398 return z.mul(y, x) 399 case m == 0 || n == 0: 400 return z[:0] 401 case n == 1: 402 return z.mulAddWW(x, y[0], 0) 403 } 404 // m >= n > 1 405 406 // determine if z can be reused 407 if alias(z, x) || alias(z, y) { 408 z = nil // z is an alias for x or y - cannot reuse 409 } 410 411 // use basic multiplication if the numbers are small 412 if n < karatsubaThreshold { 413 z = z.make(m + n) 414 basicMul(z, x, y) 415 return z.norm() 416 } 417 // m >= n && n >= karatsubaThreshold && n >= 2 418 419 // determine Karatsuba length k such that 420 // 421 // x = xh*b + x0 (0 <= x0 < b) 422 // y = yh*b + y0 (0 <= y0 < b) 423 // b = 1<<(_W*k) ("base" of digits xi, yi) 424 // 425 k := karatsubaLen(n) 426 // k <= n 427 428 // multiply x0 and y0 via Karatsuba 429 x0 := x[0:k] // x0 is not normalized 430 y0 := y[0:k] // y0 is not normalized 431 z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y 432 karatsuba(z, x0, y0) 433 z = z[0 : m+n] // z has final length but may be incomplete 434 z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m) 435 436 // If xh != 0 or yh != 0, add the missing terms to z. For 437 // 438 // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) 439 // yh = y1*b (0 <= y1 < b) 440 // 441 // the missing terms are 442 // 443 // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 444 // 445 // since all the yi for i > 1 are 0 by choice of k: If any of them 446 // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would 447 // be a larger valid threshold contradicting the assumption about k. 448 // 449 if k < n || m != n { 450 var t nat 451 452 // add x0*y1*b 453 x0 := x0.norm() 454 y1 := y[k:] // y1 is normalized because y is 455 t = t.mul(x0, y1) // update t so we don't lose t's underlying array 456 addAt(z, t, k) 457 458 // add xi*y0<<i, xi*y1*b<<(i+k) 459 y0 := y0.norm() 460 for i := k; i < len(x); i += k { 461 xi := x[i:] 462 if len(xi) > k { 463 xi = xi[:k] 464 } 465 xi = xi.norm() 466 t = t.mul(xi, y0) 467 addAt(z, t, i) 468 t = t.mul(xi, y1) 469 addAt(z, t, i+k) 470 } 471 } 472 473 return z.norm() 474 } 475 476 // basicSqr sets z = x*x and is asymptotically faster than basicMul 477 // by about a factor of 2, but slower for small arguments due to overhead. 478 // Requirements: len(x) > 0, len(z) >= 2*len(x) 479 // The (non-normalized) result is placed in z[0 : 2 * len(x)]. 480 func basicSqr(z, x nat) { 481 n := len(x) 482 t := make(nat, 2*n) // temporary variable to hold the products 483 z[1], z[0] = mulWW(x[0], x[0]) // the initial square 484 for i := 1; i < n; i++ { 485 d := x[i] 486 // z collects the squares x[i] * x[i] 487 z[2*i+1], z[2*i] = mulWW(d, d) 488 // t collects the products x[i] * x[j] where j < i 489 t[2*i] = addMulVVW(t[i:2*i], x[0:i], d) 490 } 491 t[2*n-1] = shlVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products 492 addVV(z, z, t) // combine the result 493 } 494 495 // Operands that are shorter than basicSqrThreshold are squared using 496 // "grade school" multiplication; for operands longer than karatsubaSqrThreshold 497 // the Karatsuba algorithm is used. 498 var basicSqrThreshold = 20 // computed by calibrate_test.go 499 var karatsubaSqrThreshold = 400 // computed by calibrate_test.go 500 501 // z = x*x 502 func (z nat) sqr(x nat) nat { 503 n := len(x) 504 switch { 505 case n == 0: 506 return z[:0] 507 case n == 1: 508 d := x[0] 509 z = z.make(2) 510 z[1], z[0] = mulWW(d, d) 511 return z.norm() 512 } 513 514 if alias(z, x) { 515 z = nil // z is an alias for x - cannot reuse 516 } 517 z = z.make(2 * n) 518 519 if n < basicSqrThreshold { 520 basicMul(z, x, x) 521 return z.norm() 522 } 523 if n < karatsubaSqrThreshold { 524 basicSqr(z, x) 525 return z.norm() 526 } 527 528 return z.mul(x, x) 529 } 530 531 // mulRange computes the product of all the unsigned integers in the 532 // range [a, b] inclusively. If a > b (empty range), the result is 1. 533 func (z nat) mulRange(a, b uint64) nat { 534 switch { 535 case a == 0: 536 // cut long ranges short (optimization) 537 return z.setUint64(0) 538 case a > b: 539 return z.setUint64(1) 540 case a == b: 541 return z.setUint64(a) 542 case a+1 == b: 543 return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b)) 544 } 545 m := (a + b) / 2 546 return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) 547 } 548 549 // q = (x-r)/y, with 0 <= r < y 550 func (z nat) divW(x nat, y Word) (q nat, r Word) { 551 m := len(x) 552 switch { 553 case y == 0: 554 panic("division by zero") 555 case y == 1: 556 q = z.set(x) // result is x 557 return 558 case m == 0: 559 q = z[:0] // result is 0 560 return 561 } 562 // m > 0 563 z = z.make(m) 564 r = divWVW(z, 0, x, y) 565 q = z.norm() 566 return 567 } 568 569 func (z nat) div(z2, u, v nat) (q, r nat) { 570 if len(v) == 0 { 571 panic("division by zero") 572 } 573 574 if u.cmp(v) < 0 { 575 q = z[:0] 576 r = z2.set(u) 577 return 578 } 579 580 if len(v) == 1 { 581 var r2 Word 582 q, r2 = z.divW(u, v[0]) 583 r = z2.setWord(r2) 584 return 585 } 586 587 q, r = z.divLarge(z2, u, v) 588 return 589 } 590 591 // getNat returns a *nat of len n. The contents may not be zero. 592 // The pool holds *nat to avoid allocation when converting to interface{}. 593 func getNat(n int) *nat { 594 var z *nat 595 if v := natPool.Get(); v != nil { 596 z = v.(*nat) 597 } 598 if z == nil { 599 z = new(nat) 600 } 601 *z = z.make(n) 602 return z 603 } 604 605 func putNat(x *nat) { 606 natPool.Put(x) 607 } 608 609 var natPool sync.Pool 610 611 // q = (uIn-r)/v, with 0 <= r < y 612 // Uses z as storage for q, and u as storage for r if possible. 613 // See Knuth, Volume 2, section 4.3.1, Algorithm D. 614 // Preconditions: 615 // len(v) >= 2 616 // len(uIn) >= len(v) 617 func (z nat) divLarge(u, uIn, v nat) (q, r nat) { 618 n := len(v) 619 m := len(uIn) - n 620 621 // determine if z can be reused 622 // TODO(gri) should find a better solution - this if statement 623 // is very costly (see e.g. time pidigits -s -n 10000) 624 if alias(z, uIn) || alias(z, v) { 625 z = nil // z is an alias for uIn or v - cannot reuse 626 } 627 q = z.make(m + 1) 628 629 qhatvp := getNat(n + 1) 630 qhatv := *qhatvp 631 if alias(u, uIn) || alias(u, v) { 632 u = nil // u is an alias for uIn or v - cannot reuse 633 } 634 u = u.make(len(uIn) + 1) 635 u.clear() // TODO(gri) no need to clear if we allocated a new u 636 637 // D1. 638 var v1p *nat 639 shift := nlz(v[n-1]) 640 if shift > 0 { 641 // do not modify v, it may be used by another goroutine simultaneously 642 v1p = getNat(n) 643 v1 := *v1p 644 shlVU(v1, v, shift) 645 v = v1 646 } 647 u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) 648 649 // D2. 650 vn1 := v[n-1] 651 for j := m; j >= 0; j-- { 652 // D3. 653 qhat := Word(_M) 654 if ujn := u[j+n]; ujn != vn1 { 655 var rhat Word 656 qhat, rhat = divWW(ujn, u[j+n-1], vn1) 657 658 // x1 | x2 = q̂v_{n-2} 659 vn2 := v[n-2] 660 x1, x2 := mulWW(qhat, vn2) 661 // test if q̂v_{n-2} > br̂ + u_{j+n-2} 662 ujn2 := u[j+n-2] 663 for greaterThan(x1, x2, rhat, ujn2) { 664 qhat-- 665 prevRhat := rhat 666 rhat += vn1 667 // v[n-1] >= 0, so this tests for overflow. 668 if rhat < prevRhat { 669 break 670 } 671 x1, x2 = mulWW(qhat, vn2) 672 } 673 } 674 675 // D4. 676 qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) 677 678 c := subVV(u[j:j+len(qhatv)], u[j:], qhatv) 679 if c != 0 { 680 c := addVV(u[j:j+n], u[j:], v) 681 u[j+n] += c 682 qhat-- 683 } 684 685 q[j] = qhat 686 } 687 if v1p != nil { 688 putNat(v1p) 689 } 690 putNat(qhatvp) 691 692 q = q.norm() 693 shrVU(u, u, shift) 694 r = u.norm() 695 696 return q, r 697 } 698 699 // Length of x in bits. x must be normalized. 700 func (x nat) bitLen() int { 701 if i := len(x) - 1; i >= 0 { 702 return i*_W + bits.Len(uint(x[i])) 703 } 704 return 0 705 } 706 707 // trailingZeroBits returns the number of consecutive least significant zero 708 // bits of x. 709 func (x nat) trailingZeroBits() uint { 710 if len(x) == 0 { 711 return 0 712 } 713 var i uint 714 for x[i] == 0 { 715 i++ 716 } 717 // x[i] != 0 718 return i*_W + uint(bits.TrailingZeros(uint(x[i]))) 719 } 720 721 // z = x << s 722 func (z nat) shl(x nat, s uint) nat { 723 m := len(x) 724 if m == 0 { 725 return z[:0] 726 } 727 // m > 0 728 729 n := m + int(s/_W) 730 z = z.make(n + 1) 731 z[n] = shlVU(z[n-m:n], x, s%_W) 732 z[0 : n-m].clear() 733 734 return z.norm() 735 } 736 737 // z = x >> s 738 func (z nat) shr(x nat, s uint) nat { 739 m := len(x) 740 n := m - int(s/_W) 741 if n <= 0 { 742 return z[:0] 743 } 744 // n > 0 745 746 z = z.make(n) 747 shrVU(z, x[m-n:], s%_W) 748 749 return z.norm() 750 } 751 752 func (z nat) setBit(x nat, i uint, b uint) nat { 753 j := int(i / _W) 754 m := Word(1) << (i % _W) 755 n := len(x) 756 switch b { 757 case 0: 758 z = z.make(n) 759 copy(z, x) 760 if j >= n { 761 // no need to grow 762 return z 763 } 764 z[j] &^= m 765 return z.norm() 766 case 1: 767 if j >= n { 768 z = z.make(j + 1) 769 z[n:].clear() 770 } else { 771 z = z.make(n) 772 } 773 copy(z, x) 774 z[j] |= m 775 // no need to normalize 776 return z 777 } 778 panic("set bit is not 0 or 1") 779 } 780 781 // bit returns the value of the i'th bit, with lsb == bit 0. 782 func (x nat) bit(i uint) uint { 783 j := i / _W 784 if j >= uint(len(x)) { 785 return 0 786 } 787 // 0 <= j < len(x) 788 return uint(x[j] >> (i % _W) & 1) 789 } 790 791 // sticky returns 1 if there's a 1 bit within the 792 // i least significant bits, otherwise it returns 0. 793 func (x nat) sticky(i uint) uint { 794 j := i / _W 795 if j >= uint(len(x)) { 796 if len(x) == 0 { 797 return 0 798 } 799 return 1 800 } 801 // 0 <= j < len(x) 802 for _, x := range x[:j] { 803 if x != 0 { 804 return 1 805 } 806 } 807 if x[j]<<(_W-i%_W) != 0 { 808 return 1 809 } 810 return 0 811 } 812 813 func (z nat) and(x, y nat) nat { 814 m := len(x) 815 n := len(y) 816 if m > n { 817 m = n 818 } 819 // m <= n 820 821 z = z.make(m) 822 for i := 0; i < m; i++ { 823 z[i] = x[i] & y[i] 824 } 825 826 return z.norm() 827 } 828 829 func (z nat) andNot(x, y nat) nat { 830 m := len(x) 831 n := len(y) 832 if n > m { 833 n = m 834 } 835 // m >= n 836 837 z = z.make(m) 838 for i := 0; i < n; i++ { 839 z[i] = x[i] &^ y[i] 840 } 841 copy(z[n:m], x[n:m]) 842 843 return z.norm() 844 } 845 846 func (z nat) or(x, y nat) nat { 847 m := len(x) 848 n := len(y) 849 s := x 850 if m < n { 851 n, m = m, n 852 s = y 853 } 854 // m >= n 855 856 z = z.make(m) 857 for i := 0; i < n; i++ { 858 z[i] = x[i] | y[i] 859 } 860 copy(z[n:m], s[n:m]) 861 862 return z.norm() 863 } 864 865 func (z nat) xor(x, y nat) nat { 866 m := len(x) 867 n := len(y) 868 s := x 869 if m < n { 870 n, m = m, n 871 s = y 872 } 873 // m >= n 874 875 z = z.make(m) 876 for i := 0; i < n; i++ { 877 z[i] = x[i] ^ y[i] 878 } 879 copy(z[n:m], s[n:m]) 880 881 return z.norm() 882 } 883 884 // greaterThan reports whether (x1<<_W + x2) > (y1<<_W + y2) 885 func greaterThan(x1, x2, y1, y2 Word) bool { 886 return x1 > y1 || x1 == y1 && x2 > y2 887 } 888 889 // modW returns x % d. 890 func (x nat) modW(d Word) (r Word) { 891 // TODO(agl): we don't actually need to store the q value. 892 var q nat 893 q = q.make(len(x)) 894 return divWVW(q, 0, x, d) 895 } 896 897 // random creates a random integer in [0..limit), using the space in z if 898 // possible. n is the bit length of limit. 899 func (z nat) random(rand *rand.Rand, limit nat, n int) nat { 900 if alias(z, limit) { 901 z = nil // z is an alias for limit - cannot reuse 902 } 903 z = z.make(len(limit)) 904 905 bitLengthOfMSW := uint(n % _W) 906 if bitLengthOfMSW == 0 { 907 bitLengthOfMSW = _W 908 } 909 mask := Word((1 << bitLengthOfMSW) - 1) 910 911 for { 912 switch _W { 913 case 32: 914 for i := range z { 915 z[i] = Word(rand.Uint32()) 916 } 917 case 64: 918 for i := range z { 919 z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32 920 } 921 default: 922 panic("unknown word size") 923 } 924 z[len(limit)-1] &= mask 925 if z.cmp(limit) < 0 { 926 break 927 } 928 } 929 930 return z.norm() 931 } 932 933 // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; 934 // otherwise it sets z to x**y. The result is the value of z. 935 func (z nat) expNN(x, y, m nat) nat { 936 if alias(z, x) || alias(z, y) { 937 // We cannot allow in-place modification of x or y. 938 z = nil 939 } 940 941 // x**y mod 1 == 0 942 if len(m) == 1 && m[0] == 1 { 943 return z.setWord(0) 944 } 945 // m == 0 || m > 1 946 947 // x**0 == 1 948 if len(y) == 0 { 949 return z.setWord(1) 950 } 951 // y > 0 952 953 // x**1 mod m == x mod m 954 if len(y) == 1 && y[0] == 1 && len(m) != 0 { 955 _, z = z.div(z, x, m) 956 return z 957 } 958 // y > 1 959 960 if len(m) != 0 { 961 // We likely end up being as long as the modulus. 962 z = z.make(len(m)) 963 } 964 z = z.set(x) 965 966 // If the base is non-trivial and the exponent is large, we use 967 // 4-bit, windowed exponentiation. This involves precomputing 14 values 968 // (x^2...x^15) but then reduces the number of multiply-reduces by a 969 // third. Even for a 32-bit exponent, this reduces the number of 970 // operations. Uses Montgomery method for odd moduli. 971 if x.cmp(natOne) > 0 && len(y) > 1 && len(m) > 0 { 972 if m[0]&1 == 1 { 973 return z.expNNMontgomery(x, y, m) 974 } 975 return z.expNNWindowed(x, y, m) 976 } 977 978 v := y[len(y)-1] // v > 0 because y is normalized and y > 0 979 shift := nlz(v) + 1 980 v <<= shift 981 var q nat 982 983 const mask = 1 << (_W - 1) 984 985 // We walk through the bits of the exponent one by one. Each time we 986 // see a bit, we square, thus doubling the power. If the bit is a one, 987 // we also multiply by x, thus adding one to the power. 988 989 w := _W - int(shift) 990 // zz and r are used to avoid allocating in mul and div as 991 // otherwise the arguments would alias. 992 var zz, r nat 993 for j := 0; j < w; j++ { 994 zz = zz.sqr(z) 995 zz, z = z, zz 996 997 if v&mask != 0 { 998 zz = zz.mul(z, x) 999 zz, z = z, zz 1000 } 1001 1002 if len(m) != 0 { 1003 zz, r = zz.div(r, z, m) 1004 zz, r, q, z = q, z, zz, r 1005 } 1006 1007 v <<= 1 1008 } 1009 1010 for i := len(y) - 2; i >= 0; i-- { 1011 v = y[i] 1012 1013 for j := 0; j < _W; j++ { 1014 zz = zz.sqr(z) 1015 zz, z = z, zz 1016 1017 if v&mask != 0 { 1018 zz = zz.mul(z, x) 1019 zz, z = z, zz 1020 } 1021 1022 if len(m) != 0 { 1023 zz, r = zz.div(r, z, m) 1024 zz, r, q, z = q, z, zz, r 1025 } 1026 1027 v <<= 1 1028 } 1029 } 1030 1031 return z.norm() 1032 } 1033 1034 // expNNWindowed calculates x**y mod m using a fixed, 4-bit window. 1035 func (z nat) expNNWindowed(x, y, m nat) nat { 1036 // zz and r are used to avoid allocating in mul and div as otherwise 1037 // the arguments would alias. 1038 var zz, r nat 1039 1040 const n = 4 1041 // powers[i] contains x^i. 1042 var powers [1 << n]nat 1043 powers[0] = natOne 1044 powers[1] = x 1045 for i := 2; i < 1<<n; i += 2 { 1046 p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1] 1047 *p = p.sqr(*p2) 1048 zz, r = zz.div(r, *p, m) 1049 *p, r = r, *p 1050 *p1 = p1.mul(*p, x) 1051 zz, r = zz.div(r, *p1, m) 1052 *p1, r = r, *p1 1053 } 1054 1055 z = z.setWord(1) 1056 1057 for i := len(y) - 1; i >= 0; i-- { 1058 yi := y[i] 1059 for j := 0; j < _W; j += n { 1060 if i != len(y)-1 || j != 0 { 1061 // Unrolled loop for significant performance 1062 // gain. Use go test -bench=".*" in crypto/rsa 1063 // to check performance before making changes. 1064 zz = zz.sqr(z) 1065 zz, z = z, zz 1066 zz, r = zz.div(r, z, m) 1067 z, r = r, z 1068 1069 zz = zz.sqr(z) 1070 zz, z = z, zz 1071 zz, r = zz.div(r, z, m) 1072 z, r = r, z 1073 1074 zz = zz.sqr(z) 1075 zz, z = z, zz 1076 zz, r = zz.div(r, z, m) 1077 z, r = r, z 1078 1079 zz = zz.sqr(z) 1080 zz, z = z, zz 1081 zz, r = zz.div(r, z, m) 1082 z, r = r, z 1083 } 1084 1085 zz = zz.mul(z, powers[yi>>(_W-n)]) 1086 zz, z = z, zz 1087 zz, r = zz.div(r, z, m) 1088 z, r = r, z 1089 1090 yi <<= n 1091 } 1092 } 1093 1094 return z.norm() 1095 } 1096 1097 // expNNMontgomery calculates x**y mod m using a fixed, 4-bit window. 1098 // Uses Montgomery representation. 1099 func (z nat) expNNMontgomery(x, y, m nat) nat { 1100 numWords := len(m) 1101 1102 // We want the lengths of x and m to be equal. 1103 // It is OK if x >= m as long as len(x) == len(m). 1104 if len(x) > numWords { 1105 _, x = nat(nil).div(nil, x, m) 1106 // Note: now len(x) <= numWords, not guaranteed ==. 1107 } 1108 if len(x) < numWords { 1109 rr := make(nat, numWords) 1110 copy(rr, x) 1111 x = rr 1112 } 1113 1114 // Ideally the precomputations would be performed outside, and reused 1115 // k0 = -m**-1 mod 2**_W. Algorithm from: Dumas, J.G. "On Newton–Raphson 1116 // Iteration for Multiplicative Inverses Modulo Prime Powers". 1117 k0 := 2 - m[0] 1118 t := m[0] - 1 1119 for i := 1; i < _W; i <<= 1 { 1120 t *= t 1121 k0 *= (t + 1) 1122 } 1123 k0 = -k0 1124 1125 // RR = 2**(2*_W*len(m)) mod m 1126 RR := nat(nil).setWord(1) 1127 zz := nat(nil).shl(RR, uint(2*numWords*_W)) 1128 _, RR = RR.div(RR, zz, m) 1129 if len(RR) < numWords { 1130 zz = zz.make(numWords) 1131 copy(zz, RR) 1132 RR = zz 1133 } 1134 // one = 1, with equal length to that of m 1135 one := make(nat, numWords) 1136 one[0] = 1 1137 1138 const n = 4 1139 // powers[i] contains x^i 1140 var powers [1 << n]nat 1141 powers[0] = powers[0].montgomery(one, RR, m, k0, numWords) 1142 powers[1] = powers[1].montgomery(x, RR, m, k0, numWords) 1143 for i := 2; i < 1<<n; i++ { 1144 powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords) 1145 } 1146 1147 // initialize z = 1 (Montgomery 1) 1148 z = z.make(numWords) 1149 copy(z, powers[0]) 1150 1151 zz = zz.make(numWords) 1152 1153 // same windowed exponent, but with Montgomery multiplications 1154 for i := len(y) - 1; i >= 0; i-- { 1155 yi := y[i] 1156 for j := 0; j < _W; j += n { 1157 if i != len(y)-1 || j != 0 { 1158 zz = zz.montgomery(z, z, m, k0, numWords) 1159 z = z.montgomery(zz, zz, m, k0, numWords) 1160 zz = zz.montgomery(z, z, m, k0, numWords) 1161 z = z.montgomery(zz, zz, m, k0, numWords) 1162 } 1163 zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords) 1164 z, zz = zz, z 1165 yi <<= n 1166 } 1167 } 1168 // convert to regular number 1169 zz = zz.montgomery(z, one, m, k0, numWords) 1170 1171 // One last reduction, just in case. 1172 // See golang.org/issue/13907. 1173 if zz.cmp(m) >= 0 { 1174 // Common case is m has high bit set; in that case, 1175 // since zz is the same length as m, there can be just 1176 // one multiple of m to remove. Just subtract. 1177 // We think that the subtract should be sufficient in general, 1178 // so do that unconditionally, but double-check, 1179 // in case our beliefs are wrong. 1180 // The div is not expected to be reached. 1181 zz = zz.sub(zz, m) 1182 if zz.cmp(m) >= 0 { 1183 _, zz = nat(nil).div(nil, zz, m) 1184 } 1185 } 1186 1187 return zz.norm() 1188 } 1189 1190 // bytes writes the value of z into buf using big-endian encoding. 1191 // len(buf) must be >= len(z)*_S. The value of z is encoded in the 1192 // slice buf[i:]. The number i of unused bytes at the beginning of 1193 // buf is returned as result. 1194 func (z nat) bytes(buf []byte) (i int) { 1195 i = len(buf) 1196 for _, d := range z { 1197 for j := 0; j < _S; j++ { 1198 i-- 1199 buf[i] = byte(d) 1200 d >>= 8 1201 } 1202 } 1203 1204 for i < len(buf) && buf[i] == 0 { 1205 i++ 1206 } 1207 1208 return 1209 } 1210 1211 // setBytes interprets buf as the bytes of a big-endian unsigned 1212 // integer, sets z to that value, and returns z. 1213 func (z nat) setBytes(buf []byte) nat { 1214 z = z.make((len(buf) + _S - 1) / _S) 1215 1216 k := 0 1217 s := uint(0) 1218 var d Word 1219 for i := len(buf); i > 0; i-- { 1220 d |= Word(buf[i-1]) << s 1221 if s += 8; s == _S*8 { 1222 z[k] = d 1223 k++ 1224 s = 0 1225 d = 0 1226 } 1227 } 1228 if k < len(z) { 1229 z[k] = d 1230 } 1231 1232 return z.norm() 1233 } 1234 1235 // sqrt sets z = ⌊√x⌋ 1236 func (z nat) sqrt(x nat) nat { 1237 if x.cmp(natOne) <= 0 { 1238 return z.set(x) 1239 } 1240 if alias(z, x) { 1241 z = nil 1242 } 1243 1244 // Start with value known to be too large and repeat "z = ⌊(z + ⌊x/z⌋)/2⌋" until it stops getting smaller. 1245 // See Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 (SqrtInt). 1246 // https://members.loria.fr/PZimmermann/mca/pub226.html 1247 // If x is one less than a perfect square, the sequence oscillates between the correct z and z+1; 1248 // otherwise it converges to the correct z and stays there. 1249 var z1, z2 nat 1250 z1 = z 1251 z1 = z1.setUint64(1) 1252 z1 = z1.shl(z1, uint(x.bitLen()/2+1)) // must be ≥ √x 1253 for n := 0; ; n++ { 1254 z2, _ = z2.div(nil, x, z1) 1255 z2 = z2.add(z2, z1) 1256 z2 = z2.shr(z2, 1) 1257 if z2.cmp(z1) >= 0 { 1258 // z1 is answer. 1259 // Figure out whether z1 or z2 is currently aliased to z by looking at loop count. 1260 if n&1 == 0 { 1261 return z1 1262 } 1263 return z.set(z1) 1264 } 1265 z1, z2 = z2, z1 1266 } 1267 }