github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/math/big/nat.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements unsigned multi-precision integers (natural 6 // numbers). They are the building blocks for the implementation 7 // of signed integers, rationals, and floating-point numbers. 8 9 package big 10 11 import "math/rand" 12 13 // An unsigned integer x of the form 14 // 15 // x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0] 16 // 17 // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n, 18 // with the digits x[i] as the slice elements. 19 // 20 // A number is normalized if the slice contains no leading 0 digits. 21 // During arithmetic operations, denormalized values may occur but are 22 // always normalized before returning the final result. The normalized 23 // representation of 0 is the empty or nil slice (length = 0). 24 // 25 type nat []Word 26 27 var ( 28 natOne = nat{1} 29 natTwo = nat{2} 30 natTen = nat{10} 31 ) 32 33 func (z nat) clear() { 34 for i := range z { 35 z[i] = 0 36 } 37 } 38 39 func (z nat) norm() nat { 40 i := len(z) 41 for i > 0 && z[i-1] == 0 { 42 i-- 43 } 44 return z[0:i] 45 } 46 47 func (z nat) make(n int) nat { 48 if n <= cap(z) { 49 return z[:n] // reuse z 50 } 51 // Choosing a good value for e has significant performance impact 52 // because it increases the chance that a value can be reused. 53 const e = 4 // extra capacity 54 return make(nat, n, n+e) 55 } 56 57 func (z nat) setWord(x Word) nat { 58 if x == 0 { 59 return z[:0] 60 } 61 z = z.make(1) 62 z[0] = x 63 return z 64 } 65 66 func (z nat) setUint64(x uint64) nat { 67 // single-digit values 68 if w := Word(x); uint64(w) == x { 69 return z.setWord(w) 70 } 71 72 // compute number of words n required to represent x 73 n := 0 74 for t := x; t > 0; t >>= _W { 75 n++ 76 } 77 78 // split x into n words 79 z = z.make(n) 80 for i := range z { 81 z[i] = Word(x & _M) 82 x >>= _W 83 } 84 85 return z 86 } 87 88 func (z nat) set(x nat) nat { 89 z = z.make(len(x)) 90 copy(z, x) 91 return z 92 } 93 94 func (z nat) add(x, y nat) nat { 95 m := len(x) 96 n := len(y) 97 98 switch { 99 case m < n: 100 return z.add(y, x) 101 case m == 0: 102 // n == 0 because m >= n; result is 0 103 return z[:0] 104 case n == 0: 105 // result is x 106 return z.set(x) 107 } 108 // m > 0 109 110 z = z.make(m + 1) 111 c := addVV(z[0:n], x, y) 112 if m > n { 113 c = addVW(z[n:m], x[n:], c) 114 } 115 z[m] = c 116 117 return z.norm() 118 } 119 120 func (z nat) sub(x, y nat) nat { 121 m := len(x) 122 n := len(y) 123 124 switch { 125 case m < n: 126 panic("underflow") 127 case m == 0: 128 // n == 0 because m >= n; result is 0 129 return z[:0] 130 case n == 0: 131 // result is x 132 return z.set(x) 133 } 134 // m > 0 135 136 z = z.make(m) 137 c := subVV(z[0:n], x, y) 138 if m > n { 139 c = subVW(z[n:], x[n:], c) 140 } 141 if c != 0 { 142 panic("underflow") 143 } 144 145 return z.norm() 146 } 147 148 func (x nat) cmp(y nat) (r int) { 149 m := len(x) 150 n := len(y) 151 if m != n || m == 0 { 152 switch { 153 case m < n: 154 r = -1 155 case m > n: 156 r = 1 157 } 158 return 159 } 160 161 i := m - 1 162 for i > 0 && x[i] == y[i] { 163 i-- 164 } 165 166 switch { 167 case x[i] < y[i]: 168 r = -1 169 case x[i] > y[i]: 170 r = 1 171 } 172 return 173 } 174 175 func (z nat) mulAddWW(x nat, y, r Word) nat { 176 m := len(x) 177 if m == 0 || y == 0 { 178 return z.setWord(r) // result is r 179 } 180 // m > 0 181 182 z = z.make(m + 1) 183 z[m] = mulAddVWW(z[0:m], x, y, r) 184 185 return z.norm() 186 } 187 188 // basicMul multiplies x and y and leaves the result in z. 189 // The (non-normalized) result is placed in z[0 : len(x) + len(y)]. 190 func basicMul(z, x, y nat) { 191 z[0 : len(x)+len(y)].clear() // initialize z 192 for i, d := range y { 193 if d != 0 { 194 z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) 195 } 196 } 197 } 198 199 // montgomery computes x*y*2^(-n*_W) mod m, 200 // assuming k = -1/m mod 2^_W. 201 // z is used for storing the result which is returned; 202 // z must not alias x, y or m. 203 func (z nat) montgomery(x, y, m nat, k Word, n int) nat { 204 var c1, c2 Word 205 z = z.make(n) 206 z.clear() 207 for i := 0; i < n; i++ { 208 d := y[i] 209 c1 += addMulVVW(z, x, d) 210 t := z[0] * k 211 c2 = addMulVVW(z, m, t) 212 213 copy(z, z[1:]) 214 z[n-1] = c1 + c2 215 if z[n-1] < c1 { 216 c1 = 1 217 } else { 218 c1 = 0 219 } 220 } 221 if c1 != 0 { 222 subVV(z, z, m) 223 } 224 return z 225 } 226 227 // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks. 228 // Factored out for readability - do not use outside karatsuba. 229 func karatsubaAdd(z, x nat, n int) { 230 if c := addVV(z[0:n], z, x); c != 0 { 231 addVW(z[n:n+n>>1], z[n:], c) 232 } 233 } 234 235 // Like karatsubaAdd, but does subtract. 236 func karatsubaSub(z, x nat, n int) { 237 if c := subVV(z[0:n], z, x); c != 0 { 238 subVW(z[n:n+n>>1], z[n:], c) 239 } 240 } 241 242 // Operands that are shorter than karatsubaThreshold are multiplied using 243 // "grade school" multiplication; for longer operands the Karatsuba algorithm 244 // is used. 245 var karatsubaThreshold int = 40 // computed by calibrate.go 246 247 // karatsuba multiplies x and y and leaves the result in z. 248 // Both x and y must have the same length n and n must be a 249 // power of 2. The result vector z must have len(z) >= 6*n. 250 // The (non-normalized) result is placed in z[0 : 2*n]. 251 func karatsuba(z, x, y nat) { 252 n := len(y) 253 254 // Switch to basic multiplication if numbers are odd or small. 255 // (n is always even if karatsubaThreshold is even, but be 256 // conservative) 257 if n&1 != 0 || n < karatsubaThreshold || n < 2 { 258 basicMul(z, x, y) 259 return 260 } 261 // n&1 == 0 && n >= karatsubaThreshold && n >= 2 262 263 // Karatsuba multiplication is based on the observation that 264 // for two numbers x and y with: 265 // 266 // x = x1*b + x0 267 // y = y1*b + y0 268 // 269 // the product x*y can be obtained with 3 products z2, z1, z0 270 // instead of 4: 271 // 272 // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0 273 // = z2*b*b + z1*b + z0 274 // 275 // with: 276 // 277 // xd = x1 - x0 278 // yd = y0 - y1 279 // 280 // z1 = xd*yd + z2 + z0 281 // = (x1-x0)*(y0 - y1) + z2 + z0 282 // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0 283 // = x1*y0 - z2 - z0 + x0*y1 + z2 + z0 284 // = x1*y0 + x0*y1 285 286 // split x, y into "digits" 287 n2 := n >> 1 // n2 >= 1 288 x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0 289 y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0 290 291 // z is used for the result and temporary storage: 292 // 293 // 6*n 5*n 4*n 3*n 2*n 1*n 0*n 294 // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ] 295 // 296 // For each recursive call of karatsuba, an unused slice of 297 // z is passed in that has (at least) half the length of the 298 // caller's z. 299 300 // compute z0 and z2 with the result "in place" in z 301 karatsuba(z, x0, y0) // z0 = x0*y0 302 karatsuba(z[n:], x1, y1) // z2 = x1*y1 303 304 // compute xd (or the negative value if underflow occurs) 305 s := 1 // sign of product xd*yd 306 xd := z[2*n : 2*n+n2] 307 if subVV(xd, x1, x0) != 0 { // x1-x0 308 s = -s 309 subVV(xd, x0, x1) // x0-x1 310 } 311 312 // compute yd (or the negative value if underflow occurs) 313 yd := z[2*n+n2 : 3*n] 314 if subVV(yd, y0, y1) != 0 { // y0-y1 315 s = -s 316 subVV(yd, y1, y0) // y1-y0 317 } 318 319 // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0 320 // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0 321 p := z[n*3:] 322 karatsuba(p, xd, yd) 323 324 // save original z2:z0 325 // (ok to use upper half of z since we're done recursing) 326 r := z[n*4:] 327 copy(r, z[:n*2]) 328 329 // add up all partial products 330 // 331 // 2*n n 0 332 // z = [ z2 | z0 ] 333 // + [ z0 ] 334 // + [ z2 ] 335 // + [ p ] 336 // 337 karatsubaAdd(z[n2:], r, n) 338 karatsubaAdd(z[n2:], r[n:], n) 339 if s > 0 { 340 karatsubaAdd(z[n2:], p, n) 341 } else { 342 karatsubaSub(z[n2:], p, n) 343 } 344 } 345 346 // alias reports whether x and y share the same base array. 347 func alias(x, y nat) bool { 348 return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1] 349 } 350 351 // addAt implements z += x<<(_W*i); z must be long enough. 352 // (we don't use nat.add because we need z to stay the same 353 // slice, and we don't need to normalize z after each addition) 354 func addAt(z, x nat, i int) { 355 if n := len(x); n > 0 { 356 if c := addVV(z[i:i+n], z[i:], x); c != 0 { 357 j := i + n 358 if j < len(z) { 359 addVW(z[j:], z[j:], c) 360 } 361 } 362 } 363 } 364 365 func max(x, y int) int { 366 if x > y { 367 return x 368 } 369 return y 370 } 371 372 // karatsubaLen computes an approximation to the maximum k <= n such that 373 // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the 374 // result is the largest number that can be divided repeatedly by 2 before 375 // becoming about the value of karatsubaThreshold. 376 func karatsubaLen(n int) int { 377 i := uint(0) 378 for n > karatsubaThreshold { 379 n >>= 1 380 i++ 381 } 382 return n << i 383 } 384 385 func (z nat) mul(x, y nat) nat { 386 m := len(x) 387 n := len(y) 388 389 switch { 390 case m < n: 391 return z.mul(y, x) 392 case m == 0 || n == 0: 393 return z[:0] 394 case n == 1: 395 return z.mulAddWW(x, y[0], 0) 396 } 397 // m >= n > 1 398 399 // determine if z can be reused 400 if alias(z, x) || alias(z, y) { 401 z = nil // z is an alias for x or y - cannot reuse 402 } 403 404 // use basic multiplication if the numbers are small 405 if n < karatsubaThreshold { 406 z = z.make(m + n) 407 basicMul(z, x, y) 408 return z.norm() 409 } 410 // m >= n && n >= karatsubaThreshold && n >= 2 411 412 // determine Karatsuba length k such that 413 // 414 // x = xh*b + x0 (0 <= x0 < b) 415 // y = yh*b + y0 (0 <= y0 < b) 416 // b = 1<<(_W*k) ("base" of digits xi, yi) 417 // 418 k := karatsubaLen(n) 419 // k <= n 420 421 // multiply x0 and y0 via Karatsuba 422 x0 := x[0:k] // x0 is not normalized 423 y0 := y[0:k] // y0 is not normalized 424 z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y 425 karatsuba(z, x0, y0) 426 z = z[0 : m+n] // z has final length but may be incomplete 427 z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m) 428 429 // If xh != 0 or yh != 0, add the missing terms to z. For 430 // 431 // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) 432 // yh = y1*b (0 <= y1 < b) 433 // 434 // the missing terms are 435 // 436 // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 437 // 438 // since all the yi for i > 1 are 0 by choice of k: If any of them 439 // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would 440 // be a larger valid threshold contradicting the assumption about k. 441 // 442 if k < n || m != n { 443 var t nat 444 445 // add x0*y1*b 446 x0 := x0.norm() 447 y1 := y[k:] // y1 is normalized because y is 448 t = t.mul(x0, y1) // update t so we don't lose t's underlying array 449 addAt(z, t, k) 450 451 // add xi*y0<<i, xi*y1*b<<(i+k) 452 y0 := y0.norm() 453 for i := k; i < len(x); i += k { 454 xi := x[i:] 455 if len(xi) > k { 456 xi = xi[:k] 457 } 458 xi = xi.norm() 459 t = t.mul(xi, y0) 460 addAt(z, t, i) 461 t = t.mul(xi, y1) 462 addAt(z, t, i+k) 463 } 464 } 465 466 return z.norm() 467 } 468 469 // mulRange computes the product of all the unsigned integers in the 470 // range [a, b] inclusively. If a > b (empty range), the result is 1. 471 func (z nat) mulRange(a, b uint64) nat { 472 switch { 473 case a == 0: 474 // cut long ranges short (optimization) 475 return z.setUint64(0) 476 case a > b: 477 return z.setUint64(1) 478 case a == b: 479 return z.setUint64(a) 480 case a+1 == b: 481 return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b)) 482 } 483 m := (a + b) / 2 484 return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) 485 } 486 487 // q = (x-r)/y, with 0 <= r < y 488 func (z nat) divW(x nat, y Word) (q nat, r Word) { 489 m := len(x) 490 switch { 491 case y == 0: 492 panic("division by zero") 493 case y == 1: 494 q = z.set(x) // result is x 495 return 496 case m == 0: 497 q = z[:0] // result is 0 498 return 499 } 500 // m > 0 501 z = z.make(m) 502 r = divWVW(z, 0, x, y) 503 q = z.norm() 504 return 505 } 506 507 func (z nat) div(z2, u, v nat) (q, r nat) { 508 if len(v) == 0 { 509 panic("division by zero") 510 } 511 512 if u.cmp(v) < 0 { 513 q = z[:0] 514 r = z2.set(u) 515 return 516 } 517 518 if len(v) == 1 { 519 var r2 Word 520 q, r2 = z.divW(u, v[0]) 521 r = z2.setWord(r2) 522 return 523 } 524 525 q, r = z.divLarge(z2, u, v) 526 return 527 } 528 529 // q = (uIn-r)/v, with 0 <= r < y 530 // Uses z as storage for q, and u as storage for r if possible. 531 // See Knuth, Volume 2, section 4.3.1, Algorithm D. 532 // Preconditions: 533 // len(v) >= 2 534 // len(uIn) >= len(v) 535 func (z nat) divLarge(u, uIn, v nat) (q, r nat) { 536 n := len(v) 537 m := len(uIn) - n 538 539 // determine if z can be reused 540 // TODO(gri) should find a better solution - this if statement 541 // is very costly (see e.g. time pidigits -s -n 10000) 542 if alias(z, uIn) || alias(z, v) { 543 z = nil // z is an alias for uIn or v - cannot reuse 544 } 545 q = z.make(m + 1) 546 547 qhatv := make(nat, n+1) 548 if alias(u, uIn) || alias(u, v) { 549 u = nil // u is an alias for uIn or v - cannot reuse 550 } 551 u = u.make(len(uIn) + 1) 552 u.clear() // TODO(gri) no need to clear if we allocated a new u 553 554 // D1. 555 shift := nlz(v[n-1]) 556 if shift > 0 { 557 // do not modify v, it may be used by another goroutine simultaneously 558 v1 := make(nat, n) 559 shlVU(v1, v, shift) 560 v = v1 561 } 562 u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) 563 564 // D2. 565 for j := m; j >= 0; j-- { 566 // D3. 567 qhat := Word(_M) 568 if u[j+n] != v[n-1] { 569 var rhat Word 570 qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1]) 571 572 // x1 | x2 = q̂v_{n-2} 573 x1, x2 := mulWW(qhat, v[n-2]) 574 // test if q̂v_{n-2} > br̂ + u_{j+n-2} 575 for greaterThan(x1, x2, rhat, u[j+n-2]) { 576 qhat-- 577 prevRhat := rhat 578 rhat += v[n-1] 579 // v[n-1] >= 0, so this tests for overflow. 580 if rhat < prevRhat { 581 break 582 } 583 x1, x2 = mulWW(qhat, v[n-2]) 584 } 585 } 586 587 // D4. 588 qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) 589 590 c := subVV(u[j:j+len(qhatv)], u[j:], qhatv) 591 if c != 0 { 592 c := addVV(u[j:j+n], u[j:], v) 593 u[j+n] += c 594 qhat-- 595 } 596 597 q[j] = qhat 598 } 599 600 q = q.norm() 601 shrVU(u, u, shift) 602 r = u.norm() 603 604 return q, r 605 } 606 607 // Length of x in bits. x must be normalized. 608 func (x nat) bitLen() int { 609 if i := len(x) - 1; i >= 0 { 610 return i*_W + bitLen(x[i]) 611 } 612 return 0 613 } 614 615 const deBruijn32 = 0x077CB531 616 617 var deBruijn32Lookup = []byte{ 618 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 619 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, 620 } 621 622 const deBruijn64 = 0x03f79d71b4ca8b09 623 624 var deBruijn64Lookup = []byte{ 625 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, 626 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, 627 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, 628 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, 629 } 630 631 // trailingZeroBits returns the number of consecutive least significant zero 632 // bits of x. 633 func trailingZeroBits(x Word) uint { 634 // x & -x leaves only the right-most bit set in the word. Let k be the 635 // index of that bit. Since only a single bit is set, the value is two 636 // to the power of k. Multiplying by a power of two is equivalent to 637 // left shifting, in this case by k bits. The de Bruijn constant is 638 // such that all six bit, consecutive substrings are distinct. 639 // Therefore, if we have a left shifted version of this constant we can 640 // find by how many bits it was shifted by looking at which six bit 641 // substring ended up at the top of the word. 642 // (Knuth, volume 4, section 7.3.1) 643 switch _W { 644 case 32: 645 return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27]) 646 case 64: 647 return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58]) 648 default: 649 panic("unknown word size") 650 } 651 } 652 653 // trailingZeroBits returns the number of consecutive least significant zero 654 // bits of x. 655 func (x nat) trailingZeroBits() uint { 656 if len(x) == 0 { 657 return 0 658 } 659 var i uint 660 for x[i] == 0 { 661 i++ 662 } 663 // x[i] != 0 664 return i*_W + trailingZeroBits(x[i]) 665 } 666 667 // z = x << s 668 func (z nat) shl(x nat, s uint) nat { 669 m := len(x) 670 if m == 0 { 671 return z[:0] 672 } 673 // m > 0 674 675 n := m + int(s/_W) 676 z = z.make(n + 1) 677 z[n] = shlVU(z[n-m:n], x, s%_W) 678 z[0 : n-m].clear() 679 680 return z.norm() 681 } 682 683 // z = x >> s 684 func (z nat) shr(x nat, s uint) nat { 685 m := len(x) 686 n := m - int(s/_W) 687 if n <= 0 { 688 return z[:0] 689 } 690 // n > 0 691 692 z = z.make(n) 693 shrVU(z, x[m-n:], s%_W) 694 695 return z.norm() 696 } 697 698 func (z nat) setBit(x nat, i uint, b uint) nat { 699 j := int(i / _W) 700 m := Word(1) << (i % _W) 701 n := len(x) 702 switch b { 703 case 0: 704 z = z.make(n) 705 copy(z, x) 706 if j >= n { 707 // no need to grow 708 return z 709 } 710 z[j] &^= m 711 return z.norm() 712 case 1: 713 if j >= n { 714 z = z.make(j + 1) 715 z[n:].clear() 716 } else { 717 z = z.make(n) 718 } 719 copy(z, x) 720 z[j] |= m 721 // no need to normalize 722 return z 723 } 724 panic("set bit is not 0 or 1") 725 } 726 727 // bit returns the value of the i'th bit, with lsb == bit 0. 728 func (x nat) bit(i uint) uint { 729 j := i / _W 730 if j >= uint(len(x)) { 731 return 0 732 } 733 // 0 <= j < len(x) 734 return uint(x[j] >> (i % _W) & 1) 735 } 736 737 // sticky returns 1 if there's a 1 bit within the 738 // i least significant bits, otherwise it returns 0. 739 func (x nat) sticky(i uint) uint { 740 j := i / _W 741 if j >= uint(len(x)) { 742 if len(x) == 0 { 743 return 0 744 } 745 return 1 746 } 747 // 0 <= j < len(x) 748 for _, x := range x[:j] { 749 if x != 0 { 750 return 1 751 } 752 } 753 if x[j]<<(_W-i%_W) != 0 { 754 return 1 755 } 756 return 0 757 } 758 759 func (z nat) and(x, y nat) nat { 760 m := len(x) 761 n := len(y) 762 if m > n { 763 m = n 764 } 765 // m <= n 766 767 z = z.make(m) 768 for i := 0; i < m; i++ { 769 z[i] = x[i] & y[i] 770 } 771 772 return z.norm() 773 } 774 775 func (z nat) andNot(x, y nat) nat { 776 m := len(x) 777 n := len(y) 778 if n > m { 779 n = m 780 } 781 // m >= n 782 783 z = z.make(m) 784 for i := 0; i < n; i++ { 785 z[i] = x[i] &^ y[i] 786 } 787 copy(z[n:m], x[n:m]) 788 789 return z.norm() 790 } 791 792 func (z nat) or(x, y nat) nat { 793 m := len(x) 794 n := len(y) 795 s := x 796 if m < n { 797 n, m = m, n 798 s = y 799 } 800 // m >= n 801 802 z = z.make(m) 803 for i := 0; i < n; i++ { 804 z[i] = x[i] | y[i] 805 } 806 copy(z[n:m], s[n:m]) 807 808 return z.norm() 809 } 810 811 func (z nat) xor(x, y nat) nat { 812 m := len(x) 813 n := len(y) 814 s := x 815 if m < n { 816 n, m = m, n 817 s = y 818 } 819 // m >= n 820 821 z = z.make(m) 822 for i := 0; i < n; i++ { 823 z[i] = x[i] ^ y[i] 824 } 825 copy(z[n:m], s[n:m]) 826 827 return z.norm() 828 } 829 830 // greaterThan reports whether (x1<<_W + x2) > (y1<<_W + y2) 831 func greaterThan(x1, x2, y1, y2 Word) bool { 832 return x1 > y1 || x1 == y1 && x2 > y2 833 } 834 835 // modW returns x % d. 836 func (x nat) modW(d Word) (r Word) { 837 // TODO(agl): we don't actually need to store the q value. 838 var q nat 839 q = q.make(len(x)) 840 return divWVW(q, 0, x, d) 841 } 842 843 // random creates a random integer in [0..limit), using the space in z if 844 // possible. n is the bit length of limit. 845 func (z nat) random(rand *rand.Rand, limit nat, n int) nat { 846 if alias(z, limit) { 847 z = nil // z is an alias for limit - cannot reuse 848 } 849 z = z.make(len(limit)) 850 851 bitLengthOfMSW := uint(n % _W) 852 if bitLengthOfMSW == 0 { 853 bitLengthOfMSW = _W 854 } 855 mask := Word((1 << bitLengthOfMSW) - 1) 856 857 for { 858 switch _W { 859 case 32: 860 for i := range z { 861 z[i] = Word(rand.Uint32()) 862 } 863 case 64: 864 for i := range z { 865 z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32 866 } 867 default: 868 panic("unknown word size") 869 } 870 z[len(limit)-1] &= mask 871 if z.cmp(limit) < 0 { 872 break 873 } 874 } 875 876 return z.norm() 877 } 878 879 // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; 880 // otherwise it sets z to x**y. The result is the value of z. 881 func (z nat) expNN(x, y, m nat) nat { 882 if alias(z, x) || alias(z, y) { 883 // We cannot allow in-place modification of x or y. 884 z = nil 885 } 886 887 // x**y mod 1 == 0 888 if len(m) == 1 && m[0] == 1 { 889 return z.setWord(0) 890 } 891 // m == 0 || m > 1 892 893 // x**0 == 1 894 if len(y) == 0 { 895 return z.setWord(1) 896 } 897 // y > 0 898 899 // x**1 mod m == x mod m 900 if len(y) == 1 && y[0] == 1 && len(m) != 0 { 901 _, z = z.div(z, x, m) 902 return z 903 } 904 // y > 1 905 906 if len(m) != 0 { 907 // We likely end up being as long as the modulus. 908 z = z.make(len(m)) 909 } 910 z = z.set(x) 911 912 // If the base is non-trivial and the exponent is large, we use 913 // 4-bit, windowed exponentiation. This involves precomputing 14 values 914 // (x^2...x^15) but then reduces the number of multiply-reduces by a 915 // third. Even for a 32-bit exponent, this reduces the number of 916 // operations. Uses Montgomery method for odd moduli. 917 if len(x) > 1 && len(y) > 1 && len(m) > 0 { 918 if m[0]&1 == 1 { 919 return z.expNNMontgomery(x, y, m) 920 } 921 return z.expNNWindowed(x, y, m) 922 } 923 924 v := y[len(y)-1] // v > 0 because y is normalized and y > 0 925 shift := nlz(v) + 1 926 v <<= shift 927 var q nat 928 929 const mask = 1 << (_W - 1) 930 931 // We walk through the bits of the exponent one by one. Each time we 932 // see a bit, we square, thus doubling the power. If the bit is a one, 933 // we also multiply by x, thus adding one to the power. 934 935 w := _W - int(shift) 936 // zz and r are used to avoid allocating in mul and div as 937 // otherwise the arguments would alias. 938 var zz, r nat 939 for j := 0; j < w; j++ { 940 zz = zz.mul(z, z) 941 zz, z = z, zz 942 943 if v&mask != 0 { 944 zz = zz.mul(z, x) 945 zz, z = z, zz 946 } 947 948 if len(m) != 0 { 949 zz, r = zz.div(r, z, m) 950 zz, r, q, z = q, z, zz, r 951 } 952 953 v <<= 1 954 } 955 956 for i := len(y) - 2; i >= 0; i-- { 957 v = y[i] 958 959 for j := 0; j < _W; j++ { 960 zz = zz.mul(z, z) 961 zz, z = z, zz 962 963 if v&mask != 0 { 964 zz = zz.mul(z, x) 965 zz, z = z, zz 966 } 967 968 if len(m) != 0 { 969 zz, r = zz.div(r, z, m) 970 zz, r, q, z = q, z, zz, r 971 } 972 973 v <<= 1 974 } 975 } 976 977 return z.norm() 978 } 979 980 // expNNWindowed calculates x**y mod m using a fixed, 4-bit window. 981 func (z nat) expNNWindowed(x, y, m nat) nat { 982 // zz and r are used to avoid allocating in mul and div as otherwise 983 // the arguments would alias. 984 var zz, r nat 985 986 const n = 4 987 // powers[i] contains x^i. 988 var powers [1 << n]nat 989 powers[0] = natOne 990 powers[1] = x 991 for i := 2; i < 1<<n; i += 2 { 992 p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1] 993 *p = p.mul(*p2, *p2) 994 zz, r = zz.div(r, *p, m) 995 *p, r = r, *p 996 *p1 = p1.mul(*p, x) 997 zz, r = zz.div(r, *p1, m) 998 *p1, r = r, *p1 999 } 1000 1001 z = z.setWord(1) 1002 1003 for i := len(y) - 1; i >= 0; i-- { 1004 yi := y[i] 1005 for j := 0; j < _W; j += n { 1006 if i != len(y)-1 || j != 0 { 1007 // Unrolled loop for significant performance 1008 // gain. Use go test -bench=".*" in crypto/rsa 1009 // to check performance before making changes. 1010 zz = zz.mul(z, z) 1011 zz, z = z, zz 1012 zz, r = zz.div(r, z, m) 1013 z, r = r, z 1014 1015 zz = zz.mul(z, z) 1016 zz, z = z, zz 1017 zz, r = zz.div(r, z, m) 1018 z, r = r, z 1019 1020 zz = zz.mul(z, z) 1021 zz, z = z, zz 1022 zz, r = zz.div(r, z, m) 1023 z, r = r, z 1024 1025 zz = zz.mul(z, z) 1026 zz, z = z, zz 1027 zz, r = zz.div(r, z, m) 1028 z, r = r, z 1029 } 1030 1031 zz = zz.mul(z, powers[yi>>(_W-n)]) 1032 zz, z = z, zz 1033 zz, r = zz.div(r, z, m) 1034 z, r = r, z 1035 1036 yi <<= n 1037 } 1038 } 1039 1040 return z.norm() 1041 } 1042 1043 // expNNMontgomery calculates x**y mod m using a fixed, 4-bit window. 1044 // Uses Montgomery representation. 1045 func (z nat) expNNMontgomery(x, y, m nat) nat { 1046 var zz, one, rr, RR nat 1047 1048 numWords := len(m) 1049 1050 // We want the lengths of x and m to be equal. 1051 if len(x) > numWords { 1052 _, rr = rr.div(rr, x, m) 1053 } else if len(x) < numWords { 1054 rr = rr.make(numWords) 1055 rr.clear() 1056 for i := range x { 1057 rr[i] = x[i] 1058 } 1059 } else { 1060 rr = x 1061 } 1062 x = rr 1063 1064 // Ideally the precomputations would be performed outside, and reused 1065 // k0 = -mˆ-1 mod 2ˆ_W. Algorithm from: Dumas, J.G. "On Newton–Raphson 1066 // Iteration for Multiplicative Inverses Modulo Prime Powers". 1067 k0 := 2 - m[0] 1068 t := m[0] - 1 1069 for i := 1; i < _W; i <<= 1 { 1070 t *= t 1071 k0 *= (t + 1) 1072 } 1073 k0 = -k0 1074 1075 // RR = 2ˆ(2*_W*len(m)) mod m 1076 RR = RR.setWord(1) 1077 zz = zz.shl(RR, uint(2*numWords*_W)) 1078 _, RR = RR.div(RR, zz, m) 1079 if len(RR) < numWords { 1080 zz = zz.make(numWords) 1081 copy(zz, RR) 1082 RR = zz 1083 } 1084 // one = 1, with equal length to that of m 1085 one = one.make(numWords) 1086 one.clear() 1087 one[0] = 1 1088 1089 const n = 4 1090 // powers[i] contains x^i 1091 var powers [1 << n]nat 1092 powers[0] = powers[0].montgomery(one, RR, m, k0, numWords) 1093 powers[1] = powers[1].montgomery(x, RR, m, k0, numWords) 1094 for i := 2; i < 1<<n; i++ { 1095 powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords) 1096 } 1097 1098 // initialize z = 1 (Montgomery 1) 1099 z = z.make(numWords) 1100 copy(z, powers[0]) 1101 1102 zz = zz.make(numWords) 1103 1104 // same windowed exponent, but with Montgomery multiplications 1105 for i := len(y) - 1; i >= 0; i-- { 1106 yi := y[i] 1107 for j := 0; j < _W; j += n { 1108 if i != len(y)-1 || j != 0 { 1109 zz = zz.montgomery(z, z, m, k0, numWords) 1110 z = z.montgomery(zz, zz, m, k0, numWords) 1111 zz = zz.montgomery(z, z, m, k0, numWords) 1112 z = z.montgomery(zz, zz, m, k0, numWords) 1113 } 1114 zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords) 1115 z, zz = zz, z 1116 yi <<= n 1117 } 1118 } 1119 // convert to regular number 1120 zz = zz.montgomery(z, one, m, k0, numWords) 1121 return zz.norm() 1122 } 1123 1124 // probablyPrime performs n Miller-Rabin tests to check whether x is prime. 1125 // If x is prime, it returns true. 1126 // If x is not prime, it returns false with probability at least 1 - ¼ⁿ. 1127 // 1128 // It is not suitable for judging primes that an adversary may have crafted 1129 // to fool this test. 1130 func (n nat) probablyPrime(reps int) bool { 1131 if len(n) == 0 { 1132 return false 1133 } 1134 1135 if len(n) == 1 { 1136 if n[0] < 2 { 1137 return false 1138 } 1139 1140 if n[0]%2 == 0 { 1141 return n[0] == 2 1142 } 1143 1144 // We have to exclude these cases because we reject all 1145 // multiples of these numbers below. 1146 switch n[0] { 1147 case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53: 1148 return true 1149 } 1150 } 1151 1152 if n[0]&1 == 0 { 1153 return false // n is even 1154 } 1155 1156 const primesProduct32 = 0xC0CFD797 // Π {p ∈ primes, 2 < p <= 29} 1157 const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53} 1158 1159 var r Word 1160 switch _W { 1161 case 32: 1162 r = n.modW(primesProduct32) 1163 case 64: 1164 r = n.modW(primesProduct64 & _M) 1165 default: 1166 panic("Unknown word size") 1167 } 1168 1169 if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 || 1170 r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 { 1171 return false 1172 } 1173 1174 if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 || 1175 r%43 == 0 || r%47 == 0 || r%53 == 0) { 1176 return false 1177 } 1178 1179 nm1 := nat(nil).sub(n, natOne) 1180 // determine q, k such that nm1 = q << k 1181 k := nm1.trailingZeroBits() 1182 q := nat(nil).shr(nm1, k) 1183 1184 nm3 := nat(nil).sub(nm1, natTwo) 1185 rand := rand.New(rand.NewSource(int64(n[0]))) 1186 1187 var x, y, quotient nat 1188 nm3Len := nm3.bitLen() 1189 1190 NextRandom: 1191 for i := 0; i < reps; i++ { 1192 x = x.random(rand, nm3, nm3Len) 1193 x = x.add(x, natTwo) 1194 y = y.expNN(x, q, n) 1195 if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 { 1196 continue 1197 } 1198 for j := uint(1); j < k; j++ { 1199 y = y.mul(y, y) 1200 quotient, y = quotient.div(y, y, n) 1201 if y.cmp(nm1) == 0 { 1202 continue NextRandom 1203 } 1204 if y.cmp(natOne) == 0 { 1205 return false 1206 } 1207 } 1208 return false 1209 } 1210 1211 return true 1212 } 1213 1214 // bytes writes the value of z into buf using big-endian encoding. 1215 // len(buf) must be >= len(z)*_S. The value of z is encoded in the 1216 // slice buf[i:]. The number i of unused bytes at the beginning of 1217 // buf is returned as result. 1218 func (z nat) bytes(buf []byte) (i int) { 1219 i = len(buf) 1220 for _, d := range z { 1221 for j := 0; j < _S; j++ { 1222 i-- 1223 buf[i] = byte(d) 1224 d >>= 8 1225 } 1226 } 1227 1228 for i < len(buf) && buf[i] == 0 { 1229 i++ 1230 } 1231 1232 return 1233 } 1234 1235 // setBytes interprets buf as the bytes of a big-endian unsigned 1236 // integer, sets z to that value, and returns z. 1237 func (z nat) setBytes(buf []byte) nat { 1238 z = z.make((len(buf) + _S - 1) / _S) 1239 1240 k := 0 1241 s := uint(0) 1242 var d Word 1243 for i := len(buf); i > 0; i-- { 1244 d |= Word(buf[i-1]) << s 1245 if s += 8; s == _S*8 { 1246 z[k] = d 1247 k++ 1248 s = 0 1249 d = 0 1250 } 1251 } 1252 if k < len(z) { 1253 z[k] = d 1254 } 1255 1256 return z.norm() 1257 }