github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/cmd/compile/internal/big/nat.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements unsigned multi-precision integers (natural 6 // numbers). They are the building blocks for the implementation 7 // of signed integers, rationals, and floating-point numbers. 8 9 package big 10 11 import "math/rand" 12 13 // An unsigned integer x of the form 14 // 15 // x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0] 16 // 17 // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n, 18 // with the digits x[i] as the slice elements. 19 // 20 // A number is normalized if the slice contains no leading 0 digits. 21 // During arithmetic operations, denormalized values may occur but are 22 // always normalized before returning the final result. The normalized 23 // representation of 0 is the empty or nil slice (length = 0). 24 // 25 type nat []Word 26 27 var ( 28 natOne = nat{1} 29 natTwo = nat{2} 30 natTen = nat{10} 31 ) 32 33 func (z nat) clear() { 34 for i := range z { 35 z[i] = 0 36 } 37 } 38 39 func (z nat) norm() nat { 40 i := len(z) 41 for i > 0 && z[i-1] == 0 { 42 i-- 43 } 44 return z[0:i] 45 } 46 47 func (z nat) make(n int) nat { 48 if n <= cap(z) { 49 return z[:n] // reuse z 50 } 51 // Choosing a good value for e has significant performance impact 52 // because it increases the chance that a value can be reused. 53 const e = 4 // extra capacity 54 return make(nat, n, n+e) 55 } 56 57 func (z nat) setWord(x Word) nat { 58 if x == 0 { 59 return z[:0] 60 } 61 z = z.make(1) 62 z[0] = x 63 return z 64 } 65 66 func (z nat) setUint64(x uint64) nat { 67 // single-digit values 68 if w := Word(x); uint64(w) == x { 69 return z.setWord(w) 70 } 71 72 // compute number of words n required to represent x 73 n := 0 74 for t := x; t > 0; t >>= _W { 75 n++ 76 } 77 78 // split x into n words 79 z = z.make(n) 80 for i := range z { 81 z[i] = Word(x & _M) 82 x >>= _W 83 } 84 85 return z 86 } 87 88 func (z nat) set(x nat) nat { 89 z = z.make(len(x)) 90 copy(z, x) 91 return z 92 } 93 94 func (z nat) add(x, y nat) nat { 95 m := len(x) 96 n := len(y) 97 98 switch { 99 case m < n: 100 return z.add(y, x) 101 case m == 0: 102 // n == 0 because m >= n; result is 0 103 return z[:0] 104 case n == 0: 105 // result is x 106 return z.set(x) 107 } 108 // m > 0 109 110 z = z.make(m + 1) 111 c := addVV(z[0:n], x, y) 112 if m > n { 113 c = addVW(z[n:m], x[n:], c) 114 } 115 z[m] = c 116 117 return z.norm() 118 } 119 120 func (z nat) sub(x, y nat) nat { 121 m := len(x) 122 n := len(y) 123 124 switch { 125 case m < n: 126 panic("underflow") 127 case m == 0: 128 // n == 0 because m >= n; result is 0 129 return z[:0] 130 case n == 0: 131 // result is x 132 return z.set(x) 133 } 134 // m > 0 135 136 z = z.make(m) 137 c := subVV(z[0:n], x, y) 138 if m > n { 139 c = subVW(z[n:], x[n:], c) 140 } 141 if c != 0 { 142 panic("underflow") 143 } 144 145 return z.norm() 146 } 147 148 func (x nat) cmp(y nat) (r int) { 149 m := len(x) 150 n := len(y) 151 if m != n || m == 0 { 152 switch { 153 case m < n: 154 r = -1 155 case m > n: 156 r = 1 157 } 158 return 159 } 160 161 i := m - 1 162 for i > 0 && x[i] == y[i] { 163 i-- 164 } 165 166 switch { 167 case x[i] < y[i]: 168 r = -1 169 case x[i] > y[i]: 170 r = 1 171 } 172 return 173 } 174 175 func (z nat) mulAddWW(x nat, y, r Word) nat { 176 m := len(x) 177 if m == 0 || y == 0 { 178 return z.setWord(r) // result is r 179 } 180 // m > 0 181 182 z = z.make(m + 1) 183 z[m] = mulAddVWW(z[0:m], x, y, r) 184 185 return z.norm() 186 } 187 188 // basicMul multiplies x and y and leaves the result in z. 189 // The (non-normalized) result is placed in z[0 : len(x) + len(y)]. 190 func basicMul(z, x, y nat) { 191 z[0 : len(x)+len(y)].clear() // initialize z 192 for i, d := range y { 193 if d != 0 { 194 z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) 195 } 196 } 197 } 198 199 // montgomery computes z mod m = x*y*2**(-n*_W) mod m, 200 // assuming k = -1/m mod 2**_W. 201 // z is used for storing the result which is returned; 202 // z must not alias x, y or m. 203 // See Gueron, "Efficient Software Implementations of Modular Exponentiation". 204 // https://eprint.iacr.org/2011/239.pdf 205 // In the terminology of that paper, this is an "Almost Montgomery Multiplication": 206 // x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result 207 // z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m. 208 func (z nat) montgomery(x, y, m nat, k Word, n int) nat { 209 // This code assumes x, y, m are all the same length, n. 210 // (required by addMulVVW and the for loop). 211 // It also assumes that x, y are already reduced mod m, 212 // or else the result will not be properly reduced. 213 if len(x) != n || len(y) != n || len(m) != n { 214 panic("math/big: mismatched montgomery number lengths") 215 } 216 var c1, c2, c3 Word 217 z = z.make(n) 218 z.clear() 219 for i := 0; i < n; i++ { 220 d := y[i] 221 c2 = addMulVVW(z, x, d) 222 t := z[0] * k 223 c3 = addMulVVW(z, m, t) 224 copy(z, z[1:]) 225 cx := c1 + c2 226 cy := cx + c3 227 z[n-1] = cy 228 if cx < c2 || cy < c3 { 229 c1 = 1 230 } else { 231 c1 = 0 232 } 233 } 234 if c1 != 0 { 235 subVV(z, z, m) 236 } 237 return z 238 } 239 240 // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks. 241 // Factored out for readability - do not use outside karatsuba. 242 func karatsubaAdd(z, x nat, n int) { 243 if c := addVV(z[0:n], z, x); c != 0 { 244 addVW(z[n:n+n>>1], z[n:], c) 245 } 246 } 247 248 // Like karatsubaAdd, but does subtract. 249 func karatsubaSub(z, x nat, n int) { 250 if c := subVV(z[0:n], z, x); c != 0 { 251 subVW(z[n:n+n>>1], z[n:], c) 252 } 253 } 254 255 // Operands that are shorter than karatsubaThreshold are multiplied using 256 // "grade school" multiplication; for longer operands the Karatsuba algorithm 257 // is used. 258 var karatsubaThreshold int = 40 // computed by calibrate.go 259 260 // karatsuba multiplies x and y and leaves the result in z. 261 // Both x and y must have the same length n and n must be a 262 // power of 2. The result vector z must have len(z) >= 6*n. 263 // The (non-normalized) result is placed in z[0 : 2*n]. 264 func karatsuba(z, x, y nat) { 265 n := len(y) 266 267 // Switch to basic multiplication if numbers are odd or small. 268 // (n is always even if karatsubaThreshold is even, but be 269 // conservative) 270 if n&1 != 0 || n < karatsubaThreshold || n < 2 { 271 basicMul(z, x, y) 272 return 273 } 274 // n&1 == 0 && n >= karatsubaThreshold && n >= 2 275 276 // Karatsuba multiplication is based on the observation that 277 // for two numbers x and y with: 278 // 279 // x = x1*b + x0 280 // y = y1*b + y0 281 // 282 // the product x*y can be obtained with 3 products z2, z1, z0 283 // instead of 4: 284 // 285 // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0 286 // = z2*b*b + z1*b + z0 287 // 288 // with: 289 // 290 // xd = x1 - x0 291 // yd = y0 - y1 292 // 293 // z1 = xd*yd + z2 + z0 294 // = (x1-x0)*(y0 - y1) + z2 + z0 295 // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0 296 // = x1*y0 - z2 - z0 + x0*y1 + z2 + z0 297 // = x1*y0 + x0*y1 298 299 // split x, y into "digits" 300 n2 := n >> 1 // n2 >= 1 301 x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0 302 y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0 303 304 // z is used for the result and temporary storage: 305 // 306 // 6*n 5*n 4*n 3*n 2*n 1*n 0*n 307 // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ] 308 // 309 // For each recursive call of karatsuba, an unused slice of 310 // z is passed in that has (at least) half the length of the 311 // caller's z. 312 313 // compute z0 and z2 with the result "in place" in z 314 karatsuba(z, x0, y0) // z0 = x0*y0 315 karatsuba(z[n:], x1, y1) // z2 = x1*y1 316 317 // compute xd (or the negative value if underflow occurs) 318 s := 1 // sign of product xd*yd 319 xd := z[2*n : 2*n+n2] 320 if subVV(xd, x1, x0) != 0 { // x1-x0 321 s = -s 322 subVV(xd, x0, x1) // x0-x1 323 } 324 325 // compute yd (or the negative value if underflow occurs) 326 yd := z[2*n+n2 : 3*n] 327 if subVV(yd, y0, y1) != 0 { // y0-y1 328 s = -s 329 subVV(yd, y1, y0) // y1-y0 330 } 331 332 // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0 333 // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0 334 p := z[n*3:] 335 karatsuba(p, xd, yd) 336 337 // save original z2:z0 338 // (ok to use upper half of z since we're done recursing) 339 r := z[n*4:] 340 copy(r, z[:n*2]) 341 342 // add up all partial products 343 // 344 // 2*n n 0 345 // z = [ z2 | z0 ] 346 // + [ z0 ] 347 // + [ z2 ] 348 // + [ p ] 349 // 350 karatsubaAdd(z[n2:], r, n) 351 karatsubaAdd(z[n2:], r[n:], n) 352 if s > 0 { 353 karatsubaAdd(z[n2:], p, n) 354 } else { 355 karatsubaSub(z[n2:], p, n) 356 } 357 } 358 359 // alias reports whether x and y share the same base array. 360 func alias(x, y nat) bool { 361 return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1] 362 } 363 364 // addAt implements z += x<<(_W*i); z must be long enough. 365 // (we don't use nat.add because we need z to stay the same 366 // slice, and we don't need to normalize z after each addition) 367 func addAt(z, x nat, i int) { 368 if n := len(x); n > 0 { 369 if c := addVV(z[i:i+n], z[i:], x); c != 0 { 370 j := i + n 371 if j < len(z) { 372 addVW(z[j:], z[j:], c) 373 } 374 } 375 } 376 } 377 378 func max(x, y int) int { 379 if x > y { 380 return x 381 } 382 return y 383 } 384 385 // karatsubaLen computes an approximation to the maximum k <= n such that 386 // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the 387 // result is the largest number that can be divided repeatedly by 2 before 388 // becoming about the value of karatsubaThreshold. 389 func karatsubaLen(n int) int { 390 i := uint(0) 391 for n > karatsubaThreshold { 392 n >>= 1 393 i++ 394 } 395 return n << i 396 } 397 398 func (z nat) mul(x, y nat) nat { 399 m := len(x) 400 n := len(y) 401 402 switch { 403 case m < n: 404 return z.mul(y, x) 405 case m == 0 || n == 0: 406 return z[:0] 407 case n == 1: 408 return z.mulAddWW(x, y[0], 0) 409 } 410 // m >= n > 1 411 412 // determine if z can be reused 413 if alias(z, x) || alias(z, y) { 414 z = nil // z is an alias for x or y - cannot reuse 415 } 416 417 // use basic multiplication if the numbers are small 418 if n < karatsubaThreshold { 419 z = z.make(m + n) 420 basicMul(z, x, y) 421 return z.norm() 422 } 423 // m >= n && n >= karatsubaThreshold && n >= 2 424 425 // determine Karatsuba length k such that 426 // 427 // x = xh*b + x0 (0 <= x0 < b) 428 // y = yh*b + y0 (0 <= y0 < b) 429 // b = 1<<(_W*k) ("base" of digits xi, yi) 430 // 431 k := karatsubaLen(n) 432 // k <= n 433 434 // multiply x0 and y0 via Karatsuba 435 x0 := x[0:k] // x0 is not normalized 436 y0 := y[0:k] // y0 is not normalized 437 z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y 438 karatsuba(z, x0, y0) 439 z = z[0 : m+n] // z has final length but may be incomplete 440 z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m) 441 442 // If xh != 0 or yh != 0, add the missing terms to z. For 443 // 444 // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) 445 // yh = y1*b (0 <= y1 < b) 446 // 447 // the missing terms are 448 // 449 // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 450 // 451 // since all the yi for i > 1 are 0 by choice of k: If any of them 452 // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would 453 // be a larger valid threshold contradicting the assumption about k. 454 // 455 if k < n || m != n { 456 var t nat 457 458 // add x0*y1*b 459 x0 := x0.norm() 460 y1 := y[k:] // y1 is normalized because y is 461 t = t.mul(x0, y1) // update t so we don't lose t's underlying array 462 addAt(z, t, k) 463 464 // add xi*y0<<i, xi*y1*b<<(i+k) 465 y0 := y0.norm() 466 for i := k; i < len(x); i += k { 467 xi := x[i:] 468 if len(xi) > k { 469 xi = xi[:k] 470 } 471 xi = xi.norm() 472 t = t.mul(xi, y0) 473 addAt(z, t, i) 474 t = t.mul(xi, y1) 475 addAt(z, t, i+k) 476 } 477 } 478 479 return z.norm() 480 } 481 482 // mulRange computes the product of all the unsigned integers in the 483 // range [a, b] inclusively. If a > b (empty range), the result is 1. 484 func (z nat) mulRange(a, b uint64) nat { 485 switch { 486 case a == 0: 487 // cut long ranges short (optimization) 488 return z.setUint64(0) 489 case a > b: 490 return z.setUint64(1) 491 case a == b: 492 return z.setUint64(a) 493 case a+1 == b: 494 return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b)) 495 } 496 m := (a + b) / 2 497 return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) 498 } 499 500 // q = (x-r)/y, with 0 <= r < y 501 func (z nat) divW(x nat, y Word) (q nat, r Word) { 502 m := len(x) 503 switch { 504 case y == 0: 505 panic("division by zero") 506 case y == 1: 507 q = z.set(x) // result is x 508 return 509 case m == 0: 510 q = z[:0] // result is 0 511 return 512 } 513 // m > 0 514 z = z.make(m) 515 r = divWVW(z, 0, x, y) 516 q = z.norm() 517 return 518 } 519 520 func (z nat) div(z2, u, v nat) (q, r nat) { 521 if len(v) == 0 { 522 panic("division by zero") 523 } 524 525 if u.cmp(v) < 0 { 526 q = z[:0] 527 r = z2.set(u) 528 return 529 } 530 531 if len(v) == 1 { 532 var r2 Word 533 q, r2 = z.divW(u, v[0]) 534 r = z2.setWord(r2) 535 return 536 } 537 538 q, r = z.divLarge(z2, u, v) 539 return 540 } 541 542 // q = (uIn-r)/v, with 0 <= r < y 543 // Uses z as storage for q, and u as storage for r if possible. 544 // See Knuth, Volume 2, section 4.3.1, Algorithm D. 545 // Preconditions: 546 // len(v) >= 2 547 // len(uIn) >= len(v) 548 func (z nat) divLarge(u, uIn, v nat) (q, r nat) { 549 n := len(v) 550 m := len(uIn) - n 551 552 // determine if z can be reused 553 // TODO(gri) should find a better solution - this if statement 554 // is very costly (see e.g. time pidigits -s -n 10000) 555 if alias(z, uIn) || alias(z, v) { 556 z = nil // z is an alias for uIn or v - cannot reuse 557 } 558 q = z.make(m + 1) 559 560 qhatv := make(nat, n+1) 561 if alias(u, uIn) || alias(u, v) { 562 u = nil // u is an alias for uIn or v - cannot reuse 563 } 564 u = u.make(len(uIn) + 1) 565 u.clear() // TODO(gri) no need to clear if we allocated a new u 566 567 // D1. 568 shift := nlz(v[n-1]) 569 if shift > 0 { 570 // do not modify v, it may be used by another goroutine simultaneously 571 v1 := make(nat, n) 572 shlVU(v1, v, shift) 573 v = v1 574 } 575 u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) 576 577 // D2. 578 for j := m; j >= 0; j-- { 579 // D3. 580 qhat := Word(_M) 581 if u[j+n] != v[n-1] { 582 var rhat Word 583 qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1]) 584 585 // x1 | x2 = q̂v_{n-2} 586 x1, x2 := mulWW(qhat, v[n-2]) 587 // test if q̂v_{n-2} > br̂ + u_{j+n-2} 588 for greaterThan(x1, x2, rhat, u[j+n-2]) { 589 qhat-- 590 prevRhat := rhat 591 rhat += v[n-1] 592 // v[n-1] >= 0, so this tests for overflow. 593 if rhat < prevRhat { 594 break 595 } 596 x1, x2 = mulWW(qhat, v[n-2]) 597 } 598 } 599 600 // D4. 601 qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) 602 603 c := subVV(u[j:j+len(qhatv)], u[j:], qhatv) 604 if c != 0 { 605 c := addVV(u[j:j+n], u[j:], v) 606 u[j+n] += c 607 qhat-- 608 } 609 610 q[j] = qhat 611 } 612 613 q = q.norm() 614 shrVU(u, u, shift) 615 r = u.norm() 616 617 return q, r 618 } 619 620 // Length of x in bits. x must be normalized. 621 func (x nat) bitLen() int { 622 if i := len(x) - 1; i >= 0 { 623 return i*_W + bitLen(x[i]) 624 } 625 return 0 626 } 627 628 const deBruijn32 = 0x077CB531 629 630 var deBruijn32Lookup = []byte{ 631 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 632 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, 633 } 634 635 const deBruijn64 = 0x03f79d71b4ca8b09 636 637 var deBruijn64Lookup = []byte{ 638 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, 639 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, 640 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, 641 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, 642 } 643 644 // trailingZeroBits returns the number of consecutive least significant zero 645 // bits of x. 646 func trailingZeroBits(x Word) uint { 647 // x & -x leaves only the right-most bit set in the word. Let k be the 648 // index of that bit. Since only a single bit is set, the value is two 649 // to the power of k. Multiplying by a power of two is equivalent to 650 // left shifting, in this case by k bits. The de Bruijn constant is 651 // such that all six bit, consecutive substrings are distinct. 652 // Therefore, if we have a left shifted version of this constant we can 653 // find by how many bits it was shifted by looking at which six bit 654 // substring ended up at the top of the word. 655 // (Knuth, volume 4, section 7.3.1) 656 switch _W { 657 case 32: 658 return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27]) 659 case 64: 660 return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58]) 661 default: 662 panic("unknown word size") 663 } 664 } 665 666 // trailingZeroBits returns the number of consecutive least significant zero 667 // bits of x. 668 func (x nat) trailingZeroBits() uint { 669 if len(x) == 0 { 670 return 0 671 } 672 var i uint 673 for x[i] == 0 { 674 i++ 675 } 676 // x[i] != 0 677 return i*_W + trailingZeroBits(x[i]) 678 } 679 680 // z = x << s 681 func (z nat) shl(x nat, s uint) nat { 682 m := len(x) 683 if m == 0 { 684 return z[:0] 685 } 686 // m > 0 687 688 n := m + int(s/_W) 689 z = z.make(n + 1) 690 z[n] = shlVU(z[n-m:n], x, s%_W) 691 z[0 : n-m].clear() 692 693 return z.norm() 694 } 695 696 // z = x >> s 697 func (z nat) shr(x nat, s uint) nat { 698 m := len(x) 699 n := m - int(s/_W) 700 if n <= 0 { 701 return z[:0] 702 } 703 // n > 0 704 705 z = z.make(n) 706 shrVU(z, x[m-n:], s%_W) 707 708 return z.norm() 709 } 710 711 func (z nat) setBit(x nat, i uint, b uint) nat { 712 j := int(i / _W) 713 m := Word(1) << (i % _W) 714 n := len(x) 715 switch b { 716 case 0: 717 z = z.make(n) 718 copy(z, x) 719 if j >= n { 720 // no need to grow 721 return z 722 } 723 z[j] &^= m 724 return z.norm() 725 case 1: 726 if j >= n { 727 z = z.make(j + 1) 728 z[n:].clear() 729 } else { 730 z = z.make(n) 731 } 732 copy(z, x) 733 z[j] |= m 734 // no need to normalize 735 return z 736 } 737 panic("set bit is not 0 or 1") 738 } 739 740 // bit returns the value of the i'th bit, with lsb == bit 0. 741 func (x nat) bit(i uint) uint { 742 j := i / _W 743 if j >= uint(len(x)) { 744 return 0 745 } 746 // 0 <= j < len(x) 747 return uint(x[j] >> (i % _W) & 1) 748 } 749 750 // sticky returns 1 if there's a 1 bit within the 751 // i least significant bits, otherwise it returns 0. 752 func (x nat) sticky(i uint) uint { 753 j := i / _W 754 if j >= uint(len(x)) { 755 if len(x) == 0 { 756 return 0 757 } 758 return 1 759 } 760 // 0 <= j < len(x) 761 for _, x := range x[:j] { 762 if x != 0 { 763 return 1 764 } 765 } 766 if x[j]<<(_W-i%_W) != 0 { 767 return 1 768 } 769 return 0 770 } 771 772 func (z nat) and(x, y nat) nat { 773 m := len(x) 774 n := len(y) 775 if m > n { 776 m = n 777 } 778 // m <= n 779 780 z = z.make(m) 781 for i := 0; i < m; i++ { 782 z[i] = x[i] & y[i] 783 } 784 785 return z.norm() 786 } 787 788 func (z nat) andNot(x, y nat) nat { 789 m := len(x) 790 n := len(y) 791 if n > m { 792 n = m 793 } 794 // m >= n 795 796 z = z.make(m) 797 for i := 0; i < n; i++ { 798 z[i] = x[i] &^ y[i] 799 } 800 copy(z[n:m], x[n:m]) 801 802 return z.norm() 803 } 804 805 func (z nat) or(x, y nat) nat { 806 m := len(x) 807 n := len(y) 808 s := x 809 if m < n { 810 n, m = m, n 811 s = y 812 } 813 // m >= n 814 815 z = z.make(m) 816 for i := 0; i < n; i++ { 817 z[i] = x[i] | y[i] 818 } 819 copy(z[n:m], s[n:m]) 820 821 return z.norm() 822 } 823 824 func (z nat) xor(x, y nat) nat { 825 m := len(x) 826 n := len(y) 827 s := x 828 if m < n { 829 n, m = m, n 830 s = y 831 } 832 // m >= n 833 834 z = z.make(m) 835 for i := 0; i < n; i++ { 836 z[i] = x[i] ^ y[i] 837 } 838 copy(z[n:m], s[n:m]) 839 840 return z.norm() 841 } 842 843 // greaterThan reports whether (x1<<_W + x2) > (y1<<_W + y2) 844 func greaterThan(x1, x2, y1, y2 Word) bool { 845 return x1 > y1 || x1 == y1 && x2 > y2 846 } 847 848 // modW returns x % d. 849 func (x nat) modW(d Word) (r Word) { 850 // TODO(agl): we don't actually need to store the q value. 851 var q nat 852 q = q.make(len(x)) 853 return divWVW(q, 0, x, d) 854 } 855 856 // random creates a random integer in [0..limit), using the space in z if 857 // possible. n is the bit length of limit. 858 func (z nat) random(rand *rand.Rand, limit nat, n int) nat { 859 if alias(z, limit) { 860 z = nil // z is an alias for limit - cannot reuse 861 } 862 z = z.make(len(limit)) 863 864 bitLengthOfMSW := uint(n % _W) 865 if bitLengthOfMSW == 0 { 866 bitLengthOfMSW = _W 867 } 868 mask := Word((1 << bitLengthOfMSW) - 1) 869 870 for { 871 switch _W { 872 case 32: 873 for i := range z { 874 z[i] = Word(rand.Uint32()) 875 } 876 case 64: 877 for i := range z { 878 z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32 879 } 880 default: 881 panic("unknown word size") 882 } 883 z[len(limit)-1] &= mask 884 if z.cmp(limit) < 0 { 885 break 886 } 887 } 888 889 return z.norm() 890 } 891 892 // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; 893 // otherwise it sets z to x**y. The result is the value of z. 894 func (z nat) expNN(x, y, m nat) nat { 895 if alias(z, x) || alias(z, y) { 896 // We cannot allow in-place modification of x or y. 897 z = nil 898 } 899 900 // x**y mod 1 == 0 901 if len(m) == 1 && m[0] == 1 { 902 return z.setWord(0) 903 } 904 // m == 0 || m > 1 905 906 // x**0 == 1 907 if len(y) == 0 { 908 return z.setWord(1) 909 } 910 // y > 0 911 912 // x**1 mod m == x mod m 913 if len(y) == 1 && y[0] == 1 && len(m) != 0 { 914 _, z = z.div(z, x, m) 915 return z 916 } 917 // y > 1 918 919 if len(m) != 0 { 920 // We likely end up being as long as the modulus. 921 z = z.make(len(m)) 922 } 923 z = z.set(x) 924 925 // If the base is non-trivial and the exponent is large, we use 926 // 4-bit, windowed exponentiation. This involves precomputing 14 values 927 // (x^2...x^15) but then reduces the number of multiply-reduces by a 928 // third. Even for a 32-bit exponent, this reduces the number of 929 // operations. Uses Montgomery method for odd moduli. 930 if len(x) > 1 && len(y) > 1 && len(m) > 0 { 931 if m[0]&1 == 1 { 932 return z.expNNMontgomery(x, y, m) 933 } 934 return z.expNNWindowed(x, y, m) 935 } 936 937 v := y[len(y)-1] // v > 0 because y is normalized and y > 0 938 shift := nlz(v) + 1 939 v <<= shift 940 var q nat 941 942 const mask = 1 << (_W - 1) 943 944 // We walk through the bits of the exponent one by one. Each time we 945 // see a bit, we square, thus doubling the power. If the bit is a one, 946 // we also multiply by x, thus adding one to the power. 947 948 w := _W - int(shift) 949 // zz and r are used to avoid allocating in mul and div as 950 // otherwise the arguments would alias. 951 var zz, r nat 952 for j := 0; j < w; j++ { 953 zz = zz.mul(z, z) 954 zz, z = z, zz 955 956 if v&mask != 0 { 957 zz = zz.mul(z, x) 958 zz, z = z, zz 959 } 960 961 if len(m) != 0 { 962 zz, r = zz.div(r, z, m) 963 zz, r, q, z = q, z, zz, r 964 } 965 966 v <<= 1 967 } 968 969 for i := len(y) - 2; i >= 0; i-- { 970 v = y[i] 971 972 for j := 0; j < _W; j++ { 973 zz = zz.mul(z, z) 974 zz, z = z, zz 975 976 if v&mask != 0 { 977 zz = zz.mul(z, x) 978 zz, z = z, zz 979 } 980 981 if len(m) != 0 { 982 zz, r = zz.div(r, z, m) 983 zz, r, q, z = q, z, zz, r 984 } 985 986 v <<= 1 987 } 988 } 989 990 return z.norm() 991 } 992 993 // expNNWindowed calculates x**y mod m using a fixed, 4-bit window. 994 func (z nat) expNNWindowed(x, y, m nat) nat { 995 // zz and r are used to avoid allocating in mul and div as otherwise 996 // the arguments would alias. 997 var zz, r nat 998 999 const n = 4 1000 // powers[i] contains x^i. 1001 var powers [1 << n]nat 1002 powers[0] = natOne 1003 powers[1] = x 1004 for i := 2; i < 1<<n; i += 2 { 1005 p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1] 1006 *p = p.mul(*p2, *p2) 1007 zz, r = zz.div(r, *p, m) 1008 *p, r = r, *p 1009 *p1 = p1.mul(*p, x) 1010 zz, r = zz.div(r, *p1, m) 1011 *p1, r = r, *p1 1012 } 1013 1014 z = z.setWord(1) 1015 1016 for i := len(y) - 1; i >= 0; i-- { 1017 yi := y[i] 1018 for j := 0; j < _W; j += n { 1019 if i != len(y)-1 || j != 0 { 1020 // Unrolled loop for significant performance 1021 // gain. Use go test -bench=".*" in crypto/rsa 1022 // to check performance before making changes. 1023 zz = zz.mul(z, z) 1024 zz, z = z, zz 1025 zz, r = zz.div(r, z, m) 1026 z, r = r, z 1027 1028 zz = zz.mul(z, z) 1029 zz, z = z, zz 1030 zz, r = zz.div(r, z, m) 1031 z, r = r, z 1032 1033 zz = zz.mul(z, z) 1034 zz, z = z, zz 1035 zz, r = zz.div(r, z, m) 1036 z, r = r, z 1037 1038 zz = zz.mul(z, z) 1039 zz, z = z, zz 1040 zz, r = zz.div(r, z, m) 1041 z, r = r, z 1042 } 1043 1044 zz = zz.mul(z, powers[yi>>(_W-n)]) 1045 zz, z = z, zz 1046 zz, r = zz.div(r, z, m) 1047 z, r = r, z 1048 1049 yi <<= n 1050 } 1051 } 1052 1053 return z.norm() 1054 } 1055 1056 // expNNMontgomery calculates x**y mod m using a fixed, 4-bit window. 1057 // Uses Montgomery representation. 1058 func (z nat) expNNMontgomery(x, y, m nat) nat { 1059 var zz, one, rr, RR nat 1060 1061 numWords := len(m) 1062 1063 // We want the lengths of x and m to be equal. 1064 if len(x) > numWords { 1065 _, rr = rr.div(rr, x, m) 1066 } else if len(x) < numWords { 1067 rr = rr.make(numWords) 1068 rr.clear() 1069 for i := range x { 1070 rr[i] = x[i] 1071 } 1072 } else { 1073 rr = x 1074 } 1075 x = rr 1076 1077 // Ideally the precomputations would be performed outside, and reused 1078 // k0 = -m**-1 mod 2**_W. Algorithm from: Dumas, J.G. "On Newton–Raphson 1079 // Iteration for Multiplicative Inverses Modulo Prime Powers". 1080 k0 := 2 - m[0] 1081 t := m[0] - 1 1082 for i := 1; i < _W; i <<= 1 { 1083 t *= t 1084 k0 *= (t + 1) 1085 } 1086 k0 = -k0 1087 1088 // RR = 2**(2*_W*len(m)) mod m 1089 RR = RR.setWord(1) 1090 zz = zz.shl(RR, uint(2*numWords*_W)) 1091 _, RR = RR.div(RR, zz, m) 1092 if len(RR) < numWords { 1093 zz = zz.make(numWords) 1094 copy(zz, RR) 1095 RR = zz 1096 } 1097 // one = 1, with equal length to that of m 1098 one = one.make(numWords) 1099 one.clear() 1100 one[0] = 1 1101 1102 const n = 4 1103 // powers[i] contains x^i 1104 var powers [1 << n]nat 1105 powers[0] = powers[0].montgomery(one, RR, m, k0, numWords) 1106 powers[1] = powers[1].montgomery(x, RR, m, k0, numWords) 1107 for i := 2; i < 1<<n; i++ { 1108 powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords) 1109 } 1110 1111 // initialize z = 1 (Montgomery 1) 1112 z = z.make(numWords) 1113 copy(z, powers[0]) 1114 1115 zz = zz.make(numWords) 1116 1117 // same windowed exponent, but with Montgomery multiplications 1118 for i := len(y) - 1; i >= 0; i-- { 1119 yi := y[i] 1120 for j := 0; j < _W; j += n { 1121 if i != len(y)-1 || j != 0 { 1122 zz = zz.montgomery(z, z, m, k0, numWords) 1123 z = z.montgomery(zz, zz, m, k0, numWords) 1124 zz = zz.montgomery(z, z, m, k0, numWords) 1125 z = z.montgomery(zz, zz, m, k0, numWords) 1126 } 1127 zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords) 1128 z, zz = zz, z 1129 yi <<= n 1130 } 1131 } 1132 // convert to regular number 1133 zz = zz.montgomery(z, one, m, k0, numWords) 1134 return zz.norm() 1135 } 1136 1137 // probablyPrime performs n Miller-Rabin tests to check whether x is prime. 1138 // If x is prime, it returns true. 1139 // If x is not prime, it returns false with probability at least 1 - ¼ⁿ. 1140 // 1141 // It is not suitable for judging primes that an adversary may have crafted 1142 // to fool this test. 1143 func (n nat) probablyPrime(reps int) bool { 1144 if len(n) == 0 { 1145 return false 1146 } 1147 1148 if len(n) == 1 { 1149 if n[0] < 2 { 1150 return false 1151 } 1152 1153 if n[0]%2 == 0 { 1154 return n[0] == 2 1155 } 1156 1157 // We have to exclude these cases because we reject all 1158 // multiples of these numbers below. 1159 switch n[0] { 1160 case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53: 1161 return true 1162 } 1163 } 1164 1165 if n[0]&1 == 0 { 1166 return false // n is even 1167 } 1168 1169 const primesProduct32 = 0xC0CFD797 // Π {p ∈ primes, 2 < p <= 29} 1170 const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53} 1171 1172 var r Word 1173 switch _W { 1174 case 32: 1175 r = n.modW(primesProduct32) 1176 case 64: 1177 r = n.modW(primesProduct64 & _M) 1178 default: 1179 panic("Unknown word size") 1180 } 1181 1182 if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 || 1183 r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 { 1184 return false 1185 } 1186 1187 if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 || 1188 r%43 == 0 || r%47 == 0 || r%53 == 0) { 1189 return false 1190 } 1191 1192 nm1 := nat(nil).sub(n, natOne) 1193 // determine q, k such that nm1 = q << k 1194 k := nm1.trailingZeroBits() 1195 q := nat(nil).shr(nm1, k) 1196 1197 nm3 := nat(nil).sub(nm1, natTwo) 1198 rand := rand.New(rand.NewSource(int64(n[0]))) 1199 1200 var x, y, quotient nat 1201 nm3Len := nm3.bitLen() 1202 1203 NextRandom: 1204 for i := 0; i < reps; i++ { 1205 x = x.random(rand, nm3, nm3Len) 1206 x = x.add(x, natTwo) 1207 y = y.expNN(x, q, n) 1208 if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 { 1209 continue 1210 } 1211 for j := uint(1); j < k; j++ { 1212 y = y.mul(y, y) 1213 quotient, y = quotient.div(y, y, n) 1214 if y.cmp(nm1) == 0 { 1215 continue NextRandom 1216 } 1217 if y.cmp(natOne) == 0 { 1218 return false 1219 } 1220 } 1221 return false 1222 } 1223 1224 return true 1225 } 1226 1227 // bytes writes the value of z into buf using big-endian encoding. 1228 // len(buf) must be >= len(z)*_S. The value of z is encoded in the 1229 // slice buf[i:]. The number i of unused bytes at the beginning of 1230 // buf is returned as result. 1231 func (z nat) bytes(buf []byte) (i int) { 1232 i = len(buf) 1233 for _, d := range z { 1234 for j := 0; j < _S; j++ { 1235 i-- 1236 buf[i] = byte(d) 1237 d >>= 8 1238 } 1239 } 1240 1241 for i < len(buf) && buf[i] == 0 { 1242 i++ 1243 } 1244 1245 return 1246 } 1247 1248 // setBytes interprets buf as the bytes of a big-endian unsigned 1249 // integer, sets z to that value, and returns z. 1250 func (z nat) setBytes(buf []byte) nat { 1251 z = z.make((len(buf) + _S - 1) / _S) 1252 1253 k := 0 1254 s := uint(0) 1255 var d Word 1256 for i := len(buf); i > 0; i-- { 1257 d |= Word(buf[i-1]) << s 1258 if s += 8; s == _S*8 { 1259 z[k] = d 1260 k++ 1261 s = 0 1262 d = 0 1263 } 1264 } 1265 if k < len(z) { 1266 z[k] = d 1267 } 1268 1269 return z.norm() 1270 }