github.com/hongwozai/go-src-1.4.3@v0.0.0-20191127132709-dc3fce3dbccb/src/math/big/nat.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package big implements multi-precision arithmetic (big numbers). 6 // The following numeric types are supported: 7 // 8 // - Int signed integers 9 // - Rat rational numbers 10 // 11 // Methods are typically of the form: 12 // 13 // func (z *Int) Op(x, y *Int) *Int (similar for *Rat) 14 // 15 // and implement operations z = x Op y with the result as receiver; if it 16 // is one of the operands it may be overwritten (and its memory reused). 17 // To enable chaining of operations, the result is also returned. Methods 18 // returning a result other than *Int or *Rat take one of the operands as 19 // the receiver. 20 // 21 package big 22 23 // This file contains operations on unsigned multi-precision integers. 24 // These are the building blocks for the operations on signed integers 25 // and rationals. 26 27 import ( 28 "errors" 29 "io" 30 "math" 31 "math/rand" 32 "sync" 33 ) 34 35 // An unsigned integer x of the form 36 // 37 // x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0] 38 // 39 // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n, 40 // with the digits x[i] as the slice elements. 41 // 42 // A number is normalized if the slice contains no leading 0 digits. 43 // During arithmetic operations, denormalized values may occur but are 44 // always normalized before returning the final result. The normalized 45 // representation of 0 is the empty or nil slice (length = 0). 46 // 47 type nat []Word 48 49 var ( 50 natOne = nat{1} 51 natTwo = nat{2} 52 natTen = nat{10} 53 ) 54 55 func (z nat) clear() { 56 for i := range z { 57 z[i] = 0 58 } 59 } 60 61 func (z nat) norm() nat { 62 i := len(z) 63 for i > 0 && z[i-1] == 0 { 64 i-- 65 } 66 return z[0:i] 67 } 68 69 func (z nat) make(n int) nat { 70 if n <= cap(z) { 71 return z[0:n] // reuse z 72 } 73 // Choosing a good value for e has significant performance impact 74 // because it increases the chance that a value can be reused. 75 const e = 4 // extra capacity 76 return make(nat, n, n+e) 77 } 78 79 func (z nat) setWord(x Word) nat { 80 if x == 0 { 81 return z.make(0) 82 } 83 z = z.make(1) 84 z[0] = x 85 return z 86 } 87 88 func (z nat) setUint64(x uint64) nat { 89 // single-digit values 90 if w := Word(x); uint64(w) == x { 91 return z.setWord(w) 92 } 93 94 // compute number of words n required to represent x 95 n := 0 96 for t := x; t > 0; t >>= _W { 97 n++ 98 } 99 100 // split x into n words 101 z = z.make(n) 102 for i := range z { 103 z[i] = Word(x & _M) 104 x >>= _W 105 } 106 107 return z 108 } 109 110 func (z nat) set(x nat) nat { 111 z = z.make(len(x)) 112 copy(z, x) 113 return z 114 } 115 116 func (z nat) add(x, y nat) nat { 117 m := len(x) 118 n := len(y) 119 120 switch { 121 case m < n: 122 return z.add(y, x) 123 case m == 0: 124 // n == 0 because m >= n; result is 0 125 return z.make(0) 126 case n == 0: 127 // result is x 128 return z.set(x) 129 } 130 // m > 0 131 132 z = z.make(m + 1) 133 c := addVV(z[0:n], x, y) 134 if m > n { 135 c = addVW(z[n:m], x[n:], c) 136 } 137 z[m] = c 138 139 return z.norm() 140 } 141 142 func (z nat) sub(x, y nat) nat { 143 m := len(x) 144 n := len(y) 145 146 switch { 147 case m < n: 148 panic("underflow") 149 case m == 0: 150 // n == 0 because m >= n; result is 0 151 return z.make(0) 152 case n == 0: 153 // result is x 154 return z.set(x) 155 } 156 // m > 0 157 158 z = z.make(m) 159 c := subVV(z[0:n], x, y) 160 if m > n { 161 c = subVW(z[n:], x[n:], c) 162 } 163 if c != 0 { 164 panic("underflow") 165 } 166 167 return z.norm() 168 } 169 170 func (x nat) cmp(y nat) (r int) { 171 m := len(x) 172 n := len(y) 173 if m != n || m == 0 { 174 switch { 175 case m < n: 176 r = -1 177 case m > n: 178 r = 1 179 } 180 return 181 } 182 183 i := m - 1 184 for i > 0 && x[i] == y[i] { 185 i-- 186 } 187 188 switch { 189 case x[i] < y[i]: 190 r = -1 191 case x[i] > y[i]: 192 r = 1 193 } 194 return 195 } 196 197 func (z nat) mulAddWW(x nat, y, r Word) nat { 198 m := len(x) 199 if m == 0 || y == 0 { 200 return z.setWord(r) // result is r 201 } 202 // m > 0 203 204 z = z.make(m + 1) 205 z[m] = mulAddVWW(z[0:m], x, y, r) 206 207 return z.norm() 208 } 209 210 // basicMul multiplies x and y and leaves the result in z. 211 // The (non-normalized) result is placed in z[0 : len(x) + len(y)]. 212 func basicMul(z, x, y nat) { 213 z[0 : len(x)+len(y)].clear() // initialize z 214 for i, d := range y { 215 if d != 0 { 216 z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) 217 } 218 } 219 } 220 221 // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks. 222 // Factored out for readability - do not use outside karatsuba. 223 func karatsubaAdd(z, x nat, n int) { 224 if c := addVV(z[0:n], z, x); c != 0 { 225 addVW(z[n:n+n>>1], z[n:], c) 226 } 227 } 228 229 // Like karatsubaAdd, but does subtract. 230 func karatsubaSub(z, x nat, n int) { 231 if c := subVV(z[0:n], z, x); c != 0 { 232 subVW(z[n:n+n>>1], z[n:], c) 233 } 234 } 235 236 // Operands that are shorter than karatsubaThreshold are multiplied using 237 // "grade school" multiplication; for longer operands the Karatsuba algorithm 238 // is used. 239 var karatsubaThreshold int = 40 // computed by calibrate.go 240 241 // karatsuba multiplies x and y and leaves the result in z. 242 // Both x and y must have the same length n and n must be a 243 // power of 2. The result vector z must have len(z) >= 6*n. 244 // The (non-normalized) result is placed in z[0 : 2*n]. 245 func karatsuba(z, x, y nat) { 246 n := len(y) 247 248 // Switch to basic multiplication if numbers are odd or small. 249 // (n is always even if karatsubaThreshold is even, but be 250 // conservative) 251 if n&1 != 0 || n < karatsubaThreshold || n < 2 { 252 basicMul(z, x, y) 253 return 254 } 255 // n&1 == 0 && n >= karatsubaThreshold && n >= 2 256 257 // Karatsuba multiplication is based on the observation that 258 // for two numbers x and y with: 259 // 260 // x = x1*b + x0 261 // y = y1*b + y0 262 // 263 // the product x*y can be obtained with 3 products z2, z1, z0 264 // instead of 4: 265 // 266 // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0 267 // = z2*b*b + z1*b + z0 268 // 269 // with: 270 // 271 // xd = x1 - x0 272 // yd = y0 - y1 273 // 274 // z1 = xd*yd + z2 + z0 275 // = (x1-x0)*(y0 - y1) + z2 + z0 276 // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0 277 // = x1*y0 - z2 - z0 + x0*y1 + z2 + z0 278 // = x1*y0 + x0*y1 279 280 // split x, y into "digits" 281 n2 := n >> 1 // n2 >= 1 282 x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0 283 y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0 284 285 // z is used for the result and temporary storage: 286 // 287 // 6*n 5*n 4*n 3*n 2*n 1*n 0*n 288 // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ] 289 // 290 // For each recursive call of karatsuba, an unused slice of 291 // z is passed in that has (at least) half the length of the 292 // caller's z. 293 294 // compute z0 and z2 with the result "in place" in z 295 karatsuba(z, x0, y0) // z0 = x0*y0 296 karatsuba(z[n:], x1, y1) // z2 = x1*y1 297 298 // compute xd (or the negative value if underflow occurs) 299 s := 1 // sign of product xd*yd 300 xd := z[2*n : 2*n+n2] 301 if subVV(xd, x1, x0) != 0 { // x1-x0 302 s = -s 303 subVV(xd, x0, x1) // x0-x1 304 } 305 306 // compute yd (or the negative value if underflow occurs) 307 yd := z[2*n+n2 : 3*n] 308 if subVV(yd, y0, y1) != 0 { // y0-y1 309 s = -s 310 subVV(yd, y1, y0) // y1-y0 311 } 312 313 // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0 314 // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0 315 p := z[n*3:] 316 karatsuba(p, xd, yd) 317 318 // save original z2:z0 319 // (ok to use upper half of z since we're done recursing) 320 r := z[n*4:] 321 copy(r, z[:n*2]) 322 323 // add up all partial products 324 // 325 // 2*n n 0 326 // z = [ z2 | z0 ] 327 // + [ z0 ] 328 // + [ z2 ] 329 // + [ p ] 330 // 331 karatsubaAdd(z[n2:], r, n) 332 karatsubaAdd(z[n2:], r[n:], n) 333 if s > 0 { 334 karatsubaAdd(z[n2:], p, n) 335 } else { 336 karatsubaSub(z[n2:], p, n) 337 } 338 } 339 340 // alias returns true if x and y share the same base array. 341 func alias(x, y nat) bool { 342 return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1] 343 } 344 345 // addAt implements z += x<<(_W*i); z must be long enough. 346 // (we don't use nat.add because we need z to stay the same 347 // slice, and we don't need to normalize z after each addition) 348 func addAt(z, x nat, i int) { 349 if n := len(x); n > 0 { 350 if c := addVV(z[i:i+n], z[i:], x); c != 0 { 351 j := i + n 352 if j < len(z) { 353 addVW(z[j:], z[j:], c) 354 } 355 } 356 } 357 } 358 359 func max(x, y int) int { 360 if x > y { 361 return x 362 } 363 return y 364 } 365 366 // karatsubaLen computes an approximation to the maximum k <= n such that 367 // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the 368 // result is the largest number that can be divided repeatedly by 2 before 369 // becoming about the value of karatsubaThreshold. 370 func karatsubaLen(n int) int { 371 i := uint(0) 372 for n > karatsubaThreshold { 373 n >>= 1 374 i++ 375 } 376 return n << i 377 } 378 379 func (z nat) mul(x, y nat) nat { 380 m := len(x) 381 n := len(y) 382 383 switch { 384 case m < n: 385 return z.mul(y, x) 386 case m == 0 || n == 0: 387 return z.make(0) 388 case n == 1: 389 return z.mulAddWW(x, y[0], 0) 390 } 391 // m >= n > 1 392 393 // determine if z can be reused 394 if alias(z, x) || alias(z, y) { 395 z = nil // z is an alias for x or y - cannot reuse 396 } 397 398 // use basic multiplication if the numbers are small 399 if n < karatsubaThreshold { 400 z = z.make(m + n) 401 basicMul(z, x, y) 402 return z.norm() 403 } 404 // m >= n && n >= karatsubaThreshold && n >= 2 405 406 // determine Karatsuba length k such that 407 // 408 // x = xh*b + x0 (0 <= x0 < b) 409 // y = yh*b + y0 (0 <= y0 < b) 410 // b = 1<<(_W*k) ("base" of digits xi, yi) 411 // 412 k := karatsubaLen(n) 413 // k <= n 414 415 // multiply x0 and y0 via Karatsuba 416 x0 := x[0:k] // x0 is not normalized 417 y0 := y[0:k] // y0 is not normalized 418 z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y 419 karatsuba(z, x0, y0) 420 z = z[0 : m+n] // z has final length but may be incomplete 421 z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m) 422 423 // If xh != 0 or yh != 0, add the missing terms to z. For 424 // 425 // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) 426 // yh = y1*b (0 <= y1 < b) 427 // 428 // the missing terms are 429 // 430 // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 431 // 432 // since all the yi for i > 1 are 0 by choice of k: If any of them 433 // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would 434 // be a larger valid threshold contradicting the assumption about k. 435 // 436 if k < n || m != n { 437 var t nat 438 439 // add x0*y1*b 440 x0 := x0.norm() 441 y1 := y[k:] // y1 is normalized because y is 442 t = t.mul(x0, y1) // update t so we don't lose t's underlying array 443 addAt(z, t, k) 444 445 // add xi*y0<<i, xi*y1*b<<(i+k) 446 y0 := y0.norm() 447 for i := k; i < len(x); i += k { 448 xi := x[i:] 449 if len(xi) > k { 450 xi = xi[:k] 451 } 452 xi = xi.norm() 453 t = t.mul(xi, y0) 454 addAt(z, t, i) 455 t = t.mul(xi, y1) 456 addAt(z, t, i+k) 457 } 458 } 459 460 return z.norm() 461 } 462 463 // mulRange computes the product of all the unsigned integers in the 464 // range [a, b] inclusively. If a > b (empty range), the result is 1. 465 func (z nat) mulRange(a, b uint64) nat { 466 switch { 467 case a == 0: 468 // cut long ranges short (optimization) 469 return z.setUint64(0) 470 case a > b: 471 return z.setUint64(1) 472 case a == b: 473 return z.setUint64(a) 474 case a+1 == b: 475 return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b)) 476 } 477 m := (a + b) / 2 478 return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) 479 } 480 481 // q = (x-r)/y, with 0 <= r < y 482 func (z nat) divW(x nat, y Word) (q nat, r Word) { 483 m := len(x) 484 switch { 485 case y == 0: 486 panic("division by zero") 487 case y == 1: 488 q = z.set(x) // result is x 489 return 490 case m == 0: 491 q = z.make(0) // result is 0 492 return 493 } 494 // m > 0 495 z = z.make(m) 496 r = divWVW(z, 0, x, y) 497 q = z.norm() 498 return 499 } 500 501 func (z nat) div(z2, u, v nat) (q, r nat) { 502 if len(v) == 0 { 503 panic("division by zero") 504 } 505 506 if u.cmp(v) < 0 { 507 q = z.make(0) 508 r = z2.set(u) 509 return 510 } 511 512 if len(v) == 1 { 513 var r2 Word 514 q, r2 = z.divW(u, v[0]) 515 r = z2.setWord(r2) 516 return 517 } 518 519 q, r = z.divLarge(z2, u, v) 520 return 521 } 522 523 // q = (uIn-r)/v, with 0 <= r < y 524 // Uses z as storage for q, and u as storage for r if possible. 525 // See Knuth, Volume 2, section 4.3.1, Algorithm D. 526 // Preconditions: 527 // len(v) >= 2 528 // len(uIn) >= len(v) 529 func (z nat) divLarge(u, uIn, v nat) (q, r nat) { 530 n := len(v) 531 m := len(uIn) - n 532 533 // determine if z can be reused 534 // TODO(gri) should find a better solution - this if statement 535 // is very costly (see e.g. time pidigits -s -n 10000) 536 if alias(z, uIn) || alias(z, v) { 537 z = nil // z is an alias for uIn or v - cannot reuse 538 } 539 q = z.make(m + 1) 540 541 qhatv := make(nat, n+1) 542 if alias(u, uIn) || alias(u, v) { 543 u = nil // u is an alias for uIn or v - cannot reuse 544 } 545 u = u.make(len(uIn) + 1) 546 u.clear() 547 548 // D1. 549 shift := leadingZeros(v[n-1]) 550 if shift > 0 { 551 // do not modify v, it may be used by another goroutine simultaneously 552 v1 := make(nat, n) 553 shlVU(v1, v, shift) 554 v = v1 555 } 556 u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) 557 558 // D2. 559 for j := m; j >= 0; j-- { 560 // D3. 561 qhat := Word(_M) 562 if u[j+n] != v[n-1] { 563 var rhat Word 564 qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1]) 565 566 // x1 | x2 = q̂v_{n-2} 567 x1, x2 := mulWW(qhat, v[n-2]) 568 // test if q̂v_{n-2} > br̂ + u_{j+n-2} 569 for greaterThan(x1, x2, rhat, u[j+n-2]) { 570 qhat-- 571 prevRhat := rhat 572 rhat += v[n-1] 573 // v[n-1] >= 0, so this tests for overflow. 574 if rhat < prevRhat { 575 break 576 } 577 x1, x2 = mulWW(qhat, v[n-2]) 578 } 579 } 580 581 // D4. 582 qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) 583 584 c := subVV(u[j:j+len(qhatv)], u[j:], qhatv) 585 if c != 0 { 586 c := addVV(u[j:j+n], u[j:], v) 587 u[j+n] += c 588 qhat-- 589 } 590 591 q[j] = qhat 592 } 593 594 q = q.norm() 595 shrVU(u, u, shift) 596 r = u.norm() 597 598 return q, r 599 } 600 601 // Length of x in bits. x must be normalized. 602 func (x nat) bitLen() int { 603 if i := len(x) - 1; i >= 0 { 604 return i*_W + bitLen(x[i]) 605 } 606 return 0 607 } 608 609 // MaxBase is the largest number base accepted for string conversions. 610 const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1 611 612 func hexValue(ch rune) Word { 613 d := int(MaxBase + 1) // illegal base 614 switch { 615 case '0' <= ch && ch <= '9': 616 d = int(ch - '0') 617 case 'a' <= ch && ch <= 'z': 618 d = int(ch - 'a' + 10) 619 case 'A' <= ch && ch <= 'Z': 620 d = int(ch - 'A' + 10) 621 } 622 return Word(d) 623 } 624 625 // scan sets z to the natural number corresponding to the longest possible prefix 626 // read from r representing an unsigned integer in a given conversion base. 627 // It returns z, the actual conversion base used, and an error, if any. In the 628 // error case, the value of z is undefined. The syntax follows the syntax of 629 // unsigned integer literals in Go. 630 // 631 // The base argument must be 0 or a value from 2 through MaxBase. If the base 632 // is 0, the string prefix determines the actual conversion base. A prefix of 633 // ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a 634 // ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. 635 // 636 func (z nat) scan(r io.RuneScanner, base int) (nat, int, error) { 637 // reject illegal bases 638 if base < 0 || base == 1 || MaxBase < base { 639 return z, 0, errors.New("illegal number base") 640 } 641 642 // one char look-ahead 643 ch, _, err := r.ReadRune() 644 if err != nil { 645 return z, 0, err 646 } 647 648 // determine base if necessary 649 b := Word(base) 650 if base == 0 { 651 b = 10 652 if ch == '0' { 653 switch ch, _, err = r.ReadRune(); err { 654 case nil: 655 b = 8 656 switch ch { 657 case 'x', 'X': 658 b = 16 659 case 'b', 'B': 660 b = 2 661 } 662 if b == 2 || b == 16 { 663 if ch, _, err = r.ReadRune(); err != nil { 664 return z, 0, err 665 } 666 } 667 case io.EOF: 668 return z.make(0), 10, nil 669 default: 670 return z, 10, err 671 } 672 } 673 } 674 675 // convert string 676 // - group as many digits d as possible together into a "super-digit" dd with "super-base" bb 677 // - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd 678 z = z.make(0) 679 bb := Word(1) 680 dd := Word(0) 681 for max := _M / b; ; { 682 d := hexValue(ch) 683 if d >= b { 684 r.UnreadRune() // ch does not belong to number anymore 685 break 686 } 687 688 if bb <= max { 689 bb *= b 690 dd = dd*b + d 691 } else { 692 // bb * b would overflow 693 z = z.mulAddWW(z, bb, dd) 694 bb = b 695 dd = d 696 } 697 698 if ch, _, err = r.ReadRune(); err != nil { 699 if err != io.EOF { 700 return z, int(b), err 701 } 702 break 703 } 704 } 705 706 switch { 707 case bb > 1: 708 // there was at least one mantissa digit 709 z = z.mulAddWW(z, bb, dd) 710 case base == 0 && b == 8: 711 // there was only the octal prefix 0 (possibly followed by digits > 7); 712 // return base 10, not 8 713 return z, 10, nil 714 case base != 0 || b != 8: 715 // there was neither a mantissa digit nor the octal prefix 0 716 return z, int(b), errors.New("syntax error scanning number") 717 } 718 719 return z.norm(), int(b), nil 720 } 721 722 // Character sets for string conversion. 723 const ( 724 lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz" 725 uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 726 ) 727 728 // decimalString returns a decimal representation of x. 729 // It calls x.string with the charset "0123456789". 730 func (x nat) decimalString() string { 731 return x.string(lowercaseDigits[0:10]) 732 } 733 734 // string converts x to a string using digits from a charset; a digit with 735 // value d is represented by charset[d]. The conversion base is determined 736 // by len(charset), which must be >= 2 and <= 256. 737 func (x nat) string(charset string) string { 738 b := Word(len(charset)) 739 740 // special cases 741 switch { 742 case b < 2 || MaxBase > 256: 743 panic("illegal base") 744 case len(x) == 0: 745 return string(charset[0]) 746 } 747 748 // allocate buffer for conversion 749 i := int(float64(x.bitLen())/math.Log2(float64(b))) + 1 // off by one at most 750 s := make([]byte, i) 751 752 // convert power of two and non power of two bases separately 753 if b == b&-b { 754 // shift is base-b digit size in bits 755 shift := trailingZeroBits(b) // shift > 0 because b >= 2 756 mask := Word(1)<<shift - 1 757 w := x[0] 758 nbits := uint(_W) // number of unprocessed bits in w 759 760 // convert less-significant words 761 for k := 1; k < len(x); k++ { 762 // convert full digits 763 for nbits >= shift { 764 i-- 765 s[i] = charset[w&mask] 766 w >>= shift 767 nbits -= shift 768 } 769 770 // convert any partial leading digit and advance to next word 771 if nbits == 0 { 772 // no partial digit remaining, just advance 773 w = x[k] 774 nbits = _W 775 } else { 776 // partial digit in current (k-1) and next (k) word 777 w |= x[k] << nbits 778 i-- 779 s[i] = charset[w&mask] 780 781 // advance 782 w = x[k] >> (shift - nbits) 783 nbits = _W - (shift - nbits) 784 } 785 } 786 787 // convert digits of most-significant word (omit leading zeros) 788 for nbits >= 0 && w != 0 { 789 i-- 790 s[i] = charset[w&mask] 791 w >>= shift 792 nbits -= shift 793 } 794 795 } else { 796 // determine "big base"; i.e., the largest possible value bb 797 // that is a power of base b and still fits into a Word 798 // (as in 10^19 for 19 decimal digits in a 64bit Word) 799 bb := b // big base is b**ndigits 800 ndigits := 1 // number of base b digits 801 for max := Word(_M / b); bb <= max; bb *= b { 802 ndigits++ // maximize ndigits where bb = b**ndigits, bb <= _M 803 } 804 805 // construct table of successive squares of bb*leafSize to use in subdivisions 806 // result (table != nil) <=> (len(x) > leafSize > 0) 807 table := divisors(len(x), b, ndigits, bb) 808 809 // preserve x, create local copy for use by convertWords 810 q := nat(nil).set(x) 811 812 // convert q to string s in base b 813 q.convertWords(s, charset, b, ndigits, bb, table) 814 815 // strip leading zeros 816 // (x != 0; thus s must contain at least one non-zero digit 817 // and the loop will terminate) 818 i = 0 819 for zero := charset[0]; s[i] == zero; { 820 i++ 821 } 822 } 823 824 return string(s[i:]) 825 } 826 827 // Convert words of q to base b digits in s. If q is large, it is recursively "split in half" 828 // by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using 829 // repeated nat/Word division. 830 // 831 // The iterative method processes n Words by n divW() calls, each of which visits every Word in the 832 // incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s. 833 // Recursive conversion divides q by its approximate square root, yielding two parts, each half 834 // the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s 835 // plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and 836 // is made better by splitting the subblocks recursively. Best is to split blocks until one more 837 // split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the 838 // iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the 839 // range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and 840 // ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for 841 // specific hardware. 842 // 843 func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) { 844 // split larger blocks recursively 845 if table != nil { 846 // len(q) > leafSize > 0 847 var r nat 848 index := len(table) - 1 849 for len(q) > leafSize { 850 // find divisor close to sqrt(q) if possible, but in any case < q 851 maxLength := q.bitLen() // ~= log2 q, or at of least largest possible q of this bit length 852 minLength := maxLength >> 1 // ~= log2 sqrt(q) 853 for index > 0 && table[index-1].nbits > minLength { 854 index-- // desired 855 } 856 if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 { 857 index-- 858 if index < 0 { 859 panic("internal inconsistency") 860 } 861 } 862 863 // split q into the two digit number (q'*bbb + r) to form independent subblocks 864 q, r = q.div(r, q, table[index].bbb) 865 866 // convert subblocks and collect results in s[:h] and s[h:] 867 h := len(s) - table[index].ndigits 868 r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index]) 869 s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1]) 870 } 871 } 872 873 // having split any large blocks now process the remaining (small) block iteratively 874 i := len(s) 875 var r Word 876 if b == 10 { 877 // hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants) 878 for len(q) > 0 { 879 // extract least significant, base bb "digit" 880 q, r = q.divW(q, bb) 881 for j := 0; j < ndigits && i > 0; j++ { 882 i-- 883 // avoid % computation since r%10 == r - int(r/10)*10; 884 // this appears to be faster for BenchmarkString10000Base10 885 // and smaller strings (but a bit slower for larger ones) 886 t := r / 10 887 s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code 888 r = t 889 } 890 } 891 } else { 892 for len(q) > 0 { 893 // extract least significant, base bb "digit" 894 q, r = q.divW(q, bb) 895 for j := 0; j < ndigits && i > 0; j++ { 896 i-- 897 s[i] = charset[r%b] 898 r /= b 899 } 900 } 901 } 902 903 // prepend high-order zeroes 904 zero := charset[0] 905 for i > 0 { // while need more leading zeroes 906 i-- 907 s[i] = zero 908 } 909 } 910 911 // Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion) 912 // Benchmark and configure leafSize using: go test -bench="Leaf" 913 // 8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines) 914 // 8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU 915 var leafSize int = 8 // number of Word-size binary values treat as a monolithic block 916 917 type divisor struct { 918 bbb nat // divisor 919 nbits int // bit length of divisor (discounting leading zeroes) ~= log2(bbb) 920 ndigits int // digit length of divisor in terms of output base digits 921 } 922 923 var cacheBase10 struct { 924 sync.Mutex 925 table [64]divisor // cached divisors for base 10 926 } 927 928 // expWW computes x**y 929 func (z nat) expWW(x, y Word) nat { 930 return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil) 931 } 932 933 // construct table of powers of bb*leafSize to use in subdivisions 934 func divisors(m int, b Word, ndigits int, bb Word) []divisor { 935 // only compute table when recursive conversion is enabled and x is large 936 if leafSize == 0 || m <= leafSize { 937 return nil 938 } 939 940 // determine k where (bb**leafSize)**(2**k) >= sqrt(x) 941 k := 1 942 for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 { 943 k++ 944 } 945 946 // reuse and extend existing table of divisors or create new table as appropriate 947 var table []divisor // for b == 10, table overlaps with cacheBase10.table 948 if b == 10 { 949 cacheBase10.Lock() 950 table = cacheBase10.table[0:k] // reuse old table for this conversion 951 } else { 952 table = make([]divisor, k) // create new table for this conversion 953 } 954 955 // extend table 956 if table[k-1].ndigits == 0 { 957 // add new entries as needed 958 var larger nat 959 for i := 0; i < k; i++ { 960 if table[i].ndigits == 0 { 961 if i == 0 { 962 table[0].bbb = nat(nil).expWW(bb, Word(leafSize)) 963 table[0].ndigits = ndigits * leafSize 964 } else { 965 table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb) 966 table[i].ndigits = 2 * table[i-1].ndigits 967 } 968 969 // optimization: exploit aggregated extra bits in macro blocks 970 larger = nat(nil).set(table[i].bbb) 971 for mulAddVWW(larger, larger, b, 0) == 0 { 972 table[i].bbb = table[i].bbb.set(larger) 973 table[i].ndigits++ 974 } 975 976 table[i].nbits = table[i].bbb.bitLen() 977 } 978 } 979 } 980 981 if b == 10 { 982 cacheBase10.Unlock() 983 } 984 985 return table 986 } 987 988 const deBruijn32 = 0x077CB531 989 990 var deBruijn32Lookup = []byte{ 991 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 992 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, 993 } 994 995 const deBruijn64 = 0x03f79d71b4ca8b09 996 997 var deBruijn64Lookup = []byte{ 998 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, 999 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, 1000 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, 1001 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, 1002 } 1003 1004 // trailingZeroBits returns the number of consecutive least significant zero 1005 // bits of x. 1006 func trailingZeroBits(x Word) uint { 1007 // x & -x leaves only the right-most bit set in the word. Let k be the 1008 // index of that bit. Since only a single bit is set, the value is two 1009 // to the power of k. Multiplying by a power of two is equivalent to 1010 // left shifting, in this case by k bits. The de Bruijn constant is 1011 // such that all six bit, consecutive substrings are distinct. 1012 // Therefore, if we have a left shifted version of this constant we can 1013 // find by how many bits it was shifted by looking at which six bit 1014 // substring ended up at the top of the word. 1015 // (Knuth, volume 4, section 7.3.1) 1016 switch _W { 1017 case 32: 1018 return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27]) 1019 case 64: 1020 return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58]) 1021 default: 1022 panic("unknown word size") 1023 } 1024 } 1025 1026 // trailingZeroBits returns the number of consecutive least significant zero 1027 // bits of x. 1028 func (x nat) trailingZeroBits() uint { 1029 if len(x) == 0 { 1030 return 0 1031 } 1032 var i uint 1033 for x[i] == 0 { 1034 i++ 1035 } 1036 // x[i] != 0 1037 return i*_W + trailingZeroBits(x[i]) 1038 } 1039 1040 // z = x << s 1041 func (z nat) shl(x nat, s uint) nat { 1042 m := len(x) 1043 if m == 0 { 1044 return z.make(0) 1045 } 1046 // m > 0 1047 1048 n := m + int(s/_W) 1049 z = z.make(n + 1) 1050 z[n] = shlVU(z[n-m:n], x, s%_W) 1051 z[0 : n-m].clear() 1052 1053 return z.norm() 1054 } 1055 1056 // z = x >> s 1057 func (z nat) shr(x nat, s uint) nat { 1058 m := len(x) 1059 n := m - int(s/_W) 1060 if n <= 0 { 1061 return z.make(0) 1062 } 1063 // n > 0 1064 1065 z = z.make(n) 1066 shrVU(z, x[m-n:], s%_W) 1067 1068 return z.norm() 1069 } 1070 1071 func (z nat) setBit(x nat, i uint, b uint) nat { 1072 j := int(i / _W) 1073 m := Word(1) << (i % _W) 1074 n := len(x) 1075 switch b { 1076 case 0: 1077 z = z.make(n) 1078 copy(z, x) 1079 if j >= n { 1080 // no need to grow 1081 return z 1082 } 1083 z[j] &^= m 1084 return z.norm() 1085 case 1: 1086 if j >= n { 1087 z = z.make(j + 1) 1088 z[n:].clear() 1089 } else { 1090 z = z.make(n) 1091 } 1092 copy(z, x) 1093 z[j] |= m 1094 // no need to normalize 1095 return z 1096 } 1097 panic("set bit is not 0 or 1") 1098 } 1099 1100 func (z nat) bit(i uint) uint { 1101 j := int(i / _W) 1102 if j >= len(z) { 1103 return 0 1104 } 1105 return uint(z[j] >> (i % _W) & 1) 1106 } 1107 1108 func (z nat) and(x, y nat) nat { 1109 m := len(x) 1110 n := len(y) 1111 if m > n { 1112 m = n 1113 } 1114 // m <= n 1115 1116 z = z.make(m) 1117 for i := 0; i < m; i++ { 1118 z[i] = x[i] & y[i] 1119 } 1120 1121 return z.norm() 1122 } 1123 1124 func (z nat) andNot(x, y nat) nat { 1125 m := len(x) 1126 n := len(y) 1127 if n > m { 1128 n = m 1129 } 1130 // m >= n 1131 1132 z = z.make(m) 1133 for i := 0; i < n; i++ { 1134 z[i] = x[i] &^ y[i] 1135 } 1136 copy(z[n:m], x[n:m]) 1137 1138 return z.norm() 1139 } 1140 1141 func (z nat) or(x, y nat) nat { 1142 m := len(x) 1143 n := len(y) 1144 s := x 1145 if m < n { 1146 n, m = m, n 1147 s = y 1148 } 1149 // m >= n 1150 1151 z = z.make(m) 1152 for i := 0; i < n; i++ { 1153 z[i] = x[i] | y[i] 1154 } 1155 copy(z[n:m], s[n:m]) 1156 1157 return z.norm() 1158 } 1159 1160 func (z nat) xor(x, y nat) nat { 1161 m := len(x) 1162 n := len(y) 1163 s := x 1164 if m < n { 1165 n, m = m, n 1166 s = y 1167 } 1168 // m >= n 1169 1170 z = z.make(m) 1171 for i := 0; i < n; i++ { 1172 z[i] = x[i] ^ y[i] 1173 } 1174 copy(z[n:m], s[n:m]) 1175 1176 return z.norm() 1177 } 1178 1179 // greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2) 1180 func greaterThan(x1, x2, y1, y2 Word) bool { 1181 return x1 > y1 || x1 == y1 && x2 > y2 1182 } 1183 1184 // modW returns x % d. 1185 func (x nat) modW(d Word) (r Word) { 1186 // TODO(agl): we don't actually need to store the q value. 1187 var q nat 1188 q = q.make(len(x)) 1189 return divWVW(q, 0, x, d) 1190 } 1191 1192 // random creates a random integer in [0..limit), using the space in z if 1193 // possible. n is the bit length of limit. 1194 func (z nat) random(rand *rand.Rand, limit nat, n int) nat { 1195 if alias(z, limit) { 1196 z = nil // z is an alias for limit - cannot reuse 1197 } 1198 z = z.make(len(limit)) 1199 1200 bitLengthOfMSW := uint(n % _W) 1201 if bitLengthOfMSW == 0 { 1202 bitLengthOfMSW = _W 1203 } 1204 mask := Word((1 << bitLengthOfMSW) - 1) 1205 1206 for { 1207 switch _W { 1208 case 32: 1209 for i := range z { 1210 z[i] = Word(rand.Uint32()) 1211 } 1212 case 64: 1213 for i := range z { 1214 z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32 1215 } 1216 default: 1217 panic("unknown word size") 1218 } 1219 z[len(limit)-1] &= mask 1220 if z.cmp(limit) < 0 { 1221 break 1222 } 1223 } 1224 1225 return z.norm() 1226 } 1227 1228 // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; 1229 // otherwise it sets z to x**y. The result is the value of z. 1230 func (z nat) expNN(x, y, m nat) nat { 1231 if alias(z, x) || alias(z, y) { 1232 // We cannot allow in-place modification of x or y. 1233 z = nil 1234 } 1235 1236 // x**y mod 1 == 0 1237 if len(m) == 1 && m[0] == 1 { 1238 return z.setWord(0) 1239 } 1240 // m == 0 || m > 1 1241 1242 // x**0 == 1 1243 if len(y) == 0 { 1244 return z.setWord(1) 1245 } 1246 // y > 0 1247 1248 if len(m) != 0 { 1249 // We likely end up being as long as the modulus. 1250 z = z.make(len(m)) 1251 } 1252 z = z.set(x) 1253 1254 // If the base is non-trivial and the exponent is large, we use 1255 // 4-bit, windowed exponentiation. This involves precomputing 14 values 1256 // (x^2...x^15) but then reduces the number of multiply-reduces by a 1257 // third. Even for a 32-bit exponent, this reduces the number of 1258 // operations. 1259 if len(x) > 1 && len(y) > 1 && len(m) > 0 { 1260 return z.expNNWindowed(x, y, m) 1261 } 1262 1263 v := y[len(y)-1] // v > 0 because y is normalized and y > 0 1264 shift := leadingZeros(v) + 1 1265 v <<= shift 1266 var q nat 1267 1268 const mask = 1 << (_W - 1) 1269 1270 // We walk through the bits of the exponent one by one. Each time we 1271 // see a bit, we square, thus doubling the power. If the bit is a one, 1272 // we also multiply by x, thus adding one to the power. 1273 1274 w := _W - int(shift) 1275 // zz and r are used to avoid allocating in mul and div as 1276 // otherwise the arguments would alias. 1277 var zz, r nat 1278 for j := 0; j < w; j++ { 1279 zz = zz.mul(z, z) 1280 zz, z = z, zz 1281 1282 if v&mask != 0 { 1283 zz = zz.mul(z, x) 1284 zz, z = z, zz 1285 } 1286 1287 if len(m) != 0 { 1288 zz, r = zz.div(r, z, m) 1289 zz, r, q, z = q, z, zz, r 1290 } 1291 1292 v <<= 1 1293 } 1294 1295 for i := len(y) - 2; i >= 0; i-- { 1296 v = y[i] 1297 1298 for j := 0; j < _W; j++ { 1299 zz = zz.mul(z, z) 1300 zz, z = z, zz 1301 1302 if v&mask != 0 { 1303 zz = zz.mul(z, x) 1304 zz, z = z, zz 1305 } 1306 1307 if len(m) != 0 { 1308 zz, r = zz.div(r, z, m) 1309 zz, r, q, z = q, z, zz, r 1310 } 1311 1312 v <<= 1 1313 } 1314 } 1315 1316 return z.norm() 1317 } 1318 1319 // expNNWindowed calculates x**y mod m using a fixed, 4-bit window. 1320 func (z nat) expNNWindowed(x, y, m nat) nat { 1321 // zz and r are used to avoid allocating in mul and div as otherwise 1322 // the arguments would alias. 1323 var zz, r nat 1324 1325 const n = 4 1326 // powers[i] contains x^i. 1327 var powers [1 << n]nat 1328 powers[0] = natOne 1329 powers[1] = x 1330 for i := 2; i < 1<<n; i += 2 { 1331 p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1] 1332 *p = p.mul(*p2, *p2) 1333 zz, r = zz.div(r, *p, m) 1334 *p, r = r, *p 1335 *p1 = p1.mul(*p, x) 1336 zz, r = zz.div(r, *p1, m) 1337 *p1, r = r, *p1 1338 } 1339 1340 z = z.setWord(1) 1341 1342 for i := len(y) - 1; i >= 0; i-- { 1343 yi := y[i] 1344 for j := 0; j < _W; j += n { 1345 if i != len(y)-1 || j != 0 { 1346 // Unrolled loop for significant performance 1347 // gain. Use go test -bench=".*" in crypto/rsa 1348 // to check performance before making changes. 1349 zz = zz.mul(z, z) 1350 zz, z = z, zz 1351 zz, r = zz.div(r, z, m) 1352 z, r = r, z 1353 1354 zz = zz.mul(z, z) 1355 zz, z = z, zz 1356 zz, r = zz.div(r, z, m) 1357 z, r = r, z 1358 1359 zz = zz.mul(z, z) 1360 zz, z = z, zz 1361 zz, r = zz.div(r, z, m) 1362 z, r = r, z 1363 1364 zz = zz.mul(z, z) 1365 zz, z = z, zz 1366 zz, r = zz.div(r, z, m) 1367 z, r = r, z 1368 } 1369 1370 zz = zz.mul(z, powers[yi>>(_W-n)]) 1371 zz, z = z, zz 1372 zz, r = zz.div(r, z, m) 1373 z, r = r, z 1374 1375 yi <<= n 1376 } 1377 } 1378 1379 return z.norm() 1380 } 1381 1382 // probablyPrime performs reps Miller-Rabin tests to check whether n is prime. 1383 // If it returns true, n is prime with probability 1 - 1/4^reps. 1384 // If it returns false, n is not prime. 1385 func (n nat) probablyPrime(reps int) bool { 1386 if len(n) == 0 { 1387 return false 1388 } 1389 1390 if len(n) == 1 { 1391 if n[0] < 2 { 1392 return false 1393 } 1394 1395 if n[0]%2 == 0 { 1396 return n[0] == 2 1397 } 1398 1399 // We have to exclude these cases because we reject all 1400 // multiples of these numbers below. 1401 switch n[0] { 1402 case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53: 1403 return true 1404 } 1405 } 1406 1407 const primesProduct32 = 0xC0CFD797 // Π {p ∈ primes, 2 < p <= 29} 1408 const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53} 1409 1410 var r Word 1411 switch _W { 1412 case 32: 1413 r = n.modW(primesProduct32) 1414 case 64: 1415 r = n.modW(primesProduct64 & _M) 1416 default: 1417 panic("Unknown word size") 1418 } 1419 1420 if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 || 1421 r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 { 1422 return false 1423 } 1424 1425 if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 || 1426 r%43 == 0 || r%47 == 0 || r%53 == 0) { 1427 return false 1428 } 1429 1430 nm1 := nat(nil).sub(n, natOne) 1431 // determine q, k such that nm1 = q << k 1432 k := nm1.trailingZeroBits() 1433 q := nat(nil).shr(nm1, k) 1434 1435 nm3 := nat(nil).sub(nm1, natTwo) 1436 rand := rand.New(rand.NewSource(int64(n[0]))) 1437 1438 var x, y, quotient nat 1439 nm3Len := nm3.bitLen() 1440 1441 NextRandom: 1442 for i := 0; i < reps; i++ { 1443 x = x.random(rand, nm3, nm3Len) 1444 x = x.add(x, natTwo) 1445 y = y.expNN(x, q, n) 1446 if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 { 1447 continue 1448 } 1449 for j := uint(1); j < k; j++ { 1450 y = y.mul(y, y) 1451 quotient, y = quotient.div(y, y, n) 1452 if y.cmp(nm1) == 0 { 1453 continue NextRandom 1454 } 1455 if y.cmp(natOne) == 0 { 1456 return false 1457 } 1458 } 1459 return false 1460 } 1461 1462 return true 1463 } 1464 1465 // bytes writes the value of z into buf using big-endian encoding. 1466 // len(buf) must be >= len(z)*_S. The value of z is encoded in the 1467 // slice buf[i:]. The number i of unused bytes at the beginning of 1468 // buf is returned as result. 1469 func (z nat) bytes(buf []byte) (i int) { 1470 i = len(buf) 1471 for _, d := range z { 1472 for j := 0; j < _S; j++ { 1473 i-- 1474 buf[i] = byte(d) 1475 d >>= 8 1476 } 1477 } 1478 1479 for i < len(buf) && buf[i] == 0 { 1480 i++ 1481 } 1482 1483 return 1484 } 1485 1486 // setBytes interprets buf as the bytes of a big-endian unsigned 1487 // integer, sets z to that value, and returns z. 1488 func (z nat) setBytes(buf []byte) nat { 1489 z = z.make((len(buf) + _S - 1) / _S) 1490 1491 k := 0 1492 s := uint(0) 1493 var d Word 1494 for i := len(buf); i > 0; i-- { 1495 d |= Word(buf[i-1]) << s 1496 if s += 8; s == _S*8 { 1497 z[k] = d 1498 k++ 1499 s = 0 1500 d = 0 1501 } 1502 } 1503 if k < len(z) { 1504 z[k] = d 1505 } 1506 1507 return z.norm() 1508 }