github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/src/math/big/nat.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package big implements multi-precision arithmetic (big numbers).
     6  // The following numeric types are supported:
     7  //
     8  //	- Int	signed integers
     9  //	- Rat	rational numbers
    10  //
    11  // Methods are typically of the form:
    12  //
    13  //	func (z *Int) Op(x, y *Int) *Int	(similar for *Rat)
    14  //
    15  // and implement operations z = x Op y with the result as receiver; if it
    16  // is one of the operands it may be overwritten (and its memory reused).
    17  // To enable chaining of operations, the result is also returned. Methods
    18  // returning a result other than *Int or *Rat take one of the operands as
    19  // the receiver.
    20  //
    21  package big
    22  
    23  // This file contains operations on unsigned multi-precision integers.
    24  // These are the building blocks for the operations on signed integers
    25  // and rationals.
    26  
    27  import (
    28  	"errors"
    29  	"io"
    30  	"math"
    31  	"math/rand"
    32  	"sync"
    33  )
    34  
    35  // An unsigned integer x of the form
    36  //
    37  //   x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
    38  //
    39  // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
    40  // with the digits x[i] as the slice elements.
    41  //
    42  // A number is normalized if the slice contains no leading 0 digits.
    43  // During arithmetic operations, denormalized values may occur but are
    44  // always normalized before returning the final result. The normalized
    45  // representation of 0 is the empty or nil slice (length = 0).
    46  //
    47  type nat []Word
    48  
    49  var (
    50  	natOne = nat{1}
    51  	natTwo = nat{2}
    52  	natTen = nat{10}
    53  )
    54  
    55  func (z nat) clear() {
    56  	for i := range z {
    57  		z[i] = 0
    58  	}
    59  }
    60  
    61  func (z nat) norm() nat {
    62  	i := len(z)
    63  	for i > 0 && z[i-1] == 0 {
    64  		i--
    65  	}
    66  	return z[0:i]
    67  }
    68  
    69  func (z nat) make(n int) nat {
    70  	if n <= cap(z) {
    71  		return z[0:n] // reuse z
    72  	}
    73  	// Choosing a good value for e has significant performance impact
    74  	// because it increases the chance that a value can be reused.
    75  	const e = 4 // extra capacity
    76  	return make(nat, n, n+e)
    77  }
    78  
    79  func (z nat) setWord(x Word) nat {
    80  	if x == 0 {
    81  		return z.make(0)
    82  	}
    83  	z = z.make(1)
    84  	z[0] = x
    85  	return z
    86  }
    87  
    88  func (z nat) setUint64(x uint64) nat {
    89  	// single-digit values
    90  	if w := Word(x); uint64(w) == x {
    91  		return z.setWord(w)
    92  	}
    93  
    94  	// compute number of words n required to represent x
    95  	n := 0
    96  	for t := x; t > 0; t >>= _W {
    97  		n++
    98  	}
    99  
   100  	// split x into n words
   101  	z = z.make(n)
   102  	for i := range z {
   103  		z[i] = Word(x & _M)
   104  		x >>= _W
   105  	}
   106  
   107  	return z
   108  }
   109  
   110  func (z nat) set(x nat) nat {
   111  	z = z.make(len(x))
   112  	copy(z, x)
   113  	return z
   114  }
   115  
   116  func (z nat) add(x, y nat) nat {
   117  	m := len(x)
   118  	n := len(y)
   119  
   120  	switch {
   121  	case m < n:
   122  		return z.add(y, x)
   123  	case m == 0:
   124  		// n == 0 because m >= n; result is 0
   125  		return z.make(0)
   126  	case n == 0:
   127  		// result is x
   128  		return z.set(x)
   129  	}
   130  	// m > 0
   131  
   132  	z = z.make(m + 1)
   133  	c := addVV(z[0:n], x, y)
   134  	if m > n {
   135  		c = addVW(z[n:m], x[n:], c)
   136  	}
   137  	z[m] = c
   138  
   139  	return z.norm()
   140  }
   141  
   142  func (z nat) sub(x, y nat) nat {
   143  	m := len(x)
   144  	n := len(y)
   145  
   146  	switch {
   147  	case m < n:
   148  		panic("underflow")
   149  	case m == 0:
   150  		// n == 0 because m >= n; result is 0
   151  		return z.make(0)
   152  	case n == 0:
   153  		// result is x
   154  		return z.set(x)
   155  	}
   156  	// m > 0
   157  
   158  	z = z.make(m)
   159  	c := subVV(z[0:n], x, y)
   160  	if m > n {
   161  		c = subVW(z[n:], x[n:], c)
   162  	}
   163  	if c != 0 {
   164  		panic("underflow")
   165  	}
   166  
   167  	return z.norm()
   168  }
   169  
   170  func (x nat) cmp(y nat) (r int) {
   171  	m := len(x)
   172  	n := len(y)
   173  	if m != n || m == 0 {
   174  		switch {
   175  		case m < n:
   176  			r = -1
   177  		case m > n:
   178  			r = 1
   179  		}
   180  		return
   181  	}
   182  
   183  	i := m - 1
   184  	for i > 0 && x[i] == y[i] {
   185  		i--
   186  	}
   187  
   188  	switch {
   189  	case x[i] < y[i]:
   190  		r = -1
   191  	case x[i] > y[i]:
   192  		r = 1
   193  	}
   194  	return
   195  }
   196  
   197  func (z nat) mulAddWW(x nat, y, r Word) nat {
   198  	m := len(x)
   199  	if m == 0 || y == 0 {
   200  		return z.setWord(r) // result is r
   201  	}
   202  	// m > 0
   203  
   204  	z = z.make(m + 1)
   205  	z[m] = mulAddVWW(z[0:m], x, y, r)
   206  
   207  	return z.norm()
   208  }
   209  
   210  // basicMul multiplies x and y and leaves the result in z.
   211  // The (non-normalized) result is placed in z[0 : len(x) + len(y)].
   212  func basicMul(z, x, y nat) {
   213  	z[0 : len(x)+len(y)].clear() // initialize z
   214  	for i, d := range y {
   215  		if d != 0 {
   216  			z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
   217  		}
   218  	}
   219  }
   220  
   221  // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
   222  // Factored out for readability - do not use outside karatsuba.
   223  func karatsubaAdd(z, x nat, n int) {
   224  	if c := addVV(z[0:n], z, x); c != 0 {
   225  		addVW(z[n:n+n>>1], z[n:], c)
   226  	}
   227  }
   228  
   229  // Like karatsubaAdd, but does subtract.
   230  func karatsubaSub(z, x nat, n int) {
   231  	if c := subVV(z[0:n], z, x); c != 0 {
   232  		subVW(z[n:n+n>>1], z[n:], c)
   233  	}
   234  }
   235  
   236  // Operands that are shorter than karatsubaThreshold are multiplied using
   237  // "grade school" multiplication; for longer operands the Karatsuba algorithm
   238  // is used.
   239  var karatsubaThreshold int = 40 // computed by calibrate.go
   240  
   241  // karatsuba multiplies x and y and leaves the result in z.
   242  // Both x and y must have the same length n and n must be a
   243  // power of 2. The result vector z must have len(z) >= 6*n.
   244  // The (non-normalized) result is placed in z[0 : 2*n].
   245  func karatsuba(z, x, y nat) {
   246  	n := len(y)
   247  
   248  	// Switch to basic multiplication if numbers are odd or small.
   249  	// (n is always even if karatsubaThreshold is even, but be
   250  	// conservative)
   251  	if n&1 != 0 || n < karatsubaThreshold || n < 2 {
   252  		basicMul(z, x, y)
   253  		return
   254  	}
   255  	// n&1 == 0 && n >= karatsubaThreshold && n >= 2
   256  
   257  	// Karatsuba multiplication is based on the observation that
   258  	// for two numbers x and y with:
   259  	//
   260  	//   x = x1*b + x0
   261  	//   y = y1*b + y0
   262  	//
   263  	// the product x*y can be obtained with 3 products z2, z1, z0
   264  	// instead of 4:
   265  	//
   266  	//   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
   267  	//       =    z2*b*b +              z1*b +    z0
   268  	//
   269  	// with:
   270  	//
   271  	//   xd = x1 - x0
   272  	//   yd = y0 - y1
   273  	//
   274  	//   z1 =      xd*yd                    + z2 + z0
   275  	//      = (x1-x0)*(y0 - y1)             + z2 + z0
   276  	//      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0
   277  	//      = x1*y0 -    z2 -    z0 + x0*y1 + z2 + z0
   278  	//      = x1*y0                 + x0*y1
   279  
   280  	// split x, y into "digits"
   281  	n2 := n >> 1              // n2 >= 1
   282  	x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
   283  	y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
   284  
   285  	// z is used for the result and temporary storage:
   286  	//
   287  	//   6*n     5*n     4*n     3*n     2*n     1*n     0*n
   288  	// z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
   289  	//
   290  	// For each recursive call of karatsuba, an unused slice of
   291  	// z is passed in that has (at least) half the length of the
   292  	// caller's z.
   293  
   294  	// compute z0 and z2 with the result "in place" in z
   295  	karatsuba(z, x0, y0)     // z0 = x0*y0
   296  	karatsuba(z[n:], x1, y1) // z2 = x1*y1
   297  
   298  	// compute xd (or the negative value if underflow occurs)
   299  	s := 1 // sign of product xd*yd
   300  	xd := z[2*n : 2*n+n2]
   301  	if subVV(xd, x1, x0) != 0 { // x1-x0
   302  		s = -s
   303  		subVV(xd, x0, x1) // x0-x1
   304  	}
   305  
   306  	// compute yd (or the negative value if underflow occurs)
   307  	yd := z[2*n+n2 : 3*n]
   308  	if subVV(yd, y0, y1) != 0 { // y0-y1
   309  		s = -s
   310  		subVV(yd, y1, y0) // y1-y0
   311  	}
   312  
   313  	// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
   314  	// p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
   315  	p := z[n*3:]
   316  	karatsuba(p, xd, yd)
   317  
   318  	// save original z2:z0
   319  	// (ok to use upper half of z since we're done recursing)
   320  	r := z[n*4:]
   321  	copy(r, z[:n*2])
   322  
   323  	// add up all partial products
   324  	//
   325  	//   2*n     n     0
   326  	// z = [ z2  | z0  ]
   327  	//   +    [ z0  ]
   328  	//   +    [ z2  ]
   329  	//   +    [  p  ]
   330  	//
   331  	karatsubaAdd(z[n2:], r, n)
   332  	karatsubaAdd(z[n2:], r[n:], n)
   333  	if s > 0 {
   334  		karatsubaAdd(z[n2:], p, n)
   335  	} else {
   336  		karatsubaSub(z[n2:], p, n)
   337  	}
   338  }
   339  
   340  // alias returns true if x and y share the same base array.
   341  func alias(x, y nat) bool {
   342  	return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
   343  }
   344  
   345  // addAt implements z += x<<(_W*i); z must be long enough.
   346  // (we don't use nat.add because we need z to stay the same
   347  // slice, and we don't need to normalize z after each addition)
   348  func addAt(z, x nat, i int) {
   349  	if n := len(x); n > 0 {
   350  		if c := addVV(z[i:i+n], z[i:], x); c != 0 {
   351  			j := i + n
   352  			if j < len(z) {
   353  				addVW(z[j:], z[j:], c)
   354  			}
   355  		}
   356  	}
   357  }
   358  
   359  func max(x, y int) int {
   360  	if x > y {
   361  		return x
   362  	}
   363  	return y
   364  }
   365  
   366  // karatsubaLen computes an approximation to the maximum k <= n such that
   367  // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
   368  // result is the largest number that can be divided repeatedly by 2 before
   369  // becoming about the value of karatsubaThreshold.
   370  func karatsubaLen(n int) int {
   371  	i := uint(0)
   372  	for n > karatsubaThreshold {
   373  		n >>= 1
   374  		i++
   375  	}
   376  	return n << i
   377  }
   378  
   379  func (z nat) mul(x, y nat) nat {
   380  	m := len(x)
   381  	n := len(y)
   382  
   383  	switch {
   384  	case m < n:
   385  		return z.mul(y, x)
   386  	case m == 0 || n == 0:
   387  		return z.make(0)
   388  	case n == 1:
   389  		return z.mulAddWW(x, y[0], 0)
   390  	}
   391  	// m >= n > 1
   392  
   393  	// determine if z can be reused
   394  	if alias(z, x) || alias(z, y) {
   395  		z = nil // z is an alias for x or y - cannot reuse
   396  	}
   397  
   398  	// use basic multiplication if the numbers are small
   399  	if n < karatsubaThreshold {
   400  		z = z.make(m + n)
   401  		basicMul(z, x, y)
   402  		return z.norm()
   403  	}
   404  	// m >= n && n >= karatsubaThreshold && n >= 2
   405  
   406  	// determine Karatsuba length k such that
   407  	//
   408  	//   x = xh*b + x0  (0 <= x0 < b)
   409  	//   y = yh*b + y0  (0 <= y0 < b)
   410  	//   b = 1<<(_W*k)  ("base" of digits xi, yi)
   411  	//
   412  	k := karatsubaLen(n)
   413  	// k <= n
   414  
   415  	// multiply x0 and y0 via Karatsuba
   416  	x0 := x[0:k]              // x0 is not normalized
   417  	y0 := y[0:k]              // y0 is not normalized
   418  	z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
   419  	karatsuba(z, x0, y0)
   420  	z = z[0 : m+n]  // z has final length but may be incomplete
   421  	z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m)
   422  
   423  	// If xh != 0 or yh != 0, add the missing terms to z. For
   424  	//
   425  	//   xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b)
   426  	//   yh =                         y1*b (0 <= y1 < b)
   427  	//
   428  	// the missing terms are
   429  	//
   430  	//   x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0
   431  	//
   432  	// since all the yi for i > 1 are 0 by choice of k: If any of them
   433  	// were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would
   434  	// be a larger valid threshold contradicting the assumption about k.
   435  	//
   436  	if k < n || m != n {
   437  		var t nat
   438  
   439  		// add x0*y1*b
   440  		x0 := x0.norm()
   441  		y1 := y[k:]       // y1 is normalized because y is
   442  		t = t.mul(x0, y1) // update t so we don't lose t's underlying array
   443  		addAt(z, t, k)
   444  
   445  		// add xi*y0<<i, xi*y1*b<<(i+k)
   446  		y0 := y0.norm()
   447  		for i := k; i < len(x); i += k {
   448  			xi := x[i:]
   449  			if len(xi) > k {
   450  				xi = xi[:k]
   451  			}
   452  			xi = xi.norm()
   453  			t = t.mul(xi, y0)
   454  			addAt(z, t, i)
   455  			t = t.mul(xi, y1)
   456  			addAt(z, t, i+k)
   457  		}
   458  	}
   459  
   460  	return z.norm()
   461  }
   462  
   463  // mulRange computes the product of all the unsigned integers in the
   464  // range [a, b] inclusively. If a > b (empty range), the result is 1.
   465  func (z nat) mulRange(a, b uint64) nat {
   466  	switch {
   467  	case a == 0:
   468  		// cut long ranges short (optimization)
   469  		return z.setUint64(0)
   470  	case a > b:
   471  		return z.setUint64(1)
   472  	case a == b:
   473  		return z.setUint64(a)
   474  	case a+1 == b:
   475  		return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
   476  	}
   477  	m := (a + b) / 2
   478  	return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
   479  }
   480  
   481  // q = (x-r)/y, with 0 <= r < y
   482  func (z nat) divW(x nat, y Word) (q nat, r Word) {
   483  	m := len(x)
   484  	switch {
   485  	case y == 0:
   486  		panic("division by zero")
   487  	case y == 1:
   488  		q = z.set(x) // result is x
   489  		return
   490  	case m == 0:
   491  		q = z.make(0) // result is 0
   492  		return
   493  	}
   494  	// m > 0
   495  	z = z.make(m)
   496  	r = divWVW(z, 0, x, y)
   497  	q = z.norm()
   498  	return
   499  }
   500  
   501  func (z nat) div(z2, u, v nat) (q, r nat) {
   502  	if len(v) == 0 {
   503  		panic("division by zero")
   504  	}
   505  
   506  	if u.cmp(v) < 0 {
   507  		q = z.make(0)
   508  		r = z2.set(u)
   509  		return
   510  	}
   511  
   512  	if len(v) == 1 {
   513  		var r2 Word
   514  		q, r2 = z.divW(u, v[0])
   515  		r = z2.setWord(r2)
   516  		return
   517  	}
   518  
   519  	q, r = z.divLarge(z2, u, v)
   520  	return
   521  }
   522  
   523  // q = (uIn-r)/v, with 0 <= r < y
   524  // Uses z as storage for q, and u as storage for r if possible.
   525  // See Knuth, Volume 2, section 4.3.1, Algorithm D.
   526  // Preconditions:
   527  //    len(v) >= 2
   528  //    len(uIn) >= len(v)
   529  func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
   530  	n := len(v)
   531  	m := len(uIn) - n
   532  
   533  	// determine if z can be reused
   534  	// TODO(gri) should find a better solution - this if statement
   535  	//           is very costly (see e.g. time pidigits -s -n 10000)
   536  	if alias(z, uIn) || alias(z, v) {
   537  		z = nil // z is an alias for uIn or v - cannot reuse
   538  	}
   539  	q = z.make(m + 1)
   540  
   541  	qhatv := make(nat, n+1)
   542  	if alias(u, uIn) || alias(u, v) {
   543  		u = nil // u is an alias for uIn or v - cannot reuse
   544  	}
   545  	u = u.make(len(uIn) + 1)
   546  	u.clear()
   547  
   548  	// D1.
   549  	shift := leadingZeros(v[n-1])
   550  	if shift > 0 {
   551  		// do not modify v, it may be used by another goroutine simultaneously
   552  		v1 := make(nat, n)
   553  		shlVU(v1, v, shift)
   554  		v = v1
   555  	}
   556  	u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
   557  
   558  	// D2.
   559  	for j := m; j >= 0; j-- {
   560  		// D3.
   561  		qhat := Word(_M)
   562  		if u[j+n] != v[n-1] {
   563  			var rhat Word
   564  			qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
   565  
   566  			// x1 | x2 = q̂v_{n-2}
   567  			x1, x2 := mulWW(qhat, v[n-2])
   568  			// test if q̂v_{n-2} > br̂ + u_{j+n-2}
   569  			for greaterThan(x1, x2, rhat, u[j+n-2]) {
   570  				qhat--
   571  				prevRhat := rhat
   572  				rhat += v[n-1]
   573  				// v[n-1] >= 0, so this tests for overflow.
   574  				if rhat < prevRhat {
   575  					break
   576  				}
   577  				x1, x2 = mulWW(qhat, v[n-2])
   578  			}
   579  		}
   580  
   581  		// D4.
   582  		qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
   583  
   584  		c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
   585  		if c != 0 {
   586  			c := addVV(u[j:j+n], u[j:], v)
   587  			u[j+n] += c
   588  			qhat--
   589  		}
   590  
   591  		q[j] = qhat
   592  	}
   593  
   594  	q = q.norm()
   595  	shrVU(u, u, shift)
   596  	r = u.norm()
   597  
   598  	return q, r
   599  }
   600  
   601  // Length of x in bits. x must be normalized.
   602  func (x nat) bitLen() int {
   603  	if i := len(x) - 1; i >= 0 {
   604  		return i*_W + bitLen(x[i])
   605  	}
   606  	return 0
   607  }
   608  
   609  // MaxBase is the largest number base accepted for string conversions.
   610  const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
   611  
   612  func hexValue(ch rune) Word {
   613  	d := int(MaxBase + 1) // illegal base
   614  	switch {
   615  	case '0' <= ch && ch <= '9':
   616  		d = int(ch - '0')
   617  	case 'a' <= ch && ch <= 'z':
   618  		d = int(ch - 'a' + 10)
   619  	case 'A' <= ch && ch <= 'Z':
   620  		d = int(ch - 'A' + 10)
   621  	}
   622  	return Word(d)
   623  }
   624  
   625  // scan sets z to the natural number corresponding to the longest possible prefix
   626  // read from r representing an unsigned integer in a given conversion base.
   627  // It returns z, the actual conversion base used, and an error, if any. In the
   628  // error case, the value of z is undefined. The syntax follows the syntax of
   629  // unsigned integer literals in Go.
   630  //
   631  // The base argument must be 0 or a value from 2 through MaxBase. If the base
   632  // is 0, the string prefix determines the actual conversion base. A prefix of
   633  // ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
   634  // ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
   635  //
   636  func (z nat) scan(r io.RuneScanner, base int) (nat, int, error) {
   637  	// reject illegal bases
   638  	if base < 0 || base == 1 || MaxBase < base {
   639  		return z, 0, errors.New("illegal number base")
   640  	}
   641  
   642  	// one char look-ahead
   643  	ch, _, err := r.ReadRune()
   644  	if err != nil {
   645  		return z, 0, err
   646  	}
   647  
   648  	// determine base if necessary
   649  	b := Word(base)
   650  	if base == 0 {
   651  		b = 10
   652  		if ch == '0' {
   653  			switch ch, _, err = r.ReadRune(); err {
   654  			case nil:
   655  				b = 8
   656  				switch ch {
   657  				case 'x', 'X':
   658  					b = 16
   659  				case 'b', 'B':
   660  					b = 2
   661  				}
   662  				if b == 2 || b == 16 {
   663  					if ch, _, err = r.ReadRune(); err != nil {
   664  						return z, 0, err
   665  					}
   666  				}
   667  			case io.EOF:
   668  				return z.make(0), 10, nil
   669  			default:
   670  				return z, 10, err
   671  			}
   672  		}
   673  	}
   674  
   675  	// convert string
   676  	// - group as many digits d as possible together into a "super-digit" dd with "super-base" bb
   677  	// - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd
   678  	z = z.make(0)
   679  	bb := Word(1)
   680  	dd := Word(0)
   681  	for max := _M / b; ; {
   682  		d := hexValue(ch)
   683  		if d >= b {
   684  			r.UnreadRune() // ch does not belong to number anymore
   685  			break
   686  		}
   687  
   688  		if bb <= max {
   689  			bb *= b
   690  			dd = dd*b + d
   691  		} else {
   692  			// bb * b would overflow
   693  			z = z.mulAddWW(z, bb, dd)
   694  			bb = b
   695  			dd = d
   696  		}
   697  
   698  		if ch, _, err = r.ReadRune(); err != nil {
   699  			if err != io.EOF {
   700  				return z, int(b), err
   701  			}
   702  			break
   703  		}
   704  	}
   705  
   706  	switch {
   707  	case bb > 1:
   708  		// there was at least one mantissa digit
   709  		z = z.mulAddWW(z, bb, dd)
   710  	case base == 0 && b == 8:
   711  		// there was only the octal prefix 0 (possibly followed by digits > 7);
   712  		// return base 10, not 8
   713  		return z, 10, nil
   714  	case base != 0 || b != 8:
   715  		// there was neither a mantissa digit nor the octal prefix 0
   716  		return z, int(b), errors.New("syntax error scanning number")
   717  	}
   718  
   719  	return z.norm(), int(b), nil
   720  }
   721  
   722  // Character sets for string conversion.
   723  const (
   724  	lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz"
   725  	uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
   726  )
   727  
   728  // decimalString returns a decimal representation of x.
   729  // It calls x.string with the charset "0123456789".
   730  func (x nat) decimalString() string {
   731  	return x.string(lowercaseDigits[0:10])
   732  }
   733  
   734  // string converts x to a string using digits from a charset; a digit with
   735  // value d is represented by charset[d]. The conversion base is determined
   736  // by len(charset), which must be >= 2 and <= 256.
   737  func (x nat) string(charset string) string {
   738  	b := Word(len(charset))
   739  
   740  	// special cases
   741  	switch {
   742  	case b < 2 || MaxBase > 256:
   743  		panic("illegal base")
   744  	case len(x) == 0:
   745  		return string(charset[0])
   746  	}
   747  
   748  	// allocate buffer for conversion
   749  	i := int(float64(x.bitLen())/math.Log2(float64(b))) + 1 // off by one at most
   750  	s := make([]byte, i)
   751  
   752  	// convert power of two and non power of two bases separately
   753  	if b == b&-b {
   754  		// shift is base-b digit size in bits
   755  		shift := trailingZeroBits(b) // shift > 0 because b >= 2
   756  		mask := Word(1)<<shift - 1
   757  		w := x[0]
   758  		nbits := uint(_W) // number of unprocessed bits in w
   759  
   760  		// convert less-significant words
   761  		for k := 1; k < len(x); k++ {
   762  			// convert full digits
   763  			for nbits >= shift {
   764  				i--
   765  				s[i] = charset[w&mask]
   766  				w >>= shift
   767  				nbits -= shift
   768  			}
   769  
   770  			// convert any partial leading digit and advance to next word
   771  			if nbits == 0 {
   772  				// no partial digit remaining, just advance
   773  				w = x[k]
   774  				nbits = _W
   775  			} else {
   776  				// partial digit in current (k-1) and next (k) word
   777  				w |= x[k] << nbits
   778  				i--
   779  				s[i] = charset[w&mask]
   780  
   781  				// advance
   782  				w = x[k] >> (shift - nbits)
   783  				nbits = _W - (shift - nbits)
   784  			}
   785  		}
   786  
   787  		// convert digits of most-significant word (omit leading zeros)
   788  		for nbits >= 0 && w != 0 {
   789  			i--
   790  			s[i] = charset[w&mask]
   791  			w >>= shift
   792  			nbits -= shift
   793  		}
   794  
   795  	} else {
   796  		// determine "big base"; i.e., the largest possible value bb
   797  		// that is a power of base b and still fits into a Word
   798  		// (as in 10^19 for 19 decimal digits in a 64bit Word)
   799  		bb := b      // big base is b**ndigits
   800  		ndigits := 1 // number of base b digits
   801  		for max := Word(_M / b); bb <= max; bb *= b {
   802  			ndigits++ // maximize ndigits where bb = b**ndigits, bb <= _M
   803  		}
   804  
   805  		// construct table of successive squares of bb*leafSize to use in subdivisions
   806  		// result (table != nil) <=> (len(x) > leafSize > 0)
   807  		table := divisors(len(x), b, ndigits, bb)
   808  
   809  		// preserve x, create local copy for use by convertWords
   810  		q := nat(nil).set(x)
   811  
   812  		// convert q to string s in base b
   813  		q.convertWords(s, charset, b, ndigits, bb, table)
   814  
   815  		// strip leading zeros
   816  		// (x != 0; thus s must contain at least one non-zero digit
   817  		// and the loop will terminate)
   818  		i = 0
   819  		for zero := charset[0]; s[i] == zero; {
   820  			i++
   821  		}
   822  	}
   823  
   824  	return string(s[i:])
   825  }
   826  
   827  // Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
   828  // by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
   829  // repeated nat/Word division.
   830  //
   831  // The iterative method processes n Words by n divW() calls, each of which visits every Word in the
   832  // incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
   833  // Recursive conversion divides q by its approximate square root, yielding two parts, each half
   834  // the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
   835  // plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
   836  // is made better by splitting the subblocks recursively. Best is to split blocks until one more
   837  // split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
   838  // iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
   839  // range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
   840  // ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
   841  // specific hardware.
   842  //
   843  func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) {
   844  	// split larger blocks recursively
   845  	if table != nil {
   846  		// len(q) > leafSize > 0
   847  		var r nat
   848  		index := len(table) - 1
   849  		for len(q) > leafSize {
   850  			// find divisor close to sqrt(q) if possible, but in any case < q
   851  			maxLength := q.bitLen()     // ~= log2 q, or at of least largest possible q of this bit length
   852  			minLength := maxLength >> 1 // ~= log2 sqrt(q)
   853  			for index > 0 && table[index-1].nbits > minLength {
   854  				index-- // desired
   855  			}
   856  			if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 {
   857  				index--
   858  				if index < 0 {
   859  					panic("internal inconsistency")
   860  				}
   861  			}
   862  
   863  			// split q into the two digit number (q'*bbb + r) to form independent subblocks
   864  			q, r = q.div(r, q, table[index].bbb)
   865  
   866  			// convert subblocks and collect results in s[:h] and s[h:]
   867  			h := len(s) - table[index].ndigits
   868  			r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index])
   869  			s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1])
   870  		}
   871  	}
   872  
   873  	// having split any large blocks now process the remaining (small) block iteratively
   874  	i := len(s)
   875  	var r Word
   876  	if b == 10 {
   877  		// hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants)
   878  		for len(q) > 0 {
   879  			// extract least significant, base bb "digit"
   880  			q, r = q.divW(q, bb)
   881  			for j := 0; j < ndigits && i > 0; j++ {
   882  				i--
   883  				// avoid % computation since r%10 == r - int(r/10)*10;
   884  				// this appears to be faster for BenchmarkString10000Base10
   885  				// and smaller strings (but a bit slower for larger ones)
   886  				t := r / 10
   887  				s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code
   888  				r = t
   889  			}
   890  		}
   891  	} else {
   892  		for len(q) > 0 {
   893  			// extract least significant, base bb "digit"
   894  			q, r = q.divW(q, bb)
   895  			for j := 0; j < ndigits && i > 0; j++ {
   896  				i--
   897  				s[i] = charset[r%b]
   898  				r /= b
   899  			}
   900  		}
   901  	}
   902  
   903  	// prepend high-order zeroes
   904  	zero := charset[0]
   905  	for i > 0 { // while need more leading zeroes
   906  		i--
   907  		s[i] = zero
   908  	}
   909  }
   910  
   911  // Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion)
   912  // Benchmark and configure leafSize using: go test -bench="Leaf"
   913  //   8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines)
   914  //   8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU
   915  var leafSize int = 8 // number of Word-size binary values treat as a monolithic block
   916  
   917  type divisor struct {
   918  	bbb     nat // divisor
   919  	nbits   int // bit length of divisor (discounting leading zeroes) ~= log2(bbb)
   920  	ndigits int // digit length of divisor in terms of output base digits
   921  }
   922  
   923  var cacheBase10 struct {
   924  	sync.Mutex
   925  	table [64]divisor // cached divisors for base 10
   926  }
   927  
   928  // expWW computes x**y
   929  func (z nat) expWW(x, y Word) nat {
   930  	return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil)
   931  }
   932  
   933  // construct table of powers of bb*leafSize to use in subdivisions
   934  func divisors(m int, b Word, ndigits int, bb Word) []divisor {
   935  	// only compute table when recursive conversion is enabled and x is large
   936  	if leafSize == 0 || m <= leafSize {
   937  		return nil
   938  	}
   939  
   940  	// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
   941  	k := 1
   942  	for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 {
   943  		k++
   944  	}
   945  
   946  	// reuse and extend existing table of divisors or create new table as appropriate
   947  	var table []divisor // for b == 10, table overlaps with cacheBase10.table
   948  	if b == 10 {
   949  		cacheBase10.Lock()
   950  		table = cacheBase10.table[0:k] // reuse old table for this conversion
   951  	} else {
   952  		table = make([]divisor, k) // create new table for this conversion
   953  	}
   954  
   955  	// extend table
   956  	if table[k-1].ndigits == 0 {
   957  		// add new entries as needed
   958  		var larger nat
   959  		for i := 0; i < k; i++ {
   960  			if table[i].ndigits == 0 {
   961  				if i == 0 {
   962  					table[0].bbb = nat(nil).expWW(bb, Word(leafSize))
   963  					table[0].ndigits = ndigits * leafSize
   964  				} else {
   965  					table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
   966  					table[i].ndigits = 2 * table[i-1].ndigits
   967  				}
   968  
   969  				// optimization: exploit aggregated extra bits in macro blocks
   970  				larger = nat(nil).set(table[i].bbb)
   971  				for mulAddVWW(larger, larger, b, 0) == 0 {
   972  					table[i].bbb = table[i].bbb.set(larger)
   973  					table[i].ndigits++
   974  				}
   975  
   976  				table[i].nbits = table[i].bbb.bitLen()
   977  			}
   978  		}
   979  	}
   980  
   981  	if b == 10 {
   982  		cacheBase10.Unlock()
   983  	}
   984  
   985  	return table
   986  }
   987  
   988  const deBruijn32 = 0x077CB531
   989  
   990  var deBruijn32Lookup = []byte{
   991  	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
   992  	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
   993  }
   994  
   995  const deBruijn64 = 0x03f79d71b4ca8b09
   996  
   997  var deBruijn64Lookup = []byte{
   998  	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
   999  	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
  1000  	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
  1001  	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
  1002  }
  1003  
  1004  // trailingZeroBits returns the number of consecutive least significant zero
  1005  // bits of x.
  1006  func trailingZeroBits(x Word) uint {
  1007  	// x & -x leaves only the right-most bit set in the word. Let k be the
  1008  	// index of that bit. Since only a single bit is set, the value is two
  1009  	// to the power of k. Multiplying by a power of two is equivalent to
  1010  	// left shifting, in this case by k bits.  The de Bruijn constant is
  1011  	// such that all six bit, consecutive substrings are distinct.
  1012  	// Therefore, if we have a left shifted version of this constant we can
  1013  	// find by how many bits it was shifted by looking at which six bit
  1014  	// substring ended up at the top of the word.
  1015  	// (Knuth, volume 4, section 7.3.1)
  1016  	switch _W {
  1017  	case 32:
  1018  		return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
  1019  	case 64:
  1020  		return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
  1021  	default:
  1022  		panic("unknown word size")
  1023  	}
  1024  }
  1025  
  1026  // trailingZeroBits returns the number of consecutive least significant zero
  1027  // bits of x.
  1028  func (x nat) trailingZeroBits() uint {
  1029  	if len(x) == 0 {
  1030  		return 0
  1031  	}
  1032  	var i uint
  1033  	for x[i] == 0 {
  1034  		i++
  1035  	}
  1036  	// x[i] != 0
  1037  	return i*_W + trailingZeroBits(x[i])
  1038  }
  1039  
  1040  // z = x << s
  1041  func (z nat) shl(x nat, s uint) nat {
  1042  	m := len(x)
  1043  	if m == 0 {
  1044  		return z.make(0)
  1045  	}
  1046  	// m > 0
  1047  
  1048  	n := m + int(s/_W)
  1049  	z = z.make(n + 1)
  1050  	z[n] = shlVU(z[n-m:n], x, s%_W)
  1051  	z[0 : n-m].clear()
  1052  
  1053  	return z.norm()
  1054  }
  1055  
  1056  // z = x >> s
  1057  func (z nat) shr(x nat, s uint) nat {
  1058  	m := len(x)
  1059  	n := m - int(s/_W)
  1060  	if n <= 0 {
  1061  		return z.make(0)
  1062  	}
  1063  	// n > 0
  1064  
  1065  	z = z.make(n)
  1066  	shrVU(z, x[m-n:], s%_W)
  1067  
  1068  	return z.norm()
  1069  }
  1070  
  1071  func (z nat) setBit(x nat, i uint, b uint) nat {
  1072  	j := int(i / _W)
  1073  	m := Word(1) << (i % _W)
  1074  	n := len(x)
  1075  	switch b {
  1076  	case 0:
  1077  		z = z.make(n)
  1078  		copy(z, x)
  1079  		if j >= n {
  1080  			// no need to grow
  1081  			return z
  1082  		}
  1083  		z[j] &^= m
  1084  		return z.norm()
  1085  	case 1:
  1086  		if j >= n {
  1087  			z = z.make(j + 1)
  1088  			z[n:].clear()
  1089  		} else {
  1090  			z = z.make(n)
  1091  		}
  1092  		copy(z, x)
  1093  		z[j] |= m
  1094  		// no need to normalize
  1095  		return z
  1096  	}
  1097  	panic("set bit is not 0 or 1")
  1098  }
  1099  
  1100  func (z nat) bit(i uint) uint {
  1101  	j := int(i / _W)
  1102  	if j >= len(z) {
  1103  		return 0
  1104  	}
  1105  	return uint(z[j] >> (i % _W) & 1)
  1106  }
  1107  
  1108  func (z nat) and(x, y nat) nat {
  1109  	m := len(x)
  1110  	n := len(y)
  1111  	if m > n {
  1112  		m = n
  1113  	}
  1114  	// m <= n
  1115  
  1116  	z = z.make(m)
  1117  	for i := 0; i < m; i++ {
  1118  		z[i] = x[i] & y[i]
  1119  	}
  1120  
  1121  	return z.norm()
  1122  }
  1123  
  1124  func (z nat) andNot(x, y nat) nat {
  1125  	m := len(x)
  1126  	n := len(y)
  1127  	if n > m {
  1128  		n = m
  1129  	}
  1130  	// m >= n
  1131  
  1132  	z = z.make(m)
  1133  	for i := 0; i < n; i++ {
  1134  		z[i] = x[i] &^ y[i]
  1135  	}
  1136  	copy(z[n:m], x[n:m])
  1137  
  1138  	return z.norm()
  1139  }
  1140  
  1141  func (z nat) or(x, y nat) nat {
  1142  	m := len(x)
  1143  	n := len(y)
  1144  	s := x
  1145  	if m < n {
  1146  		n, m = m, n
  1147  		s = y
  1148  	}
  1149  	// m >= n
  1150  
  1151  	z = z.make(m)
  1152  	for i := 0; i < n; i++ {
  1153  		z[i] = x[i] | y[i]
  1154  	}
  1155  	copy(z[n:m], s[n:m])
  1156  
  1157  	return z.norm()
  1158  }
  1159  
  1160  func (z nat) xor(x, y nat) nat {
  1161  	m := len(x)
  1162  	n := len(y)
  1163  	s := x
  1164  	if m < n {
  1165  		n, m = m, n
  1166  		s = y
  1167  	}
  1168  	// m >= n
  1169  
  1170  	z = z.make(m)
  1171  	for i := 0; i < n; i++ {
  1172  		z[i] = x[i] ^ y[i]
  1173  	}
  1174  	copy(z[n:m], s[n:m])
  1175  
  1176  	return z.norm()
  1177  }
  1178  
  1179  // greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2)
  1180  func greaterThan(x1, x2, y1, y2 Word) bool {
  1181  	return x1 > y1 || x1 == y1 && x2 > y2
  1182  }
  1183  
  1184  // modW returns x % d.
  1185  func (x nat) modW(d Word) (r Word) {
  1186  	// TODO(agl): we don't actually need to store the q value.
  1187  	var q nat
  1188  	q = q.make(len(x))
  1189  	return divWVW(q, 0, x, d)
  1190  }
  1191  
  1192  // random creates a random integer in [0..limit), using the space in z if
  1193  // possible. n is the bit length of limit.
  1194  func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
  1195  	if alias(z, limit) {
  1196  		z = nil // z is an alias for limit - cannot reuse
  1197  	}
  1198  	z = z.make(len(limit))
  1199  
  1200  	bitLengthOfMSW := uint(n % _W)
  1201  	if bitLengthOfMSW == 0 {
  1202  		bitLengthOfMSW = _W
  1203  	}
  1204  	mask := Word((1 << bitLengthOfMSW) - 1)
  1205  
  1206  	for {
  1207  		switch _W {
  1208  		case 32:
  1209  			for i := range z {
  1210  				z[i] = Word(rand.Uint32())
  1211  			}
  1212  		case 64:
  1213  			for i := range z {
  1214  				z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
  1215  			}
  1216  		default:
  1217  			panic("unknown word size")
  1218  		}
  1219  		z[len(limit)-1] &= mask
  1220  		if z.cmp(limit) < 0 {
  1221  			break
  1222  		}
  1223  	}
  1224  
  1225  	return z.norm()
  1226  }
  1227  
  1228  // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
  1229  // otherwise it sets z to x**y. The result is the value of z.
  1230  func (z nat) expNN(x, y, m nat) nat {
  1231  	if alias(z, x) || alias(z, y) {
  1232  		// We cannot allow in-place modification of x or y.
  1233  		z = nil
  1234  	}
  1235  
  1236  	// x**y mod 1 == 0
  1237  	if len(m) == 1 && m[0] == 1 {
  1238  		return z.setWord(0)
  1239  	}
  1240  	// m == 0 || m > 1
  1241  
  1242  	// x**0 == 1
  1243  	if len(y) == 0 {
  1244  		return z.setWord(1)
  1245  	}
  1246  	// y > 0
  1247  
  1248  	if len(m) != 0 {
  1249  		// We likely end up being as long as the modulus.
  1250  		z = z.make(len(m))
  1251  	}
  1252  	z = z.set(x)
  1253  
  1254  	// If the base is non-trivial and the exponent is large, we use
  1255  	// 4-bit, windowed exponentiation. This involves precomputing 14 values
  1256  	// (x^2...x^15) but then reduces the number of multiply-reduces by a
  1257  	// third. Even for a 32-bit exponent, this reduces the number of
  1258  	// operations.
  1259  	if len(x) > 1 && len(y) > 1 && len(m) > 0 {
  1260  		return z.expNNWindowed(x, y, m)
  1261  	}
  1262  
  1263  	v := y[len(y)-1] // v > 0 because y is normalized and y > 0
  1264  	shift := leadingZeros(v) + 1
  1265  	v <<= shift
  1266  	var q nat
  1267  
  1268  	const mask = 1 << (_W - 1)
  1269  
  1270  	// We walk through the bits of the exponent one by one. Each time we
  1271  	// see a bit, we square, thus doubling the power. If the bit is a one,
  1272  	// we also multiply by x, thus adding one to the power.
  1273  
  1274  	w := _W - int(shift)
  1275  	// zz and r are used to avoid allocating in mul and div as
  1276  	// otherwise the arguments would alias.
  1277  	var zz, r nat
  1278  	for j := 0; j < w; j++ {
  1279  		zz = zz.mul(z, z)
  1280  		zz, z = z, zz
  1281  
  1282  		if v&mask != 0 {
  1283  			zz = zz.mul(z, x)
  1284  			zz, z = z, zz
  1285  		}
  1286  
  1287  		if len(m) != 0 {
  1288  			zz, r = zz.div(r, z, m)
  1289  			zz, r, q, z = q, z, zz, r
  1290  		}
  1291  
  1292  		v <<= 1
  1293  	}
  1294  
  1295  	for i := len(y) - 2; i >= 0; i-- {
  1296  		v = y[i]
  1297  
  1298  		for j := 0; j < _W; j++ {
  1299  			zz = zz.mul(z, z)
  1300  			zz, z = z, zz
  1301  
  1302  			if v&mask != 0 {
  1303  				zz = zz.mul(z, x)
  1304  				zz, z = z, zz
  1305  			}
  1306  
  1307  			if len(m) != 0 {
  1308  				zz, r = zz.div(r, z, m)
  1309  				zz, r, q, z = q, z, zz, r
  1310  			}
  1311  
  1312  			v <<= 1
  1313  		}
  1314  	}
  1315  
  1316  	return z.norm()
  1317  }
  1318  
  1319  // expNNWindowed calculates x**y mod m using a fixed, 4-bit window.
  1320  func (z nat) expNNWindowed(x, y, m nat) nat {
  1321  	// zz and r are used to avoid allocating in mul and div as otherwise
  1322  	// the arguments would alias.
  1323  	var zz, r nat
  1324  
  1325  	const n = 4
  1326  	// powers[i] contains x^i.
  1327  	var powers [1 << n]nat
  1328  	powers[0] = natOne
  1329  	powers[1] = x
  1330  	for i := 2; i < 1<<n; i += 2 {
  1331  		p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
  1332  		*p = p.mul(*p2, *p2)
  1333  		zz, r = zz.div(r, *p, m)
  1334  		*p, r = r, *p
  1335  		*p1 = p1.mul(*p, x)
  1336  		zz, r = zz.div(r, *p1, m)
  1337  		*p1, r = r, *p1
  1338  	}
  1339  
  1340  	z = z.setWord(1)
  1341  
  1342  	for i := len(y) - 1; i >= 0; i-- {
  1343  		yi := y[i]
  1344  		for j := 0; j < _W; j += n {
  1345  			if i != len(y)-1 || j != 0 {
  1346  				// Unrolled loop for significant performance
  1347  				// gain.  Use go test -bench=".*" in crypto/rsa
  1348  				// to check performance before making changes.
  1349  				zz = zz.mul(z, z)
  1350  				zz, z = z, zz
  1351  				zz, r = zz.div(r, z, m)
  1352  				z, r = r, z
  1353  
  1354  				zz = zz.mul(z, z)
  1355  				zz, z = z, zz
  1356  				zz, r = zz.div(r, z, m)
  1357  				z, r = r, z
  1358  
  1359  				zz = zz.mul(z, z)
  1360  				zz, z = z, zz
  1361  				zz, r = zz.div(r, z, m)
  1362  				z, r = r, z
  1363  
  1364  				zz = zz.mul(z, z)
  1365  				zz, z = z, zz
  1366  				zz, r = zz.div(r, z, m)
  1367  				z, r = r, z
  1368  			}
  1369  
  1370  			zz = zz.mul(z, powers[yi>>(_W-n)])
  1371  			zz, z = z, zz
  1372  			zz, r = zz.div(r, z, m)
  1373  			z, r = r, z
  1374  
  1375  			yi <<= n
  1376  		}
  1377  	}
  1378  
  1379  	return z.norm()
  1380  }
  1381  
  1382  // probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
  1383  // If it returns true, n is prime with probability 1 - 1/4^reps.
  1384  // If it returns false, n is not prime.
  1385  func (n nat) probablyPrime(reps int) bool {
  1386  	if len(n) == 0 {
  1387  		return false
  1388  	}
  1389  
  1390  	if len(n) == 1 {
  1391  		if n[0] < 2 {
  1392  			return false
  1393  		}
  1394  
  1395  		if n[0]%2 == 0 {
  1396  			return n[0] == 2
  1397  		}
  1398  
  1399  		// We have to exclude these cases because we reject all
  1400  		// multiples of these numbers below.
  1401  		switch n[0] {
  1402  		case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
  1403  			return true
  1404  		}
  1405  	}
  1406  
  1407  	const primesProduct32 = 0xC0CFD797         // Π {p ∈ primes, 2 < p <= 29}
  1408  	const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
  1409  
  1410  	var r Word
  1411  	switch _W {
  1412  	case 32:
  1413  		r = n.modW(primesProduct32)
  1414  	case 64:
  1415  		r = n.modW(primesProduct64 & _M)
  1416  	default:
  1417  		panic("Unknown word size")
  1418  	}
  1419  
  1420  	if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
  1421  		r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
  1422  		return false
  1423  	}
  1424  
  1425  	if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
  1426  		r%43 == 0 || r%47 == 0 || r%53 == 0) {
  1427  		return false
  1428  	}
  1429  
  1430  	nm1 := nat(nil).sub(n, natOne)
  1431  	// determine q, k such that nm1 = q << k
  1432  	k := nm1.trailingZeroBits()
  1433  	q := nat(nil).shr(nm1, k)
  1434  
  1435  	nm3 := nat(nil).sub(nm1, natTwo)
  1436  	rand := rand.New(rand.NewSource(int64(n[0])))
  1437  
  1438  	var x, y, quotient nat
  1439  	nm3Len := nm3.bitLen()
  1440  
  1441  NextRandom:
  1442  	for i := 0; i < reps; i++ {
  1443  		x = x.random(rand, nm3, nm3Len)
  1444  		x = x.add(x, natTwo)
  1445  		y = y.expNN(x, q, n)
  1446  		if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
  1447  			continue
  1448  		}
  1449  		for j := uint(1); j < k; j++ {
  1450  			y = y.mul(y, y)
  1451  			quotient, y = quotient.div(y, y, n)
  1452  			if y.cmp(nm1) == 0 {
  1453  				continue NextRandom
  1454  			}
  1455  			if y.cmp(natOne) == 0 {
  1456  				return false
  1457  			}
  1458  		}
  1459  		return false
  1460  	}
  1461  
  1462  	return true
  1463  }
  1464  
  1465  // bytes writes the value of z into buf using big-endian encoding.
  1466  // len(buf) must be >= len(z)*_S. The value of z is encoded in the
  1467  // slice buf[i:]. The number i of unused bytes at the beginning of
  1468  // buf is returned as result.
  1469  func (z nat) bytes(buf []byte) (i int) {
  1470  	i = len(buf)
  1471  	for _, d := range z {
  1472  		for j := 0; j < _S; j++ {
  1473  			i--
  1474  			buf[i] = byte(d)
  1475  			d >>= 8
  1476  		}
  1477  	}
  1478  
  1479  	for i < len(buf) && buf[i] == 0 {
  1480  		i++
  1481  	}
  1482  
  1483  	return
  1484  }
  1485  
  1486  // setBytes interprets buf as the bytes of a big-endian unsigned
  1487  // integer, sets z to that value, and returns z.
  1488  func (z nat) setBytes(buf []byte) nat {
  1489  	z = z.make((len(buf) + _S - 1) / _S)
  1490  
  1491  	k := 0
  1492  	s := uint(0)
  1493  	var d Word
  1494  	for i := len(buf); i > 0; i-- {
  1495  		d |= Word(buf[i-1]) << s
  1496  		if s += 8; s == _S*8 {
  1497  			z[k] = d
  1498  			k++
  1499  			s = 0
  1500  			d = 0
  1501  		}
  1502  	}
  1503  	if k < len(z) {
  1504  		z[k] = d
  1505  	}
  1506  
  1507  	return z.norm()
  1508  }