github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/math/big/nat.go

github.com/bgentry/go@v0.0.0-20150121062915-6cf5a733d54d/src/math/big/nat.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package big implements multi-precision arithmetic (big numbers).
     6  // The following numeric types are supported:
     7  //
     8  //	- Int	signed integers
     9  //	- Rat	rational numbers
    10  //
    11  // Methods are typically of the form:
    12  //
    13  //	func (z *Int) Op(x, y *Int) *Int	(similar for *Rat)
    14  //
    15  // and implement operations z = x Op y with the result as receiver; if it
    16  // is one of the operands it may be overwritten (and its memory reused).
    17  // To enable chaining of operations, the result is also returned. Methods
    18  // returning a result other than *Int or *Rat take one of the operands as
    19  // the receiver.
    20  //
    21  package big
    22  
    23  // This file contains operations on unsigned multi-precision integers.
    24  // These are the building blocks for the operations on signed integers
    25  // and rationals.
    26  
    27  import (
    28  	"errors"
    29  	"io"
    30  	"math"
    31  	"math/rand"
    32  	"sync"
    33  )
    34  
    35  // An unsigned integer x of the form
    36  //
    37  //   x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
    38  //
    39  // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
    40  // with the digits x[i] as the slice elements.
    41  //
    42  // A number is normalized if the slice contains no leading 0 digits.
    43  // During arithmetic operations, denormalized values may occur but are
    44  // always normalized before returning the final result. The normalized
    45  // representation of 0 is the empty or nil slice (length = 0).
    46  //
    47  type nat []Word
    48  
    49  var (
    50  	natOne = nat{1}
    51  	natTwo = nat{2}
    52  	natTen = nat{10}
    53  )
    54  
    55  func (z nat) clear() {
    56  	for i := range z {
    57  		z[i] = 0
    58  	}
    59  }
    60  
    61  func (z nat) norm() nat {
    62  	i := len(z)
    63  	for i > 0 && z[i-1] == 0 {
    64  		i--
    65  	}
    66  	return z[0:i]
    67  }
    68  
    69  func (z nat) make(n int) nat {
    70  	if n <= cap(z) {
    71  		return z[:n] // reuse z
    72  	}
    73  	// Choosing a good value for e has significant performance impact
    74  	// because it increases the chance that a value can be reused.
    75  	const e = 4 // extra capacity
    76  	return make(nat, n, n+e)
    77  }
    78  
    79  func (z nat) setWord(x Word) nat {
    80  	if x == 0 {
    81  		return z[:0]
    82  	}
    83  	z = z.make(1)
    84  	z[0] = x
    85  	return z
    86  }
    87  
    88  func (z nat) setUint64(x uint64) nat {
    89  	// single-digit values
    90  	if w := Word(x); uint64(w) == x {
    91  		return z.setWord(w)
    92  	}
    93  
    94  	// compute number of words n required to represent x
    95  	n := 0
    96  	for t := x; t > 0; t >>= _W {
    97  		n++
    98  	}
    99  
   100  	// split x into n words
   101  	z = z.make(n)
   102  	for i := range z {
   103  		z[i] = Word(x & _M)
   104  		x >>= _W
   105  	}
   106  
   107  	return z
   108  }
   109  
   110  func (z nat) set(x nat) nat {
   111  	z = z.make(len(x))
   112  	copy(z, x)
   113  	return z
   114  }
   115  
   116  func (z nat) add(x, y nat) nat {
   117  	m := len(x)
   118  	n := len(y)
   119  
   120  	switch {
   121  	case m < n:
   122  		return z.add(y, x)
   123  	case m == 0:
   124  		// n == 0 because m >= n; result is 0
   125  		return z[:0]
   126  	case n == 0:
   127  		// result is x
   128  		return z.set(x)
   129  	}
   130  	// m > 0
   131  
   132  	z = z.make(m + 1)
   133  	c := addVV(z[0:n], x, y)
   134  	if m > n {
   135  		c = addVW(z[n:m], x[n:], c)
   136  	}
   137  	z[m] = c
   138  
   139  	return z.norm()
   140  }
   141  
   142  func (z nat) sub(x, y nat) nat {
   143  	m := len(x)
   144  	n := len(y)
   145  
   146  	switch {
   147  	case m < n:
   148  		panic("underflow")
   149  	case m == 0:
   150  		// n == 0 because m >= n; result is 0
   151  		return z[:0]
   152  	case n == 0:
   153  		// result is x
   154  		return z.set(x)
   155  	}
   156  	// m > 0
   157  
   158  	z = z.make(m)
   159  	c := subVV(z[0:n], x, y)
   160  	if m > n {
   161  		c = subVW(z[n:], x[n:], c)
   162  	}
   163  	if c != 0 {
   164  		panic("underflow")
   165  	}
   166  
   167  	return z.norm()
   168  }
   169  
   170  func (x nat) cmp(y nat) (r int) {
   171  	m := len(x)
   172  	n := len(y)
   173  	if m != n || m == 0 {
   174  		switch {
   175  		case m < n:
   176  			r = -1
   177  		case m > n:
   178  			r = 1
   179  		}
   180  		return
   181  	}
   182  
   183  	i := m - 1
   184  	for i > 0 && x[i] == y[i] {
   185  		i--
   186  	}
   187  
   188  	switch {
   189  	case x[i] < y[i]:
   190  		r = -1
   191  	case x[i] > y[i]:
   192  		r = 1
   193  	}
   194  	return
   195  }
   196  
   197  func (z nat) mulAddWW(x nat, y, r Word) nat {
   198  	m := len(x)
   199  	if m == 0 || y == 0 {
   200  		return z.setWord(r) // result is r
   201  	}
   202  	// m > 0
   203  
   204  	z = z.make(m + 1)
   205  	z[m] = mulAddVWW(z[0:m], x, y, r)
   206  
   207  	return z.norm()
   208  }
   209  
   210  // basicMul multiplies x and y and leaves the result in z.
   211  // The (non-normalized) result is placed in z[0 : len(x) + len(y)].
   212  func basicMul(z, x, y nat) {
   213  	z[0 : len(x)+len(y)].clear() // initialize z
   214  	for i, d := range y {
   215  		if d != 0 {
   216  			z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
   217  		}
   218  	}
   219  }
   220  
   221  // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
   222  // Factored out for readability - do not use outside karatsuba.
   223  func karatsubaAdd(z, x nat, n int) {
   224  	if c := addVV(z[0:n], z, x); c != 0 {
   225  		addVW(z[n:n+n>>1], z[n:], c)
   226  	}
   227  }
   228  
   229  // Like karatsubaAdd, but does subtract.
   230  func karatsubaSub(z, x nat, n int) {
   231  	if c := subVV(z[0:n], z, x); c != 0 {
   232  		subVW(z[n:n+n>>1], z[n:], c)
   233  	}
   234  }
   235  
   236  // Operands that are shorter than karatsubaThreshold are multiplied using
   237  // "grade school" multiplication; for longer operands the Karatsuba algorithm
   238  // is used.
   239  var karatsubaThreshold int = 40 // computed by calibrate.go
   240  
   241  // karatsuba multiplies x and y and leaves the result in z.
   242  // Both x and y must have the same length n and n must be a
   243  // power of 2. The result vector z must have len(z) >= 6*n.
   244  // The (non-normalized) result is placed in z[0 : 2*n].
   245  func karatsuba(z, x, y nat) {
   246  	n := len(y)
   247  
   248  	// Switch to basic multiplication if numbers are odd or small.
   249  	// (n is always even if karatsubaThreshold is even, but be
   250  	// conservative)
   251  	if n&1 != 0 || n < karatsubaThreshold || n < 2 {
   252  		basicMul(z, x, y)
   253  		return
   254  	}
   255  	// n&1 == 0 && n >= karatsubaThreshold && n >= 2
   256  
   257  	// Karatsuba multiplication is based on the observation that
   258  	// for two numbers x and y with:
   259  	//
   260  	//   x = x1*b + x0
   261  	//   y = y1*b + y0
   262  	//
   263  	// the product x*y can be obtained with 3 products z2, z1, z0
   264  	// instead of 4:
   265  	//
   266  	//   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
   267  	//       =    z2*b*b +              z1*b +    z0
   268  	//
   269  	// with:
   270  	//
   271  	//   xd = x1 - x0
   272  	//   yd = y0 - y1
   273  	//
   274  	//   z1 =      xd*yd                    + z2 + z0
   275  	//      = (x1-x0)*(y0 - y1)             + z2 + z0
   276  	//      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0
   277  	//      = x1*y0 -    z2 -    z0 + x0*y1 + z2 + z0
   278  	//      = x1*y0                 + x0*y1
   279  
   280  	// split x, y into "digits"
   281  	n2 := n >> 1              // n2 >= 1
   282  	x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
   283  	y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
   284  
   285  	// z is used for the result and temporary storage:
   286  	//
   287  	//   6*n     5*n     4*n     3*n     2*n     1*n     0*n
   288  	// z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
   289  	//
   290  	// For each recursive call of karatsuba, an unused slice of
   291  	// z is passed in that has (at least) half the length of the
   292  	// caller's z.
   293  
   294  	// compute z0 and z2 with the result "in place" in z
   295  	karatsuba(z, x0, y0)     // z0 = x0*y0
   296  	karatsuba(z[n:], x1, y1) // z2 = x1*y1
   297  
   298  	// compute xd (or the negative value if underflow occurs)
   299  	s := 1 // sign of product xd*yd
   300  	xd := z[2*n : 2*n+n2]
   301  	if subVV(xd, x1, x0) != 0 { // x1-x0
   302  		s = -s
   303  		subVV(xd, x0, x1) // x0-x1
   304  	}
   305  
   306  	// compute yd (or the negative value if underflow occurs)
   307  	yd := z[2*n+n2 : 3*n]
   308  	if subVV(yd, y0, y1) != 0 { // y0-y1
   309  		s = -s
   310  		subVV(yd, y1, y0) // y1-y0
   311  	}
   312  
   313  	// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
   314  	// p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
   315  	p := z[n*3:]
   316  	karatsuba(p, xd, yd)
   317  
   318  	// save original z2:z0
   319  	// (ok to use upper half of z since we're done recursing)
   320  	r := z[n*4:]
   321  	copy(r, z[:n*2])
   322  
   323  	// add up all partial products
   324  	//
   325  	//   2*n     n     0
   326  	// z = [ z2  | z0  ]
   327  	//   +    [ z0  ]
   328  	//   +    [ z2  ]
   329  	//   +    [  p  ]
   330  	//
   331  	karatsubaAdd(z[n2:], r, n)
   332  	karatsubaAdd(z[n2:], r[n:], n)
   333  	if s > 0 {
   334  		karatsubaAdd(z[n2:], p, n)
   335  	} else {
   336  		karatsubaSub(z[n2:], p, n)
   337  	}
   338  }
   339  
   340  // alias returns true if x and y share the same base array.
   341  func alias(x, y nat) bool {
   342  	return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
   343  }
   344  
   345  // addAt implements z += x<<(_W*i); z must be long enough.
   346  // (we don't use nat.add because we need z to stay the same
   347  // slice, and we don't need to normalize z after each addition)
   348  func addAt(z, x nat, i int) {
   349  	if n := len(x); n > 0 {
   350  		if c := addVV(z[i:i+n], z[i:], x); c != 0 {
   351  			j := i + n
   352  			if j < len(z) {
   353  				addVW(z[j:], z[j:], c)
   354  			}
   355  		}
   356  	}
   357  }
   358  
   359  func max(x, y int) int {
   360  	if x > y {
   361  		return x
   362  	}
   363  	return y
   364  }
   365  
   366  // karatsubaLen computes an approximation to the maximum k <= n such that
   367  // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
   368  // result is the largest number that can be divided repeatedly by 2 before
   369  // becoming about the value of karatsubaThreshold.
   370  func karatsubaLen(n int) int {
   371  	i := uint(0)
   372  	for n > karatsubaThreshold {
   373  		n >>= 1
   374  		i++
   375  	}
   376  	return n << i
   377  }
   378  
   379  func (z nat) mul(x, y nat) nat {
   380  	m := len(x)
   381  	n := len(y)
   382  
   383  	switch {
   384  	case m < n:
   385  		return z.mul(y, x)
   386  	case m == 0 || n == 0:
   387  		return z[:0]
   388  	case n == 1:
   389  		return z.mulAddWW(x, y[0], 0)
   390  	}
   391  	// m >= n > 1
   392  
   393  	// determine if z can be reused
   394  	if alias(z, x) || alias(z, y) {
   395  		z = nil // z is an alias for x or y - cannot reuse
   396  	}
   397  
   398  	// use basic multiplication if the numbers are small
   399  	if n < karatsubaThreshold {
   400  		z = z.make(m + n)
   401  		basicMul(z, x, y)
   402  		return z.norm()
   403  	}
   404  	// m >= n && n >= karatsubaThreshold && n >= 2
   405  
   406  	// determine Karatsuba length k such that
   407  	//
   408  	//   x = xh*b + x0  (0 <= x0 < b)
   409  	//   y = yh*b + y0  (0 <= y0 < b)
   410  	//   b = 1<<(_W*k)  ("base" of digits xi, yi)
   411  	//
   412  	k := karatsubaLen(n)
   413  	// k <= n
   414  
   415  	// multiply x0 and y0 via Karatsuba
   416  	x0 := x[0:k]              // x0 is not normalized
   417  	y0 := y[0:k]              // y0 is not normalized
   418  	z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
   419  	karatsuba(z, x0, y0)
   420  	z = z[0 : m+n]  // z has final length but may be incomplete
   421  	z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m)
   422  
   423  	// If xh != 0 or yh != 0, add the missing terms to z. For
   424  	//
   425  	//   xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b)
   426  	//   yh =                         y1*b (0 <= y1 < b)
   427  	//
   428  	// the missing terms are
   429  	//
   430  	//   x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0
   431  	//
   432  	// since all the yi for i > 1 are 0 by choice of k: If any of them
   433  	// were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would
   434  	// be a larger valid threshold contradicting the assumption about k.
   435  	//
   436  	if k < n || m != n {
   437  		var t nat
   438  
   439  		// add x0*y1*b
   440  		x0 := x0.norm()
   441  		y1 := y[k:]       // y1 is normalized because y is
   442  		t = t.mul(x0, y1) // update t so we don't lose t's underlying array
   443  		addAt(z, t, k)
   444  
   445  		// add xi*y0<<i, xi*y1*b<<(i+k)
   446  		y0 := y0.norm()
   447  		for i := k; i < len(x); i += k {
   448  			xi := x[i:]
   449  			if len(xi) > k {
   450  				xi = xi[:k]
   451  			}
   452  			xi = xi.norm()
   453  			t = t.mul(xi, y0)
   454  			addAt(z, t, i)
   455  			t = t.mul(xi, y1)
   456  			addAt(z, t, i+k)
   457  		}
   458  	}
   459  
   460  	return z.norm()
   461  }
   462  
   463  // mulRange computes the product of all the unsigned integers in the
   464  // range [a, b] inclusively. If a > b (empty range), the result is 1.
   465  func (z nat) mulRange(a, b uint64) nat {
   466  	switch {
   467  	case a == 0:
   468  		// cut long ranges short (optimization)
   469  		return z.setUint64(0)
   470  	case a > b:
   471  		return z.setUint64(1)
   472  	case a == b:
   473  		return z.setUint64(a)
   474  	case a+1 == b:
   475  		return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
   476  	}
   477  	m := (a + b) / 2
   478  	return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
   479  }
   480  
   481  // q = (x-r)/y, with 0 <= r < y
   482  func (z nat) divW(x nat, y Word) (q nat, r Word) {
   483  	m := len(x)
   484  	switch {
   485  	case y == 0:
   486  		panic("division by zero")
   487  	case y == 1:
   488  		q = z.set(x) // result is x
   489  		return
   490  	case m == 0:
   491  		q = z[:0] // result is 0
   492  		return
   493  	}
   494  	// m > 0
   495  	z = z.make(m)
   496  	r = divWVW(z, 0, x, y)
   497  	q = z.norm()
   498  	return
   499  }
   500  
   501  func (z nat) div(z2, u, v nat) (q, r nat) {
   502  	if len(v) == 0 {
   503  		panic("division by zero")
   504  	}
   505  
   506  	if u.cmp(v) < 0 {
   507  		q = z[:0]
   508  		r = z2.set(u)
   509  		return
   510  	}
   511  
   512  	if len(v) == 1 {
   513  		var r2 Word
   514  		q, r2 = z.divW(u, v[0])
   515  		r = z2.setWord(r2)
   516  		return
   517  	}
   518  
   519  	q, r = z.divLarge(z2, u, v)
   520  	return
   521  }
   522  
   523  // q = (uIn-r)/v, with 0 <= r < y
   524  // Uses z as storage for q, and u as storage for r if possible.
   525  // See Knuth, Volume 2, section 4.3.1, Algorithm D.
   526  // Preconditions:
   527  //    len(v) >= 2
   528  //    len(uIn) >= len(v)
   529  func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
   530  	n := len(v)
   531  	m := len(uIn) - n
   532  
   533  	// determine if z can be reused
   534  	// TODO(gri) should find a better solution - this if statement
   535  	//           is very costly (see e.g. time pidigits -s -n 10000)
   536  	if alias(z, uIn) || alias(z, v) {
   537  		z = nil // z is an alias for uIn or v - cannot reuse
   538  	}
   539  	q = z.make(m + 1)
   540  
   541  	qhatv := make(nat, n+1)
   542  	if alias(u, uIn) || alias(u, v) {
   543  		u = nil // u is an alias for uIn or v - cannot reuse
   544  	}
   545  	u = u.make(len(uIn) + 1)
   546  	u.clear() // TODO(gri) no need to clear if we allocated a new u
   547  
   548  	// D1.
   549  	shift := leadingZeros(v[n-1])
   550  	if shift > 0 {
   551  		// do not modify v, it may be used by another goroutine simultaneously
   552  		v1 := make(nat, n)
   553  		shlVU(v1, v, shift)
   554  		v = v1
   555  	}
   556  	u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
   557  
   558  	// D2.
   559  	for j := m; j >= 0; j-- {
   560  		// D3.
   561  		qhat := Word(_M)
   562  		if u[j+n] != v[n-1] {
   563  			var rhat Word
   564  			qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
   565  
   566  			// x1 | x2 = q̂v_{n-2}
   567  			x1, x2 := mulWW(qhat, v[n-2])
   568  			// test if q̂v_{n-2} > br̂ + u_{j+n-2}
   569  			for greaterThan(x1, x2, rhat, u[j+n-2]) {
   570  				qhat--
   571  				prevRhat := rhat
   572  				rhat += v[n-1]
   573  				// v[n-1] >= 0, so this tests for overflow.
   574  				if rhat < prevRhat {
   575  					break
   576  				}
   577  				x1, x2 = mulWW(qhat, v[n-2])
   578  			}
   579  		}
   580  
   581  		// D4.
   582  		qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
   583  
   584  		c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
   585  		if c != 0 {
   586  			c := addVV(u[j:j+n], u[j:], v)
   587  			u[j+n] += c
   588  			qhat--
   589  		}
   590  
   591  		q[j] = qhat
   592  	}
   593  
   594  	q = q.norm()
   595  	shrVU(u, u, shift)
   596  	r = u.norm()
   597  
   598  	return q, r
   599  }
   600  
   601  // Length of x in bits. x must be normalized.
   602  func (x nat) bitLen() int {
   603  	if i := len(x) - 1; i >= 0 {
   604  		return i*_W + bitLen(x[i])
   605  	}
   606  	return 0
   607  }
   608  
   609  // MaxBase is the largest number base accepted for string conversions.
   610  const MaxBase = 'z' - 'a' + 10 + 1
   611  
   612  // maxPow returns (b**n, n) such that b**n is the largest power b**n <= _M.
   613  // For instance maxPow(10) == (1e19, 19) for 19 decimal digits in a 64bit Word.
   614  // In other words, at most n digits in base b fit into a Word.
   615  // TODO(gri) replace this with a table, generated at build time.
   616  func maxPow(b Word) (p Word, n int) {
   617  	p, n = b, 1 // assuming b <= _M
   618  	for max := _M / b; p <= max; {
   619  		// p == b**n && p <= max
   620  		p *= b
   621  		n++
   622  	}
   623  	// p == b**n && p <= _M
   624  	return
   625  }
   626  
   627  // pow returns x**n for n > 0, and 1 otherwise.
   628  func pow(x Word, n int) (p Word) {
   629  	// n == sum of bi * 2**i, for 0 <= i < imax, and bi is 0 or 1
   630  	// thus x**n == product of x**(2**i) for all i where bi == 1
   631  	// (Russian Peasant Method for exponentiation)
   632  	p = 1
   633  	for n > 0 {
   634  		if n&1 != 0 {
   635  			p *= x
   636  		}
   637  		x *= x
   638  		n >>= 1
   639  	}
   640  	return
   641  }
   642  
   643  // scan scans the number corresponding to the longest possible prefix
   644  // from r representing an unsigned number in a given conversion base.
   645  // It returns the corresponding natural number res, the actual base b,
   646  // a digit count, and an error err, if any.
   647  //
   648  //	number = [ prefix ] digits | digits "." [ digits ] | "." digits .
   649  //	prefix = "0" [ "x" | "X" | "b" | "B" ] .
   650  //	digits = digit { digit } .
   651  //	digit  = "0" ... "9" | "a" ... "z" | "A" ... "Z" .
   652  //
   653  // The base argument must be a value between 0 and MaxBase (inclusive).
   654  // For base 0, the number prefix determines the actual base: A prefix of
   655  // ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and
   656  // a ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base
   657  // is 10 and no prefix is permitted.
   658  //
   659  // Base argument 1 selects actual base 10 but also enables scanning a number
   660  // with a decimal point.
   661  //
   662  // A result digit count > 0 corresponds to the number of (non-prefix) digits
   663  // parsed. A digit count <= 0 indicates the presence of a decimal point (for
   664  // base == 1, only), and the number of fractional digits is -count. In this
   665  // case, the value of the scanned number is res * 10**count.
   666  //
   667  func (z nat) scan(r io.RuneScanner, base int) (res nat, b, count int, err error) {
   668  	// reject illegal bases
   669  	if base < 0 || base > MaxBase {
   670  		err = errors.New("illegal number base")
   671  		return
   672  	}
   673  
   674  	// one char look-ahead
   675  	ch, _, err := r.ReadRune()
   676  	if err != nil {
   677  		return
   678  	}
   679  
   680  	// determine actual base
   681  	switch base {
   682  	case 0:
   683  		// actual base is 10 unless there's a base prefix
   684  		b = 10
   685  		if ch == '0' {
   686  			switch ch, _, err = r.ReadRune(); err {
   687  			case nil:
   688  				// possibly one of 0x, 0X, 0b, 0B
   689  				b = 8
   690  				switch ch {
   691  				case 'x', 'X':
   692  					b = 16
   693  				case 'b', 'B':
   694  					b = 2
   695  				}
   696  				if b == 2 || b == 16 {
   697  					if ch, _, err = r.ReadRune(); err != nil {
   698  						// io.EOF is also an error in this case
   699  						return
   700  					}
   701  				}
   702  			case io.EOF:
   703  				// input is "0"
   704  				res = z[:0]
   705  				count = 1
   706  				err = nil
   707  				return
   708  			default:
   709  				// read error
   710  				return
   711  			}
   712  		}
   713  	case 1:
   714  		// actual base is 10 and decimal point is permitted
   715  		b = 10
   716  	default:
   717  		b = base
   718  	}
   719  
   720  	// convert string
   721  	// Algorithm: Collect digits in groups of at most n digits in di
   722  	// and then use mulAddWW for every such group to add them to the
   723  	// result.
   724  	z = z[:0]
   725  	b1 := Word(b)
   726  	bn, n := maxPow(b1) // at most n digits in base b1 fit into Word
   727  	di := Word(0)       // 0 <= di < b1**i < bn
   728  	i := 0              // 0 <= i < n
   729  	dp := -1            // position of decimal point
   730  	for {
   731  		if base == 1 && ch == '.' {
   732  			base = 10 // no 2nd decimal point permitted
   733  			dp = count
   734  			// advance
   735  			if ch, _, err = r.ReadRune(); err != nil {
   736  				if err == io.EOF {
   737  					err = nil
   738  					break
   739  				}
   740  				return
   741  			}
   742  		}
   743  
   744  		// convert rune into digit value d1
   745  		var d1 Word
   746  		switch {
   747  		case '0' <= ch && ch <= '9':
   748  			d1 = Word(ch - '0')
   749  		case 'a' <= ch && ch <= 'z':
   750  			d1 = Word(ch - 'a' + 10)
   751  		case 'A' <= ch && ch <= 'Z':
   752  			d1 = Word(ch - 'A' + 10)
   753  		default:
   754  			d1 = MaxBase + 1
   755  		}
   756  		if d1 >= b1 {
   757  			r.UnreadRune() // ch does not belong to number anymore
   758  			break
   759  		}
   760  		count++
   761  
   762  		// collect d1 in di
   763  		di = di*b1 + d1
   764  		i++
   765  
   766  		// if di is "full", add it to the result
   767  		if i == n {
   768  			z = z.mulAddWW(z, bn, di)
   769  			di = 0
   770  			i = 0
   771  		}
   772  
   773  		// advance
   774  		if ch, _, err = r.ReadRune(); err != nil {
   775  			if err == io.EOF {
   776  				err = nil
   777  				break
   778  			}
   779  			return
   780  		}
   781  	}
   782  
   783  	if count == 0 {
   784  		// no digits found
   785  		switch {
   786  		case base == 0 && b == 8:
   787  			// there was only the octal prefix 0 (possibly followed by digits > 7);
   788  			// count as one digit and return base 10, not 8
   789  			count = 1
   790  			b = 10
   791  		case base != 0 || b != 8:
   792  			// there was neither a mantissa digit nor the octal prefix 0
   793  			err = errors.New("syntax error scanning number")
   794  		}
   795  		return
   796  	}
   797  	// count > 0
   798  
   799  	// add remaining digits to result
   800  	if i > 0 {
   801  		z = z.mulAddWW(z, pow(b1, i), di)
   802  	}
   803  	res = z.norm()
   804  
   805  	// adjust for fraction, if any
   806  	if dp >= 0 {
   807  		// 0 <= dp <= count > 0
   808  		count = dp - count
   809  	}
   810  
   811  	return
   812  }
   813  
   814  // Character sets for string conversion.
   815  const (
   816  	lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz"
   817  	uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
   818  )
   819  
   820  // decimalString returns a decimal representation of x.
   821  // It calls x.string with the charset "0123456789".
   822  func (x nat) decimalString() string {
   823  	return x.string(lowercaseDigits[:10])
   824  }
   825  
   826  // hexString returns a hexadecimal representation of x.
   827  // It calls x.string with the charset "0123456789abcdef".
   828  func (x nat) hexString() string {
   829  	return x.string(lowercaseDigits[:16])
   830  }
   831  
   832  // string converts x to a string using digits from a charset; a digit with
   833  // value d is represented by charset[d]. The conversion base is determined
   834  // by len(charset), which must be >= 2 and <= 256.
   835  func (x nat) string(charset string) string {
   836  	b := Word(len(charset))
   837  
   838  	// special cases
   839  	switch {
   840  	case b < 2 || b > 256:
   841  		panic("invalid character set length")
   842  	case len(x) == 0:
   843  		return string(charset[0])
   844  	}
   845  
   846  	// allocate buffer for conversion
   847  	i := int(float64(x.bitLen())/math.Log2(float64(b))) + 1 // off by one at most
   848  	s := make([]byte, i)
   849  
   850  	// convert power of two and non power of two bases separately
   851  	if b == b&-b {
   852  		// shift is base-b digit size in bits
   853  		shift := trailingZeroBits(b) // shift > 0 because b >= 2
   854  		mask := Word(1)<<shift - 1
   855  		w := x[0]
   856  		nbits := uint(_W) // number of unprocessed bits in w
   857  
   858  		// convert less-significant words
   859  		for k := 1; k < len(x); k++ {
   860  			// convert full digits
   861  			for nbits >= shift {
   862  				i--
   863  				s[i] = charset[w&mask]
   864  				w >>= shift
   865  				nbits -= shift
   866  			}
   867  
   868  			// convert any partial leading digit and advance to next word
   869  			if nbits == 0 {
   870  				// no partial digit remaining, just advance
   871  				w = x[k]
   872  				nbits = _W
   873  			} else {
   874  				// partial digit in current (k-1) and next (k) word
   875  				w |= x[k] << nbits
   876  				i--
   877  				s[i] = charset[w&mask]
   878  
   879  				// advance
   880  				w = x[k] >> (shift - nbits)
   881  				nbits = _W - (shift - nbits)
   882  			}
   883  		}
   884  
   885  		// convert digits of most-significant word (omit leading zeros)
   886  		for nbits >= 0 && w != 0 {
   887  			i--
   888  			s[i] = charset[w&mask]
   889  			w >>= shift
   890  			nbits -= shift
   891  		}
   892  
   893  	} else {
   894  		bb, ndigits := maxPow(Word(b))
   895  
   896  		// construct table of successive squares of bb*leafSize to use in subdivisions
   897  		// result (table != nil) <=> (len(x) > leafSize > 0)
   898  		table := divisors(len(x), b, ndigits, bb)
   899  
   900  		// preserve x, create local copy for use by convertWords
   901  		q := nat(nil).set(x)
   902  
   903  		// convert q to string s in base b
   904  		q.convertWords(s, charset, b, ndigits, bb, table)
   905  
   906  		// strip leading zeros
   907  		// (x != 0; thus s must contain at least one non-zero digit
   908  		// and the loop will terminate)
   909  		i = 0
   910  		for zero := charset[0]; s[i] == zero; {
   911  			i++
   912  		}
   913  	}
   914  
   915  	return string(s[i:])
   916  }
   917  
   918  // Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
   919  // by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
   920  // repeated nat/Word division.
   921  //
   922  // The iterative method processes n Words by n divW() calls, each of which visits every Word in the
   923  // incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
   924  // Recursive conversion divides q by its approximate square root, yielding two parts, each half
   925  // the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
   926  // plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
   927  // is made better by splitting the subblocks recursively. Best is to split blocks until one more
   928  // split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
   929  // iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
   930  // range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
   931  // ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
   932  // specific hardware.
   933  //
   934  func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) {
   935  	// split larger blocks recursively
   936  	if table != nil {
   937  		// len(q) > leafSize > 0
   938  		var r nat
   939  		index := len(table) - 1
   940  		for len(q) > leafSize {
   941  			// find divisor close to sqrt(q) if possible, but in any case < q
   942  			maxLength := q.bitLen()     // ~= log2 q, or at of least largest possible q of this bit length
   943  			minLength := maxLength >> 1 // ~= log2 sqrt(q)
   944  			for index > 0 && table[index-1].nbits > minLength {
   945  				index-- // desired
   946  			}
   947  			if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 {
   948  				index--
   949  				if index < 0 {
   950  					panic("internal inconsistency")
   951  				}
   952  			}
   953  
   954  			// split q into the two digit number (q'*bbb + r) to form independent subblocks
   955  			q, r = q.div(r, q, table[index].bbb)
   956  
   957  			// convert subblocks and collect results in s[:h] and s[h:]
   958  			h := len(s) - table[index].ndigits
   959  			r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index])
   960  			s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1])
   961  		}
   962  	}
   963  
   964  	// having split any large blocks now process the remaining (small) block iteratively
   965  	i := len(s)
   966  	var r Word
   967  	if b == 10 {
   968  		// hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants)
   969  		for len(q) > 0 {
   970  			// extract least significant, base bb "digit"
   971  			q, r = q.divW(q, bb)
   972  			for j := 0; j < ndigits && i > 0; j++ {
   973  				i--
   974  				// avoid % computation since r%10 == r - int(r/10)*10;
   975  				// this appears to be faster for BenchmarkString10000Base10
   976  				// and smaller strings (but a bit slower for larger ones)
   977  				t := r / 10
   978  				s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code
   979  				r = t
   980  			}
   981  		}
   982  	} else {
   983  		for len(q) > 0 {
   984  			// extract least significant, base bb "digit"
   985  			q, r = q.divW(q, bb)
   986  			for j := 0; j < ndigits && i > 0; j++ {
   987  				i--
   988  				s[i] = charset[r%b]
   989  				r /= b
   990  			}
   991  		}
   992  	}
   993  
   994  	// prepend high-order zeroes
   995  	zero := charset[0]
   996  	for i > 0 { // while need more leading zeroes
   997  		i--
   998  		s[i] = zero
   999  	}
  1000  }
  1001  
  1002  // Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion)
  1003  // Benchmark and configure leafSize using: go test -bench="Leaf"
  1004  //   8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines)
  1005  //   8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU
  1006  var leafSize int = 8 // number of Word-size binary values treat as a monolithic block
  1007  
  1008  type divisor struct {
  1009  	bbb     nat // divisor
  1010  	nbits   int // bit length of divisor (discounting leading zeroes) ~= log2(bbb)
  1011  	ndigits int // digit length of divisor in terms of output base digits
  1012  }
  1013  
  1014  var cacheBase10 struct {
  1015  	sync.Mutex
  1016  	table [64]divisor // cached divisors for base 10
  1017  }
  1018  
  1019  // expWW computes x**y
  1020  func (z nat) expWW(x, y Word) nat {
  1021  	return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil)
  1022  }
  1023  
  1024  // construct table of powers of bb*leafSize to use in subdivisions
  1025  func divisors(m int, b Word, ndigits int, bb Word) []divisor {
  1026  	// only compute table when recursive conversion is enabled and x is large
  1027  	if leafSize == 0 || m <= leafSize {
  1028  		return nil
  1029  	}
  1030  
  1031  	// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
  1032  	k := 1
  1033  	for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 {
  1034  		k++
  1035  	}
  1036  
  1037  	// reuse and extend existing table of divisors or create new table as appropriate
  1038  	var table []divisor // for b == 10, table overlaps with cacheBase10.table
  1039  	if b == 10 {
  1040  		cacheBase10.Lock()
  1041  		table = cacheBase10.table[0:k] // reuse old table for this conversion
  1042  	} else {
  1043  		table = make([]divisor, k) // create new table for this conversion
  1044  	}
  1045  
  1046  	// extend table
  1047  	if table[k-1].ndigits == 0 {
  1048  		// add new entries as needed
  1049  		var larger nat
  1050  		for i := 0; i < k; i++ {
  1051  			if table[i].ndigits == 0 {
  1052  				if i == 0 {
  1053  					table[0].bbb = nat(nil).expWW(bb, Word(leafSize))
  1054  					table[0].ndigits = ndigits * leafSize
  1055  				} else {
  1056  					table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
  1057  					table[i].ndigits = 2 * table[i-1].ndigits
  1058  				}
  1059  
  1060  				// optimization: exploit aggregated extra bits in macro blocks
  1061  				larger = nat(nil).set(table[i].bbb)
  1062  				for mulAddVWW(larger, larger, b, 0) == 0 {
  1063  					table[i].bbb = table[i].bbb.set(larger)
  1064  					table[i].ndigits++
  1065  				}
  1066  
  1067  				table[i].nbits = table[i].bbb.bitLen()
  1068  			}
  1069  		}
  1070  	}
  1071  
  1072  	if b == 10 {
  1073  		cacheBase10.Unlock()
  1074  	}
  1075  
  1076  	return table
  1077  }
  1078  
  1079  const deBruijn32 = 0x077CB531
  1080  
  1081  var deBruijn32Lookup = []byte{
  1082  	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
  1083  	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
  1084  }
  1085  
  1086  const deBruijn64 = 0x03f79d71b4ca8b09
  1087  
  1088  var deBruijn64Lookup = []byte{
  1089  	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
  1090  	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
  1091  	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
  1092  	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
  1093  }
  1094  
  1095  // trailingZeroBits returns the number of consecutive least significant zero
  1096  // bits of x.
  1097  func trailingZeroBits(x Word) uint {
  1098  	// x & -x leaves only the right-most bit set in the word. Let k be the
  1099  	// index of that bit. Since only a single bit is set, the value is two
  1100  	// to the power of k. Multiplying by a power of two is equivalent to
  1101  	// left shifting, in this case by k bits.  The de Bruijn constant is
  1102  	// such that all six bit, consecutive substrings are distinct.
  1103  	// Therefore, if we have a left shifted version of this constant we can
  1104  	// find by how many bits it was shifted by looking at which six bit
  1105  	// substring ended up at the top of the word.
  1106  	// (Knuth, volume 4, section 7.3.1)
  1107  	switch _W {
  1108  	case 32:
  1109  		return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
  1110  	case 64:
  1111  		return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
  1112  	default:
  1113  		panic("unknown word size")
  1114  	}
  1115  }
  1116  
  1117  // trailingZeroBits returns the number of consecutive least significant zero
  1118  // bits of x.
  1119  func (x nat) trailingZeroBits() uint {
  1120  	if len(x) == 0 {
  1121  		return 0
  1122  	}
  1123  	var i uint
  1124  	for x[i] == 0 {
  1125  		i++
  1126  	}
  1127  	// x[i] != 0
  1128  	return i*_W + trailingZeroBits(x[i])
  1129  }
  1130  
  1131  // z = x << s
  1132  func (z nat) shl(x nat, s uint) nat {
  1133  	m := len(x)
  1134  	if m == 0 {
  1135  		return z[:0]
  1136  	}
  1137  	// m > 0
  1138  
  1139  	n := m + int(s/_W)
  1140  	z = z.make(n + 1)
  1141  	z[n] = shlVU(z[n-m:n], x, s%_W)
  1142  	z[0 : n-m].clear()
  1143  
  1144  	return z.norm()
  1145  }
  1146  
  1147  // z = x >> s
  1148  func (z nat) shr(x nat, s uint) nat {
  1149  	m := len(x)
  1150  	n := m - int(s/_W)
  1151  	if n <= 0 {
  1152  		return z[:0]
  1153  	}
  1154  	// n > 0
  1155  
  1156  	z = z.make(n)
  1157  	shrVU(z, x[m-n:], s%_W)
  1158  
  1159  	return z.norm()
  1160  }
  1161  
  1162  func (z nat) setBit(x nat, i uint, b uint) nat {
  1163  	j := int(i / _W)
  1164  	m := Word(1) << (i % _W)
  1165  	n := len(x)
  1166  	switch b {
  1167  	case 0:
  1168  		z = z.make(n)
  1169  		copy(z, x)
  1170  		if j >= n {
  1171  			// no need to grow
  1172  			return z
  1173  		}
  1174  		z[j] &^= m
  1175  		return z.norm()
  1176  	case 1:
  1177  		if j >= n {
  1178  			z = z.make(j + 1)
  1179  			z[n:].clear()
  1180  		} else {
  1181  			z = z.make(n)
  1182  		}
  1183  		copy(z, x)
  1184  		z[j] |= m
  1185  		// no need to normalize
  1186  		return z
  1187  	}
  1188  	panic("set bit is not 0 or 1")
  1189  }
  1190  
  1191  // bit returns the value of the i'th bit, with lsb == bit 0.
  1192  func (x nat) bit(i uint) uint {
  1193  	j := i / _W
  1194  	if j >= uint(len(x)) {
  1195  		return 0
  1196  	}
  1197  	// 0 <= j < len(x)
  1198  	return uint(x[j] >> (i % _W) & 1)
  1199  }
  1200  
  1201  func (z nat) and(x, y nat) nat {
  1202  	m := len(x)
  1203  	n := len(y)
  1204  	if m > n {
  1205  		m = n
  1206  	}
  1207  	// m <= n
  1208  
  1209  	z = z.make(m)
  1210  	for i := 0; i < m; i++ {
  1211  		z[i] = x[i] & y[i]
  1212  	}
  1213  
  1214  	return z.norm()
  1215  }
  1216  
  1217  func (z nat) andNot(x, y nat) nat {
  1218  	m := len(x)
  1219  	n := len(y)
  1220  	if n > m {
  1221  		n = m
  1222  	}
  1223  	// m >= n
  1224  
  1225  	z = z.make(m)
  1226  	for i := 0; i < n; i++ {
  1227  		z[i] = x[i] &^ y[i]
  1228  	}
  1229  	copy(z[n:m], x[n:m])
  1230  
  1231  	return z.norm()
  1232  }
  1233  
  1234  func (z nat) or(x, y nat) nat {
  1235  	m := len(x)
  1236  	n := len(y)
  1237  	s := x
  1238  	if m < n {
  1239  		n, m = m, n
  1240  		s = y
  1241  	}
  1242  	// m >= n
  1243  
  1244  	z = z.make(m)
  1245  	for i := 0; i < n; i++ {
  1246  		z[i] = x[i] | y[i]
  1247  	}
  1248  	copy(z[n:m], s[n:m])
  1249  
  1250  	return z.norm()
  1251  }
  1252  
  1253  func (z nat) xor(x, y nat) nat {
  1254  	m := len(x)
  1255  	n := len(y)
  1256  	s := x
  1257  	if m < n {
  1258  		n, m = m, n
  1259  		s = y
  1260  	}
  1261  	// m >= n
  1262  
  1263  	z = z.make(m)
  1264  	for i := 0; i < n; i++ {
  1265  		z[i] = x[i] ^ y[i]
  1266  	}
  1267  	copy(z[n:m], s[n:m])
  1268  
  1269  	return z.norm()
  1270  }
  1271  
  1272  // greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2)
  1273  func greaterThan(x1, x2, y1, y2 Word) bool {
  1274  	return x1 > y1 || x1 == y1 && x2 > y2
  1275  }
  1276  
  1277  // modW returns x % d.
  1278  func (x nat) modW(d Word) (r Word) {
  1279  	// TODO(agl): we don't actually need to store the q value.
  1280  	var q nat
  1281  	q = q.make(len(x))
  1282  	return divWVW(q, 0, x, d)
  1283  }
  1284  
  1285  // random creates a random integer in [0..limit), using the space in z if
  1286  // possible. n is the bit length of limit.
  1287  func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
  1288  	if alias(z, limit) {
  1289  		z = nil // z is an alias for limit - cannot reuse
  1290  	}
  1291  	z = z.make(len(limit))
  1292  
  1293  	bitLengthOfMSW := uint(n % _W)
  1294  	if bitLengthOfMSW == 0 {
  1295  		bitLengthOfMSW = _W
  1296  	}
  1297  	mask := Word((1 << bitLengthOfMSW) - 1)
  1298  
  1299  	for {
  1300  		switch _W {
  1301  		case 32:
  1302  			for i := range z {
  1303  				z[i] = Word(rand.Uint32())
  1304  			}
  1305  		case 64:
  1306  			for i := range z {
  1307  				z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
  1308  			}
  1309  		default:
  1310  			panic("unknown word size")
  1311  		}
  1312  		z[len(limit)-1] &= mask
  1313  		if z.cmp(limit) < 0 {
  1314  			break
  1315  		}
  1316  	}
  1317  
  1318  	return z.norm()
  1319  }
  1320  
  1321  // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
  1322  // otherwise it sets z to x**y. The result is the value of z.
  1323  func (z nat) expNN(x, y, m nat) nat {
  1324  	if alias(z, x) || alias(z, y) {
  1325  		// We cannot allow in-place modification of x or y.
  1326  		z = nil
  1327  	}
  1328  
  1329  	// x**y mod 1 == 0
  1330  	if len(m) == 1 && m[0] == 1 {
  1331  		return z.setWord(0)
  1332  	}
  1333  	// m == 0 || m > 1
  1334  
  1335  	// x**0 == 1
  1336  	if len(y) == 0 {
  1337  		return z.setWord(1)
  1338  	}
  1339  	// y > 0
  1340  
  1341  	if len(m) != 0 {
  1342  		// We likely end up being as long as the modulus.
  1343  		z = z.make(len(m))
  1344  	}
  1345  	z = z.set(x)
  1346  
  1347  	// If the base is non-trivial and the exponent is large, we use
  1348  	// 4-bit, windowed exponentiation. This involves precomputing 14 values
  1349  	// (x^2...x^15) but then reduces the number of multiply-reduces by a
  1350  	// third. Even for a 32-bit exponent, this reduces the number of
  1351  	// operations.
  1352  	if len(x) > 1 && len(y) > 1 && len(m) > 0 {
  1353  		return z.expNNWindowed(x, y, m)
  1354  	}
  1355  
  1356  	v := y[len(y)-1] // v > 0 because y is normalized and y > 0
  1357  	shift := leadingZeros(v) + 1
  1358  	v <<= shift
  1359  	var q nat
  1360  
  1361  	const mask = 1 << (_W - 1)
  1362  
  1363  	// We walk through the bits of the exponent one by one. Each time we
  1364  	// see a bit, we square, thus doubling the power. If the bit is a one,
  1365  	// we also multiply by x, thus adding one to the power.
  1366  
  1367  	w := _W - int(shift)
  1368  	// zz and r are used to avoid allocating in mul and div as
  1369  	// otherwise the arguments would alias.
  1370  	var zz, r nat
  1371  	for j := 0; j < w; j++ {
  1372  		zz = zz.mul(z, z)
  1373  		zz, z = z, zz
  1374  
  1375  		if v&mask != 0 {
  1376  			zz = zz.mul(z, x)
  1377  			zz, z = z, zz
  1378  		}
  1379  
  1380  		if len(m) != 0 {
  1381  			zz, r = zz.div(r, z, m)
  1382  			zz, r, q, z = q, z, zz, r
  1383  		}
  1384  
  1385  		v <<= 1
  1386  	}
  1387  
  1388  	for i := len(y) - 2; i >= 0; i-- {
  1389  		v = y[i]
  1390  
  1391  		for j := 0; j < _W; j++ {
  1392  			zz = zz.mul(z, z)
  1393  			zz, z = z, zz
  1394  
  1395  			if v&mask != 0 {
  1396  				zz = zz.mul(z, x)
  1397  				zz, z = z, zz
  1398  			}
  1399  
  1400  			if len(m) != 0 {
  1401  				zz, r = zz.div(r, z, m)
  1402  				zz, r, q, z = q, z, zz, r
  1403  			}
  1404  
  1405  			v <<= 1
  1406  		}
  1407  	}
  1408  
  1409  	return z.norm()
  1410  }
  1411  
  1412  // expNNWindowed calculates x**y mod m using a fixed, 4-bit window.
  1413  func (z nat) expNNWindowed(x, y, m nat) nat {
  1414  	// zz and r are used to avoid allocating in mul and div as otherwise
  1415  	// the arguments would alias.
  1416  	var zz, r nat
  1417  
  1418  	const n = 4
  1419  	// powers[i] contains x^i.
  1420  	var powers [1 << n]nat
  1421  	powers[0] = natOne
  1422  	powers[1] = x
  1423  	for i := 2; i < 1<<n; i += 2 {
  1424  		p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
  1425  		*p = p.mul(*p2, *p2)
  1426  		zz, r = zz.div(r, *p, m)
  1427  		*p, r = r, *p
  1428  		*p1 = p1.mul(*p, x)
  1429  		zz, r = zz.div(r, *p1, m)
  1430  		*p1, r = r, *p1
  1431  	}
  1432  
  1433  	z = z.setWord(1)
  1434  
  1435  	for i := len(y) - 1; i >= 0; i-- {
  1436  		yi := y[i]
  1437  		for j := 0; j < _W; j += n {
  1438  			if i != len(y)-1 || j != 0 {
  1439  				// Unrolled loop for significant performance
  1440  				// gain.  Use go test -bench=".*" in crypto/rsa
  1441  				// to check performance before making changes.
  1442  				zz = zz.mul(z, z)
  1443  				zz, z = z, zz
  1444  				zz, r = zz.div(r, z, m)
  1445  				z, r = r, z
  1446  
  1447  				zz = zz.mul(z, z)
  1448  				zz, z = z, zz
  1449  				zz, r = zz.div(r, z, m)
  1450  				z, r = r, z
  1451  
  1452  				zz = zz.mul(z, z)
  1453  				zz, z = z, zz
  1454  				zz, r = zz.div(r, z, m)
  1455  				z, r = r, z
  1456  
  1457  				zz = zz.mul(z, z)
  1458  				zz, z = z, zz
  1459  				zz, r = zz.div(r, z, m)
  1460  				z, r = r, z
  1461  			}
  1462  
  1463  			zz = zz.mul(z, powers[yi>>(_W-n)])
  1464  			zz, z = z, zz
  1465  			zz, r = zz.div(r, z, m)
  1466  			z, r = r, z
  1467  
  1468  			yi <<= n
  1469  		}
  1470  	}
  1471  
  1472  	return z.norm()
  1473  }
  1474  
  1475  // probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
  1476  // If it returns true, n is prime with probability 1 - 1/4^reps.
  1477  // If it returns false, n is not prime.
  1478  func (n nat) probablyPrime(reps int) bool {
  1479  	if len(n) == 0 {
  1480  		return false
  1481  	}
  1482  
  1483  	if len(n) == 1 {
  1484  		if n[0] < 2 {
  1485  			return false
  1486  		}
  1487  
  1488  		if n[0]%2 == 0 {
  1489  			return n[0] == 2
  1490  		}
  1491  
  1492  		// We have to exclude these cases because we reject all
  1493  		// multiples of these numbers below.
  1494  		switch n[0] {
  1495  		case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
  1496  			return true
  1497  		}
  1498  	}
  1499  
  1500  	if n[0]&1 == 0 {
  1501  		return false // n is even
  1502  	}
  1503  
  1504  	const primesProduct32 = 0xC0CFD797         // Π {p ∈ primes, 2 < p <= 29}
  1505  	const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
  1506  
  1507  	var r Word
  1508  	switch _W {
  1509  	case 32:
  1510  		r = n.modW(primesProduct32)
  1511  	case 64:
  1512  		r = n.modW(primesProduct64 & _M)
  1513  	default:
  1514  		panic("Unknown word size")
  1515  	}
  1516  
  1517  	if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
  1518  		r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
  1519  		return false
  1520  	}
  1521  
  1522  	if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
  1523  		r%43 == 0 || r%47 == 0 || r%53 == 0) {
  1524  		return false
  1525  	}
  1526  
  1527  	nm1 := nat(nil).sub(n, natOne)
  1528  	// determine q, k such that nm1 = q << k
  1529  	k := nm1.trailingZeroBits()
  1530  	q := nat(nil).shr(nm1, k)
  1531  
  1532  	nm3 := nat(nil).sub(nm1, natTwo)
  1533  	rand := rand.New(rand.NewSource(int64(n[0])))
  1534  
  1535  	var x, y, quotient nat
  1536  	nm3Len := nm3.bitLen()
  1537  
  1538  NextRandom:
  1539  	for i := 0; i < reps; i++ {
  1540  		x = x.random(rand, nm3, nm3Len)
  1541  		x = x.add(x, natTwo)
  1542  		y = y.expNN(x, q, n)
  1543  		if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
  1544  			continue
  1545  		}
  1546  		for j := uint(1); j < k; j++ {
  1547  			y = y.mul(y, y)
  1548  			quotient, y = quotient.div(y, y, n)
  1549  			if y.cmp(nm1) == 0 {
  1550  				continue NextRandom
  1551  			}
  1552  			if y.cmp(natOne) == 0 {
  1553  				return false
  1554  			}
  1555  		}
  1556  		return false
  1557  	}
  1558  
  1559  	return true
  1560  }
  1561  
  1562  // bytes writes the value of z into buf using big-endian encoding.
  1563  // len(buf) must be >= len(z)*_S. The value of z is encoded in the
  1564  // slice buf[i:]. The number i of unused bytes at the beginning of
  1565  // buf is returned as result.
  1566  func (z nat) bytes(buf []byte) (i int) {
  1567  	i = len(buf)
  1568  	for _, d := range z {
  1569  		for j := 0; j < _S; j++ {
  1570  			i--
  1571  			buf[i] = byte(d)
  1572  			d >>= 8
  1573  		}
  1574  	}
  1575  
  1576  	for i < len(buf) && buf[i] == 0 {
  1577  		i++
  1578  	}
  1579  
  1580  	return
  1581  }
  1582  
  1583  // setBytes interprets buf as the bytes of a big-endian unsigned
  1584  // integer, sets z to that value, and returns z.
  1585  func (z nat) setBytes(buf []byte) nat {
  1586  	z = z.make((len(buf) + _S - 1) / _S)
  1587  
  1588  	k := 0
  1589  	s := uint(0)
  1590  	var d Word
  1591  	for i := len(buf); i > 0; i-- {
  1592  		d |= Word(buf[i-1]) << s
  1593  		if s += 8; s == _S*8 {
  1594  			z[k] = d
  1595  			k++
  1596  			s = 0
  1597  			d = 0
  1598  		}
  1599  	}
  1600  	if k < len(z) {
  1601  		z[k] = d
  1602  	}
  1603  
  1604  	return z.norm()
  1605  }