github.com/peggyl/go@v0.0.0-20151008231540-ae315999c2d5/src/math/big/nat.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements unsigned multi-precision integers (natural
     6  // numbers). They are the building blocks for the implementation
     7  // of signed integers, rationals, and floating-point numbers.
     8  
     9  package big
    10  
    11  import "math/rand"
    12  
    13  // An unsigned integer x of the form
    14  //
    15  //   x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
    16  //
    17  // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
    18  // with the digits x[i] as the slice elements.
    19  //
    20  // A number is normalized if the slice contains no leading 0 digits.
    21  // During arithmetic operations, denormalized values may occur but are
    22  // always normalized before returning the final result. The normalized
    23  // representation of 0 is the empty or nil slice (length = 0).
    24  //
    25  type nat []Word
    26  
    27  var (
    28  	natOne = nat{1}
    29  	natTwo = nat{2}
    30  	natTen = nat{10}
    31  )
    32  
    33  func (z nat) clear() {
    34  	for i := range z {
    35  		z[i] = 0
    36  	}
    37  }
    38  
    39  func (z nat) norm() nat {
    40  	i := len(z)
    41  	for i > 0 && z[i-1] == 0 {
    42  		i--
    43  	}
    44  	return z[0:i]
    45  }
    46  
    47  func (z nat) make(n int) nat {
    48  	if n <= cap(z) {
    49  		return z[:n] // reuse z
    50  	}
    51  	// Choosing a good value for e has significant performance impact
    52  	// because it increases the chance that a value can be reused.
    53  	const e = 4 // extra capacity
    54  	return make(nat, n, n+e)
    55  }
    56  
    57  func (z nat) setWord(x Word) nat {
    58  	if x == 0 {
    59  		return z[:0]
    60  	}
    61  	z = z.make(1)
    62  	z[0] = x
    63  	return z
    64  }
    65  
    66  func (z nat) setUint64(x uint64) nat {
    67  	// single-digit values
    68  	if w := Word(x); uint64(w) == x {
    69  		return z.setWord(w)
    70  	}
    71  
    72  	// compute number of words n required to represent x
    73  	n := 0
    74  	for t := x; t > 0; t >>= _W {
    75  		n++
    76  	}
    77  
    78  	// split x into n words
    79  	z = z.make(n)
    80  	for i := range z {
    81  		z[i] = Word(x & _M)
    82  		x >>= _W
    83  	}
    84  
    85  	return z
    86  }
    87  
    88  func (z nat) set(x nat) nat {
    89  	z = z.make(len(x))
    90  	copy(z, x)
    91  	return z
    92  }
    93  
    94  func (z nat) add(x, y nat) nat {
    95  	m := len(x)
    96  	n := len(y)
    97  
    98  	switch {
    99  	case m < n:
   100  		return z.add(y, x)
   101  	case m == 0:
   102  		// n == 0 because m >= n; result is 0
   103  		return z[:0]
   104  	case n == 0:
   105  		// result is x
   106  		return z.set(x)
   107  	}
   108  	// m > 0
   109  
   110  	z = z.make(m + 1)
   111  	c := addVV(z[0:n], x, y)
   112  	if m > n {
   113  		c = addVW(z[n:m], x[n:], c)
   114  	}
   115  	z[m] = c
   116  
   117  	return z.norm()
   118  }
   119  
   120  func (z nat) sub(x, y nat) nat {
   121  	m := len(x)
   122  	n := len(y)
   123  
   124  	switch {
   125  	case m < n:
   126  		panic("underflow")
   127  	case m == 0:
   128  		// n == 0 because m >= n; result is 0
   129  		return z[:0]
   130  	case n == 0:
   131  		// result is x
   132  		return z.set(x)
   133  	}
   134  	// m > 0
   135  
   136  	z = z.make(m)
   137  	c := subVV(z[0:n], x, y)
   138  	if m > n {
   139  		c = subVW(z[n:], x[n:], c)
   140  	}
   141  	if c != 0 {
   142  		panic("underflow")
   143  	}
   144  
   145  	return z.norm()
   146  }
   147  
   148  func (x nat) cmp(y nat) (r int) {
   149  	m := len(x)
   150  	n := len(y)
   151  	if m != n || m == 0 {
   152  		switch {
   153  		case m < n:
   154  			r = -1
   155  		case m > n:
   156  			r = 1
   157  		}
   158  		return
   159  	}
   160  
   161  	i := m - 1
   162  	for i > 0 && x[i] == y[i] {
   163  		i--
   164  	}
   165  
   166  	switch {
   167  	case x[i] < y[i]:
   168  		r = -1
   169  	case x[i] > y[i]:
   170  		r = 1
   171  	}
   172  	return
   173  }
   174  
   175  func (z nat) mulAddWW(x nat, y, r Word) nat {
   176  	m := len(x)
   177  	if m == 0 || y == 0 {
   178  		return z.setWord(r) // result is r
   179  	}
   180  	// m > 0
   181  
   182  	z = z.make(m + 1)
   183  	z[m] = mulAddVWW(z[0:m], x, y, r)
   184  
   185  	return z.norm()
   186  }
   187  
   188  // basicMul multiplies x and y and leaves the result in z.
   189  // The (non-normalized) result is placed in z[0 : len(x) + len(y)].
   190  func basicMul(z, x, y nat) {
   191  	z[0 : len(x)+len(y)].clear() // initialize z
   192  	for i, d := range y {
   193  		if d != 0 {
   194  			z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
   195  		}
   196  	}
   197  }
   198  
   199  // montgomery computes x*y*2^(-n*_W) mod m,
   200  // assuming k = -1/m mod 2^_W.
   201  // z is used for storing the result which is returned;
   202  // z must not alias x, y or m.
   203  func (z nat) montgomery(x, y, m nat, k Word, n int) nat {
   204  	var c1, c2 Word
   205  	z = z.make(n)
   206  	z.clear()
   207  	for i := 0; i < n; i++ {
   208  		d := y[i]
   209  		c1 += addMulVVW(z, x, d)
   210  		t := z[0] * k
   211  		c2 = addMulVVW(z, m, t)
   212  
   213  		copy(z, z[1:])
   214  		z[n-1] = c1 + c2
   215  		if z[n-1] < c1 {
   216  			c1 = 1
   217  		} else {
   218  			c1 = 0
   219  		}
   220  	}
   221  	if c1 != 0 {
   222  		subVV(z, z, m)
   223  	}
   224  	return z
   225  }
   226  
   227  // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
   228  // Factored out for readability - do not use outside karatsuba.
   229  func karatsubaAdd(z, x nat, n int) {
   230  	if c := addVV(z[0:n], z, x); c != 0 {
   231  		addVW(z[n:n+n>>1], z[n:], c)
   232  	}
   233  }
   234  
   235  // Like karatsubaAdd, but does subtract.
   236  func karatsubaSub(z, x nat, n int) {
   237  	if c := subVV(z[0:n], z, x); c != 0 {
   238  		subVW(z[n:n+n>>1], z[n:], c)
   239  	}
   240  }
   241  
   242  // Operands that are shorter than karatsubaThreshold are multiplied using
   243  // "grade school" multiplication; for longer operands the Karatsuba algorithm
   244  // is used.
   245  var karatsubaThreshold int = 40 // computed by calibrate.go
   246  
   247  // karatsuba multiplies x and y and leaves the result in z.
   248  // Both x and y must have the same length n and n must be a
   249  // power of 2. The result vector z must have len(z) >= 6*n.
   250  // The (non-normalized) result is placed in z[0 : 2*n].
   251  func karatsuba(z, x, y nat) {
   252  	n := len(y)
   253  
   254  	// Switch to basic multiplication if numbers are odd or small.
   255  	// (n is always even if karatsubaThreshold is even, but be
   256  	// conservative)
   257  	if n&1 != 0 || n < karatsubaThreshold || n < 2 {
   258  		basicMul(z, x, y)
   259  		return
   260  	}
   261  	// n&1 == 0 && n >= karatsubaThreshold && n >= 2
   262  
   263  	// Karatsuba multiplication is based on the observation that
   264  	// for two numbers x and y with:
   265  	//
   266  	//   x = x1*b + x0
   267  	//   y = y1*b + y0
   268  	//
   269  	// the product x*y can be obtained with 3 products z2, z1, z0
   270  	// instead of 4:
   271  	//
   272  	//   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
   273  	//       =    z2*b*b +              z1*b +    z0
   274  	//
   275  	// with:
   276  	//
   277  	//   xd = x1 - x0
   278  	//   yd = y0 - y1
   279  	//
   280  	//   z1 =      xd*yd                    + z2 + z0
   281  	//      = (x1-x0)*(y0 - y1)             + z2 + z0
   282  	//      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0
   283  	//      = x1*y0 -    z2 -    z0 + x0*y1 + z2 + z0
   284  	//      = x1*y0                 + x0*y1
   285  
   286  	// split x, y into "digits"
   287  	n2 := n >> 1              // n2 >= 1
   288  	x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
   289  	y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
   290  
   291  	// z is used for the result and temporary storage:
   292  	//
   293  	//   6*n     5*n     4*n     3*n     2*n     1*n     0*n
   294  	// z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
   295  	//
   296  	// For each recursive call of karatsuba, an unused slice of
   297  	// z is passed in that has (at least) half the length of the
   298  	// caller's z.
   299  
   300  	// compute z0 and z2 with the result "in place" in z
   301  	karatsuba(z, x0, y0)     // z0 = x0*y0
   302  	karatsuba(z[n:], x1, y1) // z2 = x1*y1
   303  
   304  	// compute xd (or the negative value if underflow occurs)
   305  	s := 1 // sign of product xd*yd
   306  	xd := z[2*n : 2*n+n2]
   307  	if subVV(xd, x1, x0) != 0 { // x1-x0
   308  		s = -s
   309  		subVV(xd, x0, x1) // x0-x1
   310  	}
   311  
   312  	// compute yd (or the negative value if underflow occurs)
   313  	yd := z[2*n+n2 : 3*n]
   314  	if subVV(yd, y0, y1) != 0 { // y0-y1
   315  		s = -s
   316  		subVV(yd, y1, y0) // y1-y0
   317  	}
   318  
   319  	// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
   320  	// p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
   321  	p := z[n*3:]
   322  	karatsuba(p, xd, yd)
   323  
   324  	// save original z2:z0
   325  	// (ok to use upper half of z since we're done recursing)
   326  	r := z[n*4:]
   327  	copy(r, z[:n*2])
   328  
   329  	// add up all partial products
   330  	//
   331  	//   2*n     n     0
   332  	// z = [ z2  | z0  ]
   333  	//   +    [ z0  ]
   334  	//   +    [ z2  ]
   335  	//   +    [  p  ]
   336  	//
   337  	karatsubaAdd(z[n2:], r, n)
   338  	karatsubaAdd(z[n2:], r[n:], n)
   339  	if s > 0 {
   340  		karatsubaAdd(z[n2:], p, n)
   341  	} else {
   342  		karatsubaSub(z[n2:], p, n)
   343  	}
   344  }
   345  
   346  // alias reports whether x and y share the same base array.
   347  func alias(x, y nat) bool {
   348  	return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
   349  }
   350  
   351  // addAt implements z += x<<(_W*i); z must be long enough.
   352  // (we don't use nat.add because we need z to stay the same
   353  // slice, and we don't need to normalize z after each addition)
   354  func addAt(z, x nat, i int) {
   355  	if n := len(x); n > 0 {
   356  		if c := addVV(z[i:i+n], z[i:], x); c != 0 {
   357  			j := i + n
   358  			if j < len(z) {
   359  				addVW(z[j:], z[j:], c)
   360  			}
   361  		}
   362  	}
   363  }
   364  
   365  func max(x, y int) int {
   366  	if x > y {
   367  		return x
   368  	}
   369  	return y
   370  }
   371  
   372  // karatsubaLen computes an approximation to the maximum k <= n such that
   373  // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
   374  // result is the largest number that can be divided repeatedly by 2 before
   375  // becoming about the value of karatsubaThreshold.
   376  func karatsubaLen(n int) int {
   377  	i := uint(0)
   378  	for n > karatsubaThreshold {
   379  		n >>= 1
   380  		i++
   381  	}
   382  	return n << i
   383  }
   384  
   385  func (z nat) mul(x, y nat) nat {
   386  	m := len(x)
   387  	n := len(y)
   388  
   389  	switch {
   390  	case m < n:
   391  		return z.mul(y, x)
   392  	case m == 0 || n == 0:
   393  		return z[:0]
   394  	case n == 1:
   395  		return z.mulAddWW(x, y[0], 0)
   396  	}
   397  	// m >= n > 1
   398  
   399  	// determine if z can be reused
   400  	if alias(z, x) || alias(z, y) {
   401  		z = nil // z is an alias for x or y - cannot reuse
   402  	}
   403  
   404  	// use basic multiplication if the numbers are small
   405  	if n < karatsubaThreshold {
   406  		z = z.make(m + n)
   407  		basicMul(z, x, y)
   408  		return z.norm()
   409  	}
   410  	// m >= n && n >= karatsubaThreshold && n >= 2
   411  
   412  	// determine Karatsuba length k such that
   413  	//
   414  	//   x = xh*b + x0  (0 <= x0 < b)
   415  	//   y = yh*b + y0  (0 <= y0 < b)
   416  	//   b = 1<<(_W*k)  ("base" of digits xi, yi)
   417  	//
   418  	k := karatsubaLen(n)
   419  	// k <= n
   420  
   421  	// multiply x0 and y0 via Karatsuba
   422  	x0 := x[0:k]              // x0 is not normalized
   423  	y0 := y[0:k]              // y0 is not normalized
   424  	z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
   425  	karatsuba(z, x0, y0)
   426  	z = z[0 : m+n]  // z has final length but may be incomplete
   427  	z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m)
   428  
   429  	// If xh != 0 or yh != 0, add the missing terms to z. For
   430  	//
   431  	//   xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b)
   432  	//   yh =                         y1*b (0 <= y1 < b)
   433  	//
   434  	// the missing terms are
   435  	//
   436  	//   x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0
   437  	//
   438  	// since all the yi for i > 1 are 0 by choice of k: If any of them
   439  	// were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would
   440  	// be a larger valid threshold contradicting the assumption about k.
   441  	//
   442  	if k < n || m != n {
   443  		var t nat
   444  
   445  		// add x0*y1*b
   446  		x0 := x0.norm()
   447  		y1 := y[k:]       // y1 is normalized because y is
   448  		t = t.mul(x0, y1) // update t so we don't lose t's underlying array
   449  		addAt(z, t, k)
   450  
   451  		// add xi*y0<<i, xi*y1*b<<(i+k)
   452  		y0 := y0.norm()
   453  		for i := k; i < len(x); i += k {
   454  			xi := x[i:]
   455  			if len(xi) > k {
   456  				xi = xi[:k]
   457  			}
   458  			xi = xi.norm()
   459  			t = t.mul(xi, y0)
   460  			addAt(z, t, i)
   461  			t = t.mul(xi, y1)
   462  			addAt(z, t, i+k)
   463  		}
   464  	}
   465  
   466  	return z.norm()
   467  }
   468  
   469  // mulRange computes the product of all the unsigned integers in the
   470  // range [a, b] inclusively. If a > b (empty range), the result is 1.
   471  func (z nat) mulRange(a, b uint64) nat {
   472  	switch {
   473  	case a == 0:
   474  		// cut long ranges short (optimization)
   475  		return z.setUint64(0)
   476  	case a > b:
   477  		return z.setUint64(1)
   478  	case a == b:
   479  		return z.setUint64(a)
   480  	case a+1 == b:
   481  		return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
   482  	}
   483  	m := (a + b) / 2
   484  	return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
   485  }
   486  
   487  // q = (x-r)/y, with 0 <= r < y
   488  func (z nat) divW(x nat, y Word) (q nat, r Word) {
   489  	m := len(x)
   490  	switch {
   491  	case y == 0:
   492  		panic("division by zero")
   493  	case y == 1:
   494  		q = z.set(x) // result is x
   495  		return
   496  	case m == 0:
   497  		q = z[:0] // result is 0
   498  		return
   499  	}
   500  	// m > 0
   501  	z = z.make(m)
   502  	r = divWVW(z, 0, x, y)
   503  	q = z.norm()
   504  	return
   505  }
   506  
   507  func (z nat) div(z2, u, v nat) (q, r nat) {
   508  	if len(v) == 0 {
   509  		panic("division by zero")
   510  	}
   511  
   512  	if u.cmp(v) < 0 {
   513  		q = z[:0]
   514  		r = z2.set(u)
   515  		return
   516  	}
   517  
   518  	if len(v) == 1 {
   519  		var r2 Word
   520  		q, r2 = z.divW(u, v[0])
   521  		r = z2.setWord(r2)
   522  		return
   523  	}
   524  
   525  	q, r = z.divLarge(z2, u, v)
   526  	return
   527  }
   528  
   529  // q = (uIn-r)/v, with 0 <= r < y
   530  // Uses z as storage for q, and u as storage for r if possible.
   531  // See Knuth, Volume 2, section 4.3.1, Algorithm D.
   532  // Preconditions:
   533  //    len(v) >= 2
   534  //    len(uIn) >= len(v)
   535  func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
   536  	n := len(v)
   537  	m := len(uIn) - n
   538  
   539  	// determine if z can be reused
   540  	// TODO(gri) should find a better solution - this if statement
   541  	//           is very costly (see e.g. time pidigits -s -n 10000)
   542  	if alias(z, uIn) || alias(z, v) {
   543  		z = nil // z is an alias for uIn or v - cannot reuse
   544  	}
   545  	q = z.make(m + 1)
   546  
   547  	qhatv := make(nat, n+1)
   548  	if alias(u, uIn) || alias(u, v) {
   549  		u = nil // u is an alias for uIn or v - cannot reuse
   550  	}
   551  	u = u.make(len(uIn) + 1)
   552  	u.clear() // TODO(gri) no need to clear if we allocated a new u
   553  
   554  	// D1.
   555  	shift := nlz(v[n-1])
   556  	if shift > 0 {
   557  		// do not modify v, it may be used by another goroutine simultaneously
   558  		v1 := make(nat, n)
   559  		shlVU(v1, v, shift)
   560  		v = v1
   561  	}
   562  	u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
   563  
   564  	// D2.
   565  	for j := m; j >= 0; j-- {
   566  		// D3.
   567  		qhat := Word(_M)
   568  		if u[j+n] != v[n-1] {
   569  			var rhat Word
   570  			qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
   571  
   572  			// x1 | x2 = q̂v_{n-2}
   573  			x1, x2 := mulWW(qhat, v[n-2])
   574  			// test if q̂v_{n-2} > br̂ + u_{j+n-2}
   575  			for greaterThan(x1, x2, rhat, u[j+n-2]) {
   576  				qhat--
   577  				prevRhat := rhat
   578  				rhat += v[n-1]
   579  				// v[n-1] >= 0, so this tests for overflow.
   580  				if rhat < prevRhat {
   581  					break
   582  				}
   583  				x1, x2 = mulWW(qhat, v[n-2])
   584  			}
   585  		}
   586  
   587  		// D4.
   588  		qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
   589  
   590  		c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
   591  		if c != 0 {
   592  			c := addVV(u[j:j+n], u[j:], v)
   593  			u[j+n] += c
   594  			qhat--
   595  		}
   596  
   597  		q[j] = qhat
   598  	}
   599  
   600  	q = q.norm()
   601  	shrVU(u, u, shift)
   602  	r = u.norm()
   603  
   604  	return q, r
   605  }
   606  
   607  // Length of x in bits. x must be normalized.
   608  func (x nat) bitLen() int {
   609  	if i := len(x) - 1; i >= 0 {
   610  		return i*_W + bitLen(x[i])
   611  	}
   612  	return 0
   613  }
   614  
   615  const deBruijn32 = 0x077CB531
   616  
   617  var deBruijn32Lookup = []byte{
   618  	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
   619  	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
   620  }
   621  
   622  const deBruijn64 = 0x03f79d71b4ca8b09
   623  
   624  var deBruijn64Lookup = []byte{
   625  	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
   626  	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
   627  	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
   628  	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
   629  }
   630  
   631  // trailingZeroBits returns the number of consecutive least significant zero
   632  // bits of x.
   633  func trailingZeroBits(x Word) uint {
   634  	// x & -x leaves only the right-most bit set in the word. Let k be the
   635  	// index of that bit. Since only a single bit is set, the value is two
   636  	// to the power of k. Multiplying by a power of two is equivalent to
   637  	// left shifting, in this case by k bits.  The de Bruijn constant is
   638  	// such that all six bit, consecutive substrings are distinct.
   639  	// Therefore, if we have a left shifted version of this constant we can
   640  	// find by how many bits it was shifted by looking at which six bit
   641  	// substring ended up at the top of the word.
   642  	// (Knuth, volume 4, section 7.3.1)
   643  	switch _W {
   644  	case 32:
   645  		return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
   646  	case 64:
   647  		return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
   648  	default:
   649  		panic("unknown word size")
   650  	}
   651  }
   652  
   653  // trailingZeroBits returns the number of consecutive least significant zero
   654  // bits of x.
   655  func (x nat) trailingZeroBits() uint {
   656  	if len(x) == 0 {
   657  		return 0
   658  	}
   659  	var i uint
   660  	for x[i] == 0 {
   661  		i++
   662  	}
   663  	// x[i] != 0
   664  	return i*_W + trailingZeroBits(x[i])
   665  }
   666  
   667  // z = x << s
   668  func (z nat) shl(x nat, s uint) nat {
   669  	m := len(x)
   670  	if m == 0 {
   671  		return z[:0]
   672  	}
   673  	// m > 0
   674  
   675  	n := m + int(s/_W)
   676  	z = z.make(n + 1)
   677  	z[n] = shlVU(z[n-m:n], x, s%_W)
   678  	z[0 : n-m].clear()
   679  
   680  	return z.norm()
   681  }
   682  
   683  // z = x >> s
   684  func (z nat) shr(x nat, s uint) nat {
   685  	m := len(x)
   686  	n := m - int(s/_W)
   687  	if n <= 0 {
   688  		return z[:0]
   689  	}
   690  	// n > 0
   691  
   692  	z = z.make(n)
   693  	shrVU(z, x[m-n:], s%_W)
   694  
   695  	return z.norm()
   696  }
   697  
   698  func (z nat) setBit(x nat, i uint, b uint) nat {
   699  	j := int(i / _W)
   700  	m := Word(1) << (i % _W)
   701  	n := len(x)
   702  	switch b {
   703  	case 0:
   704  		z = z.make(n)
   705  		copy(z, x)
   706  		if j >= n {
   707  			// no need to grow
   708  			return z
   709  		}
   710  		z[j] &^= m
   711  		return z.norm()
   712  	case 1:
   713  		if j >= n {
   714  			z = z.make(j + 1)
   715  			z[n:].clear()
   716  		} else {
   717  			z = z.make(n)
   718  		}
   719  		copy(z, x)
   720  		z[j] |= m
   721  		// no need to normalize
   722  		return z
   723  	}
   724  	panic("set bit is not 0 or 1")
   725  }
   726  
   727  // bit returns the value of the i'th bit, with lsb == bit 0.
   728  func (x nat) bit(i uint) uint {
   729  	j := i / _W
   730  	if j >= uint(len(x)) {
   731  		return 0
   732  	}
   733  	// 0 <= j < len(x)
   734  	return uint(x[j] >> (i % _W) & 1)
   735  }
   736  
   737  // sticky returns 1 if there's a 1 bit within the
   738  // i least significant bits, otherwise it returns 0.
   739  func (x nat) sticky(i uint) uint {
   740  	j := i / _W
   741  	if j >= uint(len(x)) {
   742  		if len(x) == 0 {
   743  			return 0
   744  		}
   745  		return 1
   746  	}
   747  	// 0 <= j < len(x)
   748  	for _, x := range x[:j] {
   749  		if x != 0 {
   750  			return 1
   751  		}
   752  	}
   753  	if x[j]<<(_W-i%_W) != 0 {
   754  		return 1
   755  	}
   756  	return 0
   757  }
   758  
   759  func (z nat) and(x, y nat) nat {
   760  	m := len(x)
   761  	n := len(y)
   762  	if m > n {
   763  		m = n
   764  	}
   765  	// m <= n
   766  
   767  	z = z.make(m)
   768  	for i := 0; i < m; i++ {
   769  		z[i] = x[i] & y[i]
   770  	}
   771  
   772  	return z.norm()
   773  }
   774  
   775  func (z nat) andNot(x, y nat) nat {
   776  	m := len(x)
   777  	n := len(y)
   778  	if n > m {
   779  		n = m
   780  	}
   781  	// m >= n
   782  
   783  	z = z.make(m)
   784  	for i := 0; i < n; i++ {
   785  		z[i] = x[i] &^ y[i]
   786  	}
   787  	copy(z[n:m], x[n:m])
   788  
   789  	return z.norm()
   790  }
   791  
   792  func (z nat) or(x, y nat) nat {
   793  	m := len(x)
   794  	n := len(y)
   795  	s := x
   796  	if m < n {
   797  		n, m = m, n
   798  		s = y
   799  	}
   800  	// m >= n
   801  
   802  	z = z.make(m)
   803  	for i := 0; i < n; i++ {
   804  		z[i] = x[i] | y[i]
   805  	}
   806  	copy(z[n:m], s[n:m])
   807  
   808  	return z.norm()
   809  }
   810  
   811  func (z nat) xor(x, y nat) nat {
   812  	m := len(x)
   813  	n := len(y)
   814  	s := x
   815  	if m < n {
   816  		n, m = m, n
   817  		s = y
   818  	}
   819  	// m >= n
   820  
   821  	z = z.make(m)
   822  	for i := 0; i < n; i++ {
   823  		z[i] = x[i] ^ y[i]
   824  	}
   825  	copy(z[n:m], s[n:m])
   826  
   827  	return z.norm()
   828  }
   829  
   830  // greaterThan reports whether (x1<<_W + x2) > (y1<<_W + y2)
   831  func greaterThan(x1, x2, y1, y2 Word) bool {
   832  	return x1 > y1 || x1 == y1 && x2 > y2
   833  }
   834  
   835  // modW returns x % d.
   836  func (x nat) modW(d Word) (r Word) {
   837  	// TODO(agl): we don't actually need to store the q value.
   838  	var q nat
   839  	q = q.make(len(x))
   840  	return divWVW(q, 0, x, d)
   841  }
   842  
   843  // random creates a random integer in [0..limit), using the space in z if
   844  // possible. n is the bit length of limit.
   845  func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
   846  	if alias(z, limit) {
   847  		z = nil // z is an alias for limit - cannot reuse
   848  	}
   849  	z = z.make(len(limit))
   850  
   851  	bitLengthOfMSW := uint(n % _W)
   852  	if bitLengthOfMSW == 0 {
   853  		bitLengthOfMSW = _W
   854  	}
   855  	mask := Word((1 << bitLengthOfMSW) - 1)
   856  
   857  	for {
   858  		switch _W {
   859  		case 32:
   860  			for i := range z {
   861  				z[i] = Word(rand.Uint32())
   862  			}
   863  		case 64:
   864  			for i := range z {
   865  				z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
   866  			}
   867  		default:
   868  			panic("unknown word size")
   869  		}
   870  		z[len(limit)-1] &= mask
   871  		if z.cmp(limit) < 0 {
   872  			break
   873  		}
   874  	}
   875  
   876  	return z.norm()
   877  }
   878  
   879  // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
   880  // otherwise it sets z to x**y. The result is the value of z.
   881  func (z nat) expNN(x, y, m nat) nat {
   882  	if alias(z, x) || alias(z, y) {
   883  		// We cannot allow in-place modification of x or y.
   884  		z = nil
   885  	}
   886  
   887  	// x**y mod 1 == 0
   888  	if len(m) == 1 && m[0] == 1 {
   889  		return z.setWord(0)
   890  	}
   891  	// m == 0 || m > 1
   892  
   893  	// x**0 == 1
   894  	if len(y) == 0 {
   895  		return z.setWord(1)
   896  	}
   897  	// y > 0
   898  
   899  	// x**1 mod m == x mod m
   900  	if len(y) == 1 && y[0] == 1 && len(m) != 0 {
   901  		_, z = z.div(z, x, m)
   902  		return z
   903  	}
   904  	// y > 1
   905  
   906  	if len(m) != 0 {
   907  		// We likely end up being as long as the modulus.
   908  		z = z.make(len(m))
   909  	}
   910  	z = z.set(x)
   911  
   912  	// If the base is non-trivial and the exponent is large, we use
   913  	// 4-bit, windowed exponentiation. This involves precomputing 14 values
   914  	// (x^2...x^15) but then reduces the number of multiply-reduces by a
   915  	// third. Even for a 32-bit exponent, this reduces the number of
   916  	// operations. Uses Montgomery method for odd moduli.
   917  	if len(x) > 1 && len(y) > 1 && len(m) > 0 {
   918  		if m[0]&1 == 1 {
   919  			return z.expNNMontgomery(x, y, m)
   920  		}
   921  		return z.expNNWindowed(x, y, m)
   922  	}
   923  
   924  	v := y[len(y)-1] // v > 0 because y is normalized and y > 0
   925  	shift := nlz(v) + 1
   926  	v <<= shift
   927  	var q nat
   928  
   929  	const mask = 1 << (_W - 1)
   930  
   931  	// We walk through the bits of the exponent one by one. Each time we
   932  	// see a bit, we square, thus doubling the power. If the bit is a one,
   933  	// we also multiply by x, thus adding one to the power.
   934  
   935  	w := _W - int(shift)
   936  	// zz and r are used to avoid allocating in mul and div as
   937  	// otherwise the arguments would alias.
   938  	var zz, r nat
   939  	for j := 0; j < w; j++ {
   940  		zz = zz.mul(z, z)
   941  		zz, z = z, zz
   942  
   943  		if v&mask != 0 {
   944  			zz = zz.mul(z, x)
   945  			zz, z = z, zz
   946  		}
   947  
   948  		if len(m) != 0 {
   949  			zz, r = zz.div(r, z, m)
   950  			zz, r, q, z = q, z, zz, r
   951  		}
   952  
   953  		v <<= 1
   954  	}
   955  
   956  	for i := len(y) - 2; i >= 0; i-- {
   957  		v = y[i]
   958  
   959  		for j := 0; j < _W; j++ {
   960  			zz = zz.mul(z, z)
   961  			zz, z = z, zz
   962  
   963  			if v&mask != 0 {
   964  				zz = zz.mul(z, x)
   965  				zz, z = z, zz
   966  			}
   967  
   968  			if len(m) != 0 {
   969  				zz, r = zz.div(r, z, m)
   970  				zz, r, q, z = q, z, zz, r
   971  			}
   972  
   973  			v <<= 1
   974  		}
   975  	}
   976  
   977  	return z.norm()
   978  }
   979  
   980  // expNNWindowed calculates x**y mod m using a fixed, 4-bit window.
   981  func (z nat) expNNWindowed(x, y, m nat) nat {
   982  	// zz and r are used to avoid allocating in mul and div as otherwise
   983  	// the arguments would alias.
   984  	var zz, r nat
   985  
   986  	const n = 4
   987  	// powers[i] contains x^i.
   988  	var powers [1 << n]nat
   989  	powers[0] = natOne
   990  	powers[1] = x
   991  	for i := 2; i < 1<<n; i += 2 {
   992  		p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
   993  		*p = p.mul(*p2, *p2)
   994  		zz, r = zz.div(r, *p, m)
   995  		*p, r = r, *p
   996  		*p1 = p1.mul(*p, x)
   997  		zz, r = zz.div(r, *p1, m)
   998  		*p1, r = r, *p1
   999  	}
  1000  
  1001  	z = z.setWord(1)
  1002  
  1003  	for i := len(y) - 1; i >= 0; i-- {
  1004  		yi := y[i]
  1005  		for j := 0; j < _W; j += n {
  1006  			if i != len(y)-1 || j != 0 {
  1007  				// Unrolled loop for significant performance
  1008  				// gain.  Use go test -bench=".*" in crypto/rsa
  1009  				// to check performance before making changes.
  1010  				zz = zz.mul(z, z)
  1011  				zz, z = z, zz
  1012  				zz, r = zz.div(r, z, m)
  1013  				z, r = r, z
  1014  
  1015  				zz = zz.mul(z, z)
  1016  				zz, z = z, zz
  1017  				zz, r = zz.div(r, z, m)
  1018  				z, r = r, z
  1019  
  1020  				zz = zz.mul(z, z)
  1021  				zz, z = z, zz
  1022  				zz, r = zz.div(r, z, m)
  1023  				z, r = r, z
  1024  
  1025  				zz = zz.mul(z, z)
  1026  				zz, z = z, zz
  1027  				zz, r = zz.div(r, z, m)
  1028  				z, r = r, z
  1029  			}
  1030  
  1031  			zz = zz.mul(z, powers[yi>>(_W-n)])
  1032  			zz, z = z, zz
  1033  			zz, r = zz.div(r, z, m)
  1034  			z, r = r, z
  1035  
  1036  			yi <<= n
  1037  		}
  1038  	}
  1039  
  1040  	return z.norm()
  1041  }
  1042  
  1043  // expNNMontgomery calculates x**y mod m using a fixed, 4-bit window.
  1044  // Uses Montgomery representation.
  1045  func (z nat) expNNMontgomery(x, y, m nat) nat {
  1046  	var zz, one, rr, RR nat
  1047  
  1048  	numWords := len(m)
  1049  
  1050  	// We want the lengths of x and m to be equal.
  1051  	if len(x) > numWords {
  1052  		_, rr = rr.div(rr, x, m)
  1053  	} else if len(x) < numWords {
  1054  		rr = rr.make(numWords)
  1055  		rr.clear()
  1056  		for i := range x {
  1057  			rr[i] = x[i]
  1058  		}
  1059  	} else {
  1060  		rr = x
  1061  	}
  1062  	x = rr
  1063  
  1064  	// Ideally the precomputations would be performed outside, and reused
  1065  	// k0 = -mˆ-1 mod 2ˆ_W. Algorithm from: Dumas, J.G. "On Newton–Raphson
  1066  	// Iteration for Multiplicative Inverses Modulo Prime Powers".
  1067  	k0 := 2 - m[0]
  1068  	t := m[0] - 1
  1069  	for i := 1; i < _W; i <<= 1 {
  1070  		t *= t
  1071  		k0 *= (t + 1)
  1072  	}
  1073  	k0 = -k0
  1074  
  1075  	// RR = 2ˆ(2*_W*len(m)) mod m
  1076  	RR = RR.setWord(1)
  1077  	zz = zz.shl(RR, uint(2*numWords*_W))
  1078  	_, RR = RR.div(RR, zz, m)
  1079  	if len(RR) < numWords {
  1080  		zz = zz.make(numWords)
  1081  		copy(zz, RR)
  1082  		RR = zz
  1083  	}
  1084  	// one = 1, with equal length to that of m
  1085  	one = one.make(numWords)
  1086  	one.clear()
  1087  	one[0] = 1
  1088  
  1089  	const n = 4
  1090  	// powers[i] contains x^i
  1091  	var powers [1 << n]nat
  1092  	powers[0] = powers[0].montgomery(one, RR, m, k0, numWords)
  1093  	powers[1] = powers[1].montgomery(x, RR, m, k0, numWords)
  1094  	for i := 2; i < 1<<n; i++ {
  1095  		powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords)
  1096  	}
  1097  
  1098  	// initialize z = 1 (Montgomery 1)
  1099  	z = z.make(numWords)
  1100  	copy(z, powers[0])
  1101  
  1102  	zz = zz.make(numWords)
  1103  
  1104  	// same windowed exponent, but with Montgomery multiplications
  1105  	for i := len(y) - 1; i >= 0; i-- {
  1106  		yi := y[i]
  1107  		for j := 0; j < _W; j += n {
  1108  			if i != len(y)-1 || j != 0 {
  1109  				zz = zz.montgomery(z, z, m, k0, numWords)
  1110  				z = z.montgomery(zz, zz, m, k0, numWords)
  1111  				zz = zz.montgomery(z, z, m, k0, numWords)
  1112  				z = z.montgomery(zz, zz, m, k0, numWords)
  1113  			}
  1114  			zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords)
  1115  			z, zz = zz, z
  1116  			yi <<= n
  1117  		}
  1118  	}
  1119  	// convert to regular number
  1120  	zz = zz.montgomery(z, one, m, k0, numWords)
  1121  	return zz.norm()
  1122  }
  1123  
  1124  // probablyPrime performs n Miller-Rabin tests to check whether x is prime.
  1125  // If x is prime, it returns true.
  1126  // If x is not prime, it returns false with probability at least 1 - ¼ⁿ.
  1127  //
  1128  // It is not suitable for judging primes that an adversary may have crafted
  1129  // to fool this test.
  1130  func (n nat) probablyPrime(reps int) bool {
  1131  	if len(n) == 0 {
  1132  		return false
  1133  	}
  1134  
  1135  	if len(n) == 1 {
  1136  		if n[0] < 2 {
  1137  			return false
  1138  		}
  1139  
  1140  		if n[0]%2 == 0 {
  1141  			return n[0] == 2
  1142  		}
  1143  
  1144  		// We have to exclude these cases because we reject all
  1145  		// multiples of these numbers below.
  1146  		switch n[0] {
  1147  		case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
  1148  			return true
  1149  		}
  1150  	}
  1151  
  1152  	if n[0]&1 == 0 {
  1153  		return false // n is even
  1154  	}
  1155  
  1156  	const primesProduct32 = 0xC0CFD797         // Π {p ∈ primes, 2 < p <= 29}
  1157  	const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
  1158  
  1159  	var r Word
  1160  	switch _W {
  1161  	case 32:
  1162  		r = n.modW(primesProduct32)
  1163  	case 64:
  1164  		r = n.modW(primesProduct64 & _M)
  1165  	default:
  1166  		panic("Unknown word size")
  1167  	}
  1168  
  1169  	if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
  1170  		r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
  1171  		return false
  1172  	}
  1173  
  1174  	if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
  1175  		r%43 == 0 || r%47 == 0 || r%53 == 0) {
  1176  		return false
  1177  	}
  1178  
  1179  	nm1 := nat(nil).sub(n, natOne)
  1180  	// determine q, k such that nm1 = q << k
  1181  	k := nm1.trailingZeroBits()
  1182  	q := nat(nil).shr(nm1, k)
  1183  
  1184  	nm3 := nat(nil).sub(nm1, natTwo)
  1185  	rand := rand.New(rand.NewSource(int64(n[0])))
  1186  
  1187  	var x, y, quotient nat
  1188  	nm3Len := nm3.bitLen()
  1189  
  1190  NextRandom:
  1191  	for i := 0; i < reps; i++ {
  1192  		x = x.random(rand, nm3, nm3Len)
  1193  		x = x.add(x, natTwo)
  1194  		y = y.expNN(x, q, n)
  1195  		if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
  1196  			continue
  1197  		}
  1198  		for j := uint(1); j < k; j++ {
  1199  			y = y.mul(y, y)
  1200  			quotient, y = quotient.div(y, y, n)
  1201  			if y.cmp(nm1) == 0 {
  1202  				continue NextRandom
  1203  			}
  1204  			if y.cmp(natOne) == 0 {
  1205  				return false
  1206  			}
  1207  		}
  1208  		return false
  1209  	}
  1210  
  1211  	return true
  1212  }
  1213  
  1214  // bytes writes the value of z into buf using big-endian encoding.
  1215  // len(buf) must be >= len(z)*_S. The value of z is encoded in the
  1216  // slice buf[i:]. The number i of unused bytes at the beginning of
  1217  // buf is returned as result.
  1218  func (z nat) bytes(buf []byte) (i int) {
  1219  	i = len(buf)
  1220  	for _, d := range z {
  1221  		for j := 0; j < _S; j++ {
  1222  			i--
  1223  			buf[i] = byte(d)
  1224  			d >>= 8
  1225  		}
  1226  	}
  1227  
  1228  	for i < len(buf) && buf[i] == 0 {
  1229  		i++
  1230  	}
  1231  
  1232  	return
  1233  }
  1234  
  1235  // setBytes interprets buf as the bytes of a big-endian unsigned
  1236  // integer, sets z to that value, and returns z.
  1237  func (z nat) setBytes(buf []byte) nat {
  1238  	z = z.make((len(buf) + _S - 1) / _S)
  1239  
  1240  	k := 0
  1241  	s := uint(0)
  1242  	var d Word
  1243  	for i := len(buf); i > 0; i-- {
  1244  		d |= Word(buf[i-1]) << s
  1245  		if s += 8; s == _S*8 {
  1246  			z[k] = d
  1247  			k++
  1248  			s = 0
  1249  			d = 0
  1250  		}
  1251  	}
  1252  	if k < len(z) {
  1253  		z[k] = d
  1254  	}
  1255  
  1256  	return z.norm()
  1257  }