github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/math/big/nat.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements unsigned multi-precision integers (natural
     6  // numbers). They are the building blocks for the implementation
     7  // of signed integers, rationals, and floating-point numbers.
     8  
     9  package big
    10  
    11  import (
    12  	"math/rand"
    13  	"sync"
    14  )
    15  
    16  // An unsigned integer x of the form
    17  //
    18  //   x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
    19  //
    20  // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
    21  // with the digits x[i] as the slice elements.
    22  //
    23  // A number is normalized if the slice contains no leading 0 digits.
    24  // During arithmetic operations, denormalized values may occur but are
    25  // always normalized before returning the final result. The normalized
    26  // representation of 0 is the empty or nil slice (length = 0).
    27  //
    28  type nat []Word
    29  
    30  var (
    31  	natOne = nat{1}
    32  	natTwo = nat{2}
    33  	natTen = nat{10}
    34  )
    35  
    36  func (z nat) clear() {
    37  	for i := range z {
    38  		z[i] = 0
    39  	}
    40  }
    41  
    42  func (z nat) norm() nat {
    43  	i := len(z)
    44  	for i > 0 && z[i-1] == 0 {
    45  		i--
    46  	}
    47  	return z[0:i]
    48  }
    49  
    50  func (z nat) make(n int) nat {
    51  	if n <= cap(z) {
    52  		return z[:n] // reuse z
    53  	}
    54  	// Choosing a good value for e has significant performance impact
    55  	// because it increases the chance that a value can be reused.
    56  	const e = 4 // extra capacity
    57  	return make(nat, n, n+e)
    58  }
    59  
    60  func (z nat) setWord(x Word) nat {
    61  	if x == 0 {
    62  		return z[:0]
    63  	}
    64  	z = z.make(1)
    65  	z[0] = x
    66  	return z
    67  }
    68  
    69  func (z nat) setUint64(x uint64) nat {
    70  	// single-digit values
    71  	if w := Word(x); uint64(w) == x {
    72  		return z.setWord(w)
    73  	}
    74  
    75  	// compute number of words n required to represent x
    76  	n := 0
    77  	for t := x; t > 0; t >>= _W {
    78  		n++
    79  	}
    80  
    81  	// split x into n words
    82  	z = z.make(n)
    83  	for i := range z {
    84  		z[i] = Word(x & _M)
    85  		x >>= _W
    86  	}
    87  
    88  	return z
    89  }
    90  
    91  func (z nat) set(x nat) nat {
    92  	z = z.make(len(x))
    93  	copy(z, x)
    94  	return z
    95  }
    96  
    97  func (z nat) add(x, y nat) nat {
    98  	m := len(x)
    99  	n := len(y)
   100  
   101  	switch {
   102  	case m < n:
   103  		return z.add(y, x)
   104  	case m == 0:
   105  		// n == 0 because m >= n; result is 0
   106  		return z[:0]
   107  	case n == 0:
   108  		// result is x
   109  		return z.set(x)
   110  	}
   111  	// m > 0
   112  
   113  	z = z.make(m + 1)
   114  	c := addVV(z[0:n], x, y)
   115  	if m > n {
   116  		c = addVW(z[n:m], x[n:], c)
   117  	}
   118  	z[m] = c
   119  
   120  	return z.norm()
   121  }
   122  
   123  func (z nat) sub(x, y nat) nat {
   124  	m := len(x)
   125  	n := len(y)
   126  
   127  	switch {
   128  	case m < n:
   129  		panic("underflow")
   130  	case m == 0:
   131  		// n == 0 because m >= n; result is 0
   132  		return z[:0]
   133  	case n == 0:
   134  		// result is x
   135  		return z.set(x)
   136  	}
   137  	// m > 0
   138  
   139  	z = z.make(m)
   140  	c := subVV(z[0:n], x, y)
   141  	if m > n {
   142  		c = subVW(z[n:], x[n:], c)
   143  	}
   144  	if c != 0 {
   145  		panic("underflow")
   146  	}
   147  
   148  	return z.norm()
   149  }
   150  
   151  func (x nat) cmp(y nat) (r int) {
   152  	m := len(x)
   153  	n := len(y)
   154  	if m != n || m == 0 {
   155  		switch {
   156  		case m < n:
   157  			r = -1
   158  		case m > n:
   159  			r = 1
   160  		}
   161  		return
   162  	}
   163  
   164  	i := m - 1
   165  	for i > 0 && x[i] == y[i] {
   166  		i--
   167  	}
   168  
   169  	switch {
   170  	case x[i] < y[i]:
   171  		r = -1
   172  	case x[i] > y[i]:
   173  		r = 1
   174  	}
   175  	return
   176  }
   177  
   178  func (z nat) mulAddWW(x nat, y, r Word) nat {
   179  	m := len(x)
   180  	if m == 0 || y == 0 {
   181  		return z.setWord(r) // result is r
   182  	}
   183  	// m > 0
   184  
   185  	z = z.make(m + 1)
   186  	z[m] = mulAddVWW(z[0:m], x, y, r)
   187  
   188  	return z.norm()
   189  }
   190  
   191  // basicMul multiplies x and y and leaves the result in z.
   192  // The (non-normalized) result is placed in z[0 : len(x) + len(y)].
   193  func basicMul(z, x, y nat) {
   194  	z[0 : len(x)+len(y)].clear() // initialize z
   195  	for i, d := range y {
   196  		if d != 0 {
   197  			z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
   198  		}
   199  	}
   200  }
   201  
   202  // montgomery computes z mod m = x*y*2**(-n*_W) mod m,
   203  // assuming k = -1/m mod 2**_W.
   204  // z is used for storing the result which is returned;
   205  // z must not alias x, y or m.
   206  // See Gueron, "Efficient Software Implementations of Modular Exponentiation".
   207  // https://eprint.iacr.org/2011/239.pdf
   208  // In the terminology of that paper, this is an "Almost Montgomery Multiplication":
   209  // x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result
   210  // z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m.
   211  func (z nat) montgomery(x, y, m nat, k Word, n int) nat {
   212  	// This code assumes x, y, m are all the same length, n.
   213  	// (required by addMulVVW and the for loop).
   214  	// It also assumes that x, y are already reduced mod m,
   215  	// or else the result will not be properly reduced.
   216  	if len(x) != n || len(y) != n || len(m) != n {
   217  		panic("math/big: mismatched montgomery number lengths")
   218  	}
   219  	z = z.make(n)
   220  	z.clear()
   221  	var c Word
   222  	for i := 0; i < n; i++ {
   223  		d := y[i]
   224  		c2 := addMulVVW(z, x, d)
   225  		t := z[0] * k
   226  		c3 := addMulVVW(z, m, t)
   227  		copy(z, z[1:])
   228  		cx := c + c2
   229  		cy := cx + c3
   230  		z[n-1] = cy
   231  		if cx < c2 || cy < c3 {
   232  			c = 1
   233  		} else {
   234  			c = 0
   235  		}
   236  	}
   237  	if c != 0 {
   238  		subVV(z, z, m)
   239  	}
   240  	return z
   241  }
   242  
   243  // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
   244  // Factored out for readability - do not use outside karatsuba.
   245  func karatsubaAdd(z, x nat, n int) {
   246  	if c := addVV(z[0:n], z, x); c != 0 {
   247  		addVW(z[n:n+n>>1], z[n:], c)
   248  	}
   249  }
   250  
   251  // Like karatsubaAdd, but does subtract.
   252  func karatsubaSub(z, x nat, n int) {
   253  	if c := subVV(z[0:n], z, x); c != 0 {
   254  		subVW(z[n:n+n>>1], z[n:], c)
   255  	}
   256  }
   257  
   258  // Operands that are shorter than karatsubaThreshold are multiplied using
   259  // "grade school" multiplication; for longer operands the Karatsuba algorithm
   260  // is used.
   261  var karatsubaThreshold int = 40 // computed by calibrate.go
   262  
   263  // karatsuba multiplies x and y and leaves the result in z.
   264  // Both x and y must have the same length n and n must be a
   265  // power of 2. The result vector z must have len(z) >= 6*n.
   266  // The (non-normalized) result is placed in z[0 : 2*n].
   267  func karatsuba(z, x, y nat) {
   268  	n := len(y)
   269  
   270  	// Switch to basic multiplication if numbers are odd or small.
   271  	// (n is always even if karatsubaThreshold is even, but be
   272  	// conservative)
   273  	if n&1 != 0 || n < karatsubaThreshold || n < 2 {
   274  		basicMul(z, x, y)
   275  		return
   276  	}
   277  	// n&1 == 0 && n >= karatsubaThreshold && n >= 2
   278  
   279  	// Karatsuba multiplication is based on the observation that
   280  	// for two numbers x and y with:
   281  	//
   282  	//   x = x1*b + x0
   283  	//   y = y1*b + y0
   284  	//
   285  	// the product x*y can be obtained with 3 products z2, z1, z0
   286  	// instead of 4:
   287  	//
   288  	//   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
   289  	//       =    z2*b*b +              z1*b +    z0
   290  	//
   291  	// with:
   292  	//
   293  	//   xd = x1 - x0
   294  	//   yd = y0 - y1
   295  	//
   296  	//   z1 =      xd*yd                    + z2 + z0
   297  	//      = (x1-x0)*(y0 - y1)             + z2 + z0
   298  	//      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0
   299  	//      = x1*y0 -    z2 -    z0 + x0*y1 + z2 + z0
   300  	//      = x1*y0                 + x0*y1
   301  
   302  	// split x, y into "digits"
   303  	n2 := n >> 1              // n2 >= 1
   304  	x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
   305  	y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
   306  
   307  	// z is used for the result and temporary storage:
   308  	//
   309  	//   6*n     5*n     4*n     3*n     2*n     1*n     0*n
   310  	// z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
   311  	//
   312  	// For each recursive call of karatsuba, an unused slice of
   313  	// z is passed in that has (at least) half the length of the
   314  	// caller's z.
   315  
   316  	// compute z0 and z2 with the result "in place" in z
   317  	karatsuba(z, x0, y0)     // z0 = x0*y0
   318  	karatsuba(z[n:], x1, y1) // z2 = x1*y1
   319  
   320  	// compute xd (or the negative value if underflow occurs)
   321  	s := 1 // sign of product xd*yd
   322  	xd := z[2*n : 2*n+n2]
   323  	if subVV(xd, x1, x0) != 0 { // x1-x0
   324  		s = -s
   325  		subVV(xd, x0, x1) // x0-x1
   326  	}
   327  
   328  	// compute yd (or the negative value if underflow occurs)
   329  	yd := z[2*n+n2 : 3*n]
   330  	if subVV(yd, y0, y1) != 0 { // y0-y1
   331  		s = -s
   332  		subVV(yd, y1, y0) // y1-y0
   333  	}
   334  
   335  	// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
   336  	// p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
   337  	p := z[n*3:]
   338  	karatsuba(p, xd, yd)
   339  
   340  	// save original z2:z0
   341  	// (ok to use upper half of z since we're done recursing)
   342  	r := z[n*4:]
   343  	copy(r, z[:n*2])
   344  
   345  	// add up all partial products
   346  	//
   347  	//   2*n     n     0
   348  	// z = [ z2  | z0  ]
   349  	//   +    [ z0  ]
   350  	//   +    [ z2  ]
   351  	//   +    [  p  ]
   352  	//
   353  	karatsubaAdd(z[n2:], r, n)
   354  	karatsubaAdd(z[n2:], r[n:], n)
   355  	if s > 0 {
   356  		karatsubaAdd(z[n2:], p, n)
   357  	} else {
   358  		karatsubaSub(z[n2:], p, n)
   359  	}
   360  }
   361  
   362  // alias reports whether x and y share the same base array.
   363  func alias(x, y nat) bool {
   364  	return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
   365  }
   366  
   367  // addAt implements z += x<<(_W*i); z must be long enough.
   368  // (we don't use nat.add because we need z to stay the same
   369  // slice, and we don't need to normalize z after each addition)
   370  func addAt(z, x nat, i int) {
   371  	if n := len(x); n > 0 {
   372  		if c := addVV(z[i:i+n], z[i:], x); c != 0 {
   373  			j := i + n
   374  			if j < len(z) {
   375  				addVW(z[j:], z[j:], c)
   376  			}
   377  		}
   378  	}
   379  }
   380  
   381  func max(x, y int) int {
   382  	if x > y {
   383  		return x
   384  	}
   385  	return y
   386  }
   387  
   388  // karatsubaLen computes an approximation to the maximum k <= n such that
   389  // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
   390  // result is the largest number that can be divided repeatedly by 2 before
   391  // becoming about the value of karatsubaThreshold.
   392  func karatsubaLen(n int) int {
   393  	i := uint(0)
   394  	for n > karatsubaThreshold {
   395  		n >>= 1
   396  		i++
   397  	}
   398  	return n << i
   399  }
   400  
   401  func (z nat) mul(x, y nat) nat {
   402  	m := len(x)
   403  	n := len(y)
   404  
   405  	switch {
   406  	case m < n:
   407  		return z.mul(y, x)
   408  	case m == 0 || n == 0:
   409  		return z[:0]
   410  	case n == 1:
   411  		return z.mulAddWW(x, y[0], 0)
   412  	}
   413  	// m >= n > 1
   414  
   415  	// determine if z can be reused
   416  	if alias(z, x) || alias(z, y) {
   417  		z = nil // z is an alias for x or y - cannot reuse
   418  	}
   419  
   420  	// use basic multiplication if the numbers are small
   421  	if n < karatsubaThreshold {
   422  		z = z.make(m + n)
   423  		basicMul(z, x, y)
   424  		return z.norm()
   425  	}
   426  	// m >= n && n >= karatsubaThreshold && n >= 2
   427  
   428  	// determine Karatsuba length k such that
   429  	//
   430  	//   x = xh*b + x0  (0 <= x0 < b)
   431  	//   y = yh*b + y0  (0 <= y0 < b)
   432  	//   b = 1<<(_W*k)  ("base" of digits xi, yi)
   433  	//
   434  	k := karatsubaLen(n)
   435  	// k <= n
   436  
   437  	// multiply x0 and y0 via Karatsuba
   438  	x0 := x[0:k]              // x0 is not normalized
   439  	y0 := y[0:k]              // y0 is not normalized
   440  	z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
   441  	karatsuba(z, x0, y0)
   442  	z = z[0 : m+n]  // z has final length but may be incomplete
   443  	z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m)
   444  
   445  	// If xh != 0 or yh != 0, add the missing terms to z. For
   446  	//
   447  	//   xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b)
   448  	//   yh =                         y1*b (0 <= y1 < b)
   449  	//
   450  	// the missing terms are
   451  	//
   452  	//   x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0
   453  	//
   454  	// since all the yi for i > 1 are 0 by choice of k: If any of them
   455  	// were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would
   456  	// be a larger valid threshold contradicting the assumption about k.
   457  	//
   458  	if k < n || m != n {
   459  		var t nat
   460  
   461  		// add x0*y1*b
   462  		x0 := x0.norm()
   463  		y1 := y[k:]       // y1 is normalized because y is
   464  		t = t.mul(x0, y1) // update t so we don't lose t's underlying array
   465  		addAt(z, t, k)
   466  
   467  		// add xi*y0<<i, xi*y1*b<<(i+k)
   468  		y0 := y0.norm()
   469  		for i := k; i < len(x); i += k {
   470  			xi := x[i:]
   471  			if len(xi) > k {
   472  				xi = xi[:k]
   473  			}
   474  			xi = xi.norm()
   475  			t = t.mul(xi, y0)
   476  			addAt(z, t, i)
   477  			t = t.mul(xi, y1)
   478  			addAt(z, t, i+k)
   479  		}
   480  	}
   481  
   482  	return z.norm()
   483  }
   484  
   485  // mulRange computes the product of all the unsigned integers in the
   486  // range [a, b] inclusively. If a > b (empty range), the result is 1.
   487  func (z nat) mulRange(a, b uint64) nat {
   488  	switch {
   489  	case a == 0:
   490  		// cut long ranges short (optimization)
   491  		return z.setUint64(0)
   492  	case a > b:
   493  		return z.setUint64(1)
   494  	case a == b:
   495  		return z.setUint64(a)
   496  	case a+1 == b:
   497  		return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
   498  	}
   499  	m := (a + b) / 2
   500  	return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
   501  }
   502  
   503  // q = (x-r)/y, with 0 <= r < y
   504  func (z nat) divW(x nat, y Word) (q nat, r Word) {
   505  	m := len(x)
   506  	switch {
   507  	case y == 0:
   508  		panic("division by zero")
   509  	case y == 1:
   510  		q = z.set(x) // result is x
   511  		return
   512  	case m == 0:
   513  		q = z[:0] // result is 0
   514  		return
   515  	}
   516  	// m > 0
   517  	z = z.make(m)
   518  	r = divWVW(z, 0, x, y)
   519  	q = z.norm()
   520  	return
   521  }
   522  
   523  func (z nat) div(z2, u, v nat) (q, r nat) {
   524  	if len(v) == 0 {
   525  		panic("division by zero")
   526  	}
   527  
   528  	if u.cmp(v) < 0 {
   529  		q = z[:0]
   530  		r = z2.set(u)
   531  		return
   532  	}
   533  
   534  	if len(v) == 1 {
   535  		var r2 Word
   536  		q, r2 = z.divW(u, v[0])
   537  		r = z2.setWord(r2)
   538  		return
   539  	}
   540  
   541  	q, r = z.divLarge(z2, u, v)
   542  	return
   543  }
   544  
   545  // getNat returns a nat of len n. The contents may not be zero.
   546  func getNat(n int) nat {
   547  	var z nat
   548  	if v := natPool.Get(); v != nil {
   549  		z = v.(nat)
   550  	}
   551  	return z.make(n)
   552  }
   553  
   554  func putNat(x nat) {
   555  	natPool.Put(x)
   556  }
   557  
   558  var natPool sync.Pool
   559  
   560  // q = (uIn-r)/v, with 0 <= r < y
   561  // Uses z as storage for q, and u as storage for r if possible.
   562  // See Knuth, Volume 2, section 4.3.1, Algorithm D.
   563  // Preconditions:
   564  //    len(v) >= 2
   565  //    len(uIn) >= len(v)
   566  func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
   567  	n := len(v)
   568  	m := len(uIn) - n
   569  
   570  	// determine if z can be reused
   571  	// TODO(gri) should find a better solution - this if statement
   572  	//           is very costly (see e.g. time pidigits -s -n 10000)
   573  	if alias(z, uIn) || alias(z, v) {
   574  		z = nil // z is an alias for uIn or v - cannot reuse
   575  	}
   576  	q = z.make(m + 1)
   577  
   578  	qhatv := getNat(n + 1)
   579  	if alias(u, uIn) || alias(u, v) {
   580  		u = nil // u is an alias for uIn or v - cannot reuse
   581  	}
   582  	u = u.make(len(uIn) + 1)
   583  	u.clear() // TODO(gri) no need to clear if we allocated a new u
   584  
   585  	// D1.
   586  	var v1 nat
   587  	shift := nlz(v[n-1])
   588  	if shift > 0 {
   589  		// do not modify v, it may be used by another goroutine simultaneously
   590  		v1 = getNat(n)
   591  		shlVU(v1, v, shift)
   592  		v = v1
   593  	}
   594  	u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
   595  
   596  	// D2.
   597  	for j := m; j >= 0; j-- {
   598  		// D3.
   599  		qhat := Word(_M)
   600  		if u[j+n] != v[n-1] {
   601  			var rhat Word
   602  			qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
   603  
   604  			// x1 | x2 = q̂v_{n-2}
   605  			x1, x2 := mulWW(qhat, v[n-2])
   606  			// test if q̂v_{n-2} > br̂ + u_{j+n-2}
   607  			for greaterThan(x1, x2, rhat, u[j+n-2]) {
   608  				qhat--
   609  				prevRhat := rhat
   610  				rhat += v[n-1]
   611  				// v[n-1] >= 0, so this tests for overflow.
   612  				if rhat < prevRhat {
   613  					break
   614  				}
   615  				x1, x2 = mulWW(qhat, v[n-2])
   616  			}
   617  		}
   618  
   619  		// D4.
   620  		qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
   621  
   622  		c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
   623  		if c != 0 {
   624  			c := addVV(u[j:j+n], u[j:], v)
   625  			u[j+n] += c
   626  			qhat--
   627  		}
   628  
   629  		q[j] = qhat
   630  	}
   631  	if v1 != nil {
   632  		putNat(v1)
   633  	}
   634  	putNat(qhatv)
   635  
   636  	q = q.norm()
   637  	shrVU(u, u, shift)
   638  	r = u.norm()
   639  
   640  	return q, r
   641  }
   642  
   643  // Length of x in bits. x must be normalized.
   644  func (x nat) bitLen() int {
   645  	if i := len(x) - 1; i >= 0 {
   646  		return i*_W + bitLen(x[i])
   647  	}
   648  	return 0
   649  }
   650  
   651  const deBruijn32 = 0x077CB531
   652  
   653  var deBruijn32Lookup = [...]byte{
   654  	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
   655  	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
   656  }
   657  
   658  const deBruijn64 = 0x03f79d71b4ca8b09
   659  
   660  var deBruijn64Lookup = [...]byte{
   661  	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
   662  	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
   663  	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
   664  	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
   665  }
   666  
   667  // trailingZeroBits returns the number of consecutive least significant zero
   668  // bits of x.
   669  func trailingZeroBits(x Word) uint {
   670  	// x & -x leaves only the right-most bit set in the word. Let k be the
   671  	// index of that bit. Since only a single bit is set, the value is two
   672  	// to the power of k. Multiplying by a power of two is equivalent to
   673  	// left shifting, in this case by k bits. The de Bruijn constant is
   674  	// such that all six bit, consecutive substrings are distinct.
   675  	// Therefore, if we have a left shifted version of this constant we can
   676  	// find by how many bits it was shifted by looking at which six bit
   677  	// substring ended up at the top of the word.
   678  	// (Knuth, volume 4, section 7.3.1)
   679  	switch _W {
   680  	case 32:
   681  		return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
   682  	case 64:
   683  		return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
   684  	default:
   685  		panic("unknown word size")
   686  	}
   687  }
   688  
   689  // trailingZeroBits returns the number of consecutive least significant zero
   690  // bits of x.
   691  func (x nat) trailingZeroBits() uint {
   692  	if len(x) == 0 {
   693  		return 0
   694  	}
   695  	var i uint
   696  	for x[i] == 0 {
   697  		i++
   698  	}
   699  	// x[i] != 0
   700  	return i*_W + trailingZeroBits(x[i])
   701  }
   702  
   703  // z = x << s
   704  func (z nat) shl(x nat, s uint) nat {
   705  	m := len(x)
   706  	if m == 0 {
   707  		return z[:0]
   708  	}
   709  	// m > 0
   710  
   711  	n := m + int(s/_W)
   712  	z = z.make(n + 1)
   713  	z[n] = shlVU(z[n-m:n], x, s%_W)
   714  	z[0 : n-m].clear()
   715  
   716  	return z.norm()
   717  }
   718  
   719  // z = x >> s
   720  func (z nat) shr(x nat, s uint) nat {
   721  	m := len(x)
   722  	n := m - int(s/_W)
   723  	if n <= 0 {
   724  		return z[:0]
   725  	}
   726  	// n > 0
   727  
   728  	z = z.make(n)
   729  	shrVU(z, x[m-n:], s%_W)
   730  
   731  	return z.norm()
   732  }
   733  
   734  func (z nat) setBit(x nat, i uint, b uint) nat {
   735  	j := int(i / _W)
   736  	m := Word(1) << (i % _W)
   737  	n := len(x)
   738  	switch b {
   739  	case 0:
   740  		z = z.make(n)
   741  		copy(z, x)
   742  		if j >= n {
   743  			// no need to grow
   744  			return z
   745  		}
   746  		z[j] &^= m
   747  		return z.norm()
   748  	case 1:
   749  		if j >= n {
   750  			z = z.make(j + 1)
   751  			z[n:].clear()
   752  		} else {
   753  			z = z.make(n)
   754  		}
   755  		copy(z, x)
   756  		z[j] |= m
   757  		// no need to normalize
   758  		return z
   759  	}
   760  	panic("set bit is not 0 or 1")
   761  }
   762  
   763  // bit returns the value of the i'th bit, with lsb == bit 0.
   764  func (x nat) bit(i uint) uint {
   765  	j := i / _W
   766  	if j >= uint(len(x)) {
   767  		return 0
   768  	}
   769  	// 0 <= j < len(x)
   770  	return uint(x[j] >> (i % _W) & 1)
   771  }
   772  
   773  // sticky returns 1 if there's a 1 bit within the
   774  // i least significant bits, otherwise it returns 0.
   775  func (x nat) sticky(i uint) uint {
   776  	j := i / _W
   777  	if j >= uint(len(x)) {
   778  		if len(x) == 0 {
   779  			return 0
   780  		}
   781  		return 1
   782  	}
   783  	// 0 <= j < len(x)
   784  	for _, x := range x[:j] {
   785  		if x != 0 {
   786  			return 1
   787  		}
   788  	}
   789  	if x[j]<<(_W-i%_W) != 0 {
   790  		return 1
   791  	}
   792  	return 0
   793  }
   794  
   795  func (z nat) and(x, y nat) nat {
   796  	m := len(x)
   797  	n := len(y)
   798  	if m > n {
   799  		m = n
   800  	}
   801  	// m <= n
   802  
   803  	z = z.make(m)
   804  	for i := 0; i < m; i++ {
   805  		z[i] = x[i] & y[i]
   806  	}
   807  
   808  	return z.norm()
   809  }
   810  
   811  func (z nat) andNot(x, y nat) nat {
   812  	m := len(x)
   813  	n := len(y)
   814  	if n > m {
   815  		n = m
   816  	}
   817  	// m >= n
   818  
   819  	z = z.make(m)
   820  	for i := 0; i < n; i++ {
   821  		z[i] = x[i] &^ y[i]
   822  	}
   823  	copy(z[n:m], x[n:m])
   824  
   825  	return z.norm()
   826  }
   827  
   828  func (z nat) or(x, y nat) nat {
   829  	m := len(x)
   830  	n := len(y)
   831  	s := x
   832  	if m < n {
   833  		n, m = m, n
   834  		s = y
   835  	}
   836  	// m >= n
   837  
   838  	z = z.make(m)
   839  	for i := 0; i < n; i++ {
   840  		z[i] = x[i] | y[i]
   841  	}
   842  	copy(z[n:m], s[n:m])
   843  
   844  	return z.norm()
   845  }
   846  
   847  func (z nat) xor(x, y nat) nat {
   848  	m := len(x)
   849  	n := len(y)
   850  	s := x
   851  	if m < n {
   852  		n, m = m, n
   853  		s = y
   854  	}
   855  	// m >= n
   856  
   857  	z = z.make(m)
   858  	for i := 0; i < n; i++ {
   859  		z[i] = x[i] ^ y[i]
   860  	}
   861  	copy(z[n:m], s[n:m])
   862  
   863  	return z.norm()
   864  }
   865  
   866  // greaterThan reports whether (x1<<_W + x2) > (y1<<_W + y2)
   867  func greaterThan(x1, x2, y1, y2 Word) bool {
   868  	return x1 > y1 || x1 == y1 && x2 > y2
   869  }
   870  
   871  // modW returns x % d.
   872  func (x nat) modW(d Word) (r Word) {
   873  	// TODO(agl): we don't actually need to store the q value.
   874  	var q nat
   875  	q = q.make(len(x))
   876  	return divWVW(q, 0, x, d)
   877  }
   878  
   879  // random creates a random integer in [0..limit), using the space in z if
   880  // possible. n is the bit length of limit.
   881  func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
   882  	if alias(z, limit) {
   883  		z = nil // z is an alias for limit - cannot reuse
   884  	}
   885  	z = z.make(len(limit))
   886  
   887  	bitLengthOfMSW := uint(n % _W)
   888  	if bitLengthOfMSW == 0 {
   889  		bitLengthOfMSW = _W
   890  	}
   891  	mask := Word((1 << bitLengthOfMSW) - 1)
   892  
   893  	for {
   894  		switch _W {
   895  		case 32:
   896  			for i := range z {
   897  				z[i] = Word(rand.Uint32())
   898  			}
   899  		case 64:
   900  			for i := range z {
   901  				z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
   902  			}
   903  		default:
   904  			panic("unknown word size")
   905  		}
   906  		z[len(limit)-1] &= mask
   907  		if z.cmp(limit) < 0 {
   908  			break
   909  		}
   910  	}
   911  
   912  	return z.norm()
   913  }
   914  
   915  // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
   916  // otherwise it sets z to x**y. The result is the value of z.
   917  func (z nat) expNN(x, y, m nat) nat {
   918  	if alias(z, x) || alias(z, y) {
   919  		// We cannot allow in-place modification of x or y.
   920  		z = nil
   921  	}
   922  
   923  	// x**y mod 1 == 0
   924  	if len(m) == 1 && m[0] == 1 {
   925  		return z.setWord(0)
   926  	}
   927  	// m == 0 || m > 1
   928  
   929  	// x**0 == 1
   930  	if len(y) == 0 {
   931  		return z.setWord(1)
   932  	}
   933  	// y > 0
   934  
   935  	// x**1 mod m == x mod m
   936  	if len(y) == 1 && y[0] == 1 && len(m) != 0 {
   937  		_, z = z.div(z, x, m)
   938  		return z
   939  	}
   940  	// y > 1
   941  
   942  	if len(m) != 0 {
   943  		// We likely end up being as long as the modulus.
   944  		z = z.make(len(m))
   945  	}
   946  	z = z.set(x)
   947  
   948  	// If the base is non-trivial and the exponent is large, we use
   949  	// 4-bit, windowed exponentiation. This involves precomputing 14 values
   950  	// (x^2...x^15) but then reduces the number of multiply-reduces by a
   951  	// third. Even for a 32-bit exponent, this reduces the number of
   952  	// operations. Uses Montgomery method for odd moduli.
   953  	if len(x) > 1 && len(y) > 1 && len(m) > 0 {
   954  		if m[0]&1 == 1 {
   955  			return z.expNNMontgomery(x, y, m)
   956  		}
   957  		return z.expNNWindowed(x, y, m)
   958  	}
   959  
   960  	v := y[len(y)-1] // v > 0 because y is normalized and y > 0
   961  	shift := nlz(v) + 1
   962  	v <<= shift
   963  	var q nat
   964  
   965  	const mask = 1 << (_W - 1)
   966  
   967  	// We walk through the bits of the exponent one by one. Each time we
   968  	// see a bit, we square, thus doubling the power. If the bit is a one,
   969  	// we also multiply by x, thus adding one to the power.
   970  
   971  	w := _W - int(shift)
   972  	// zz and r are used to avoid allocating in mul and div as
   973  	// otherwise the arguments would alias.
   974  	var zz, r nat
   975  	for j := 0; j < w; j++ {
   976  		zz = zz.mul(z, z)
   977  		zz, z = z, zz
   978  
   979  		if v&mask != 0 {
   980  			zz = zz.mul(z, x)
   981  			zz, z = z, zz
   982  		}
   983  
   984  		if len(m) != 0 {
   985  			zz, r = zz.div(r, z, m)
   986  			zz, r, q, z = q, z, zz, r
   987  		}
   988  
   989  		v <<= 1
   990  	}
   991  
   992  	for i := len(y) - 2; i >= 0; i-- {
   993  		v = y[i]
   994  
   995  		for j := 0; j < _W; j++ {
   996  			zz = zz.mul(z, z)
   997  			zz, z = z, zz
   998  
   999  			if v&mask != 0 {
  1000  				zz = zz.mul(z, x)
  1001  				zz, z = z, zz
  1002  			}
  1003  
  1004  			if len(m) != 0 {
  1005  				zz, r = zz.div(r, z, m)
  1006  				zz, r, q, z = q, z, zz, r
  1007  			}
  1008  
  1009  			v <<= 1
  1010  		}
  1011  	}
  1012  
  1013  	return z.norm()
  1014  }
  1015  
  1016  // expNNWindowed calculates x**y mod m using a fixed, 4-bit window.
  1017  func (z nat) expNNWindowed(x, y, m nat) nat {
  1018  	// zz and r are used to avoid allocating in mul and div as otherwise
  1019  	// the arguments would alias.
  1020  	var zz, r nat
  1021  
  1022  	const n = 4
  1023  	// powers[i] contains x^i.
  1024  	var powers [1 << n]nat
  1025  	powers[0] = natOne
  1026  	powers[1] = x
  1027  	for i := 2; i < 1<<n; i += 2 {
  1028  		p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
  1029  		*p = p.mul(*p2, *p2)
  1030  		zz, r = zz.div(r, *p, m)
  1031  		*p, r = r, *p
  1032  		*p1 = p1.mul(*p, x)
  1033  		zz, r = zz.div(r, *p1, m)
  1034  		*p1, r = r, *p1
  1035  	}
  1036  
  1037  	z = z.setWord(1)
  1038  
  1039  	for i := len(y) - 1; i >= 0; i-- {
  1040  		yi := y[i]
  1041  		for j := 0; j < _W; j += n {
  1042  			if i != len(y)-1 || j != 0 {
  1043  				// Unrolled loop for significant performance
  1044  				// gain. Use go test -bench=".*" in crypto/rsa
  1045  				// to check performance before making changes.
  1046  				zz = zz.mul(z, z)
  1047  				zz, z = z, zz
  1048  				zz, r = zz.div(r, z, m)
  1049  				z, r = r, z
  1050  
  1051  				zz = zz.mul(z, z)
  1052  				zz, z = z, zz
  1053  				zz, r = zz.div(r, z, m)
  1054  				z, r = r, z
  1055  
  1056  				zz = zz.mul(z, z)
  1057  				zz, z = z, zz
  1058  				zz, r = zz.div(r, z, m)
  1059  				z, r = r, z
  1060  
  1061  				zz = zz.mul(z, z)
  1062  				zz, z = z, zz
  1063  				zz, r = zz.div(r, z, m)
  1064  				z, r = r, z
  1065  			}
  1066  
  1067  			zz = zz.mul(z, powers[yi>>(_W-n)])
  1068  			zz, z = z, zz
  1069  			zz, r = zz.div(r, z, m)
  1070  			z, r = r, z
  1071  
  1072  			yi <<= n
  1073  		}
  1074  	}
  1075  
  1076  	return z.norm()
  1077  }
  1078  
  1079  // expNNMontgomery calculates x**y mod m using a fixed, 4-bit window.
  1080  // Uses Montgomery representation.
  1081  func (z nat) expNNMontgomery(x, y, m nat) nat {
  1082  	numWords := len(m)
  1083  
  1084  	// We want the lengths of x and m to be equal.
  1085  	// It is OK if x >= m as long as len(x) == len(m).
  1086  	if len(x) > numWords {
  1087  		_, x = nat(nil).div(nil, x, m)
  1088  		// Note: now len(x) <= numWords, not guaranteed ==.
  1089  	}
  1090  	if len(x) < numWords {
  1091  		rr := make(nat, numWords)
  1092  		copy(rr, x)
  1093  		x = rr
  1094  	}
  1095  
  1096  	// Ideally the precomputations would be performed outside, and reused
  1097  	// k0 = -m**-1 mod 2**_W. Algorithm from: Dumas, J.G. "On Newton–Raphson
  1098  	// Iteration for Multiplicative Inverses Modulo Prime Powers".
  1099  	k0 := 2 - m[0]
  1100  	t := m[0] - 1
  1101  	for i := 1; i < _W; i <<= 1 {
  1102  		t *= t
  1103  		k0 *= (t + 1)
  1104  	}
  1105  	k0 = -k0
  1106  
  1107  	// RR = 2**(2*_W*len(m)) mod m
  1108  	RR := nat(nil).setWord(1)
  1109  	zz := nat(nil).shl(RR, uint(2*numWords*_W))
  1110  	_, RR = RR.div(RR, zz, m)
  1111  	if len(RR) < numWords {
  1112  		zz = zz.make(numWords)
  1113  		copy(zz, RR)
  1114  		RR = zz
  1115  	}
  1116  	// one = 1, with equal length to that of m
  1117  	one := make(nat, numWords)
  1118  	one[0] = 1
  1119  
  1120  	const n = 4
  1121  	// powers[i] contains x^i
  1122  	var powers [1 << n]nat
  1123  	powers[0] = powers[0].montgomery(one, RR, m, k0, numWords)
  1124  	powers[1] = powers[1].montgomery(x, RR, m, k0, numWords)
  1125  	for i := 2; i < 1<<n; i++ {
  1126  		powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords)
  1127  	}
  1128  
  1129  	// initialize z = 1 (Montgomery 1)
  1130  	z = z.make(numWords)
  1131  	copy(z, powers[0])
  1132  
  1133  	zz = zz.make(numWords)
  1134  
  1135  	// same windowed exponent, but with Montgomery multiplications
  1136  	for i := len(y) - 1; i >= 0; i-- {
  1137  		yi := y[i]
  1138  		for j := 0; j < _W; j += n {
  1139  			if i != len(y)-1 || j != 0 {
  1140  				zz = zz.montgomery(z, z, m, k0, numWords)
  1141  				z = z.montgomery(zz, zz, m, k0, numWords)
  1142  				zz = zz.montgomery(z, z, m, k0, numWords)
  1143  				z = z.montgomery(zz, zz, m, k0, numWords)
  1144  			}
  1145  			zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords)
  1146  			z, zz = zz, z
  1147  			yi <<= n
  1148  		}
  1149  	}
  1150  	// convert to regular number
  1151  	zz = zz.montgomery(z, one, m, k0, numWords)
  1152  
  1153  	// One last reduction, just in case.
  1154  	// See golang.org/issue/13907.
  1155  	if zz.cmp(m) >= 0 {
  1156  		// Common case is m has high bit set; in that case,
  1157  		// since zz is the same length as m, there can be just
  1158  		// one multiple of m to remove. Just subtract.
  1159  		// We think that the subtract should be sufficient in general,
  1160  		// so do that unconditionally, but double-check,
  1161  		// in case our beliefs are wrong.
  1162  		// The div is not expected to be reached.
  1163  		zz = zz.sub(zz, m)
  1164  		if zz.cmp(m) >= 0 {
  1165  			_, zz = nat(nil).div(nil, zz, m)
  1166  		}
  1167  	}
  1168  
  1169  	return zz.norm()
  1170  }
  1171  
  1172  // probablyPrime performs n Miller-Rabin tests to check whether x is prime.
  1173  // If x is prime, it returns true.
  1174  // If x is not prime, it returns false with probability at least 1 - ¼ⁿ.
  1175  //
  1176  // It is not suitable for judging primes that an adversary may have crafted
  1177  // to fool this test.
  1178  func (n nat) probablyPrime(reps int) bool {
  1179  	if len(n) == 0 {
  1180  		return false
  1181  	}
  1182  
  1183  	if len(n) == 1 {
  1184  		if n[0] < 2 {
  1185  			return false
  1186  		}
  1187  
  1188  		if n[0]%2 == 0 {
  1189  			return n[0] == 2
  1190  		}
  1191  
  1192  		// We have to exclude these cases because we reject all
  1193  		// multiples of these numbers below.
  1194  		switch n[0] {
  1195  		case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
  1196  			return true
  1197  		}
  1198  	}
  1199  
  1200  	if n[0]&1 == 0 {
  1201  		return false // n is even
  1202  	}
  1203  
  1204  	const primesProduct32 = 0xC0CFD797         // Π {p ∈ primes, 2 < p <= 29}
  1205  	const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
  1206  
  1207  	var r Word
  1208  	switch _W {
  1209  	case 32:
  1210  		r = n.modW(primesProduct32)
  1211  	case 64:
  1212  		r = n.modW(primesProduct64 & _M)
  1213  	default:
  1214  		panic("Unknown word size")
  1215  	}
  1216  
  1217  	if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
  1218  		r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
  1219  		return false
  1220  	}
  1221  
  1222  	if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
  1223  		r%43 == 0 || r%47 == 0 || r%53 == 0) {
  1224  		return false
  1225  	}
  1226  
  1227  	nm1 := nat(nil).sub(n, natOne)
  1228  	// determine q, k such that nm1 = q << k
  1229  	k := nm1.trailingZeroBits()
  1230  	q := nat(nil).shr(nm1, k)
  1231  
  1232  	nm3 := nat(nil).sub(nm1, natTwo)
  1233  	rand := rand.New(rand.NewSource(int64(n[0])))
  1234  
  1235  	var x, y, quotient nat
  1236  	nm3Len := nm3.bitLen()
  1237  
  1238  NextRandom:
  1239  	for i := 0; i < reps; i++ {
  1240  		x = x.random(rand, nm3, nm3Len)
  1241  		x = x.add(x, natTwo)
  1242  		y = y.expNN(x, q, n)
  1243  		if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
  1244  			continue
  1245  		}
  1246  		for j := uint(1); j < k; j++ {
  1247  			y = y.mul(y, y)
  1248  			quotient, y = quotient.div(y, y, n)
  1249  			if y.cmp(nm1) == 0 {
  1250  				continue NextRandom
  1251  			}
  1252  			if y.cmp(natOne) == 0 {
  1253  				return false
  1254  			}
  1255  		}
  1256  		return false
  1257  	}
  1258  
  1259  	return true
  1260  }
  1261  
  1262  // bytes writes the value of z into buf using big-endian encoding.
  1263  // len(buf) must be >= len(z)*_S. The value of z is encoded in the
  1264  // slice buf[i:]. The number i of unused bytes at the beginning of
  1265  // buf is returned as result.
  1266  func (z nat) bytes(buf []byte) (i int) {
  1267  	i = len(buf)
  1268  	for _, d := range z {
  1269  		for j := 0; j < _S; j++ {
  1270  			i--
  1271  			buf[i] = byte(d)
  1272  			d >>= 8
  1273  		}
  1274  	}
  1275  
  1276  	for i < len(buf) && buf[i] == 0 {
  1277  		i++
  1278  	}
  1279  
  1280  	return
  1281  }
  1282  
  1283  // setBytes interprets buf as the bytes of a big-endian unsigned
  1284  // integer, sets z to that value, and returns z.
  1285  func (z nat) setBytes(buf []byte) nat {
  1286  	z = z.make((len(buf) + _S - 1) / _S)
  1287  
  1288  	k := 0
  1289  	s := uint(0)
  1290  	var d Word
  1291  	for i := len(buf); i > 0; i-- {
  1292  		d |= Word(buf[i-1]) << s
  1293  		if s += 8; s == _S*8 {
  1294  			z[k] = d
  1295  			k++
  1296  			s = 0
  1297  			d = 0
  1298  		}
  1299  	}
  1300  	if k < len(z) {
  1301  		z[k] = d
  1302  	}
  1303  
  1304  	return z.norm()
  1305  }