github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/compile/internal/big/nat.go

github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/compile/internal/big/nat.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package big implements multi-precision arithmetic (big numbers).
     6  // The following numeric types are supported:
     7  //
     8  //   Int    signed integers
     9  //   Rat    rational numbers
    10  //   Float  floating-point numbers
    11  //
    12  // Methods are typically of the form:
    13  //
    14  //   func (z *T) Unary(x *T) *T        // z = op x
    15  //   func (z *T) Binary(x, y *T) *T    // z = x op y
    16  //   func (x *T) M() T1                // v = x.M()
    17  //
    18  // with T one of Int, Rat, or Float. For unary and binary operations, the
    19  // result is the receiver (usually named z in that case); if it is one of
    20  // the operands x or y it may be overwritten (and its memory reused).
    21  // To enable chaining of operations, the result is also returned. Methods
    22  // returning a result other than *Int, *Rat, or *Float take an operand as
    23  // the receiver (usually named x in that case).
    24  //
    25  package big
    26  
    27  // This file contains operations on unsigned multi-precision integers.
    28  // These are the building blocks for the operations on signed integers
    29  // and rationals.
    30  
    31  import "math/rand"
    32  
    33  // An unsigned integer x of the form
    34  //
    35  //   x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
    36  //
    37  // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
    38  // with the digits x[i] as the slice elements.
    39  //
    40  // A number is normalized if the slice contains no leading 0 digits.
    41  // During arithmetic operations, denormalized values may occur but are
    42  // always normalized before returning the final result. The normalized
    43  // representation of 0 is the empty or nil slice (length = 0).
    44  //
    45  type nat []Word
    46  
    47  var (
    48  	natOne = nat{1}
    49  	natTwo = nat{2}
    50  	natTen = nat{10}
    51  )
    52  
    53  func (z nat) clear() {
    54  	for i := range z {
    55  		z[i] = 0
    56  	}
    57  }
    58  
    59  func (z nat) norm() nat {
    60  	i := len(z)
    61  	for i > 0 && z[i-1] == 0 {
    62  		i--
    63  	}
    64  	return z[0:i]
    65  }
    66  
    67  func (z nat) make(n int) nat {
    68  	if n <= cap(z) {
    69  		return z[:n] // reuse z
    70  	}
    71  	// Choosing a good value for e has significant performance impact
    72  	// because it increases the chance that a value can be reused.
    73  	const e = 4 // extra capacity
    74  	return make(nat, n, n+e)
    75  }
    76  
    77  func (z nat) setWord(x Word) nat {
    78  	if x == 0 {
    79  		return z[:0]
    80  	}
    81  	z = z.make(1)
    82  	z[0] = x
    83  	return z
    84  }
    85  
    86  func (z nat) setUint64(x uint64) nat {
    87  	// single-digit values
    88  	if w := Word(x); uint64(w) == x {
    89  		return z.setWord(w)
    90  	}
    91  
    92  	// compute number of words n required to represent x
    93  	n := 0
    94  	for t := x; t > 0; t >>= _W {
    95  		n++
    96  	}
    97  
    98  	// split x into n words
    99  	z = z.make(n)
   100  	for i := range z {
   101  		z[i] = Word(x & _M)
   102  		x >>= _W
   103  	}
   104  
   105  	return z
   106  }
   107  
   108  func (z nat) set(x nat) nat {
   109  	z = z.make(len(x))
   110  	copy(z, x)
   111  	return z
   112  }
   113  
   114  func (z nat) add(x, y nat) nat {
   115  	m := len(x)
   116  	n := len(y)
   117  
   118  	switch {
   119  	case m < n:
   120  		return z.add(y, x)
   121  	case m == 0:
   122  		// n == 0 because m >= n; result is 0
   123  		return z[:0]
   124  	case n == 0:
   125  		// result is x
   126  		return z.set(x)
   127  	}
   128  	// m > 0
   129  
   130  	z = z.make(m + 1)
   131  	c := addVV(z[0:n], x, y)
   132  	if m > n {
   133  		c = addVW(z[n:m], x[n:], c)
   134  	}
   135  	z[m] = c
   136  
   137  	return z.norm()
   138  }
   139  
   140  func (z nat) sub(x, y nat) nat {
   141  	m := len(x)
   142  	n := len(y)
   143  
   144  	switch {
   145  	case m < n:
   146  		panic("underflow")
   147  	case m == 0:
   148  		// n == 0 because m >= n; result is 0
   149  		return z[:0]
   150  	case n == 0:
   151  		// result is x
   152  		return z.set(x)
   153  	}
   154  	// m > 0
   155  
   156  	z = z.make(m)
   157  	c := subVV(z[0:n], x, y)
   158  	if m > n {
   159  		c = subVW(z[n:], x[n:], c)
   160  	}
   161  	if c != 0 {
   162  		panic("underflow")
   163  	}
   164  
   165  	return z.norm()
   166  }
   167  
   168  func (x nat) cmp(y nat) (r int) {
   169  	m := len(x)
   170  	n := len(y)
   171  	if m != n || m == 0 {
   172  		switch {
   173  		case m < n:
   174  			r = -1
   175  		case m > n:
   176  			r = 1
   177  		}
   178  		return
   179  	}
   180  
   181  	i := m - 1
   182  	for i > 0 && x[i] == y[i] {
   183  		i--
   184  	}
   185  
   186  	switch {
   187  	case x[i] < y[i]:
   188  		r = -1
   189  	case x[i] > y[i]:
   190  		r = 1
   191  	}
   192  	return
   193  }
   194  
   195  func (z nat) mulAddWW(x nat, y, r Word) nat {
   196  	m := len(x)
   197  	if m == 0 || y == 0 {
   198  		return z.setWord(r) // result is r
   199  	}
   200  	// m > 0
   201  
   202  	z = z.make(m + 1)
   203  	z[m] = mulAddVWW(z[0:m], x, y, r)
   204  
   205  	return z.norm()
   206  }
   207  
   208  // basicMul multiplies x and y and leaves the result in z.
   209  // The (non-normalized) result is placed in z[0 : len(x) + len(y)].
   210  func basicMul(z, x, y nat) {
   211  	z[0 : len(x)+len(y)].clear() // initialize z
   212  	for i, d := range y {
   213  		if d != 0 {
   214  			z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
   215  		}
   216  	}
   217  }
   218  
   219  // montgomery computes x*y*2^(-n*_W) mod m,
   220  // assuming k = -1/m mod 2^_W.
   221  // z is used for storing the result which is returned;
   222  // z must not alias x, y or m.
   223  func (z nat) montgomery(x, y, m nat, k Word, n int) nat {
   224  	var c1, c2 Word
   225  	z = z.make(n)
   226  	z.clear()
   227  	for i := 0; i < n; i++ {
   228  		d := y[i]
   229  		c1 += addMulVVW(z, x, d)
   230  		t := z[0] * k
   231  		c2 = addMulVVW(z, m, t)
   232  
   233  		copy(z, z[1:])
   234  		z[n-1] = c1 + c2
   235  		if z[n-1] < c1 {
   236  			c1 = 1
   237  		} else {
   238  			c1 = 0
   239  		}
   240  	}
   241  	if c1 != 0 {
   242  		subVV(z, z, m)
   243  	}
   244  	return z
   245  }
   246  
   247  // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
   248  // Factored out for readability - do not use outside karatsuba.
   249  func karatsubaAdd(z, x nat, n int) {
   250  	if c := addVV(z[0:n], z, x); c != 0 {
   251  		addVW(z[n:n+n>>1], z[n:], c)
   252  	}
   253  }
   254  
   255  // Like karatsubaAdd, but does subtract.
   256  func karatsubaSub(z, x nat, n int) {
   257  	if c := subVV(z[0:n], z, x); c != 0 {
   258  		subVW(z[n:n+n>>1], z[n:], c)
   259  	}
   260  }
   261  
   262  // Operands that are shorter than karatsubaThreshold are multiplied using
   263  // "grade school" multiplication; for longer operands the Karatsuba algorithm
   264  // is used.
   265  var karatsubaThreshold int = 40 // computed by calibrate.go
   266  
   267  // karatsuba multiplies x and y and leaves the result in z.
   268  // Both x and y must have the same length n and n must be a
   269  // power of 2. The result vector z must have len(z) >= 6*n.
   270  // The (non-normalized) result is placed in z[0 : 2*n].
   271  func karatsuba(z, x, y nat) {
   272  	n := len(y)
   273  
   274  	// Switch to basic multiplication if numbers are odd or small.
   275  	// (n is always even if karatsubaThreshold is even, but be
   276  	// conservative)
   277  	if n&1 != 0 || n < karatsubaThreshold || n < 2 {
   278  		basicMul(z, x, y)
   279  		return
   280  	}
   281  	// n&1 == 0 && n >= karatsubaThreshold && n >= 2
   282  
   283  	// Karatsuba multiplication is based on the observation that
   284  	// for two numbers x and y with:
   285  	//
   286  	//   x = x1*b + x0
   287  	//   y = y1*b + y0
   288  	//
   289  	// the product x*y can be obtained with 3 products z2, z1, z0
   290  	// instead of 4:
   291  	//
   292  	//   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
   293  	//       =    z2*b*b +              z1*b +    z0
   294  	//
   295  	// with:
   296  	//
   297  	//   xd = x1 - x0
   298  	//   yd = y0 - y1
   299  	//
   300  	//   z1 =      xd*yd                    + z2 + z0
   301  	//      = (x1-x0)*(y0 - y1)             + z2 + z0
   302  	//      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0
   303  	//      = x1*y0 -    z2 -    z0 + x0*y1 + z2 + z0
   304  	//      = x1*y0                 + x0*y1
   305  
   306  	// split x, y into "digits"
   307  	n2 := n >> 1              // n2 >= 1
   308  	x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
   309  	y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
   310  
   311  	// z is used for the result and temporary storage:
   312  	//
   313  	//   6*n     5*n     4*n     3*n     2*n     1*n     0*n
   314  	// z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
   315  	//
   316  	// For each recursive call of karatsuba, an unused slice of
   317  	// z is passed in that has (at least) half the length of the
   318  	// caller's z.
   319  
   320  	// compute z0 and z2 with the result "in place" in z
   321  	karatsuba(z, x0, y0)     // z0 = x0*y0
   322  	karatsuba(z[n:], x1, y1) // z2 = x1*y1
   323  
   324  	// compute xd (or the negative value if underflow occurs)
   325  	s := 1 // sign of product xd*yd
   326  	xd := z[2*n : 2*n+n2]
   327  	if subVV(xd, x1, x0) != 0 { // x1-x0
   328  		s = -s
   329  		subVV(xd, x0, x1) // x0-x1
   330  	}
   331  
   332  	// compute yd (or the negative value if underflow occurs)
   333  	yd := z[2*n+n2 : 3*n]
   334  	if subVV(yd, y0, y1) != 0 { // y0-y1
   335  		s = -s
   336  		subVV(yd, y1, y0) // y1-y0
   337  	}
   338  
   339  	// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
   340  	// p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
   341  	p := z[n*3:]
   342  	karatsuba(p, xd, yd)
   343  
   344  	// save original z2:z0
   345  	// (ok to use upper half of z since we're done recursing)
   346  	r := z[n*4:]
   347  	copy(r, z[:n*2])
   348  
   349  	// add up all partial products
   350  	//
   351  	//   2*n     n     0
   352  	// z = [ z2  | z0  ]
   353  	//   +    [ z0  ]
   354  	//   +    [ z2  ]
   355  	//   +    [  p  ]
   356  	//
   357  	karatsubaAdd(z[n2:], r, n)
   358  	karatsubaAdd(z[n2:], r[n:], n)
   359  	if s > 0 {
   360  		karatsubaAdd(z[n2:], p, n)
   361  	} else {
   362  		karatsubaSub(z[n2:], p, n)
   363  	}
   364  }
   365  
   366  // alias reports whether x and y share the same base array.
   367  func alias(x, y nat) bool {
   368  	return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
   369  }
   370  
   371  // addAt implements z += x<<(_W*i); z must be long enough.
   372  // (we don't use nat.add because we need z to stay the same
   373  // slice, and we don't need to normalize z after each addition)
   374  func addAt(z, x nat, i int) {
   375  	if n := len(x); n > 0 {
   376  		if c := addVV(z[i:i+n], z[i:], x); c != 0 {
   377  			j := i + n
   378  			if j < len(z) {
   379  				addVW(z[j:], z[j:], c)
   380  			}
   381  		}
   382  	}
   383  }
   384  
   385  func max(x, y int) int {
   386  	if x > y {
   387  		return x
   388  	}
   389  	return y
   390  }
   391  
   392  // karatsubaLen computes an approximation to the maximum k <= n such that
   393  // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
   394  // result is the largest number that can be divided repeatedly by 2 before
   395  // becoming about the value of karatsubaThreshold.
   396  func karatsubaLen(n int) int {
   397  	i := uint(0)
   398  	for n > karatsubaThreshold {
   399  		n >>= 1
   400  		i++
   401  	}
   402  	return n << i
   403  }
   404  
   405  func (z nat) mul(x, y nat) nat {
   406  	m := len(x)
   407  	n := len(y)
   408  
   409  	switch {
   410  	case m < n:
   411  		return z.mul(y, x)
   412  	case m == 0 || n == 0:
   413  		return z[:0]
   414  	case n == 1:
   415  		return z.mulAddWW(x, y[0], 0)
   416  	}
   417  	// m >= n > 1
   418  
   419  	// determine if z can be reused
   420  	if alias(z, x) || alias(z, y) {
   421  		z = nil // z is an alias for x or y - cannot reuse
   422  	}
   423  
   424  	// use basic multiplication if the numbers are small
   425  	if n < karatsubaThreshold {
   426  		z = z.make(m + n)
   427  		basicMul(z, x, y)
   428  		return z.norm()
   429  	}
   430  	// m >= n && n >= karatsubaThreshold && n >= 2
   431  
   432  	// determine Karatsuba length k such that
   433  	//
   434  	//   x = xh*b + x0  (0 <= x0 < b)
   435  	//   y = yh*b + y0  (0 <= y0 < b)
   436  	//   b = 1<<(_W*k)  ("base" of digits xi, yi)
   437  	//
   438  	k := karatsubaLen(n)
   439  	// k <= n
   440  
   441  	// multiply x0 and y0 via Karatsuba
   442  	x0 := x[0:k]              // x0 is not normalized
   443  	y0 := y[0:k]              // y0 is not normalized
   444  	z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
   445  	karatsuba(z, x0, y0)
   446  	z = z[0 : m+n]  // z has final length but may be incomplete
   447  	z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m)
   448  
   449  	// If xh != 0 or yh != 0, add the missing terms to z. For
   450  	//
   451  	//   xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b)
   452  	//   yh =                         y1*b (0 <= y1 < b)
   453  	//
   454  	// the missing terms are
   455  	//
   456  	//   x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0
   457  	//
   458  	// since all the yi for i > 1 are 0 by choice of k: If any of them
   459  	// were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would
   460  	// be a larger valid threshold contradicting the assumption about k.
   461  	//
   462  	if k < n || m != n {
   463  		var t nat
   464  
   465  		// add x0*y1*b
   466  		x0 := x0.norm()
   467  		y1 := y[k:]       // y1 is normalized because y is
   468  		t = t.mul(x0, y1) // update t so we don't lose t's underlying array
   469  		addAt(z, t, k)
   470  
   471  		// add xi*y0<<i, xi*y1*b<<(i+k)
   472  		y0 := y0.norm()
   473  		for i := k; i < len(x); i += k {
   474  			xi := x[i:]
   475  			if len(xi) > k {
   476  				xi = xi[:k]
   477  			}
   478  			xi = xi.norm()
   479  			t = t.mul(xi, y0)
   480  			addAt(z, t, i)
   481  			t = t.mul(xi, y1)
   482  			addAt(z, t, i+k)
   483  		}
   484  	}
   485  
   486  	return z.norm()
   487  }
   488  
   489  // mulRange computes the product of all the unsigned integers in the
   490  // range [a, b] inclusively. If a > b (empty range), the result is 1.
   491  func (z nat) mulRange(a, b uint64) nat {
   492  	switch {
   493  	case a == 0:
   494  		// cut long ranges short (optimization)
   495  		return z.setUint64(0)
   496  	case a > b:
   497  		return z.setUint64(1)
   498  	case a == b:
   499  		return z.setUint64(a)
   500  	case a+1 == b:
   501  		return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
   502  	}
   503  	m := (a + b) / 2
   504  	return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
   505  }
   506  
   507  // q = (x-r)/y, with 0 <= r < y
   508  func (z nat) divW(x nat, y Word) (q nat, r Word) {
   509  	m := len(x)
   510  	switch {
   511  	case y == 0:
   512  		panic("division by zero")
   513  	case y == 1:
   514  		q = z.set(x) // result is x
   515  		return
   516  	case m == 0:
   517  		q = z[:0] // result is 0
   518  		return
   519  	}
   520  	// m > 0
   521  	z = z.make(m)
   522  	r = divWVW(z, 0, x, y)
   523  	q = z.norm()
   524  	return
   525  }
   526  
   527  func (z nat) div(z2, u, v nat) (q, r nat) {
   528  	if len(v) == 0 {
   529  		panic("division by zero")
   530  	}
   531  
   532  	if u.cmp(v) < 0 {
   533  		q = z[:0]
   534  		r = z2.set(u)
   535  		return
   536  	}
   537  
   538  	if len(v) == 1 {
   539  		var r2 Word
   540  		q, r2 = z.divW(u, v[0])
   541  		r = z2.setWord(r2)
   542  		return
   543  	}
   544  
   545  	q, r = z.divLarge(z2, u, v)
   546  	return
   547  }
   548  
   549  // q = (uIn-r)/v, with 0 <= r < y
   550  // Uses z as storage for q, and u as storage for r if possible.
   551  // See Knuth, Volume 2, section 4.3.1, Algorithm D.
   552  // Preconditions:
   553  //    len(v) >= 2
   554  //    len(uIn) >= len(v)
   555  func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
   556  	n := len(v)
   557  	m := len(uIn) - n
   558  
   559  	// determine if z can be reused
   560  	// TODO(gri) should find a better solution - this if statement
   561  	//           is very costly (see e.g. time pidigits -s -n 10000)
   562  	if alias(z, uIn) || alias(z, v) {
   563  		z = nil // z is an alias for uIn or v - cannot reuse
   564  	}
   565  	q = z.make(m + 1)
   566  
   567  	qhatv := make(nat, n+1)
   568  	if alias(u, uIn) || alias(u, v) {
   569  		u = nil // u is an alias for uIn or v - cannot reuse
   570  	}
   571  	u = u.make(len(uIn) + 1)
   572  	u.clear() // TODO(gri) no need to clear if we allocated a new u
   573  
   574  	// D1.
   575  	shift := nlz(v[n-1])
   576  	if shift > 0 {
   577  		// do not modify v, it may be used by another goroutine simultaneously
   578  		v1 := make(nat, n)
   579  		shlVU(v1, v, shift)
   580  		v = v1
   581  	}
   582  	u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
   583  
   584  	// D2.
   585  	for j := m; j >= 0; j-- {
   586  		// D3.
   587  		qhat := Word(_M)
   588  		if u[j+n] != v[n-1] {
   589  			var rhat Word
   590  			qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
   591  
   592  			// x1 | x2 = q̂v_{n-2}
   593  			x1, x2 := mulWW(qhat, v[n-2])
   594  			// test if q̂v_{n-2} > br̂ + u_{j+n-2}
   595  			for greaterThan(x1, x2, rhat, u[j+n-2]) {
   596  				qhat--
   597  				prevRhat := rhat
   598  				rhat += v[n-1]
   599  				// v[n-1] >= 0, so this tests for overflow.
   600  				if rhat < prevRhat {
   601  					break
   602  				}
   603  				x1, x2 = mulWW(qhat, v[n-2])
   604  			}
   605  		}
   606  
   607  		// D4.
   608  		qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
   609  
   610  		c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
   611  		if c != 0 {
   612  			c := addVV(u[j:j+n], u[j:], v)
   613  			u[j+n] += c
   614  			qhat--
   615  		}
   616  
   617  		q[j] = qhat
   618  	}
   619  
   620  	q = q.norm()
   621  	shrVU(u, u, shift)
   622  	r = u.norm()
   623  
   624  	return q, r
   625  }
   626  
   627  // Length of x in bits. x must be normalized.
   628  func (x nat) bitLen() int {
   629  	if i := len(x) - 1; i >= 0 {
   630  		return i*_W + bitLen(x[i])
   631  	}
   632  	return 0
   633  }
   634  
   635  const deBruijn32 = 0x077CB531
   636  
   637  var deBruijn32Lookup = []byte{
   638  	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
   639  	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
   640  }
   641  
   642  const deBruijn64 = 0x03f79d71b4ca8b09
   643  
   644  var deBruijn64Lookup = []byte{
   645  	0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
   646  	62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
   647  	63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
   648  	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
   649  }
   650  
   651  // trailingZeroBits returns the number of consecutive least significant zero
   652  // bits of x.
   653  func trailingZeroBits(x Word) uint {
   654  	// x & -x leaves only the right-most bit set in the word. Let k be the
   655  	// index of that bit. Since only a single bit is set, the value is two
   656  	// to the power of k. Multiplying by a power of two is equivalent to
   657  	// left shifting, in this case by k bits.  The de Bruijn constant is
   658  	// such that all six bit, consecutive substrings are distinct.
   659  	// Therefore, if we have a left shifted version of this constant we can
   660  	// find by how many bits it was shifted by looking at which six bit
   661  	// substring ended up at the top of the word.
   662  	// (Knuth, volume 4, section 7.3.1)
   663  	switch _W {
   664  	case 32:
   665  		return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
   666  	case 64:
   667  		return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
   668  	default:
   669  		panic("unknown word size")
   670  	}
   671  }
   672  
   673  // trailingZeroBits returns the number of consecutive least significant zero
   674  // bits of x.
   675  func (x nat) trailingZeroBits() uint {
   676  	if len(x) == 0 {
   677  		return 0
   678  	}
   679  	var i uint
   680  	for x[i] == 0 {
   681  		i++
   682  	}
   683  	// x[i] != 0
   684  	return i*_W + trailingZeroBits(x[i])
   685  }
   686  
   687  // z = x << s
   688  func (z nat) shl(x nat, s uint) nat {
   689  	m := len(x)
   690  	if m == 0 {
   691  		return z[:0]
   692  	}
   693  	// m > 0
   694  
   695  	n := m + int(s/_W)
   696  	z = z.make(n + 1)
   697  	z[n] = shlVU(z[n-m:n], x, s%_W)
   698  	z[0 : n-m].clear()
   699  
   700  	return z.norm()
   701  }
   702  
   703  // z = x >> s
   704  func (z nat) shr(x nat, s uint) nat {
   705  	m := len(x)
   706  	n := m - int(s/_W)
   707  	if n <= 0 {
   708  		return z[:0]
   709  	}
   710  	// n > 0
   711  
   712  	z = z.make(n)
   713  	shrVU(z, x[m-n:], s%_W)
   714  
   715  	return z.norm()
   716  }
   717  
   718  func (z nat) setBit(x nat, i uint, b uint) nat {
   719  	j := int(i / _W)
   720  	m := Word(1) << (i % _W)
   721  	n := len(x)
   722  	switch b {
   723  	case 0:
   724  		z = z.make(n)
   725  		copy(z, x)
   726  		if j >= n {
   727  			// no need to grow
   728  			return z
   729  		}
   730  		z[j] &^= m
   731  		return z.norm()
   732  	case 1:
   733  		if j >= n {
   734  			z = z.make(j + 1)
   735  			z[n:].clear()
   736  		} else {
   737  			z = z.make(n)
   738  		}
   739  		copy(z, x)
   740  		z[j] |= m
   741  		// no need to normalize
   742  		return z
   743  	}
   744  	panic("set bit is not 0 or 1")
   745  }
   746  
   747  // bit returns the value of the i'th bit, with lsb == bit 0.
   748  func (x nat) bit(i uint) uint {
   749  	j := i / _W
   750  	if j >= uint(len(x)) {
   751  		return 0
   752  	}
   753  	// 0 <= j < len(x)
   754  	return uint(x[j] >> (i % _W) & 1)
   755  }
   756  
   757  // sticky returns 1 if there's a 1 bit within the
   758  // i least significant bits, otherwise it returns 0.
   759  func (x nat) sticky(i uint) uint {
   760  	j := i / _W
   761  	if j >= uint(len(x)) {
   762  		if len(x) == 0 {
   763  			return 0
   764  		}
   765  		return 1
   766  	}
   767  	// 0 <= j < len(x)
   768  	for _, x := range x[:j] {
   769  		if x != 0 {
   770  			return 1
   771  		}
   772  	}
   773  	if x[j]<<(_W-i%_W) != 0 {
   774  		return 1
   775  	}
   776  	return 0
   777  }
   778  
   779  func (z nat) and(x, y nat) nat {
   780  	m := len(x)
   781  	n := len(y)
   782  	if m > n {
   783  		m = n
   784  	}
   785  	// m <= n
   786  
   787  	z = z.make(m)
   788  	for i := 0; i < m; i++ {
   789  		z[i] = x[i] & y[i]
   790  	}
   791  
   792  	return z.norm()
   793  }
   794  
   795  func (z nat) andNot(x, y nat) nat {
   796  	m := len(x)
   797  	n := len(y)
   798  	if n > m {
   799  		n = m
   800  	}
   801  	// m >= n
   802  
   803  	z = z.make(m)
   804  	for i := 0; i < n; i++ {
   805  		z[i] = x[i] &^ y[i]
   806  	}
   807  	copy(z[n:m], x[n:m])
   808  
   809  	return z.norm()
   810  }
   811  
   812  func (z nat) or(x, y nat) nat {
   813  	m := len(x)
   814  	n := len(y)
   815  	s := x
   816  	if m < n {
   817  		n, m = m, n
   818  		s = y
   819  	}
   820  	// m >= n
   821  
   822  	z = z.make(m)
   823  	for i := 0; i < n; i++ {
   824  		z[i] = x[i] | y[i]
   825  	}
   826  	copy(z[n:m], s[n:m])
   827  
   828  	return z.norm()
   829  }
   830  
   831  func (z nat) xor(x, y nat) nat {
   832  	m := len(x)
   833  	n := len(y)
   834  	s := x
   835  	if m < n {
   836  		n, m = m, n
   837  		s = y
   838  	}
   839  	// m >= n
   840  
   841  	z = z.make(m)
   842  	for i := 0; i < n; i++ {
   843  		z[i] = x[i] ^ y[i]
   844  	}
   845  	copy(z[n:m], s[n:m])
   846  
   847  	return z.norm()
   848  }
   849  
   850  // greaterThan reports whether (x1<<_W + x2) > (y1<<_W + y2)
   851  func greaterThan(x1, x2, y1, y2 Word) bool {
   852  	return x1 > y1 || x1 == y1 && x2 > y2
   853  }
   854  
   855  // modW returns x % d.
   856  func (x nat) modW(d Word) (r Word) {
   857  	// TODO(agl): we don't actually need to store the q value.
   858  	var q nat
   859  	q = q.make(len(x))
   860  	return divWVW(q, 0, x, d)
   861  }
   862  
   863  // random creates a random integer in [0..limit), using the space in z if
   864  // possible. n is the bit length of limit.
   865  func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
   866  	if alias(z, limit) {
   867  		z = nil // z is an alias for limit - cannot reuse
   868  	}
   869  	z = z.make(len(limit))
   870  
   871  	bitLengthOfMSW := uint(n % _W)
   872  	if bitLengthOfMSW == 0 {
   873  		bitLengthOfMSW = _W
   874  	}
   875  	mask := Word((1 << bitLengthOfMSW) - 1)
   876  
   877  	for {
   878  		switch _W {
   879  		case 32:
   880  			for i := range z {
   881  				z[i] = Word(rand.Uint32())
   882  			}
   883  		case 64:
   884  			for i := range z {
   885  				z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
   886  			}
   887  		default:
   888  			panic("unknown word size")
   889  		}
   890  		z[len(limit)-1] &= mask
   891  		if z.cmp(limit) < 0 {
   892  			break
   893  		}
   894  	}
   895  
   896  	return z.norm()
   897  }
   898  
   899  // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
   900  // otherwise it sets z to x**y. The result is the value of z.
   901  func (z nat) expNN(x, y, m nat) nat {
   902  	if alias(z, x) || alias(z, y) {
   903  		// We cannot allow in-place modification of x or y.
   904  		z = nil
   905  	}
   906  
   907  	// x**y mod 1 == 0
   908  	if len(m) == 1 && m[0] == 1 {
   909  		return z.setWord(0)
   910  	}
   911  	// m == 0 || m > 1
   912  
   913  	// x**0 == 1
   914  	if len(y) == 0 {
   915  		return z.setWord(1)
   916  	}
   917  	// y > 0
   918  
   919  	// x**1 mod m == x mod m
   920  	if len(y) == 1 && y[0] == 1 && len(m) != 0 {
   921  		_, z = z.div(z, x, m)
   922  		return z
   923  	}
   924  	// y > 1
   925  
   926  	if len(m) != 0 {
   927  		// We likely end up being as long as the modulus.
   928  		z = z.make(len(m))
   929  	}
   930  	z = z.set(x)
   931  
   932  	// If the base is non-trivial and the exponent is large, we use
   933  	// 4-bit, windowed exponentiation. This involves precomputing 14 values
   934  	// (x^2...x^15) but then reduces the number of multiply-reduces by a
   935  	// third. Even for a 32-bit exponent, this reduces the number of
   936  	// operations. Uses Montgomery method for odd moduli.
   937  	if len(x) > 1 && len(y) > 1 && len(m) > 0 {
   938  		if m[0]&1 == 1 {
   939  			return z.expNNMontgomery(x, y, m)
   940  		}
   941  		return z.expNNWindowed(x, y, m)
   942  	}
   943  
   944  	v := y[len(y)-1] // v > 0 because y is normalized and y > 0
   945  	shift := nlz(v) + 1
   946  	v <<= shift
   947  	var q nat
   948  
   949  	const mask = 1 << (_W - 1)
   950  
   951  	// We walk through the bits of the exponent one by one. Each time we
   952  	// see a bit, we square, thus doubling the power. If the bit is a one,
   953  	// we also multiply by x, thus adding one to the power.
   954  
   955  	w := _W - int(shift)
   956  	// zz and r are used to avoid allocating in mul and div as
   957  	// otherwise the arguments would alias.
   958  	var zz, r nat
   959  	for j := 0; j < w; j++ {
   960  		zz = zz.mul(z, z)
   961  		zz, z = z, zz
   962  
   963  		if v&mask != 0 {
   964  			zz = zz.mul(z, x)
   965  			zz, z = z, zz
   966  		}
   967  
   968  		if len(m) != 0 {
   969  			zz, r = zz.div(r, z, m)
   970  			zz, r, q, z = q, z, zz, r
   971  		}
   972  
   973  		v <<= 1
   974  	}
   975  
   976  	for i := len(y) - 2; i >= 0; i-- {
   977  		v = y[i]
   978  
   979  		for j := 0; j < _W; j++ {
   980  			zz = zz.mul(z, z)
   981  			zz, z = z, zz
   982  
   983  			if v&mask != 0 {
   984  				zz = zz.mul(z, x)
   985  				zz, z = z, zz
   986  			}
   987  
   988  			if len(m) != 0 {
   989  				zz, r = zz.div(r, z, m)
   990  				zz, r, q, z = q, z, zz, r
   991  			}
   992  
   993  			v <<= 1
   994  		}
   995  	}
   996  
   997  	return z.norm()
   998  }
   999  
  1000  // expNNWindowed calculates x**y mod m using a fixed, 4-bit window.
  1001  func (z nat) expNNWindowed(x, y, m nat) nat {
  1002  	// zz and r are used to avoid allocating in mul and div as otherwise
  1003  	// the arguments would alias.
  1004  	var zz, r nat
  1005  
  1006  	const n = 4
  1007  	// powers[i] contains x^i.
  1008  	var powers [1 << n]nat
  1009  	powers[0] = natOne
  1010  	powers[1] = x
  1011  	for i := 2; i < 1<<n; i += 2 {
  1012  		p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
  1013  		*p = p.mul(*p2, *p2)
  1014  		zz, r = zz.div(r, *p, m)
  1015  		*p, r = r, *p
  1016  		*p1 = p1.mul(*p, x)
  1017  		zz, r = zz.div(r, *p1, m)
  1018  		*p1, r = r, *p1
  1019  	}
  1020  
  1021  	z = z.setWord(1)
  1022  
  1023  	for i := len(y) - 1; i >= 0; i-- {
  1024  		yi := y[i]
  1025  		for j := 0; j < _W; j += n {
  1026  			if i != len(y)-1 || j != 0 {
  1027  				// Unrolled loop for significant performance
  1028  				// gain.  Use go test -bench=".*" in crypto/rsa
  1029  				// to check performance before making changes.
  1030  				zz = zz.mul(z, z)
  1031  				zz, z = z, zz
  1032  				zz, r = zz.div(r, z, m)
  1033  				z, r = r, z
  1034  
  1035  				zz = zz.mul(z, z)
  1036  				zz, z = z, zz
  1037  				zz, r = zz.div(r, z, m)
  1038  				z, r = r, z
  1039  
  1040  				zz = zz.mul(z, z)
  1041  				zz, z = z, zz
  1042  				zz, r = zz.div(r, z, m)
  1043  				z, r = r, z
  1044  
  1045  				zz = zz.mul(z, z)
  1046  				zz, z = z, zz
  1047  				zz, r = zz.div(r, z, m)
  1048  				z, r = r, z
  1049  			}
  1050  
  1051  			zz = zz.mul(z, powers[yi>>(_W-n)])
  1052  			zz, z = z, zz
  1053  			zz, r = zz.div(r, z, m)
  1054  			z, r = r, z
  1055  
  1056  			yi <<= n
  1057  		}
  1058  	}
  1059  
  1060  	return z.norm()
  1061  }
  1062  
  1063  // expNNMontgomery calculates x**y mod m using a fixed, 4-bit window.
  1064  // Uses Montgomery representation.
  1065  func (z nat) expNNMontgomery(x, y, m nat) nat {
  1066  	var zz, one, rr, RR nat
  1067  
  1068  	numWords := len(m)
  1069  
  1070  	// We want the lengths of x and m to be equal.
  1071  	if len(x) > numWords {
  1072  		_, rr = rr.div(rr, x, m)
  1073  	} else if len(x) < numWords {
  1074  		rr = rr.make(numWords)
  1075  		rr.clear()
  1076  		for i := range x {
  1077  			rr[i] = x[i]
  1078  		}
  1079  	} else {
  1080  		rr = x
  1081  	}
  1082  	x = rr
  1083  
  1084  	// Ideally the precomputations would be performed outside, and reused
  1085  	// k0 = -mˆ-1 mod 2ˆ_W. Algorithm from: Dumas, J.G. "On Newton–Raphson
  1086  	// Iteration for Multiplicative Inverses Modulo Prime Powers".
  1087  	k0 := 2 - m[0]
  1088  	t := m[0] - 1
  1089  	for i := 1; i < _W; i <<= 1 {
  1090  		t *= t
  1091  		k0 *= (t + 1)
  1092  	}
  1093  	k0 = -k0
  1094  
  1095  	// RR = 2ˆ(2*_W*len(m)) mod m
  1096  	RR = RR.setWord(1)
  1097  	zz = zz.shl(RR, uint(2*numWords*_W))
  1098  	_, RR = RR.div(RR, zz, m)
  1099  	if len(RR) < numWords {
  1100  		zz = zz.make(numWords)
  1101  		copy(zz, RR)
  1102  		RR = zz
  1103  	}
  1104  	// one = 1, with equal length to that of m
  1105  	one = one.make(numWords)
  1106  	one.clear()
  1107  	one[0] = 1
  1108  
  1109  	const n = 4
  1110  	// powers[i] contains x^i
  1111  	var powers [1 << n]nat
  1112  	powers[0] = powers[0].montgomery(one, RR, m, k0, numWords)
  1113  	powers[1] = powers[1].montgomery(x, RR, m, k0, numWords)
  1114  	for i := 2; i < 1<<n; i++ {
  1115  		powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords)
  1116  	}
  1117  
  1118  	// initialize z = 1 (Montgomery 1)
  1119  	z = z.make(numWords)
  1120  	copy(z, powers[0])
  1121  
  1122  	zz = zz.make(numWords)
  1123  
  1124  	// same windowed exponent, but with Montgomery multiplications
  1125  	for i := len(y) - 1; i >= 0; i-- {
  1126  		yi := y[i]
  1127  		for j := 0; j < _W; j += n {
  1128  			if i != len(y)-1 || j != 0 {
  1129  				zz = zz.montgomery(z, z, m, k0, numWords)
  1130  				z = z.montgomery(zz, zz, m, k0, numWords)
  1131  				zz = zz.montgomery(z, z, m, k0, numWords)
  1132  				z = z.montgomery(zz, zz, m, k0, numWords)
  1133  			}
  1134  			zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords)
  1135  			z, zz = zz, z
  1136  			yi <<= n
  1137  		}
  1138  	}
  1139  	// convert to regular number
  1140  	zz = zz.montgomery(z, one, m, k0, numWords)
  1141  	return zz.norm()
  1142  }
  1143  
  1144  // probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
  1145  // If it returns true, n is prime with probability 1 - 1/4^reps.
  1146  // If it returns false, n is not prime.
  1147  func (n nat) probablyPrime(reps int) bool {
  1148  	if len(n) == 0 {
  1149  		return false
  1150  	}
  1151  
  1152  	if len(n) == 1 {
  1153  		if n[0] < 2 {
  1154  			return false
  1155  		}
  1156  
  1157  		if n[0]%2 == 0 {
  1158  			return n[0] == 2
  1159  		}
  1160  
  1161  		// We have to exclude these cases because we reject all
  1162  		// multiples of these numbers below.
  1163  		switch n[0] {
  1164  		case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
  1165  			return true
  1166  		}
  1167  	}
  1168  
  1169  	if n[0]&1 == 0 {
  1170  		return false // n is even
  1171  	}
  1172  
  1173  	const primesProduct32 = 0xC0CFD797         // Π {p ∈ primes, 2 < p <= 29}
  1174  	const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
  1175  
  1176  	var r Word
  1177  	switch _W {
  1178  	case 32:
  1179  		r = n.modW(primesProduct32)
  1180  	case 64:
  1181  		r = n.modW(primesProduct64 & _M)
  1182  	default:
  1183  		panic("Unknown word size")
  1184  	}
  1185  
  1186  	if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
  1187  		r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
  1188  		return false
  1189  	}
  1190  
  1191  	if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
  1192  		r%43 == 0 || r%47 == 0 || r%53 == 0) {
  1193  		return false
  1194  	}
  1195  
  1196  	nm1 := nat(nil).sub(n, natOne)
  1197  	// determine q, k such that nm1 = q << k
  1198  	k := nm1.trailingZeroBits()
  1199  	q := nat(nil).shr(nm1, k)
  1200  
  1201  	nm3 := nat(nil).sub(nm1, natTwo)
  1202  	rand := rand.New(rand.NewSource(int64(n[0])))
  1203  
  1204  	var x, y, quotient nat
  1205  	nm3Len := nm3.bitLen()
  1206  
  1207  NextRandom:
  1208  	for i := 0; i < reps; i++ {
  1209  		x = x.random(rand, nm3, nm3Len)
  1210  		x = x.add(x, natTwo)
  1211  		y = y.expNN(x, q, n)
  1212  		if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
  1213  			continue
  1214  		}
  1215  		for j := uint(1); j < k; j++ {
  1216  			y = y.mul(y, y)
  1217  			quotient, y = quotient.div(y, y, n)
  1218  			if y.cmp(nm1) == 0 {
  1219  				continue NextRandom
  1220  			}
  1221  			if y.cmp(natOne) == 0 {
  1222  				return false
  1223  			}
  1224  		}
  1225  		return false
  1226  	}
  1227  
  1228  	return true
  1229  }
  1230  
  1231  // bytes writes the value of z into buf using big-endian encoding.
  1232  // len(buf) must be >= len(z)*_S. The value of z is encoded in the
  1233  // slice buf[i:]. The number i of unused bytes at the beginning of
  1234  // buf is returned as result.
  1235  func (z nat) bytes(buf []byte) (i int) {
  1236  	i = len(buf)
  1237  	for _, d := range z {
  1238  		for j := 0; j < _S; j++ {
  1239  			i--
  1240  			buf[i] = byte(d)
  1241  			d >>= 8
  1242  		}
  1243  	}
  1244  
  1245  	for i < len(buf) && buf[i] == 0 {
  1246  		i++
  1247  	}
  1248  
  1249  	return
  1250  }
  1251  
  1252  // setBytes interprets buf as the bytes of a big-endian unsigned
  1253  // integer, sets z to that value, and returns z.
  1254  func (z nat) setBytes(buf []byte) nat {
  1255  	z = z.make((len(buf) + _S - 1) / _S)
  1256  
  1257  	k := 0
  1258  	s := uint(0)
  1259  	var d Word
  1260  	for i := len(buf); i > 0; i-- {
  1261  		d |= Word(buf[i-1]) << s
  1262  		if s += 8; s == _S*8 {
  1263  			z[k] = d
  1264  			k++
  1265  			s = 0
  1266  			d = 0
  1267  		}
  1268  	}
  1269  	if k < len(z) {
  1270  		z[k] = d
  1271  	}
  1272  
  1273  	return z.norm()
  1274  }