github.com/dashpay/godash@v0.0.0-20160726055534-e038a21e0e3d/btcec/field.go

github.com/dashpay/godash@v0.0.0-20160726055534-e038a21e0e3d/btcec/field.go (about)

     1  // Copyright (c) 2013-2014 The btcsuite developers
     2  // Copyright (c) 2016 The Dash developers
     3  // Copyright (c) 2013-2014 Dave Collins
     4  // Use of this source code is governed by an ISC
     5  // license that can be found in the LICENSE file.
     6  
     7  package btcec
     8  
     9  // References:
    10  //   [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
    11  //     http://cacr.uwaterloo.ca/hac/
    12  
    13  // All elliptic curve operations for secp256k1 are done in a finite field
    14  // characterized by a 256-bit prime.  Given this precision is larger than the
    15  // biggest available native type, obviously some form of bignum math is needed.
    16  // This package implements specialized fixed-precision field arithmetic rather
    17  // than relying on an arbitrary-precision arithmetic package such as math/big
    18  // for dealing with the field math since the size is known.  As a result, rather
    19  // large performance gains are achieved by taking advantage of many
    20  // optimizations not available to arbitrary-precision arithmetic and generic
    21  // modular arithmetic algorithms.
    22  //
    23  // There are various ways to internally represent each finite field element.
    24  // For example, the most obvious representation would be to use an array of 4
    25  // uint64s (64 bits * 4 = 256 bits).  However, that representation suffers from
    26  // a couple of issues.  First, there is no native Go type large enough to handle
    27  // the intermediate results while adding or multiplying two 64-bit numbers, and
    28  // second there is no space left for overflows when performing the intermediate
    29  // arithmetic between each array element which would lead to expensive carry
    30  // propagation.
    31  //
    32  // Given the above, this implementation represents the the field elements as
    33  // 10 uint32s with each word (array entry) treated as base 2^26.  This was
    34  // chosen for the following reasons:
    35  // 1) Most systems at the current time are 64-bit (or at least have 64-bit
    36  //    registers available for specialized purposes such as MMX) so the
    37  //    intermediate results can typically be done using a native register (and
    38  //    using uint64s to avoid the need for additional half-word arithmetic)
    39  // 2) In order to allow addition of the internal words without having to
    40  //    propagate the the carry, the max normalized value for each register must
    41  //    be less than the number of bits available in the register
    42  // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
    43  //    reasonable choice for #2
    44  // 4) Given the need for 256-bits of precision and the properties stated in #1,
    45  //    #2, and #3, the representation which best accommodates this is 10 uint32s
    46  //    with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
    47  //    bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
    48  //    overflow
    49  //
    50  // Since it is so important that the field arithmetic is extremely fast for
    51  // high performance crypto, this package does not perform any validation where
    52  // it ordinarily would.  For example, some functions only give the correct
    53  // result is the field is normalized and there is no checking to ensure it is.
    54  // While I typically prefer to ensure all state and input is valid for most
    55  // packages, this code is really only used internally and every extra check
    56  // counts.
    57  
    58  import (
    59  	"encoding/hex"
    60  )
    61  
    62  // Constants used to make the code more readable.
    63  const (
    64  	twoBitsMask   = 0x3
    65  	fourBitsMask  = 0xf
    66  	sixBitsMask   = 0x3f
    67  	eightBitsMask = 0xff
    68  )
    69  
    70  // Constants related to the field representation.
    71  const (
    72  	// fieldWords is the number of words used to internally represent the
    73  	// 256-bit value.
    74  	fieldWords = 10
    75  
    76  	// fieldBase is the exponent used to form the numeric base of each word.
    77  	// 2^(fieldBase*i) where i is the word position.
    78  	fieldBase = 26
    79  
    80  	// fieldOverflowBits is the minimum number of "overflow" bits for each
    81  	// word in the field value.
    82  	fieldOverflowBits = 32 - fieldBase
    83  
    84  	// fieldBaseMask is the mask for the bits in each word needed to
    85  	// represent the numeric base of each word (except the most significant
    86  	// word).
    87  	fieldBaseMask = (1 << fieldBase) - 1
    88  
    89  	// fieldMSBBits is the number of bits in the most significant word used
    90  	// to represent the value.
    91  	fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
    92  
    93  	// fieldMSBMask is the mask for the bits in the most significant word
    94  	// needed to represent the value.
    95  	fieldMSBMask = (1 << fieldMSBBits) - 1
    96  
    97  	// fieldPrimeWordZero is word zero of the secp256k1 prime in the
    98  	// internal field representation.  It is used during modular reduction
    99  	// and negation.
   100  	fieldPrimeWordZero = 0x3fffc2f
   101  
   102  	// fieldPrimeWordOne is word one of the secp256k1 prime in the
   103  	// internal field representation.  It is used during modular reduction
   104  	// and negation.
   105  	fieldPrimeWordOne = 0x3ffffbf
   106  )
   107  
   108  // fieldVal implements optimized fixed-precision arithmetic over the
   109  // secp256k1 finite field.  This means all arithmetic is performed modulo
   110  // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.  It
   111  // represents each 256-bit value as 10 32-bit integers in base 2^26.  This
   112  // provides 6 bits of overflow in each word (10 bits in the most significant
   113  // word) for a total of 64 bits of overflow (9*6 + 10 = 64).  It only implements
   114  // the arithmetic needed for elliptic curve operations.
   115  //
   116  // The following depicts the internal representation:
   117  // 	 -----------------------------------------------------------------
   118  // 	|        n[9]       |        n[8]       | ... |        n[0]       |
   119  // 	| 32 bits available | 32 bits available | ... | 32 bits available |
   120  // 	| 22 bits for value | 26 bits for value | ... | 26 bits for value |
   121  // 	| 10 bits overflow  |  6 bits overflow  | ... |  6 bits overflow  |
   122  // 	| Mult: 2^(26*9)    | Mult: 2^(26*8)    | ... | Mult: 2^(26*0)    |
   123  // 	 -----------------------------------------------------------------
   124  //
   125  // For example, consider the number 2^49 + 1.  It would be represented as:
   126  // 	n[0] = 1
   127  // 	n[1] = 2^23
   128  // 	n[2..9] = 0
   129  //
   130  // The full 256-bit value is then calculated by looping i from 9..0 and
   131  // doing sum(n[i] * 2^(26i)) like so:
   132  // 	n[9] * 2^(26*9) = 0    * 2^234 = 0
   133  // 	n[8] * 2^(26*8) = 0    * 2^208 = 0
   134  // 	...
   135  // 	n[1] * 2^(26*1) = 2^23 * 2^26  = 2^49
   136  // 	n[0] * 2^(26*0) = 1    * 2^0   = 1
   137  // 	Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
   138  type fieldVal struct {
   139  	n [10]uint32
   140  }
   141  
   142  // String returns the field value as a human-readable hex string.
   143  func (f fieldVal) String() string {
   144  	t := new(fieldVal).Set(&f).Normalize()
   145  	return hex.EncodeToString(t.Bytes()[:])
   146  }
   147  
   148  // Zero sets the field value to zero.  A newly created field value is already
   149  // set to zero.  This function can be useful to clear an existing field value
   150  // for reuse.
   151  func (f *fieldVal) Zero() {
   152  	f.n[0] = 0
   153  	f.n[1] = 0
   154  	f.n[2] = 0
   155  	f.n[3] = 0
   156  	f.n[4] = 0
   157  	f.n[5] = 0
   158  	f.n[6] = 0
   159  	f.n[7] = 0
   160  	f.n[8] = 0
   161  	f.n[9] = 0
   162  }
   163  
   164  // Set sets the field value equal to the passed value.
   165  //
   166  // The field value is returned to support chaining.  This enables syntax like:
   167  // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
   168  // modified.
   169  func (f *fieldVal) Set(val *fieldVal) *fieldVal {
   170  	*f = *val
   171  	return f
   172  }
   173  
   174  // SetInt sets the field value to the passed integer.  This is a convenience
   175  // function since it is fairly common to perform some arithemetic with small
   176  // native integers.
   177  //
   178  // The field value is returned to support chaining.  This enables syntax such
   179  // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
   180  func (f *fieldVal) SetInt(ui uint) *fieldVal {
   181  	f.Zero()
   182  	f.n[0] = uint32(ui)
   183  	return f
   184  }
   185  
   186  // SetBytes packs the passed 32-byte big-endian value into the internal field
   187  // value representation.
   188  //
   189  // The field value is returned to support chaining.  This enables syntax like:
   190  // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2.
   191  func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal {
   192  	// Pack the 256 total bits across the 10 uint32 words with a max of
   193  	// 26-bits per word.  This could be done with a couple of for loops,
   194  	// but this unrolled version is significantly faster.  Benchmarks show
   195  	// this is about 34 times faster than the variant which uses loops.
   196  	f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
   197  		(uint32(b[28])&twoBitsMask)<<24
   198  	f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
   199  		(uint32(b[25])&fourBitsMask)<<22
   200  	f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
   201  		(uint32(b[22])&sixBitsMask)<<20
   202  	f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
   203  		uint32(b[19])<<18
   204  	f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
   205  		(uint32(b[15])&twoBitsMask)<<24
   206  	f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
   207  		(uint32(b[12])&fourBitsMask)<<22
   208  	f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
   209  		(uint32(b[9])&sixBitsMask)<<20
   210  	f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
   211  		uint32(b[6])<<18
   212  	f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
   213  		(uint32(b[2])&twoBitsMask)<<24
   214  	f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
   215  	return f
   216  }
   217  
   218  // SetByteSlice packs the passed big-endian value into the internal field value
   219  // representation.  Only the first 32-bytes are used.  As a result, it is up to
   220  // the caller to ensure numbers of the appropriate size are used or the value
   221  // will be truncated.
   222  //
   223  // The field value is returned to support chaining.  This enables syntax like:
   224  // f := new(fieldVal).SetByteSlice(byteSlice)
   225  func (f *fieldVal) SetByteSlice(b []byte) *fieldVal {
   226  	var b32 [32]byte
   227  	for i := 0; i < len(b); i++ {
   228  		if i < 32 {
   229  			b32[i+(32-len(b))] = b[i]
   230  		}
   231  	}
   232  	return f.SetBytes(&b32)
   233  }
   234  
   235  // SetHex decodes the passed big-endian hex string into the internal field value
   236  // representation.  Only the first 32-bytes are used.
   237  //
   238  // The field value is returned to support chaining.  This enables syntax like:
   239  // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1
   240  func (f *fieldVal) SetHex(hexString string) *fieldVal {
   241  	if len(hexString)%2 != 0 {
   242  		hexString = "0" + hexString
   243  	}
   244  	bytes, _ := hex.DecodeString(hexString)
   245  	return f.SetByteSlice(bytes)
   246  }
   247  
   248  // Normalize normalizes the internal field words into the desired range and
   249  // performs fast modular reduction over the secp256k1 prime by making use of the
   250  // special form of the prime.
   251  func (f *fieldVal) Normalize() *fieldVal {
   252  	// The field representation leaves 6 bits of overflow in each
   253  	// word so intermediate calculations can be performed without needing
   254  	// to propagate the carry to each higher word during the calculations.
   255  	// In order to normalize, first we need to "compact" the full 256-bit
   256  	// value to the right and treat the additional 64 leftmost bits as
   257  	// the magnitude.
   258  	m := f.n[0]
   259  	t0 := m & fieldBaseMask
   260  	m = (m >> fieldBase) + f.n[1]
   261  	t1 := m & fieldBaseMask
   262  	m = (m >> fieldBase) + f.n[2]
   263  	t2 := m & fieldBaseMask
   264  	m = (m >> fieldBase) + f.n[3]
   265  	t3 := m & fieldBaseMask
   266  	m = (m >> fieldBase) + f.n[4]
   267  	t4 := m & fieldBaseMask
   268  	m = (m >> fieldBase) + f.n[5]
   269  	t5 := m & fieldBaseMask
   270  	m = (m >> fieldBase) + f.n[6]
   271  	t6 := m & fieldBaseMask
   272  	m = (m >> fieldBase) + f.n[7]
   273  	t7 := m & fieldBaseMask
   274  	m = (m >> fieldBase) + f.n[8]
   275  	t8 := m & fieldBaseMask
   276  	m = (m >> fieldBase) + f.n[9]
   277  	t9 := m & fieldMSBMask
   278  	m = m >> fieldMSBBits
   279  
   280  	// At this point, if the magnitude is greater than 0, the overall value
   281  	// is greater than the max possible 256-bit value.  In particular, it is
   282  	// "how many times larger" than the max value it is.  Since this field
   283  	// is doing arithmetic modulo the secp256k1 prime, we need to perform
   284  	// modular reduction over the prime.
   285  	//
   286  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
   287  	// when the modulus is of the special form m = b^t - c, highly efficient
   288  	// reduction can be achieved.
   289  	//
   290  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
   291  	// this criteria.
   292  	//
   293  	// 4294968273 in field representation (base 2^26) is:
   294  	// n[0] = 977
   295  	// n[1] = 64
   296  	// That is to say (2^26 * 64) + 977 = 4294968273
   297  	//
   298  	// The algorithm presented in the referenced section typically repeats
   299  	// until the quotient is zero.  However, due to our field representation
   300  	// we already know at least how many times we would need to repeat as
   301  	// it's the value currently in m.  Thus we can simply multiply the
   302  	// magnitude by the field representation of the prime and do a single
   303  	// iteration.  Notice that nothing will be changed when the magnitude is
   304  	// zero, so we could skip this in that case, however always running
   305  	// regardless allows it to run in constant time.
   306  	r := t0 + m*977
   307  	t0 = r & fieldBaseMask
   308  	r = (r >> fieldBase) + t1 + m*64
   309  	t1 = r & fieldBaseMask
   310  	r = (r >> fieldBase) + t2
   311  	t2 = r & fieldBaseMask
   312  	r = (r >> fieldBase) + t3
   313  	t3 = r & fieldBaseMask
   314  	r = (r >> fieldBase) + t4
   315  	t4 = r & fieldBaseMask
   316  	r = (r >> fieldBase) + t5
   317  	t5 = r & fieldBaseMask
   318  	r = (r >> fieldBase) + t6
   319  	t6 = r & fieldBaseMask
   320  	r = (r >> fieldBase) + t7
   321  	t7 = r & fieldBaseMask
   322  	r = (r >> fieldBase) + t8
   323  	t8 = r & fieldBaseMask
   324  	r = (r >> fieldBase) + t9
   325  	t9 = r & fieldMSBMask
   326  
   327  	// At this point, the result will be in the range 0 <= result <=
   328  	// prime + (2^64 - c).  Therefore, one more subtraction of the prime
   329  	// might be needed if the current result is greater than or equal to the
   330  	// prime.  The following does the final reduction in constant time.
   331  	// Note that the if/else here intentionally does the bitwise OR with
   332  	// zero even though it won't change the value to ensure constant time
   333  	// between the branches.
   334  	var mask int32
   335  	if t0 < fieldPrimeWordZero {
   336  		mask |= -1
   337  	} else {
   338  		mask |= 0
   339  	}
   340  	if t1 < fieldPrimeWordOne {
   341  		mask |= -1
   342  	} else {
   343  		mask |= 0
   344  	}
   345  	if t2 < fieldBaseMask {
   346  		mask |= -1
   347  	} else {
   348  		mask |= 0
   349  	}
   350  	if t3 < fieldBaseMask {
   351  		mask |= -1
   352  	} else {
   353  		mask |= 0
   354  	}
   355  	if t4 < fieldBaseMask {
   356  		mask |= -1
   357  	} else {
   358  		mask |= 0
   359  	}
   360  	if t5 < fieldBaseMask {
   361  		mask |= -1
   362  	} else {
   363  		mask |= 0
   364  	}
   365  	if t6 < fieldBaseMask {
   366  		mask |= -1
   367  	} else {
   368  		mask |= 0
   369  	}
   370  	if t7 < fieldBaseMask {
   371  		mask |= -1
   372  	} else {
   373  		mask |= 0
   374  	}
   375  	if t8 < fieldBaseMask {
   376  		mask |= -1
   377  	} else {
   378  		mask |= 0
   379  	}
   380  	if t9 < fieldMSBMask {
   381  		mask |= -1
   382  	} else {
   383  		mask |= 0
   384  	}
   385  	t0 = t0 - uint32(^mask&fieldPrimeWordZero)
   386  	t1 = t1 - uint32(^mask&fieldPrimeWordOne)
   387  	t2 = t2 & uint32(mask)
   388  	t3 = t3 & uint32(mask)
   389  	t4 = t4 & uint32(mask)
   390  	t5 = t5 & uint32(mask)
   391  	t6 = t6 & uint32(mask)
   392  	t7 = t7 & uint32(mask)
   393  	t8 = t8 & uint32(mask)
   394  	t9 = t9 & uint32(mask)
   395  
   396  	// Finally, set the normalized and reduced words.
   397  	f.n[0] = t0
   398  	f.n[1] = t1
   399  	f.n[2] = t2
   400  	f.n[3] = t3
   401  	f.n[4] = t4
   402  	f.n[5] = t5
   403  	f.n[6] = t6
   404  	f.n[7] = t7
   405  	f.n[8] = t8
   406  	f.n[9] = t9
   407  	return f
   408  }
   409  
   410  // PutBytes unpacks the field value to a 32-byte big-endian value using the
   411  // passed byte array.  There is a similar function, Bytes, which unpacks the
   412  // field value into a new array and returns that.  This version is provided
   413  // since it can be useful to cut down on the number of allocations by allowing
   414  // the caller to reuse a buffer.
   415  //
   416  // The field value must be normalized for this function to return the correct
   417  // result.
   418  func (f *fieldVal) PutBytes(b *[32]byte) {
   419  	// Unpack the 256 total bits from the 10 uint32 words with a max of
   420  	// 26-bits per word.  This could be done with a couple of for loops,
   421  	// but this unrolled version is a bit faster.  Benchmarks show this is
   422  	// about 10 times faster than the variant which uses loops.
   423  	b[31] = byte(f.n[0] & eightBitsMask)
   424  	b[30] = byte((f.n[0] >> 8) & eightBitsMask)
   425  	b[29] = byte((f.n[0] >> 16) & eightBitsMask)
   426  	b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
   427  	b[27] = byte((f.n[1] >> 6) & eightBitsMask)
   428  	b[26] = byte((f.n[1] >> 14) & eightBitsMask)
   429  	b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
   430  	b[24] = byte((f.n[2] >> 4) & eightBitsMask)
   431  	b[23] = byte((f.n[2] >> 12) & eightBitsMask)
   432  	b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
   433  	b[21] = byte((f.n[3] >> 2) & eightBitsMask)
   434  	b[20] = byte((f.n[3] >> 10) & eightBitsMask)
   435  	b[19] = byte((f.n[3] >> 18) & eightBitsMask)
   436  	b[18] = byte(f.n[4] & eightBitsMask)
   437  	b[17] = byte((f.n[4] >> 8) & eightBitsMask)
   438  	b[16] = byte((f.n[4] >> 16) & eightBitsMask)
   439  	b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
   440  	b[14] = byte((f.n[5] >> 6) & eightBitsMask)
   441  	b[13] = byte((f.n[5] >> 14) & eightBitsMask)
   442  	b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
   443  	b[11] = byte((f.n[6] >> 4) & eightBitsMask)
   444  	b[10] = byte((f.n[6] >> 12) & eightBitsMask)
   445  	b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
   446  	b[8] = byte((f.n[7] >> 2) & eightBitsMask)
   447  	b[7] = byte((f.n[7] >> 10) & eightBitsMask)
   448  	b[6] = byte((f.n[7] >> 18) & eightBitsMask)
   449  	b[5] = byte(f.n[8] & eightBitsMask)
   450  	b[4] = byte((f.n[8] >> 8) & eightBitsMask)
   451  	b[3] = byte((f.n[8] >> 16) & eightBitsMask)
   452  	b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
   453  	b[1] = byte((f.n[9] >> 6) & eightBitsMask)
   454  	b[0] = byte((f.n[9] >> 14) & eightBitsMask)
   455  }
   456  
   457  // Bytes unpacks the field value to a 32-byte big-endian value.  See PutBytes
   458  // for a variant that allows the a buffer to be passed which can be useful to
   459  // to cut down on the number of allocations by allowing the caller to reuse a
   460  // buffer.
   461  //
   462  // The field value must be normalized for this function to return correct
   463  // result.
   464  func (f *fieldVal) Bytes() *[32]byte {
   465  	b := new([32]byte)
   466  	f.PutBytes(b)
   467  	return b
   468  }
   469  
   470  // IsZero returns whether or not the field value is equal to zero.
   471  func (f *fieldVal) IsZero() bool {
   472  	// The value can only be zero if no bits are set in any of the words.
   473  	// This is a constant time implementation.
   474  	bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
   475  		f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
   476  
   477  	return bits == 0
   478  }
   479  
   480  // IsOdd returns whether or not the field value is an odd number.
   481  //
   482  // The field value must be normalized for this function to return correct
   483  // result.
   484  func (f *fieldVal) IsOdd() bool {
   485  	// Only odd numbers have the bottom bit set.
   486  	return f.n[0]&1 == 1
   487  }
   488  
   489  // Equals returns whether or not the two field values are the same.  Both
   490  // field values being compared must be normalized for this function to return
   491  // the correct result.
   492  func (f *fieldVal) Equals(val *fieldVal) bool {
   493  	// Xor only sets bits when they are different, so the two field values
   494  	// can only be the same if no bits are set after xoring each word.
   495  	// This is a constant time implementation.
   496  	bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
   497  		(f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
   498  		(f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
   499  		(f.n[9] ^ val.n[9])
   500  
   501  	return bits == 0
   502  }
   503  
   504  // NegateVal negates the passed value and stores the result in f.  The caller
   505  // must provide the magnitude of the passed value for a correct result.
   506  //
   507  // The field value is returned to support chaining.  This enables syntax like:
   508  // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
   509  func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal {
   510  	// Negation in the field is just the prime minus the value.  However,
   511  	// in order to allow negation against a field value without having to
   512  	// normalize/reduce it first, multiply by the magnitude (that is how
   513  	// "far" away it is from the normalized value) to adjust.  Also, since
   514  	// negating a value pushes it one more order of magnitude away from the
   515  	// normalized range, add 1 to compensate.
   516  	//
   517  	// For some intuition here, imagine you're performing mod 12 arithmetic
   518  	// (picture a clock) and you are negating the number 7.  So you start at
   519  	// 12 (which is of course 0 under mod 12) and count backwards (left on
   520  	// the clock) 7 times to arrive at 5.  Notice this is just 12-7 = 5.
   521  	// Now, assume you're starting with 19, which is a number that is
   522  	// already larger than the modulus and congruent to 7 (mod 12).  When a
   523  	// value is already in the desired range, its magnitude is 1.  Since 19
   524  	// is an additional "step", its magnitude (mod 12) is 2.  Since any
   525  	// multiple of the modulus is conguent to zero (mod m), the answer can
   526  	// be shortcut by simply mulplying the magnitude by the modulus and
   527  	// subtracting.  Keeping with the example, this would be (2*12)-19 = 5.
   528  	f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
   529  	f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
   530  	f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
   531  	f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
   532  	f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
   533  	f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
   534  	f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
   535  	f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
   536  	f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
   537  	f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
   538  
   539  	return f
   540  }
   541  
   542  // Negate negates the field value.  The existing field value is modified.  The
   543  // caller must provide the magnitude of the field value for a correct result.
   544  //
   545  // The field value is returned to support chaining.  This enables syntax like:
   546  // f.Negate().AddInt(1) so that f = -f + 1.
   547  func (f *fieldVal) Negate(magnitude uint32) *fieldVal {
   548  	return f.NegateVal(f, magnitude)
   549  }
   550  
   551  // AddInt adds the passed integer to the existing field value and stores the
   552  // result in f.  This is a convenience function since it is fairly common to
   553  // perform some arithemetic with small native integers.
   554  //
   555  // The field value is returned to support chaining.  This enables syntax like:
   556  // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
   557  func (f *fieldVal) AddInt(ui uint) *fieldVal {
   558  	// Since the field representation intentionally provides overflow bits,
   559  	// it's ok to use carryless addition as the carry bit is safely part of
   560  	// the word and will be normalized out.
   561  	f.n[0] += uint32(ui)
   562  
   563  	return f
   564  }
   565  
   566  // Add adds the passed value to the existing field value and stores the result
   567  // in f.
   568  //
   569  // The field value is returned to support chaining.  This enables syntax like:
   570  // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
   571  func (f *fieldVal) Add(val *fieldVal) *fieldVal {
   572  	// Since the field representation intentionally provides overflow bits,
   573  	// it's ok to use carryless addition as the carry bit is safely part of
   574  	// each word and will be normalized out.  This could obviously be done
   575  	// in a loop, but the unrolled version is faster.
   576  	f.n[0] += val.n[0]
   577  	f.n[1] += val.n[1]
   578  	f.n[2] += val.n[2]
   579  	f.n[3] += val.n[3]
   580  	f.n[4] += val.n[4]
   581  	f.n[5] += val.n[5]
   582  	f.n[6] += val.n[6]
   583  	f.n[7] += val.n[7]
   584  	f.n[8] += val.n[8]
   585  	f.n[9] += val.n[9]
   586  
   587  	return f
   588  }
   589  
   590  // Add2 adds the passed two field values together and stores the result in f.
   591  //
   592  // The field value is returned to support chaining.  This enables syntax like:
   593  // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
   594  func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal {
   595  	// Since the field representation intentionally provides overflow bits,
   596  	// it's ok to use carryless addition as the carry bit is safely part of
   597  	// each word and will be normalized out.  This could obviously be done
   598  	// in a loop, but the unrolled version is faster.
   599  	f.n[0] = val.n[0] + val2.n[0]
   600  	f.n[1] = val.n[1] + val2.n[1]
   601  	f.n[2] = val.n[2] + val2.n[2]
   602  	f.n[3] = val.n[3] + val2.n[3]
   603  	f.n[4] = val.n[4] + val2.n[4]
   604  	f.n[5] = val.n[5] + val2.n[5]
   605  	f.n[6] = val.n[6] + val2.n[6]
   606  	f.n[7] = val.n[7] + val2.n[7]
   607  	f.n[8] = val.n[8] + val2.n[8]
   608  	f.n[9] = val.n[9] + val2.n[9]
   609  
   610  	return f
   611  }
   612  
   613  // MulInt multiplies the field value by the passed int and stores the result in
   614  // f.  Note that this function can overflow if multiplying the value by any of
   615  // the individual words exceeds a max uint32.  Therefore it is important that
   616  // the caller ensures no overflows will occur before using this function.
   617  //
   618  // The field value is returned to support chaining.  This enables syntax like:
   619  // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
   620  func (f *fieldVal) MulInt(val uint) *fieldVal {
   621  	// Since each word of the field representation can hold up to
   622  	// fieldOverflowBits extra bits which will be normalized out, it's safe
   623  	// to multiply each word without using a larger type or carry
   624  	// propagation so long as the values won't overflow a uint32.  This
   625  	// could obviously be done in a loop, but the unrolled version is
   626  	// faster.
   627  	ui := uint32(val)
   628  	f.n[0] *= ui
   629  	f.n[1] *= ui
   630  	f.n[2] *= ui
   631  	f.n[3] *= ui
   632  	f.n[4] *= ui
   633  	f.n[5] *= ui
   634  	f.n[6] *= ui
   635  	f.n[7] *= ui
   636  	f.n[8] *= ui
   637  	f.n[9] *= ui
   638  
   639  	return f
   640  }
   641  
   642  // Mul multiplies the passed value to the existing field value and stores the
   643  // result in f.  Note that this function can overflow if multiplying any
   644  // of the individual words exceeds a max uint32.  In practice, this means the
   645  // magnitude of either value involved in the multiplication must be a max of
   646  // 8.
   647  //
   648  // The field value is returned to support chaining.  This enables syntax like:
   649  // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
   650  func (f *fieldVal) Mul(val *fieldVal) *fieldVal {
   651  	return f.Mul2(f, val)
   652  }
   653  
   654  // Mul2 multiplies the passed two field values together and stores the result
   655  // result in f.  Note that this function can overflow if multiplying any of
   656  // the individual words exceeds a max uint32.  In practice, this means the
   657  // magnitude of either value involved in the multiplication must be a max of
   658  // 8.
   659  //
   660  // The field value is returned to support chaining.  This enables syntax like:
   661  // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
   662  func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal {
   663  	// This could be done with a couple of for loops and an array to store
   664  	// the intermediate terms, but this unrolled version is significantly
   665  	// faster.
   666  
   667  	// Terms for 2^(fieldBase*0).
   668  	m := uint64(val.n[0]) * uint64(val2.n[0])
   669  	t0 := m & fieldBaseMask
   670  
   671  	// Terms for 2^(fieldBase*1).
   672  	m = (m >> fieldBase) +
   673  		uint64(val.n[0])*uint64(val2.n[1]) +
   674  		uint64(val.n[1])*uint64(val2.n[0])
   675  	t1 := m & fieldBaseMask
   676  
   677  	// Terms for 2^(fieldBase*2).
   678  	m = (m >> fieldBase) +
   679  		uint64(val.n[0])*uint64(val2.n[2]) +
   680  		uint64(val.n[1])*uint64(val2.n[1]) +
   681  		uint64(val.n[2])*uint64(val2.n[0])
   682  	t2 := m & fieldBaseMask
   683  
   684  	// Terms for 2^(fieldBase*3).
   685  	m = (m >> fieldBase) +
   686  		uint64(val.n[0])*uint64(val2.n[3]) +
   687  		uint64(val.n[1])*uint64(val2.n[2]) +
   688  		uint64(val.n[2])*uint64(val2.n[1]) +
   689  		uint64(val.n[3])*uint64(val2.n[0])
   690  	t3 := m & fieldBaseMask
   691  
   692  	// Terms for 2^(fieldBase*4).
   693  	m = (m >> fieldBase) +
   694  		uint64(val.n[0])*uint64(val2.n[4]) +
   695  		uint64(val.n[1])*uint64(val2.n[3]) +
   696  		uint64(val.n[2])*uint64(val2.n[2]) +
   697  		uint64(val.n[3])*uint64(val2.n[1]) +
   698  		uint64(val.n[4])*uint64(val2.n[0])
   699  	t4 := m & fieldBaseMask
   700  
   701  	// Terms for 2^(fieldBase*5).
   702  	m = (m >> fieldBase) +
   703  		uint64(val.n[0])*uint64(val2.n[5]) +
   704  		uint64(val.n[1])*uint64(val2.n[4]) +
   705  		uint64(val.n[2])*uint64(val2.n[3]) +
   706  		uint64(val.n[3])*uint64(val2.n[2]) +
   707  		uint64(val.n[4])*uint64(val2.n[1]) +
   708  		uint64(val.n[5])*uint64(val2.n[0])
   709  	t5 := m & fieldBaseMask
   710  
   711  	// Terms for 2^(fieldBase*6).
   712  	m = (m >> fieldBase) +
   713  		uint64(val.n[0])*uint64(val2.n[6]) +
   714  		uint64(val.n[1])*uint64(val2.n[5]) +
   715  		uint64(val.n[2])*uint64(val2.n[4]) +
   716  		uint64(val.n[3])*uint64(val2.n[3]) +
   717  		uint64(val.n[4])*uint64(val2.n[2]) +
   718  		uint64(val.n[5])*uint64(val2.n[1]) +
   719  		uint64(val.n[6])*uint64(val2.n[0])
   720  	t6 := m & fieldBaseMask
   721  
   722  	// Terms for 2^(fieldBase*7).
   723  	m = (m >> fieldBase) +
   724  		uint64(val.n[0])*uint64(val2.n[7]) +
   725  		uint64(val.n[1])*uint64(val2.n[6]) +
   726  		uint64(val.n[2])*uint64(val2.n[5]) +
   727  		uint64(val.n[3])*uint64(val2.n[4]) +
   728  		uint64(val.n[4])*uint64(val2.n[3]) +
   729  		uint64(val.n[5])*uint64(val2.n[2]) +
   730  		uint64(val.n[6])*uint64(val2.n[1]) +
   731  		uint64(val.n[7])*uint64(val2.n[0])
   732  	t7 := m & fieldBaseMask
   733  
   734  	// Terms for 2^(fieldBase*8).
   735  	m = (m >> fieldBase) +
   736  		uint64(val.n[0])*uint64(val2.n[8]) +
   737  		uint64(val.n[1])*uint64(val2.n[7]) +
   738  		uint64(val.n[2])*uint64(val2.n[6]) +
   739  		uint64(val.n[3])*uint64(val2.n[5]) +
   740  		uint64(val.n[4])*uint64(val2.n[4]) +
   741  		uint64(val.n[5])*uint64(val2.n[3]) +
   742  		uint64(val.n[6])*uint64(val2.n[2]) +
   743  		uint64(val.n[7])*uint64(val2.n[1]) +
   744  		uint64(val.n[8])*uint64(val2.n[0])
   745  	t8 := m & fieldBaseMask
   746  
   747  	// Terms for 2^(fieldBase*9).
   748  	m = (m >> fieldBase) +
   749  		uint64(val.n[0])*uint64(val2.n[9]) +
   750  		uint64(val.n[1])*uint64(val2.n[8]) +
   751  		uint64(val.n[2])*uint64(val2.n[7]) +
   752  		uint64(val.n[3])*uint64(val2.n[6]) +
   753  		uint64(val.n[4])*uint64(val2.n[5]) +
   754  		uint64(val.n[5])*uint64(val2.n[4]) +
   755  		uint64(val.n[6])*uint64(val2.n[3]) +
   756  		uint64(val.n[7])*uint64(val2.n[2]) +
   757  		uint64(val.n[8])*uint64(val2.n[1]) +
   758  		uint64(val.n[9])*uint64(val2.n[0])
   759  	t9 := m & fieldBaseMask
   760  
   761  	// Terms for 2^(fieldBase*10).
   762  	m = (m >> fieldBase) +
   763  		uint64(val.n[1])*uint64(val2.n[9]) +
   764  		uint64(val.n[2])*uint64(val2.n[8]) +
   765  		uint64(val.n[3])*uint64(val2.n[7]) +
   766  		uint64(val.n[4])*uint64(val2.n[6]) +
   767  		uint64(val.n[5])*uint64(val2.n[5]) +
   768  		uint64(val.n[6])*uint64(val2.n[4]) +
   769  		uint64(val.n[7])*uint64(val2.n[3]) +
   770  		uint64(val.n[8])*uint64(val2.n[2]) +
   771  		uint64(val.n[9])*uint64(val2.n[1])
   772  	t10 := m & fieldBaseMask
   773  
   774  	// Terms for 2^(fieldBase*11).
   775  	m = (m >> fieldBase) +
   776  		uint64(val.n[2])*uint64(val2.n[9]) +
   777  		uint64(val.n[3])*uint64(val2.n[8]) +
   778  		uint64(val.n[4])*uint64(val2.n[7]) +
   779  		uint64(val.n[5])*uint64(val2.n[6]) +
   780  		uint64(val.n[6])*uint64(val2.n[5]) +
   781  		uint64(val.n[7])*uint64(val2.n[4]) +
   782  		uint64(val.n[8])*uint64(val2.n[3]) +
   783  		uint64(val.n[9])*uint64(val2.n[2])
   784  	t11 := m & fieldBaseMask
   785  
   786  	// Terms for 2^(fieldBase*12).
   787  	m = (m >> fieldBase) +
   788  		uint64(val.n[3])*uint64(val2.n[9]) +
   789  		uint64(val.n[4])*uint64(val2.n[8]) +
   790  		uint64(val.n[5])*uint64(val2.n[7]) +
   791  		uint64(val.n[6])*uint64(val2.n[6]) +
   792  		uint64(val.n[7])*uint64(val2.n[5]) +
   793  		uint64(val.n[8])*uint64(val2.n[4]) +
   794  		uint64(val.n[9])*uint64(val2.n[3])
   795  	t12 := m & fieldBaseMask
   796  
   797  	// Terms for 2^(fieldBase*13).
   798  	m = (m >> fieldBase) +
   799  		uint64(val.n[4])*uint64(val2.n[9]) +
   800  		uint64(val.n[5])*uint64(val2.n[8]) +
   801  		uint64(val.n[6])*uint64(val2.n[7]) +
   802  		uint64(val.n[7])*uint64(val2.n[6]) +
   803  		uint64(val.n[8])*uint64(val2.n[5]) +
   804  		uint64(val.n[9])*uint64(val2.n[4])
   805  	t13 := m & fieldBaseMask
   806  
   807  	// Terms for 2^(fieldBase*14).
   808  	m = (m >> fieldBase) +
   809  		uint64(val.n[5])*uint64(val2.n[9]) +
   810  		uint64(val.n[6])*uint64(val2.n[8]) +
   811  		uint64(val.n[7])*uint64(val2.n[7]) +
   812  		uint64(val.n[8])*uint64(val2.n[6]) +
   813  		uint64(val.n[9])*uint64(val2.n[5])
   814  	t14 := m & fieldBaseMask
   815  
   816  	// Terms for 2^(fieldBase*15).
   817  	m = (m >> fieldBase) +
   818  		uint64(val.n[6])*uint64(val2.n[9]) +
   819  		uint64(val.n[7])*uint64(val2.n[8]) +
   820  		uint64(val.n[8])*uint64(val2.n[7]) +
   821  		uint64(val.n[9])*uint64(val2.n[6])
   822  	t15 := m & fieldBaseMask
   823  
   824  	// Terms for 2^(fieldBase*16).
   825  	m = (m >> fieldBase) +
   826  		uint64(val.n[7])*uint64(val2.n[9]) +
   827  		uint64(val.n[8])*uint64(val2.n[8]) +
   828  		uint64(val.n[9])*uint64(val2.n[7])
   829  	t16 := m & fieldBaseMask
   830  
   831  	// Terms for 2^(fieldBase*17).
   832  	m = (m >> fieldBase) +
   833  		uint64(val.n[8])*uint64(val2.n[9]) +
   834  		uint64(val.n[9])*uint64(val2.n[8])
   835  	t17 := m & fieldBaseMask
   836  
   837  	// Terms for 2^(fieldBase*18).
   838  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
   839  	t18 := m & fieldBaseMask
   840  
   841  	// What's left is for 2^(fieldBase*19).
   842  	t19 := m >> fieldBase
   843  
   844  	// At this point, all of the terms are grouped into their respective
   845  	// base.
   846  	//
   847  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
   848  	// when the modulus is of the special form m = b^t - c, highly efficient
   849  	// reduction can be achieved per the provided algorithm.
   850  	//
   851  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
   852  	// this criteria.
   853  	//
   854  	// 4294968273 in field representation (base 2^26) is:
   855  	// n[0] = 977
   856  	// n[1] = 64
   857  	// That is to say (2^26 * 64) + 977 = 4294968273
   858  	//
   859  	// Since each word is in base 26, the upper terms (t10 and up) start
   860  	// at 260 bits (versus the final desired range of 256 bits), so the
   861  	// field representation of 'c' from above needs to be adjusted for the
   862  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
   863  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
   864  	// n[0] = 977 * 16 = 15632
   865  	// n[1] = 64 * 16 = 1024
   866  	// That is to say (2^26 * 1024) + 15632 = 68719492368
   867  	//
   868  	// To reduce the final term, t19, the entire 'c' value is needed instead
   869  	// of only n[0] because there are no more terms left to handle n[1].
   870  	// This means there might be some magnitude left in the upper bits that
   871  	// is handled below.
   872  	m = t0 + t10*15632
   873  	t0 = m & fieldBaseMask
   874  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
   875  	t1 = m & fieldBaseMask
   876  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
   877  	t2 = m & fieldBaseMask
   878  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
   879  	t3 = m & fieldBaseMask
   880  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
   881  	t4 = m & fieldBaseMask
   882  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
   883  	t5 = m & fieldBaseMask
   884  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
   885  	t6 = m & fieldBaseMask
   886  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
   887  	t7 = m & fieldBaseMask
   888  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
   889  	t8 = m & fieldBaseMask
   890  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
   891  	t9 = m & fieldMSBMask
   892  	m = m >> fieldMSBBits
   893  
   894  	// At this point, if the magnitude is greater than 0, the overall value
   895  	// is greater than the max possible 256-bit value.  In particular, it is
   896  	// "how many times larger" than the max value it is.
   897  	//
   898  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
   899  	// quotient is zero.  However, due to the above, we already know at
   900  	// least how many times we would need to repeat as it's the value
   901  	// currently in m.  Thus we can simply multiply the magnitude by the
   902  	// field representation of the prime and do a single iteration.  Notice
   903  	// that nothing will be changed when the magnitude is zero, so we could
   904  	// skip this in that case, however always running regardless allows it
   905  	// to run in constant time.  The final result will be in the range
   906  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
   907  	// magnitude of 1, but it is denormalized.
   908  	d := t0 + m*977
   909  	f.n[0] = uint32(d & fieldBaseMask)
   910  	d = (d >> fieldBase) + t1 + m*64
   911  	f.n[1] = uint32(d & fieldBaseMask)
   912  	f.n[2] = uint32((d >> fieldBase) + t2)
   913  	f.n[3] = uint32(t3)
   914  	f.n[4] = uint32(t4)
   915  	f.n[5] = uint32(t5)
   916  	f.n[6] = uint32(t6)
   917  	f.n[7] = uint32(t7)
   918  	f.n[8] = uint32(t8)
   919  	f.n[9] = uint32(t9)
   920  
   921  	return f
   922  }
   923  
   924  // Square squares the field value.  The existing field value is modified.  Note
   925  // that this function can overflow if multiplying any of the individual words
   926  // exceeds a max uint32.  In practice, this means the magnitude of the field
   927  // must be a max of 8 to prevent overflow.
   928  //
   929  // The field value is returned to support chaining.  This enables syntax like:
   930  // f.Square().Mul(f2) so that f = f^2 * f2.
   931  func (f *fieldVal) Square() *fieldVal {
   932  	return f.SquareVal(f)
   933  }
   934  
   935  // SquareVal squares the passed value and stores the result in f.  Note that
   936  // this function can overflow if multiplying any of the individual words
   937  // exceeds a max uint32.  In practice, this means the magnitude of the field
   938  // being squred must be a max of 8 to prevent overflow.
   939  //
   940  // The field value is returned to support chaining.  This enables syntax like:
   941  // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
   942  func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal {
   943  	// This could be done with a couple of for loops and an array to store
   944  	// the intermediate terms, but this unrolled version is significantly
   945  	// faster.
   946  
   947  	// Terms for 2^(fieldBase*0).
   948  	m := uint64(val.n[0]) * uint64(val.n[0])
   949  	t0 := m & fieldBaseMask
   950  
   951  	// Terms for 2^(fieldBase*1).
   952  	m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
   953  	t1 := m & fieldBaseMask
   954  
   955  	// Terms for 2^(fieldBase*2).
   956  	m = (m >> fieldBase) +
   957  		2*uint64(val.n[0])*uint64(val.n[2]) +
   958  		uint64(val.n[1])*uint64(val.n[1])
   959  	t2 := m & fieldBaseMask
   960  
   961  	// Terms for 2^(fieldBase*3).
   962  	m = (m >> fieldBase) +
   963  		2*uint64(val.n[0])*uint64(val.n[3]) +
   964  		2*uint64(val.n[1])*uint64(val.n[2])
   965  	t3 := m & fieldBaseMask
   966  
   967  	// Terms for 2^(fieldBase*4).
   968  	m = (m >> fieldBase) +
   969  		2*uint64(val.n[0])*uint64(val.n[4]) +
   970  		2*uint64(val.n[1])*uint64(val.n[3]) +
   971  		uint64(val.n[2])*uint64(val.n[2])
   972  	t4 := m & fieldBaseMask
   973  
   974  	// Terms for 2^(fieldBase*5).
   975  	m = (m >> fieldBase) +
   976  		2*uint64(val.n[0])*uint64(val.n[5]) +
   977  		2*uint64(val.n[1])*uint64(val.n[4]) +
   978  		2*uint64(val.n[2])*uint64(val.n[3])
   979  	t5 := m & fieldBaseMask
   980  
   981  	// Terms for 2^(fieldBase*6).
   982  	m = (m >> fieldBase) +
   983  		2*uint64(val.n[0])*uint64(val.n[6]) +
   984  		2*uint64(val.n[1])*uint64(val.n[5]) +
   985  		2*uint64(val.n[2])*uint64(val.n[4]) +
   986  		uint64(val.n[3])*uint64(val.n[3])
   987  	t6 := m & fieldBaseMask
   988  
   989  	// Terms for 2^(fieldBase*7).
   990  	m = (m >> fieldBase) +
   991  		2*uint64(val.n[0])*uint64(val.n[7]) +
   992  		2*uint64(val.n[1])*uint64(val.n[6]) +
   993  		2*uint64(val.n[2])*uint64(val.n[5]) +
   994  		2*uint64(val.n[3])*uint64(val.n[4])
   995  	t7 := m & fieldBaseMask
   996  
   997  	// Terms for 2^(fieldBase*8).
   998  	m = (m >> fieldBase) +
   999  		2*uint64(val.n[0])*uint64(val.n[8]) +
  1000  		2*uint64(val.n[1])*uint64(val.n[7]) +
  1001  		2*uint64(val.n[2])*uint64(val.n[6]) +
  1002  		2*uint64(val.n[3])*uint64(val.n[5]) +
  1003  		uint64(val.n[4])*uint64(val.n[4])
  1004  	t8 := m & fieldBaseMask
  1005  
  1006  	// Terms for 2^(fieldBase*9).
  1007  	m = (m >> fieldBase) +
  1008  		2*uint64(val.n[0])*uint64(val.n[9]) +
  1009  		2*uint64(val.n[1])*uint64(val.n[8]) +
  1010  		2*uint64(val.n[2])*uint64(val.n[7]) +
  1011  		2*uint64(val.n[3])*uint64(val.n[6]) +
  1012  		2*uint64(val.n[4])*uint64(val.n[5])
  1013  	t9 := m & fieldBaseMask
  1014  
  1015  	// Terms for 2^(fieldBase*10).
  1016  	m = (m >> fieldBase) +
  1017  		2*uint64(val.n[1])*uint64(val.n[9]) +
  1018  		2*uint64(val.n[2])*uint64(val.n[8]) +
  1019  		2*uint64(val.n[3])*uint64(val.n[7]) +
  1020  		2*uint64(val.n[4])*uint64(val.n[6]) +
  1021  		uint64(val.n[5])*uint64(val.n[5])
  1022  	t10 := m & fieldBaseMask
  1023  
  1024  	// Terms for 2^(fieldBase*11).
  1025  	m = (m >> fieldBase) +
  1026  		2*uint64(val.n[2])*uint64(val.n[9]) +
  1027  		2*uint64(val.n[3])*uint64(val.n[8]) +
  1028  		2*uint64(val.n[4])*uint64(val.n[7]) +
  1029  		2*uint64(val.n[5])*uint64(val.n[6])
  1030  	t11 := m & fieldBaseMask
  1031  
  1032  	// Terms for 2^(fieldBase*12).
  1033  	m = (m >> fieldBase) +
  1034  		2*uint64(val.n[3])*uint64(val.n[9]) +
  1035  		2*uint64(val.n[4])*uint64(val.n[8]) +
  1036  		2*uint64(val.n[5])*uint64(val.n[7]) +
  1037  		uint64(val.n[6])*uint64(val.n[6])
  1038  	t12 := m & fieldBaseMask
  1039  
  1040  	// Terms for 2^(fieldBase*13).
  1041  	m = (m >> fieldBase) +
  1042  		2*uint64(val.n[4])*uint64(val.n[9]) +
  1043  		2*uint64(val.n[5])*uint64(val.n[8]) +
  1044  		2*uint64(val.n[6])*uint64(val.n[7])
  1045  	t13 := m & fieldBaseMask
  1046  
  1047  	// Terms for 2^(fieldBase*14).
  1048  	m = (m >> fieldBase) +
  1049  		2*uint64(val.n[5])*uint64(val.n[9]) +
  1050  		2*uint64(val.n[6])*uint64(val.n[8]) +
  1051  		uint64(val.n[7])*uint64(val.n[7])
  1052  	t14 := m & fieldBaseMask
  1053  
  1054  	// Terms for 2^(fieldBase*15).
  1055  	m = (m >> fieldBase) +
  1056  		2*uint64(val.n[6])*uint64(val.n[9]) +
  1057  		2*uint64(val.n[7])*uint64(val.n[8])
  1058  	t15 := m & fieldBaseMask
  1059  
  1060  	// Terms for 2^(fieldBase*16).
  1061  	m = (m >> fieldBase) +
  1062  		2*uint64(val.n[7])*uint64(val.n[9]) +
  1063  		uint64(val.n[8])*uint64(val.n[8])
  1064  	t16 := m & fieldBaseMask
  1065  
  1066  	// Terms for 2^(fieldBase*17).
  1067  	m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
  1068  	t17 := m & fieldBaseMask
  1069  
  1070  	// Terms for 2^(fieldBase*18).
  1071  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
  1072  	t18 := m & fieldBaseMask
  1073  
  1074  	// What's left is for 2^(fieldBase*19).
  1075  	t19 := m >> fieldBase
  1076  
  1077  	// At this point, all of the terms are grouped into their respective
  1078  	// base.
  1079  	//
  1080  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
  1081  	// when the modulus is of the special form m = b^t - c, highly efficient
  1082  	// reduction can be achieved per the provided algorithm.
  1083  	//
  1084  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
  1085  	// this criteria.
  1086  	//
  1087  	// 4294968273 in field representation (base 2^26) is:
  1088  	// n[0] = 977
  1089  	// n[1] = 64
  1090  	// That is to say (2^26 * 64) + 977 = 4294968273
  1091  	//
  1092  	// Since each word is in base 26, the upper terms (t10 and up) start
  1093  	// at 260 bits (versus the final desired range of 256 bits), so the
  1094  	// field representation of 'c' from above needs to be adjusted for the
  1095  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
  1096  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
  1097  	// n[0] = 977 * 16 = 15632
  1098  	// n[1] = 64 * 16 = 1024
  1099  	// That is to say (2^26 * 1024) + 15632 = 68719492368
  1100  	//
  1101  	// To reduce the final term, t19, the entire 'c' value is needed instead
  1102  	// of only n[0] because there are no more terms left to handle n[1].
  1103  	// This means there might be some magnitude left in the upper bits that
  1104  	// is handled below.
  1105  	m = t0 + t10*15632
  1106  	t0 = m & fieldBaseMask
  1107  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
  1108  	t1 = m & fieldBaseMask
  1109  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
  1110  	t2 = m & fieldBaseMask
  1111  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
  1112  	t3 = m & fieldBaseMask
  1113  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
  1114  	t4 = m & fieldBaseMask
  1115  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
  1116  	t5 = m & fieldBaseMask
  1117  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
  1118  	t6 = m & fieldBaseMask
  1119  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
  1120  	t7 = m & fieldBaseMask
  1121  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
  1122  	t8 = m & fieldBaseMask
  1123  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
  1124  	t9 = m & fieldMSBMask
  1125  	m = m >> fieldMSBBits
  1126  
  1127  	// At this point, if the magnitude is greater than 0, the overall value
  1128  	// is greater than the max possible 256-bit value.  In particular, it is
  1129  	// "how many times larger" than the max value it is.
  1130  	//
  1131  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
  1132  	// quotient is zero.  However, due to the above, we already know at
  1133  	// least how many times we would need to repeat as it's the value
  1134  	// currently in m.  Thus we can simply multiply the magnitude by the
  1135  	// field representation of the prime and do a single iteration.  Notice
  1136  	// that nothing will be changed when the magnitude is zero, so we could
  1137  	// skip this in that case, however always running regardless allows it
  1138  	// to run in constant time.  The final result will be in the range
  1139  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
  1140  	// magnitude of 1, but it is denormalized.
  1141  	n := t0 + m*977
  1142  	f.n[0] = uint32(n & fieldBaseMask)
  1143  	n = (n >> fieldBase) + t1 + m*64
  1144  	f.n[1] = uint32(n & fieldBaseMask)
  1145  	f.n[2] = uint32((n >> fieldBase) + t2)
  1146  	f.n[3] = uint32(t3)
  1147  	f.n[4] = uint32(t4)
  1148  	f.n[5] = uint32(t5)
  1149  	f.n[6] = uint32(t6)
  1150  	f.n[7] = uint32(t7)
  1151  	f.n[8] = uint32(t8)
  1152  	f.n[9] = uint32(t9)
  1153  
  1154  	return f
  1155  }
  1156  
  1157  // Inverse finds the modular multiplicative inverse of the field value.  The
  1158  // existing field value is modified.
  1159  //
  1160  // The field value is returned to support chaining.  This enables syntax like:
  1161  // f.Inverse().Mul(f2) so that f = f^-1 * f2.
  1162  func (f *fieldVal) Inverse() *fieldVal {
  1163  	// Fermat's little theorem states that for a nonzero number a and prime
  1164  	// prime p, a^(p-1) = 1 (mod p).  Since the multipliciative inverse is
  1165  	// a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p).
  1166  	// Thus, a^(p-2) is the multiplicative inverse.
  1167  	//
  1168  	// In order to efficiently compute a^(p-2), p-2 needs to be split into
  1169  	// a sequence of squares and multipications that minimizes the number of
  1170  	// multiplications needed (since they are more costly than squarings).
  1171  	// Intermediate results are saved and reused as well.
  1172  	//
  1173  	// The secp256k1 prime - 2 is 2^256 - 4294968275.
  1174  	//
  1175  	// This has a cost of 258 field squarings and 33 field multiplications.
  1176  	var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal
  1177  	a2.SquareVal(f)
  1178  	a3.Mul2(&a2, f)
  1179  	a4.SquareVal(&a2)
  1180  	a10.SquareVal(&a4).Mul(&a2)
  1181  	a11.Mul2(&a10, f)
  1182  	a21.Mul2(&a10, &a11)
  1183  	a42.SquareVal(&a21)
  1184  	a45.Mul2(&a42, &a3)
  1185  	a63.Mul2(&a42, &a21)
  1186  	a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11)
  1187  	a1023.Mul2(&a1019, &a4)
  1188  	f.Set(&a63)                                    // f = a^(2^6 - 1)
  1189  	f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32)
  1190  	f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024)
  1191  	f.Mul(&a1023)                                  // f = a^(2^16 - 1)
  1192  	f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32)
  1193  	f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024)
  1194  	f.Mul(&a1023)                                  // f = a^(2^26 - 1)
  1195  	f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32)
  1196  	f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024)
  1197  	f.Mul(&a1023)                                  // f = a^(2^36 - 1)
  1198  	f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32)
  1199  	f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024)
  1200  	f.Mul(&a1023)                                  // f = a^(2^46 - 1)
  1201  	f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32)
  1202  	f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024)
  1203  	f.Mul(&a1023)                                  // f = a^(2^56 - 1)
  1204  	f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32)
  1205  	f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024)
  1206  	f.Mul(&a1023)                                  // f = a^(2^66 - 1)
  1207  	f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32)
  1208  	f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024)
  1209  	f.Mul(&a1023)                                  // f = a^(2^76 - 1)
  1210  	f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32)
  1211  	f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024)
  1212  	f.Mul(&a1023)                                  // f = a^(2^86 - 1)
  1213  	f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32)
  1214  	f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024)
  1215  	f.Mul(&a1023)                                  // f = a^(2^96 - 1)
  1216  	f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32)
  1217  	f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024)
  1218  	f.Mul(&a1023)                                  // f = a^(2^106 - 1)
  1219  	f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32)
  1220  	f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024)
  1221  	f.Mul(&a1023)                                  // f = a^(2^116 - 1)
  1222  	f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32)
  1223  	f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024)
  1224  	f.Mul(&a1023)                                  // f = a^(2^126 - 1)
  1225  	f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32)
  1226  	f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024)
  1227  	f.Mul(&a1023)                                  // f = a^(2^136 - 1)
  1228  	f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32)
  1229  	f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024)
  1230  	f.Mul(&a1023)                                  // f = a^(2^146 - 1)
  1231  	f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32)
  1232  	f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024)
  1233  	f.Mul(&a1023)                                  // f = a^(2^156 - 1)
  1234  	f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32)
  1235  	f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024)
  1236  	f.Mul(&a1023)                                  // f = a^(2^166 - 1)
  1237  	f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32)
  1238  	f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024)
  1239  	f.Mul(&a1023)                                  // f = a^(2^176 - 1)
  1240  	f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32)
  1241  	f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024)
  1242  	f.Mul(&a1023)                                  // f = a^(2^186 - 1)
  1243  	f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32)
  1244  	f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024)
  1245  	f.Mul(&a1023)                                  // f = a^(2^196 - 1)
  1246  	f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32)
  1247  	f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024)
  1248  	f.Mul(&a1023)                                  // f = a^(2^206 - 1)
  1249  	f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32)
  1250  	f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024)
  1251  	f.Mul(&a1023)                                  // f = a^(2^216 - 1)
  1252  	f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32)
  1253  	f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024)
  1254  	f.Mul(&a1019)                                  // f = a^(2^226 - 5)
  1255  	f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160)
  1256  	f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120)
  1257  	f.Mul(&a1023)                                  // f = a^(2^236 - 4097)
  1258  	f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104)
  1259  	f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328)
  1260  	f.Mul(&a1023)                                  // f = a^(2^246 - 4194305)
  1261  	f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760)
  1262  	f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320)
  1263  	return f.Mul(&a45)                             // f = a^(2^256 - 4294968275) = a^(p-2)
  1264  }