github.com/mit-dci/lit@v0.0.0-20221102210550-8c3d3b49f2ce/crypto/koblitz/field.go

github.com/mit-dci/lit@v0.0.0-20221102210550-8c3d3b49f2ce/crypto/koblitz/field.go (about)

     1  // Copyright (c) 2013-2014 The btcsuite developers
     2  // Copyright (c) 2013-2014 Dave Collins
     3  // Use of this source code is governed by an ISC
     4  // license that can be found in the LICENSE file.
     5  
     6  package koblitz
     7  
     8  // References:
     9  //   [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
    10  //     http://cacr.uwaterloo.ca/hac/
    11  
    12  // All elliptic curve operations for secp256k1 are done in a finite field
    13  // characterized by a 256-bit prime.  Given this precision is larger than the
    14  // biggest available native type, obviously some form of bignum math is needed.
    15  // This package implements specialized fixed-precision field arithmetic rather
    16  // than relying on an arbitrary-precision arithmetic package such as math/big
    17  // for dealing with the field math since the size is known.  As a result, rather
    18  // large performance gains are achieved by taking advantage of many
    19  // optimizations not available to arbitrary-precision arithmetic and generic
    20  // modular arithmetic algorithms.
    21  //
    22  // There are various ways to internally represent each finite field element.
    23  // For example, the most obvious representation would be to use an array of 4
    24  // uint64s (64 bits * 4 = 256 bits).  However, that representation suffers from
    25  // a couple of issues.  First, there is no native Go type large enough to handle
    26  // the intermediate results while adding or multiplying two 64-bit numbers, and
    27  // second there is no space left for overflows when performing the intermediate
    28  // arithmetic between each array element which would lead to expensive carry
    29  // propagation.
    30  //
    31  // Given the above, this implementation represents the the field elements as
    32  // 10 uint32s with each word (array entry) treated as base 2^26.  This was
    33  // chosen for the following reasons:
    34  // 1) Most systems at the current time are 64-bit (or at least have 64-bit
    35  //    registers available for specialized purposes such as MMX) so the
    36  //    intermediate results can typically be done using a native register (and
    37  //    using uint64s to avoid the need for additional half-word arithmetic)
    38  // 2) In order to allow addition of the internal words without having to
    39  //    propagate the the carry, the max normalized value for each register must
    40  //    be less than the number of bits available in the register
    41  // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
    42  //    reasonable choice for #2
    43  // 4) Given the need for 256-bits of precision and the properties stated in #1,
    44  //    #2, and #3, the representation which best accommodates this is 10 uint32s
    45  //    with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
    46  //    bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
    47  //    overflow
    48  //
    49  // Since it is so important that the field arithmetic is extremely fast for
    50  // high performance crypto, this package does not perform any validation where
    51  // it ordinarily would.  For example, some functions only give the correct
    52  // result is the field is normalized and there is no checking to ensure it is.
    53  // While I typically prefer to ensure all state and input is valid for most
    54  // packages, this code is really only used internally and every extra check
    55  // counts.
    56  
    57  import (
    58  	"encoding/hex"
    59  )
    60  
    61  // Constants used to make the code more readable.
    62  const (
    63  	twoBitsMask   = 0x3
    64  	fourBitsMask  = 0xf
    65  	sixBitsMask   = 0x3f
    66  	eightBitsMask = 0xff
    67  )
    68  
    69  // Constants related to the field representation.
    70  const (
    71  	// fieldWords is the number of words used to internally represent the
    72  	// 256-bit value.
    73  	fieldWords = 10
    74  
    75  	// fieldBase is the exponent used to form the numeric base of each word.
    76  	// 2^(fieldBase*i) where i is the word position.
    77  	fieldBase = 26
    78  
    79  	// fieldOverflowBits is the minimum number of "overflow" bits for each
    80  	// word in the field value.
    81  	fieldOverflowBits = 32 - fieldBase
    82  
    83  	// fieldBaseMask is the mask for the bits in each word needed to
    84  	// represent the numeric base of each word (except the most significant
    85  	// word).
    86  	fieldBaseMask = (1 << fieldBase) - 1
    87  
    88  	// fieldMSBBits is the number of bits in the most significant word used
    89  	// to represent the value.
    90  	fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
    91  
    92  	// fieldMSBMask is the mask for the bits in the most significant word
    93  	// needed to represent the value.
    94  	fieldMSBMask = (1 << fieldMSBBits) - 1
    95  
    96  	// fieldPrimeWordZero is word zero of the secp256k1 prime in the
    97  	// internal field representation.  It is used during modular reduction
    98  	// and negation.
    99  	fieldPrimeWordZero = 0x3fffc2f
   100  
   101  	// fieldPrimeWordOne is word one of the secp256k1 prime in the
   102  	// internal field representation.  It is used during modular reduction
   103  	// and negation.
   104  	fieldPrimeWordOne = 0x3ffffbf
   105  )
   106  
   107  // fieldVal implements optimized fixed-precision arithmetic over the
   108  // secp256k1 finite field.  This means all arithmetic is performed modulo
   109  // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.  It
   110  // represents each 256-bit value as 10 32-bit integers in base 2^26.  This
   111  // provides 6 bits of overflow in each word (10 bits in the most significant
   112  // word) for a total of 64 bits of overflow (9*6 + 10 = 64).  It only implements
   113  // the arithmetic needed for elliptic curve operations.
   114  //
   115  // The following depicts the internal representation:
   116  // 	 -----------------------------------------------------------------
   117  // 	|        n[9]       |        n[8]       | ... |        n[0]       |
   118  // 	| 32 bits available | 32 bits available | ... | 32 bits available |
   119  // 	| 22 bits for value | 26 bits for value | ... | 26 bits for value |
   120  // 	| 10 bits overflow  |  6 bits overflow  | ... |  6 bits overflow  |
   121  // 	| Mult: 2^(26*9)    | Mult: 2^(26*8)    | ... | Mult: 2^(26*0)    |
   122  // 	 -----------------------------------------------------------------
   123  //
   124  // For example, consider the number 2^49 + 1.  It would be represented as:
   125  // 	n[0] = 1
   126  // 	n[1] = 2^23
   127  // 	n[2..9] = 0
   128  //
   129  // The full 256-bit value is then calculated by looping i from 9..0 and
   130  // doing sum(n[i] * 2^(26i)) like so:
   131  // 	n[9] * 2^(26*9) = 0    * 2^234 = 0
   132  // 	n[8] * 2^(26*8) = 0    * 2^208 = 0
   133  // 	...
   134  // 	n[1] * 2^(26*1) = 2^23 * 2^26  = 2^49
   135  // 	n[0] * 2^(26*0) = 1    * 2^0   = 1
   136  // 	Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
   137  type fieldVal struct {
   138  	n [10]uint32
   139  }
   140  
   141  // String returns the field value as a human-readable hex string.
   142  func (f fieldVal) String() string {
   143  	t := new(fieldVal).Set(&f).Normalize()
   144  	return hex.EncodeToString(t.Bytes()[:])
   145  }
   146  
   147  // Zero sets the field value to zero.  A newly created field value is already
   148  // set to zero.  This function can be useful to clear an existing field value
   149  // for reuse.
   150  func (f *fieldVal) Zero() {
   151  	f.n[0] = 0
   152  	f.n[1] = 0
   153  	f.n[2] = 0
   154  	f.n[3] = 0
   155  	f.n[4] = 0
   156  	f.n[5] = 0
   157  	f.n[6] = 0
   158  	f.n[7] = 0
   159  	f.n[8] = 0
   160  	f.n[9] = 0
   161  }
   162  
   163  // Set sets the field value equal to the passed value.
   164  //
   165  // The field value is returned to support chaining.  This enables syntax like:
   166  // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
   167  // modified.
   168  func (f *fieldVal) Set(val *fieldVal) *fieldVal {
   169  	*f = *val
   170  	return f
   171  }
   172  
   173  // SetInt sets the field value to the passed integer.  This is a convenience
   174  // function since it is fairly common to perform some arithemetic with small
   175  // native integers.
   176  //
   177  // The field value is returned to support chaining.  This enables syntax such
   178  // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
   179  func (f *fieldVal) SetInt(ui uint) *fieldVal {
   180  	f.Zero()
   181  	f.n[0] = uint32(ui)
   182  	return f
   183  }
   184  
   185  // SetBytes packs the passed 32-byte big-endian value into the internal field
   186  // value representation.
   187  //
   188  // The field value is returned to support chaining.  This enables syntax like:
   189  // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2.
   190  func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal {
   191  	// Pack the 256 total bits across the 10 uint32 words with a max of
   192  	// 26-bits per word.  This could be done with a couple of for loops,
   193  	// but this unrolled version is significantly faster.  Benchmarks show
   194  	// this is about 34 times faster than the variant which uses loops.
   195  	f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
   196  		(uint32(b[28])&twoBitsMask)<<24
   197  	f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
   198  		(uint32(b[25])&fourBitsMask)<<22
   199  	f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
   200  		(uint32(b[22])&sixBitsMask)<<20
   201  	f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
   202  		uint32(b[19])<<18
   203  	f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
   204  		(uint32(b[15])&twoBitsMask)<<24
   205  	f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
   206  		(uint32(b[12])&fourBitsMask)<<22
   207  	f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
   208  		(uint32(b[9])&sixBitsMask)<<20
   209  	f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
   210  		uint32(b[6])<<18
   211  	f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
   212  		(uint32(b[2])&twoBitsMask)<<24
   213  	f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
   214  	return f
   215  }
   216  
   217  // SetByteSlice packs the passed big-endian value into the internal field value
   218  // representation.  Only the first 32-bytes are used.  As a result, it is up to
   219  // the caller to ensure numbers of the appropriate size are used or the value
   220  // will be truncated.
   221  //
   222  // The field value is returned to support chaining.  This enables syntax like:
   223  // f := new(fieldVal).SetByteSlice(byteSlice)
   224  func (f *fieldVal) SetByteSlice(b []byte) *fieldVal {
   225  	var b32 [32]byte
   226  	for i := 0; i < len(b); i++ {
   227  		if i < 32 {
   228  			b32[i+(32-len(b))] = b[i]
   229  		}
   230  	}
   231  	return f.SetBytes(&b32)
   232  }
   233  
   234  // SetHex decodes the passed big-endian hex string into the internal field value
   235  // representation.  Only the first 32-bytes are used.
   236  //
   237  // The field value is returned to support chaining.  This enables syntax like:
   238  // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1
   239  func (f *fieldVal) SetHex(hexString string) *fieldVal {
   240  	if len(hexString)%2 != 0 {
   241  		hexString = "0" + hexString
   242  	}
   243  	bytes, _ := hex.DecodeString(hexString)
   244  	return f.SetByteSlice(bytes)
   245  }
   246  
   247  // Normalize normalizes the internal field words into the desired range and
   248  // performs fast modular reduction over the secp256k1 prime by making use of the
   249  // special form of the prime.
   250  func (f *fieldVal) Normalize() *fieldVal {
   251  	// The field representation leaves 6 bits of overflow in each
   252  	// word so intermediate calculations can be performed without needing
   253  	// to propagate the carry to each higher word during the calculations.
   254  	// In order to normalize, first we need to "compact" the full 256-bit
   255  	// value to the right and treat the additional 64 leftmost bits as
   256  	// the magnitude.
   257  	m := f.n[0]
   258  	t0 := m & fieldBaseMask
   259  	m = (m >> fieldBase) + f.n[1]
   260  	t1 := m & fieldBaseMask
   261  	m = (m >> fieldBase) + f.n[2]
   262  	t2 := m & fieldBaseMask
   263  	m = (m >> fieldBase) + f.n[3]
   264  	t3 := m & fieldBaseMask
   265  	m = (m >> fieldBase) + f.n[4]
   266  	t4 := m & fieldBaseMask
   267  	m = (m >> fieldBase) + f.n[5]
   268  	t5 := m & fieldBaseMask
   269  	m = (m >> fieldBase) + f.n[6]
   270  	t6 := m & fieldBaseMask
   271  	m = (m >> fieldBase) + f.n[7]
   272  	t7 := m & fieldBaseMask
   273  	m = (m >> fieldBase) + f.n[8]
   274  	t8 := m & fieldBaseMask
   275  	m = (m >> fieldBase) + f.n[9]
   276  	t9 := m & fieldMSBMask
   277  	m = m >> fieldMSBBits
   278  
   279  	// At this point, if the magnitude is greater than 0, the overall value
   280  	// is greater than the max possible 256-bit value.  In particular, it is
   281  	// "how many times larger" than the max value it is.  Since this field
   282  	// is doing arithmetic modulo the secp256k1 prime, we need to perform
   283  	// modular reduction over the prime.
   284  	//
   285  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
   286  	// when the modulus is of the special form m = b^t - c, highly efficient
   287  	// reduction can be achieved.
   288  	//
   289  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
   290  	// this criteria.
   291  	//
   292  	// 4294968273 in field representation (base 2^26) is:
   293  	// n[0] = 977
   294  	// n[1] = 64
   295  	// That is to say (2^26 * 64) + 977 = 4294968273
   296  	//
   297  	// The algorithm presented in the referenced section typically repeats
   298  	// until the quotient is zero.  However, due to our field representation
   299  	// we already know at least how many times we would need to repeat as
   300  	// it's the value currently in m.  Thus we can simply multiply the
   301  	// magnitude by the field representation of the prime and do a single
   302  	// iteration.  Notice that nothing will be changed when the magnitude is
   303  	// zero, so we could skip this in that case, however always running
   304  	// regardless allows it to run in constant time.
   305  	r := t0 + m*977
   306  	t0 = r & fieldBaseMask
   307  	r = (r >> fieldBase) + t1 + m*64
   308  	t1 = r & fieldBaseMask
   309  	r = (r >> fieldBase) + t2
   310  	t2 = r & fieldBaseMask
   311  	r = (r >> fieldBase) + t3
   312  	t3 = r & fieldBaseMask
   313  	r = (r >> fieldBase) + t4
   314  	t4 = r & fieldBaseMask
   315  	r = (r >> fieldBase) + t5
   316  	t5 = r & fieldBaseMask
   317  	r = (r >> fieldBase) + t6
   318  	t6 = r & fieldBaseMask
   319  	r = (r >> fieldBase) + t7
   320  	t7 = r & fieldBaseMask
   321  	r = (r >> fieldBase) + t8
   322  	t8 = r & fieldBaseMask
   323  	r = (r >> fieldBase) + t9
   324  	t9 = r & fieldMSBMask
   325  
   326  	// At this point, the result will be in the range 0 <= result <=
   327  	// prime + (2^64 - c).  Therefore, one more subtraction of the prime
   328  	// might be needed if the current result is greater than or equal to the
   329  	// prime.  The following does the final reduction in constant time.
   330  	// Note that the if/else here intentionally does the bitwise OR with
   331  	// zero even though it won't change the value to ensure constant time
   332  	// between the branches.
   333  	var mask int32
   334  	if t0 < fieldPrimeWordZero {
   335  		mask |= -1
   336  	} else {
   337  		mask |= 0
   338  	}
   339  	if t1 < fieldPrimeWordOne {
   340  		mask |= -1
   341  	} else {
   342  		mask |= 0
   343  	}
   344  	if t2 < fieldBaseMask {
   345  		mask |= -1
   346  	} else {
   347  		mask |= 0
   348  	}
   349  	if t3 < fieldBaseMask {
   350  		mask |= -1
   351  	} else {
   352  		mask |= 0
   353  	}
   354  	if t4 < fieldBaseMask {
   355  		mask |= -1
   356  	} else {
   357  		mask |= 0
   358  	}
   359  	if t5 < fieldBaseMask {
   360  		mask |= -1
   361  	} else {
   362  		mask |= 0
   363  	}
   364  	if t6 < fieldBaseMask {
   365  		mask |= -1
   366  	} else {
   367  		mask |= 0
   368  	}
   369  	if t7 < fieldBaseMask {
   370  		mask |= -1
   371  	} else {
   372  		mask |= 0
   373  	}
   374  	if t8 < fieldBaseMask {
   375  		mask |= -1
   376  	} else {
   377  		mask |= 0
   378  	}
   379  	if t9 < fieldMSBMask {
   380  		mask |= -1
   381  	} else {
   382  		mask |= 0
   383  	}
   384  	t0 = t0 - uint32(^mask&fieldPrimeWordZero)
   385  	t1 = t1 - uint32(^mask&fieldPrimeWordOne)
   386  	t2 = t2 & uint32(mask)
   387  	t3 = t3 & uint32(mask)
   388  	t4 = t4 & uint32(mask)
   389  	t5 = t5 & uint32(mask)
   390  	t6 = t6 & uint32(mask)
   391  	t7 = t7 & uint32(mask)
   392  	t8 = t8 & uint32(mask)
   393  	t9 = t9 & uint32(mask)
   394  
   395  	// Finally, set the normalized and reduced words.
   396  	f.n[0] = t0
   397  	f.n[1] = t1
   398  	f.n[2] = t2
   399  	f.n[3] = t3
   400  	f.n[4] = t4
   401  	f.n[5] = t5
   402  	f.n[6] = t6
   403  	f.n[7] = t7
   404  	f.n[8] = t8
   405  	f.n[9] = t9
   406  	return f
   407  }
   408  
   409  // PutBytes unpacks the field value to a 32-byte big-endian value using the
   410  // passed byte array.  There is a similar function, Bytes, which unpacks the
   411  // field value into a new array and returns that.  This version is provided
   412  // since it can be useful to cut down on the number of allocations by allowing
   413  // the caller to reuse a buffer.
   414  //
   415  // The field value must be normalized for this function to return the correct
   416  // result.
   417  func (f *fieldVal) PutBytes(b *[32]byte) {
   418  	// Unpack the 256 total bits from the 10 uint32 words with a max of
   419  	// 26-bits per word.  This could be done with a couple of for loops,
   420  	// but this unrolled version is a bit faster.  Benchmarks show this is
   421  	// about 10 times faster than the variant which uses loops.
   422  	b[31] = byte(f.n[0] & eightBitsMask)
   423  	b[30] = byte((f.n[0] >> 8) & eightBitsMask)
   424  	b[29] = byte((f.n[0] >> 16) & eightBitsMask)
   425  	b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
   426  	b[27] = byte((f.n[1] >> 6) & eightBitsMask)
   427  	b[26] = byte((f.n[1] >> 14) & eightBitsMask)
   428  	b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
   429  	b[24] = byte((f.n[2] >> 4) & eightBitsMask)
   430  	b[23] = byte((f.n[2] >> 12) & eightBitsMask)
   431  	b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
   432  	b[21] = byte((f.n[3] >> 2) & eightBitsMask)
   433  	b[20] = byte((f.n[3] >> 10) & eightBitsMask)
   434  	b[19] = byte((f.n[3] >> 18) & eightBitsMask)
   435  	b[18] = byte(f.n[4] & eightBitsMask)
   436  	b[17] = byte((f.n[4] >> 8) & eightBitsMask)
   437  	b[16] = byte((f.n[4] >> 16) & eightBitsMask)
   438  	b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
   439  	b[14] = byte((f.n[5] >> 6) & eightBitsMask)
   440  	b[13] = byte((f.n[5] >> 14) & eightBitsMask)
   441  	b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
   442  	b[11] = byte((f.n[6] >> 4) & eightBitsMask)
   443  	b[10] = byte((f.n[6] >> 12) & eightBitsMask)
   444  	b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
   445  	b[8] = byte((f.n[7] >> 2) & eightBitsMask)
   446  	b[7] = byte((f.n[7] >> 10) & eightBitsMask)
   447  	b[6] = byte((f.n[7] >> 18) & eightBitsMask)
   448  	b[5] = byte(f.n[8] & eightBitsMask)
   449  	b[4] = byte((f.n[8] >> 8) & eightBitsMask)
   450  	b[3] = byte((f.n[8] >> 16) & eightBitsMask)
   451  	b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
   452  	b[1] = byte((f.n[9] >> 6) & eightBitsMask)
   453  	b[0] = byte((f.n[9] >> 14) & eightBitsMask)
   454  }
   455  
   456  // Bytes unpacks the field value to a 32-byte big-endian value.  See PutBytes
   457  // for a variant that allows the a buffer to be passed which can be useful to
   458  // to cut down on the number of allocations by allowing the caller to reuse a
   459  // buffer.
   460  //
   461  // The field value must be normalized for this function to return correct
   462  // result.
   463  func (f *fieldVal) Bytes() *[32]byte {
   464  	b := new([32]byte)
   465  	f.PutBytes(b)
   466  	return b
   467  }
   468  
   469  // IsZero returns whether or not the field value is equal to zero.
   470  func (f *fieldVal) IsZero() bool {
   471  	// The value can only be zero if no bits are set in any of the words.
   472  	// This is a constant time implementation.
   473  	bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
   474  		f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
   475  
   476  	return bits == 0
   477  }
   478  
   479  // IsOdd returns whether or not the field value is an odd number.
   480  //
   481  // The field value must be normalized for this function to return correct
   482  // result.
   483  func (f *fieldVal) IsOdd() bool {
   484  	// Only odd numbers have the bottom bit set.
   485  	return f.n[0]&1 == 1
   486  }
   487  
   488  // Equals returns whether or not the two field values are the same.  Both
   489  // field values being compared must be normalized for this function to return
   490  // the correct result.
   491  func (f *fieldVal) Equals(val *fieldVal) bool {
   492  	// Xor only sets bits when they are different, so the two field values
   493  	// can only be the same if no bits are set after xoring each word.
   494  	// This is a constant time implementation.
   495  	bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
   496  		(f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
   497  		(f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
   498  		(f.n[9] ^ val.n[9])
   499  
   500  	return bits == 0
   501  }
   502  
   503  // NegateVal negates the passed value and stores the result in f.  The caller
   504  // must provide the magnitude of the passed value for a correct result.
   505  //
   506  // The field value is returned to support chaining.  This enables syntax like:
   507  // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
   508  func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal {
   509  	// Negation in the field is just the prime minus the value.  However,
   510  	// in order to allow negation against a field value without having to
   511  	// normalize/reduce it first, multiply by the magnitude (that is how
   512  	// "far" away it is from the normalized value) to adjust.  Also, since
   513  	// negating a value pushes it one more order of magnitude away from the
   514  	// normalized range, add 1 to compensate.
   515  	//
   516  	// For some intuition here, imagine you're performing mod 12 arithmetic
   517  	// (picture a clock) and you are negating the number 7.  So you start at
   518  	// 12 (which is of course 0 under mod 12) and count backwards (left on
   519  	// the clock) 7 times to arrive at 5.  Notice this is just 12-7 = 5.
   520  	// Now, assume you're starting with 19, which is a number that is
   521  	// already larger than the modulus and congruent to 7 (mod 12).  When a
   522  	// value is already in the desired range, its magnitude is 1.  Since 19
   523  	// is an additional "step", its magnitude (mod 12) is 2.  Since any
   524  	// multiple of the modulus is conguent to zero (mod m), the answer can
   525  	// be shortcut by simply mulplying the magnitude by the modulus and
   526  	// subtracting.  Keeping with the example, this would be (2*12)-19 = 5.
   527  	f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
   528  	f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
   529  	f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
   530  	f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
   531  	f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
   532  	f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
   533  	f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
   534  	f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
   535  	f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
   536  	f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
   537  
   538  	return f
   539  }
   540  
   541  // Negate negates the field value.  The existing field value is modified.  The
   542  // caller must provide the magnitude of the field value for a correct result.
   543  //
   544  // The field value is returned to support chaining.  This enables syntax like:
   545  // f.Negate().AddInt(1) so that f = -f + 1.
   546  func (f *fieldVal) Negate(magnitude uint32) *fieldVal {
   547  	return f.NegateVal(f, magnitude)
   548  }
   549  
   550  // AddInt adds the passed integer to the existing field value and stores the
   551  // result in f.  This is a convenience function since it is fairly common to
   552  // perform some arithemetic with small native integers.
   553  //
   554  // The field value is returned to support chaining.  This enables syntax like:
   555  // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
   556  func (f *fieldVal) AddInt(ui uint) *fieldVal {
   557  	// Since the field representation intentionally provides overflow bits,
   558  	// it's ok to use carryless addition as the carry bit is safely part of
   559  	// the word and will be normalized out.
   560  	f.n[0] += uint32(ui)
   561  
   562  	return f
   563  }
   564  
   565  // Add adds the passed value to the existing field value and stores the result
   566  // in f.
   567  //
   568  // The field value is returned to support chaining.  This enables syntax like:
   569  // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
   570  func (f *fieldVal) Add(val *fieldVal) *fieldVal {
   571  	// Since the field representation intentionally provides overflow bits,
   572  	// it's ok to use carryless addition as the carry bit is safely part of
   573  	// each word and will be normalized out.  This could obviously be done
   574  	// in a loop, but the unrolled version is faster.
   575  	f.n[0] += val.n[0]
   576  	f.n[1] += val.n[1]
   577  	f.n[2] += val.n[2]
   578  	f.n[3] += val.n[3]
   579  	f.n[4] += val.n[4]
   580  	f.n[5] += val.n[5]
   581  	f.n[6] += val.n[6]
   582  	f.n[7] += val.n[7]
   583  	f.n[8] += val.n[8]
   584  	f.n[9] += val.n[9]
   585  
   586  	return f
   587  }
   588  
   589  // Add2 adds the passed two field values together and stores the result in f.
   590  //
   591  // The field value is returned to support chaining.  This enables syntax like:
   592  // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
   593  func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal {
   594  	// Since the field representation intentionally provides overflow bits,
   595  	// it's ok to use carryless addition as the carry bit is safely part of
   596  	// each word and will be normalized out.  This could obviously be done
   597  	// in a loop, but the unrolled version is faster.
   598  	f.n[0] = val.n[0] + val2.n[0]
   599  	f.n[1] = val.n[1] + val2.n[1]
   600  	f.n[2] = val.n[2] + val2.n[2]
   601  	f.n[3] = val.n[3] + val2.n[3]
   602  	f.n[4] = val.n[4] + val2.n[4]
   603  	f.n[5] = val.n[5] + val2.n[5]
   604  	f.n[6] = val.n[6] + val2.n[6]
   605  	f.n[7] = val.n[7] + val2.n[7]
   606  	f.n[8] = val.n[8] + val2.n[8]
   607  	f.n[9] = val.n[9] + val2.n[9]
   608  
   609  	return f
   610  }
   611  
   612  // MulInt multiplies the field value by the passed int and stores the result in
   613  // f.  Note that this function can overflow if multiplying the value by any of
   614  // the individual words exceeds a max uint32.  Therefore it is important that
   615  // the caller ensures no overflows will occur before using this function.
   616  //
   617  // The field value is returned to support chaining.  This enables syntax like:
   618  // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
   619  func (f *fieldVal) MulInt(val uint) *fieldVal {
   620  	// Since each word of the field representation can hold up to
   621  	// fieldOverflowBits extra bits which will be normalized out, it's safe
   622  	// to multiply each word without using a larger type or carry
   623  	// propagation so long as the values won't overflow a uint32.  This
   624  	// could obviously be done in a loop, but the unrolled version is
   625  	// faster.
   626  	ui := uint32(val)
   627  	f.n[0] *= ui
   628  	f.n[1] *= ui
   629  	f.n[2] *= ui
   630  	f.n[3] *= ui
   631  	f.n[4] *= ui
   632  	f.n[5] *= ui
   633  	f.n[6] *= ui
   634  	f.n[7] *= ui
   635  	f.n[8] *= ui
   636  	f.n[9] *= ui
   637  
   638  	return f
   639  }
   640  
   641  // Mul multiplies the passed value to the existing field value and stores the
   642  // result in f.  Note that this function can overflow if multiplying any
   643  // of the individual words exceeds a max uint32.  In practice, this means the
   644  // magnitude of either value involved in the multiplication must be a max of
   645  // 8.
   646  //
   647  // The field value is returned to support chaining.  This enables syntax like:
   648  // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
   649  func (f *fieldVal) Mul(val *fieldVal) *fieldVal {
   650  	return f.Mul2(f, val)
   651  }
   652  
   653  // Mul2 multiplies the passed two field values together and stores the result
   654  // result in f.  Note that this function can overflow if multiplying any of
   655  // the individual words exceeds a max uint32.  In practice, this means the
   656  // magnitude of either value involved in the multiplication must be a max of
   657  // 8.
   658  //
   659  // The field value is returned to support chaining.  This enables syntax like:
   660  // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
   661  func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal {
   662  	// This could be done with a couple of for loops and an array to store
   663  	// the intermediate terms, but this unrolled version is significantly
   664  	// faster.
   665  
   666  	// Terms for 2^(fieldBase*0).
   667  	m := uint64(val.n[0]) * uint64(val2.n[0])
   668  	t0 := m & fieldBaseMask
   669  
   670  	// Terms for 2^(fieldBase*1).
   671  	m = (m >> fieldBase) +
   672  		uint64(val.n[0])*uint64(val2.n[1]) +
   673  		uint64(val.n[1])*uint64(val2.n[0])
   674  	t1 := m & fieldBaseMask
   675  
   676  	// Terms for 2^(fieldBase*2).
   677  	m = (m >> fieldBase) +
   678  		uint64(val.n[0])*uint64(val2.n[2]) +
   679  		uint64(val.n[1])*uint64(val2.n[1]) +
   680  		uint64(val.n[2])*uint64(val2.n[0])
   681  	t2 := m & fieldBaseMask
   682  
   683  	// Terms for 2^(fieldBase*3).
   684  	m = (m >> fieldBase) +
   685  		uint64(val.n[0])*uint64(val2.n[3]) +
   686  		uint64(val.n[1])*uint64(val2.n[2]) +
   687  		uint64(val.n[2])*uint64(val2.n[1]) +
   688  		uint64(val.n[3])*uint64(val2.n[0])
   689  	t3 := m & fieldBaseMask
   690  
   691  	// Terms for 2^(fieldBase*4).
   692  	m = (m >> fieldBase) +
   693  		uint64(val.n[0])*uint64(val2.n[4]) +
   694  		uint64(val.n[1])*uint64(val2.n[3]) +
   695  		uint64(val.n[2])*uint64(val2.n[2]) +
   696  		uint64(val.n[3])*uint64(val2.n[1]) +
   697  		uint64(val.n[4])*uint64(val2.n[0])
   698  	t4 := m & fieldBaseMask
   699  
   700  	// Terms for 2^(fieldBase*5).
   701  	m = (m >> fieldBase) +
   702  		uint64(val.n[0])*uint64(val2.n[5]) +
   703  		uint64(val.n[1])*uint64(val2.n[4]) +
   704  		uint64(val.n[2])*uint64(val2.n[3]) +
   705  		uint64(val.n[3])*uint64(val2.n[2]) +
   706  		uint64(val.n[4])*uint64(val2.n[1]) +
   707  		uint64(val.n[5])*uint64(val2.n[0])
   708  	t5 := m & fieldBaseMask
   709  
   710  	// Terms for 2^(fieldBase*6).
   711  	m = (m >> fieldBase) +
   712  		uint64(val.n[0])*uint64(val2.n[6]) +
   713  		uint64(val.n[1])*uint64(val2.n[5]) +
   714  		uint64(val.n[2])*uint64(val2.n[4]) +
   715  		uint64(val.n[3])*uint64(val2.n[3]) +
   716  		uint64(val.n[4])*uint64(val2.n[2]) +
   717  		uint64(val.n[5])*uint64(val2.n[1]) +
   718  		uint64(val.n[6])*uint64(val2.n[0])
   719  	t6 := m & fieldBaseMask
   720  
   721  	// Terms for 2^(fieldBase*7).
   722  	m = (m >> fieldBase) +
   723  		uint64(val.n[0])*uint64(val2.n[7]) +
   724  		uint64(val.n[1])*uint64(val2.n[6]) +
   725  		uint64(val.n[2])*uint64(val2.n[5]) +
   726  		uint64(val.n[3])*uint64(val2.n[4]) +
   727  		uint64(val.n[4])*uint64(val2.n[3]) +
   728  		uint64(val.n[5])*uint64(val2.n[2]) +
   729  		uint64(val.n[6])*uint64(val2.n[1]) +
   730  		uint64(val.n[7])*uint64(val2.n[0])
   731  	t7 := m & fieldBaseMask
   732  
   733  	// Terms for 2^(fieldBase*8).
   734  	m = (m >> fieldBase) +
   735  		uint64(val.n[0])*uint64(val2.n[8]) +
   736  		uint64(val.n[1])*uint64(val2.n[7]) +
   737  		uint64(val.n[2])*uint64(val2.n[6]) +
   738  		uint64(val.n[3])*uint64(val2.n[5]) +
   739  		uint64(val.n[4])*uint64(val2.n[4]) +
   740  		uint64(val.n[5])*uint64(val2.n[3]) +
   741  		uint64(val.n[6])*uint64(val2.n[2]) +
   742  		uint64(val.n[7])*uint64(val2.n[1]) +
   743  		uint64(val.n[8])*uint64(val2.n[0])
   744  	t8 := m & fieldBaseMask
   745  
   746  	// Terms for 2^(fieldBase*9).
   747  	m = (m >> fieldBase) +
   748  		uint64(val.n[0])*uint64(val2.n[9]) +
   749  		uint64(val.n[1])*uint64(val2.n[8]) +
   750  		uint64(val.n[2])*uint64(val2.n[7]) +
   751  		uint64(val.n[3])*uint64(val2.n[6]) +
   752  		uint64(val.n[4])*uint64(val2.n[5]) +
   753  		uint64(val.n[5])*uint64(val2.n[4]) +
   754  		uint64(val.n[6])*uint64(val2.n[3]) +
   755  		uint64(val.n[7])*uint64(val2.n[2]) +
   756  		uint64(val.n[8])*uint64(val2.n[1]) +
   757  		uint64(val.n[9])*uint64(val2.n[0])
   758  	t9 := m & fieldBaseMask
   759  
   760  	// Terms for 2^(fieldBase*10).
   761  	m = (m >> fieldBase) +
   762  		uint64(val.n[1])*uint64(val2.n[9]) +
   763  		uint64(val.n[2])*uint64(val2.n[8]) +
   764  		uint64(val.n[3])*uint64(val2.n[7]) +
   765  		uint64(val.n[4])*uint64(val2.n[6]) +
   766  		uint64(val.n[5])*uint64(val2.n[5]) +
   767  		uint64(val.n[6])*uint64(val2.n[4]) +
   768  		uint64(val.n[7])*uint64(val2.n[3]) +
   769  		uint64(val.n[8])*uint64(val2.n[2]) +
   770  		uint64(val.n[9])*uint64(val2.n[1])
   771  	t10 := m & fieldBaseMask
   772  
   773  	// Terms for 2^(fieldBase*11).
   774  	m = (m >> fieldBase) +
   775  		uint64(val.n[2])*uint64(val2.n[9]) +
   776  		uint64(val.n[3])*uint64(val2.n[8]) +
   777  		uint64(val.n[4])*uint64(val2.n[7]) +
   778  		uint64(val.n[5])*uint64(val2.n[6]) +
   779  		uint64(val.n[6])*uint64(val2.n[5]) +
   780  		uint64(val.n[7])*uint64(val2.n[4]) +
   781  		uint64(val.n[8])*uint64(val2.n[3]) +
   782  		uint64(val.n[9])*uint64(val2.n[2])
   783  	t11 := m & fieldBaseMask
   784  
   785  	// Terms for 2^(fieldBase*12).
   786  	m = (m >> fieldBase) +
   787  		uint64(val.n[3])*uint64(val2.n[9]) +
   788  		uint64(val.n[4])*uint64(val2.n[8]) +
   789  		uint64(val.n[5])*uint64(val2.n[7]) +
   790  		uint64(val.n[6])*uint64(val2.n[6]) +
   791  		uint64(val.n[7])*uint64(val2.n[5]) +
   792  		uint64(val.n[8])*uint64(val2.n[4]) +
   793  		uint64(val.n[9])*uint64(val2.n[3])
   794  	t12 := m & fieldBaseMask
   795  
   796  	// Terms for 2^(fieldBase*13).
   797  	m = (m >> fieldBase) +
   798  		uint64(val.n[4])*uint64(val2.n[9]) +
   799  		uint64(val.n[5])*uint64(val2.n[8]) +
   800  		uint64(val.n[6])*uint64(val2.n[7]) +
   801  		uint64(val.n[7])*uint64(val2.n[6]) +
   802  		uint64(val.n[8])*uint64(val2.n[5]) +
   803  		uint64(val.n[9])*uint64(val2.n[4])
   804  	t13 := m & fieldBaseMask
   805  
   806  	// Terms for 2^(fieldBase*14).
   807  	m = (m >> fieldBase) +
   808  		uint64(val.n[5])*uint64(val2.n[9]) +
   809  		uint64(val.n[6])*uint64(val2.n[8]) +
   810  		uint64(val.n[7])*uint64(val2.n[7]) +
   811  		uint64(val.n[8])*uint64(val2.n[6]) +
   812  		uint64(val.n[9])*uint64(val2.n[5])
   813  	t14 := m & fieldBaseMask
   814  
   815  	// Terms for 2^(fieldBase*15).
   816  	m = (m >> fieldBase) +
   817  		uint64(val.n[6])*uint64(val2.n[9]) +
   818  		uint64(val.n[7])*uint64(val2.n[8]) +
   819  		uint64(val.n[8])*uint64(val2.n[7]) +
   820  		uint64(val.n[9])*uint64(val2.n[6])
   821  	t15 := m & fieldBaseMask
   822  
   823  	// Terms for 2^(fieldBase*16).
   824  	m = (m >> fieldBase) +
   825  		uint64(val.n[7])*uint64(val2.n[9]) +
   826  		uint64(val.n[8])*uint64(val2.n[8]) +
   827  		uint64(val.n[9])*uint64(val2.n[7])
   828  	t16 := m & fieldBaseMask
   829  
   830  	// Terms for 2^(fieldBase*17).
   831  	m = (m >> fieldBase) +
   832  		uint64(val.n[8])*uint64(val2.n[9]) +
   833  		uint64(val.n[9])*uint64(val2.n[8])
   834  	t17 := m & fieldBaseMask
   835  
   836  	// Terms for 2^(fieldBase*18).
   837  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
   838  	t18 := m & fieldBaseMask
   839  
   840  	// What's left is for 2^(fieldBase*19).
   841  	t19 := m >> fieldBase
   842  
   843  	// At this point, all of the terms are grouped into their respective
   844  	// base.
   845  	//
   846  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
   847  	// when the modulus is of the special form m = b^t - c, highly efficient
   848  	// reduction can be achieved per the provided algorithm.
   849  	//
   850  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
   851  	// this criteria.
   852  	//
   853  	// 4294968273 in field representation (base 2^26) is:
   854  	// n[0] = 977
   855  	// n[1] = 64
   856  	// That is to say (2^26 * 64) + 977 = 4294968273
   857  	//
   858  	// Since each word is in base 26, the upper terms (t10 and up) start
   859  	// at 260 bits (versus the final desired range of 256 bits), so the
   860  	// field representation of 'c' from above needs to be adjusted for the
   861  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
   862  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
   863  	// n[0] = 977 * 16 = 15632
   864  	// n[1] = 64 * 16 = 1024
   865  	// That is to say (2^26 * 1024) + 15632 = 68719492368
   866  	//
   867  	// To reduce the final term, t19, the entire 'c' value is needed instead
   868  	// of only n[0] because there are no more terms left to handle n[1].
   869  	// This means there might be some magnitude left in the upper bits that
   870  	// is handled below.
   871  	m = t0 + t10*15632
   872  	t0 = m & fieldBaseMask
   873  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
   874  	t1 = m & fieldBaseMask
   875  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
   876  	t2 = m & fieldBaseMask
   877  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
   878  	t3 = m & fieldBaseMask
   879  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
   880  	t4 = m & fieldBaseMask
   881  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
   882  	t5 = m & fieldBaseMask
   883  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
   884  	t6 = m & fieldBaseMask
   885  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
   886  	t7 = m & fieldBaseMask
   887  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
   888  	t8 = m & fieldBaseMask
   889  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
   890  	t9 = m & fieldMSBMask
   891  	m = m >> fieldMSBBits
   892  
   893  	// At this point, if the magnitude is greater than 0, the overall value
   894  	// is greater than the max possible 256-bit value.  In particular, it is
   895  	// "how many times larger" than the max value it is.
   896  	//
   897  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
   898  	// quotient is zero.  However, due to the above, we already know at
   899  	// least how many times we would need to repeat as it's the value
   900  	// currently in m.  Thus we can simply multiply the magnitude by the
   901  	// field representation of the prime and do a single iteration.  Notice
   902  	// that nothing will be changed when the magnitude is zero, so we could
   903  	// skip this in that case, however always running regardless allows it
   904  	// to run in constant time.  The final result will be in the range
   905  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
   906  	// magnitude of 1, but it is denormalized.
   907  	d := t0 + m*977
   908  	f.n[0] = uint32(d & fieldBaseMask)
   909  	d = (d >> fieldBase) + t1 + m*64
   910  	f.n[1] = uint32(d & fieldBaseMask)
   911  	f.n[2] = uint32((d >> fieldBase) + t2)
   912  	f.n[3] = uint32(t3)
   913  	f.n[4] = uint32(t4)
   914  	f.n[5] = uint32(t5)
   915  	f.n[6] = uint32(t6)
   916  	f.n[7] = uint32(t7)
   917  	f.n[8] = uint32(t8)
   918  	f.n[9] = uint32(t9)
   919  
   920  	return f
   921  }
   922  
   923  // Square squares the field value.  The existing field value is modified.  Note
   924  // that this function can overflow if multiplying any of the individual words
   925  // exceeds a max uint32.  In practice, this means the magnitude of the field
   926  // must be a max of 8 to prevent overflow.
   927  //
   928  // The field value is returned to support chaining.  This enables syntax like:
   929  // f.Square().Mul(f2) so that f = f^2 * f2.
   930  func (f *fieldVal) Square() *fieldVal {
   931  	return f.SquareVal(f)
   932  }
   933  
   934  // SquareVal squares the passed value and stores the result in f.  Note that
   935  // this function can overflow if multiplying any of the individual words
   936  // exceeds a max uint32.  In practice, this means the magnitude of the field
   937  // being squred must be a max of 8 to prevent overflow.
   938  //
   939  // The field value is returned to support chaining.  This enables syntax like:
   940  // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
   941  func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal {
   942  	// This could be done with a couple of for loops and an array to store
   943  	// the intermediate terms, but this unrolled version is significantly
   944  	// faster.
   945  
   946  	// Terms for 2^(fieldBase*0).
   947  	m := uint64(val.n[0]) * uint64(val.n[0])
   948  	t0 := m & fieldBaseMask
   949  
   950  	// Terms for 2^(fieldBase*1).
   951  	m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
   952  	t1 := m & fieldBaseMask
   953  
   954  	// Terms for 2^(fieldBase*2).
   955  	m = (m >> fieldBase) +
   956  		2*uint64(val.n[0])*uint64(val.n[2]) +
   957  		uint64(val.n[1])*uint64(val.n[1])
   958  	t2 := m & fieldBaseMask
   959  
   960  	// Terms for 2^(fieldBase*3).
   961  	m = (m >> fieldBase) +
   962  		2*uint64(val.n[0])*uint64(val.n[3]) +
   963  		2*uint64(val.n[1])*uint64(val.n[2])
   964  	t3 := m & fieldBaseMask
   965  
   966  	// Terms for 2^(fieldBase*4).
   967  	m = (m >> fieldBase) +
   968  		2*uint64(val.n[0])*uint64(val.n[4]) +
   969  		2*uint64(val.n[1])*uint64(val.n[3]) +
   970  		uint64(val.n[2])*uint64(val.n[2])
   971  	t4 := m & fieldBaseMask
   972  
   973  	// Terms for 2^(fieldBase*5).
   974  	m = (m >> fieldBase) +
   975  		2*uint64(val.n[0])*uint64(val.n[5]) +
   976  		2*uint64(val.n[1])*uint64(val.n[4]) +
   977  		2*uint64(val.n[2])*uint64(val.n[3])
   978  	t5 := m & fieldBaseMask
   979  
   980  	// Terms for 2^(fieldBase*6).
   981  	m = (m >> fieldBase) +
   982  		2*uint64(val.n[0])*uint64(val.n[6]) +
   983  		2*uint64(val.n[1])*uint64(val.n[5]) +
   984  		2*uint64(val.n[2])*uint64(val.n[4]) +
   985  		uint64(val.n[3])*uint64(val.n[3])
   986  	t6 := m & fieldBaseMask
   987  
   988  	// Terms for 2^(fieldBase*7).
   989  	m = (m >> fieldBase) +
   990  		2*uint64(val.n[0])*uint64(val.n[7]) +
   991  		2*uint64(val.n[1])*uint64(val.n[6]) +
   992  		2*uint64(val.n[2])*uint64(val.n[5]) +
   993  		2*uint64(val.n[3])*uint64(val.n[4])
   994  	t7 := m & fieldBaseMask
   995  
   996  	// Terms for 2^(fieldBase*8).
   997  	m = (m >> fieldBase) +
   998  		2*uint64(val.n[0])*uint64(val.n[8]) +
   999  		2*uint64(val.n[1])*uint64(val.n[7]) +
  1000  		2*uint64(val.n[2])*uint64(val.n[6]) +
  1001  		2*uint64(val.n[3])*uint64(val.n[5]) +
  1002  		uint64(val.n[4])*uint64(val.n[4])
  1003  	t8 := m & fieldBaseMask
  1004  
  1005  	// Terms for 2^(fieldBase*9).
  1006  	m = (m >> fieldBase) +
  1007  		2*uint64(val.n[0])*uint64(val.n[9]) +
  1008  		2*uint64(val.n[1])*uint64(val.n[8]) +
  1009  		2*uint64(val.n[2])*uint64(val.n[7]) +
  1010  		2*uint64(val.n[3])*uint64(val.n[6]) +
  1011  		2*uint64(val.n[4])*uint64(val.n[5])
  1012  	t9 := m & fieldBaseMask
  1013  
  1014  	// Terms for 2^(fieldBase*10).
  1015  	m = (m >> fieldBase) +
  1016  		2*uint64(val.n[1])*uint64(val.n[9]) +
  1017  		2*uint64(val.n[2])*uint64(val.n[8]) +
  1018  		2*uint64(val.n[3])*uint64(val.n[7]) +
  1019  		2*uint64(val.n[4])*uint64(val.n[6]) +
  1020  		uint64(val.n[5])*uint64(val.n[5])
  1021  	t10 := m & fieldBaseMask
  1022  
  1023  	// Terms for 2^(fieldBase*11).
  1024  	m = (m >> fieldBase) +
  1025  		2*uint64(val.n[2])*uint64(val.n[9]) +
  1026  		2*uint64(val.n[3])*uint64(val.n[8]) +
  1027  		2*uint64(val.n[4])*uint64(val.n[7]) +
  1028  		2*uint64(val.n[5])*uint64(val.n[6])
  1029  	t11 := m & fieldBaseMask
  1030  
  1031  	// Terms for 2^(fieldBase*12).
  1032  	m = (m >> fieldBase) +
  1033  		2*uint64(val.n[3])*uint64(val.n[9]) +
  1034  		2*uint64(val.n[4])*uint64(val.n[8]) +
  1035  		2*uint64(val.n[5])*uint64(val.n[7]) +
  1036  		uint64(val.n[6])*uint64(val.n[6])
  1037  	t12 := m & fieldBaseMask
  1038  
  1039  	// Terms for 2^(fieldBase*13).
  1040  	m = (m >> fieldBase) +
  1041  		2*uint64(val.n[4])*uint64(val.n[9]) +
  1042  		2*uint64(val.n[5])*uint64(val.n[8]) +
  1043  		2*uint64(val.n[6])*uint64(val.n[7])
  1044  	t13 := m & fieldBaseMask
  1045  
  1046  	// Terms for 2^(fieldBase*14).
  1047  	m = (m >> fieldBase) +
  1048  		2*uint64(val.n[5])*uint64(val.n[9]) +
  1049  		2*uint64(val.n[6])*uint64(val.n[8]) +
  1050  		uint64(val.n[7])*uint64(val.n[7])
  1051  	t14 := m & fieldBaseMask
  1052  
  1053  	// Terms for 2^(fieldBase*15).
  1054  	m = (m >> fieldBase) +
  1055  		2*uint64(val.n[6])*uint64(val.n[9]) +
  1056  		2*uint64(val.n[7])*uint64(val.n[8])
  1057  	t15 := m & fieldBaseMask
  1058  
  1059  	// Terms for 2^(fieldBase*16).
  1060  	m = (m >> fieldBase) +
  1061  		2*uint64(val.n[7])*uint64(val.n[9]) +
  1062  		uint64(val.n[8])*uint64(val.n[8])
  1063  	t16 := m & fieldBaseMask
  1064  
  1065  	// Terms for 2^(fieldBase*17).
  1066  	m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
  1067  	t17 := m & fieldBaseMask
  1068  
  1069  	// Terms for 2^(fieldBase*18).
  1070  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
  1071  	t18 := m & fieldBaseMask
  1072  
  1073  	// What's left is for 2^(fieldBase*19).
  1074  	t19 := m >> fieldBase
  1075  
  1076  	// At this point, all of the terms are grouped into their respective
  1077  	// base.
  1078  	//
  1079  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
  1080  	// when the modulus is of the special form m = b^t - c, highly efficient
  1081  	// reduction can be achieved per the provided algorithm.
  1082  	//
  1083  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
  1084  	// this criteria.
  1085  	//
  1086  	// 4294968273 in field representation (base 2^26) is:
  1087  	// n[0] = 977
  1088  	// n[1] = 64
  1089  	// That is to say (2^26 * 64) + 977 = 4294968273
  1090  	//
  1091  	// Since each word is in base 26, the upper terms (t10 and up) start
  1092  	// at 260 bits (versus the final desired range of 256 bits), so the
  1093  	// field representation of 'c' from above needs to be adjusted for the
  1094  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
  1095  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
  1096  	// n[0] = 977 * 16 = 15632
  1097  	// n[1] = 64 * 16 = 1024
  1098  	// That is to say (2^26 * 1024) + 15632 = 68719492368
  1099  	//
  1100  	// To reduce the final term, t19, the entire 'c' value is needed instead
  1101  	// of only n[0] because there are no more terms left to handle n[1].
  1102  	// This means there might be some magnitude left in the upper bits that
  1103  	// is handled below.
  1104  	m = t0 + t10*15632
  1105  	t0 = m & fieldBaseMask
  1106  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
  1107  	t1 = m & fieldBaseMask
  1108  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
  1109  	t2 = m & fieldBaseMask
  1110  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
  1111  	t3 = m & fieldBaseMask
  1112  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
  1113  	t4 = m & fieldBaseMask
  1114  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
  1115  	t5 = m & fieldBaseMask
  1116  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
  1117  	t6 = m & fieldBaseMask
  1118  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
  1119  	t7 = m & fieldBaseMask
  1120  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
  1121  	t8 = m & fieldBaseMask
  1122  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
  1123  	t9 = m & fieldMSBMask
  1124  	m = m >> fieldMSBBits
  1125  
  1126  	// At this point, if the magnitude is greater than 0, the overall value
  1127  	// is greater than the max possible 256-bit value.  In particular, it is
  1128  	// "how many times larger" than the max value it is.
  1129  	//
  1130  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
  1131  	// quotient is zero.  However, due to the above, we already know at
  1132  	// least how many times we would need to repeat as it's the value
  1133  	// currently in m.  Thus we can simply multiply the magnitude by the
  1134  	// field representation of the prime and do a single iteration.  Notice
  1135  	// that nothing will be changed when the magnitude is zero, so we could
  1136  	// skip this in that case, however always running regardless allows it
  1137  	// to run in constant time.  The final result will be in the range
  1138  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
  1139  	// magnitude of 1, but it is denormalized.
  1140  	n := t0 + m*977
  1141  	f.n[0] = uint32(n & fieldBaseMask)
  1142  	n = (n >> fieldBase) + t1 + m*64
  1143  	f.n[1] = uint32(n & fieldBaseMask)
  1144  	f.n[2] = uint32((n >> fieldBase) + t2)
  1145  	f.n[3] = uint32(t3)
  1146  	f.n[4] = uint32(t4)
  1147  	f.n[5] = uint32(t5)
  1148  	f.n[6] = uint32(t6)
  1149  	f.n[7] = uint32(t7)
  1150  	f.n[8] = uint32(t8)
  1151  	f.n[9] = uint32(t9)
  1152  
  1153  	return f
  1154  }
  1155  
  1156  // Inverse finds the modular multiplicative inverse of the field value.  The
  1157  // existing field value is modified.
  1158  //
  1159  // The field value is returned to support chaining.  This enables syntax like:
  1160  // f.Inverse().Mul(f2) so that f = f^-1 * f2.
  1161  func (f *fieldVal) Inverse() *fieldVal {
  1162  	// Fermat's little theorem states that for a nonzero number a and prime
  1163  	// prime p, a^(p-1) = 1 (mod p).  Since the multipliciative inverse is
  1164  	// a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p).
  1165  	// Thus, a^(p-2) is the multiplicative inverse.
  1166  	//
  1167  	// In order to efficiently compute a^(p-2), p-2 needs to be split into
  1168  	// a sequence of squares and multipications that minimizes the number of
  1169  	// multiplications needed (since they are more costly than squarings).
  1170  	// Intermediate results are saved and reused as well.
  1171  	//
  1172  	// The secp256k1 prime - 2 is 2^256 - 4294968275.
  1173  	//
  1174  	// This has a cost of 258 field squarings and 33 field multiplications.
  1175  	var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal
  1176  	a2.SquareVal(f)
  1177  	a3.Mul2(&a2, f)
  1178  	a4.SquareVal(&a2)
  1179  	a10.SquareVal(&a4).Mul(&a2)
  1180  	a11.Mul2(&a10, f)
  1181  	a21.Mul2(&a10, &a11)
  1182  	a42.SquareVal(&a21)
  1183  	a45.Mul2(&a42, &a3)
  1184  	a63.Mul2(&a42, &a21)
  1185  	a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11)
  1186  	a1023.Mul2(&a1019, &a4)
  1187  	f.Set(&a63)                                    // f = a^(2^6 - 1)
  1188  	f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32)
  1189  	f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024)
  1190  	f.Mul(&a1023)                                  // f = a^(2^16 - 1)
  1191  	f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32)
  1192  	f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024)
  1193  	f.Mul(&a1023)                                  // f = a^(2^26 - 1)
  1194  	f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32)
  1195  	f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024)
  1196  	f.Mul(&a1023)                                  // f = a^(2^36 - 1)
  1197  	f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32)
  1198  	f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024)
  1199  	f.Mul(&a1023)                                  // f = a^(2^46 - 1)
  1200  	f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32)
  1201  	f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024)
  1202  	f.Mul(&a1023)                                  // f = a^(2^56 - 1)
  1203  	f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32)
  1204  	f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024)
  1205  	f.Mul(&a1023)                                  // f = a^(2^66 - 1)
  1206  	f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32)
  1207  	f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024)
  1208  	f.Mul(&a1023)                                  // f = a^(2^76 - 1)
  1209  	f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32)
  1210  	f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024)
  1211  	f.Mul(&a1023)                                  // f = a^(2^86 - 1)
  1212  	f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32)
  1213  	f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024)
  1214  	f.Mul(&a1023)                                  // f = a^(2^96 - 1)
  1215  	f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32)
  1216  	f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024)
  1217  	f.Mul(&a1023)                                  // f = a^(2^106 - 1)
  1218  	f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32)
  1219  	f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024)
  1220  	f.Mul(&a1023)                                  // f = a^(2^116 - 1)
  1221  	f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32)
  1222  	f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024)
  1223  	f.Mul(&a1023)                                  // f = a^(2^126 - 1)
  1224  	f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32)
  1225  	f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024)
  1226  	f.Mul(&a1023)                                  // f = a^(2^136 - 1)
  1227  	f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32)
  1228  	f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024)
  1229  	f.Mul(&a1023)                                  // f = a^(2^146 - 1)
  1230  	f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32)
  1231  	f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024)
  1232  	f.Mul(&a1023)                                  // f = a^(2^156 - 1)
  1233  	f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32)
  1234  	f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024)
  1235  	f.Mul(&a1023)                                  // f = a^(2^166 - 1)
  1236  	f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32)
  1237  	f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024)
  1238  	f.Mul(&a1023)                                  // f = a^(2^176 - 1)
  1239  	f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32)
  1240  	f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024)
  1241  	f.Mul(&a1023)                                  // f = a^(2^186 - 1)
  1242  	f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32)
  1243  	f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024)
  1244  	f.Mul(&a1023)                                  // f = a^(2^196 - 1)
  1245  	f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32)
  1246  	f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024)
  1247  	f.Mul(&a1023)                                  // f = a^(2^206 - 1)
  1248  	f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32)
  1249  	f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024)
  1250  	f.Mul(&a1023)                                  // f = a^(2^216 - 1)
  1251  	f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32)
  1252  	f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024)
  1253  	f.Mul(&a1019)                                  // f = a^(2^226 - 5)
  1254  	f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160)
  1255  	f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120)
  1256  	f.Mul(&a1023)                                  // f = a^(2^236 - 4097)
  1257  	f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104)
  1258  	f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328)
  1259  	f.Mul(&a1023)                                  // f = a^(2^246 - 4194305)
  1260  	f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760)
  1261  	f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320)
  1262  	return f.Mul(&a45)                             // f = a^(2^256 - 4294968275) = a^(p-2)
  1263  }