
     1  // Copyright (c) 2013-2016 The btcsuite developers
     2  // Copyright (c) 2013-2016 Dave Collins
     3  // Use of this source code is governed by an ISC
     4  // license that can be found in the LICENSE file.
     6  package btcecv1
     8  // References:
     9  //   [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
    10  //
    12  // All elliptic curve operations for secp256k1 are done in a finite field
    13  // characterized by a 256-bit prime.  Given this precision is larger than the
    14  // biggest available native type, obviously some form of bignum math is needed.
    15  // This package implements specialized fixed-precision field arithmetic rather
    16  // than relying on an arbitrary-precision arithmetic package such as math/big
    17  // for dealing with the field math since the size is known.  As a result, rather
    18  // large performance gains are achieved by taking advantage of many
    19  // optimizations not available to arbitrary-precision arithmetic and generic
    20  // modular arithmetic algorithms.
    21  //
    22  // There are various ways to internally represent each finite field element.
    23  // For example, the most obvious representation would be to use an array of 4
    24  // uint64s (64 bits * 4 = 256 bits).  However, that representation suffers from
    25  // a couple of issues.  First, there is no native Go type large enough to handle
    26  // the intermediate results while adding or multiplying two 64-bit numbers, and
    27  // second there is no space left for overflows when performing the intermediate
    28  // arithmetic between each array element which would lead to expensive carry
    29  // propagation.
    30  //
    31  // Given the above, this implementation represents the the field elements as
    32  // 10 uint32s with each word (array entry) treated as base 2^26.  This was
    33  // chosen for the following reasons:
    34  // 1) Most systems at the current time are 64-bit (or at least have 64-bit
    35  //    registers available for specialized purposes such as MMX) so the
    36  //    intermediate results can typically be done using a native register (and
    37  //    using uint64s to avoid the need for additional half-word arithmetic)
    38  // 2) In order to allow addition of the internal words without having to
    39  //    propagate the the carry, the max normalized value for each register must
    40  //    be less than the number of bits available in the register
    41  // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
    42  //    reasonable choice for #2
    43  // 4) Given the need for 256-bits of precision and the properties stated in #1,
    44  //    #2, and #3, the representation which best accommodates this is 10 uint32s
    45  //    with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
    46  //    bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
    47  //    overflow
    48  //
    49  // Since it is so important that the field arithmetic is extremely fast for
    50  // high performance crypto, this package does not perform any validation where
    51  // it ordinarily would.  For example, some functions only give the correct
    52  // result is the field is normalized and there is no checking to ensure it is.
    53  // While I typically prefer to ensure all state and input is valid for most
    54  // packages, this code is really only used internally and every extra check
    55  // counts.
    57  import (
    58  	"encoding/hex"
    59  )
    61  // Constants used to make the code more readable.
    62  const (
    63  	twoBitsMask   = 0x3
    64  	fourBitsMask  = 0xf
    65  	sixBitsMask   = 0x3f
    66  	eightBitsMask = 0xff
    67  )
    69  // Constants related to the field representation.
    70  const (
    71  	// fieldWords is the number of words used to internally represent the
    72  	// 256-bit value.
    73  	fieldWords = 10
    75  	// fieldBase is the exponent used to form the numeric base of each word.
    76  	// 2^(fieldBase*i) where i is the word position.
    77  	fieldBase = 26
    79  	// fieldOverflowBits is the minimum number of "overflow" bits for each
    80  	// word in the field value.
    81  	fieldOverflowBits = 32 - fieldBase
    83  	// fieldBaseMask is the mask for the bits in each word needed to
    84  	// represent the numeric base of each word (except the most significant
    85  	// word).
    86  	fieldBaseMask = (1 << fieldBase) - 1
    88  	// fieldMSBBits is the number of bits in the most significant word used
    89  	// to represent the value.
    90  	fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
    92  	// fieldMSBMask is the mask for the bits in the most significant word
    93  	// needed to represent the value.
    94  	fieldMSBMask = (1 << fieldMSBBits) - 1
    96  	// fieldPrimeWordZero is word zero of the secp256k1 prime in the
    97  	// internal field representation.  It is used during negation.
    98  	fieldPrimeWordZero = 0x3fffc2f
   100  	// fieldPrimeWordOne is word one of the secp256k1 prime in the
   101  	// internal field representation.  It is used during negation.
   102  	fieldPrimeWordOne = 0x3ffffbf
   103  )
   105  var (
   106  	// fieldQBytes is the value Q = (P+1)/4 for the secp256k1 prime P. This
   107  	// value is used to efficiently compute the square root of values in the
   108  	// field via exponentiation. The value of Q in hex is:
   109  	//
   110  	//   Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c
   111  	fieldQBytes = []byte{
   112  		0x3f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   113  		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   114  		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   115  		0xff, 0xff, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x0c,
   116  	}
   117  )
   119  // fieldVal implements optimized fixed-precision arithmetic over the
   120  // secp256k1 finite field.  This means all arithmetic is performed modulo
   121  // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.  It
   122  // represents each 256-bit value as 10 32-bit integers in base 2^26.  This
   123  // provides 6 bits of overflow in each word (10 bits in the most significant
   124  // word) for a total of 64 bits of overflow (9*6 + 10 = 64).  It only implements
   125  // the arithmetic needed for elliptic curve operations.
   126  //
   127  // The following depicts the internal representation:
   128  //
   129  //	 -----------------------------------------------------------------
   130  //	|        n[9]       |        n[8]       | ... |        n[0]       |
   131  //	| 32 bits available | 32 bits available | ... | 32 bits available |
   132  //	| 22 bits for value | 26 bits for value | ... | 26 bits for value |
   133  //	| 10 bits overflow  |  6 bits overflow  | ... |  6 bits overflow  |
   134  //	| Mult: 2^(26*9)    | Mult: 2^(26*8)    | ... | Mult: 2^(26*0)    |
   135  //	 -----------------------------------------------------------------
   136  //
   137  // For example, consider the number 2^49 + 1.  It would be represented as:
   138  //
   139  //	n[0] = 1
   140  //	n[1] = 2^23
   141  //	n[2..9] = 0
   142  //
   143  // The full 256-bit value is then calculated by looping i from 9..0 and
   144  // doing sum(n[i] * 2^(26i)) like so:
   145  //
   146  //	n[9] * 2^(26*9) = 0    * 2^234 = 0
   147  //	n[8] * 2^(26*8) = 0    * 2^208 = 0
   148  //	...
   149  //	n[1] * 2^(26*1) = 2^23 * 2^26  = 2^49
   150  //	n[0] * 2^(26*0) = 1    * 2^0   = 1
   151  //	Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
   152  type fieldVal struct {
   153  	n [10]uint32
   154  }
   156  // String returns the field value as a human-readable hex string.
   157  func (f fieldVal) String() string {
   158  	t := new(fieldVal).Set(&f).Normalize()
   159  	return hex.EncodeToString(t.Bytes()[:])
   160  }
   162  // Zero sets the field value to zero.  A newly created field value is already
   163  // set to zero.  This function can be useful to clear an existing field value
   164  // for reuse.
   165  func (f *fieldVal) Zero() {
   166  	f.n[0] = 0
   167  	f.n[1] = 0
   168  	f.n[2] = 0
   169  	f.n[3] = 0
   170  	f.n[4] = 0
   171  	f.n[5] = 0
   172  	f.n[6] = 0
   173  	f.n[7] = 0
   174  	f.n[8] = 0
   175  	f.n[9] = 0
   176  }
   178  // Set sets the field value equal to the passed value.
   179  //
   180  // The field value is returned to support chaining.  This enables syntax like:
   181  // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
   182  // modified.
   183  func (f *fieldVal) Set(val *fieldVal) *fieldVal {
   184  	*f = *val
   185  	return f
   186  }
   188  // SetInt sets the field value to the passed integer.  This is a convenience
   189  // function since it is fairly common to perform some arithemetic with small
   190  // native integers.
   191  //
   192  // The field value is returned to support chaining.  This enables syntax such
   193  // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
   194  func (f *fieldVal) SetInt(ui uint) *fieldVal {
   195  	f.Zero()
   196  	f.n[0] = uint32(ui)
   197  	return f
   198  }
   200  // SetBytes packs the passed 32-byte big-endian value into the internal field
   201  // value representation.
   202  //
   203  // The field value is returned to support chaining.  This enables syntax like:
   204  // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2.
   205  func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal {
   206  	// Pack the 256 total bits across the 10 uint32 words with a max of
   207  	// 26-bits per word.  This could be done with a couple of for loops,
   208  	// but this unrolled version is significantly faster.  Benchmarks show
   209  	// this is about 34 times faster than the variant which uses loops.
   210  	f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
   211  		(uint32(b[28])&twoBitsMask)<<24
   212  	f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
   213  		(uint32(b[25])&fourBitsMask)<<22
   214  	f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
   215  		(uint32(b[22])&sixBitsMask)<<20
   216  	f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
   217  		uint32(b[19])<<18
   218  	f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
   219  		(uint32(b[15])&twoBitsMask)<<24
   220  	f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
   221  		(uint32(b[12])&fourBitsMask)<<22
   222  	f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
   223  		(uint32(b[9])&sixBitsMask)<<20
   224  	f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
   225  		uint32(b[6])<<18
   226  	f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
   227  		(uint32(b[2])&twoBitsMask)<<24
   228  	f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
   229  	return f
   230  }
   232  // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
   233  // integer (meaning it is truncated to the first 32 bytes), packs it into the
   234  // internal field value representation, and returns the updated field value.
   235  //
   236  // Note that since passing a slice with more than 32 bytes is truncated, it is
   237  // possible that the truncated value is less than the field prime.  It is up to
   238  // the caller to decide whether it needs to provide numbers of the appropriate
   239  // size or if it is acceptable to use this function with the described
   240  // truncation behavior.
   241  //
   242  // The field value is returned to support chaining.  This enables syntax like:
   243  // f := new(fieldVal).SetByteSlice(byteSlice)
   244  func (f *fieldVal) SetByteSlice(b []byte) *fieldVal {
   245  	var b32 [32]byte
   246  	if len(b) > 32 {
   247  		b = b[:32]
   248  	}
   249  	copy(b32[32-len(b):], b)
   250  	return f.SetBytes(&b32)
   251  }
   253  // SetHex decodes the passed big-endian hex string into the internal field value
   254  // representation.  Only the first 32-bytes are used.
   255  //
   256  // The field value is returned to support chaining.  This enables syntax like:
   257  // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1
   258  func (f *fieldVal) SetHex(hexString string) *fieldVal {
   259  	if len(hexString)%2 != 0 {
   260  		hexString = "0" + hexString
   261  	}
   262  	bytes, _ := hex.DecodeString(hexString)
   263  	return f.SetByteSlice(bytes)
   264  }
   266  // Normalize normalizes the internal field words into the desired range and
   267  // performs fast modular reduction over the secp256k1 prime by making use of the
   268  // special form of the prime.
   269  func (f *fieldVal) Normalize() *fieldVal {
   270  	// The field representation leaves 6 bits of overflow in each word so
   271  	// intermediate calculations can be performed without needing to
   272  	// propagate the carry to each higher word during the calculations.  In
   273  	// order to normalize, we need to "compact" the full 256-bit value to
   274  	// the right while propagating any carries through to the high order
   275  	// word.
   276  	//
   277  	// Since this field is doing arithmetic modulo the secp256k1 prime, we
   278  	// also need to perform modular reduction over the prime.
   279  	//
   280  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
   281  	// when the modulus is of the special form m = b^t - c, highly efficient
   282  	// reduction can be achieved.
   283  	//
   284  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
   285  	// this criteria.
   286  	//
   287  	// 4294968273 in field representation (base 2^26) is:
   288  	// n[0] = 977
   289  	// n[1] = 64
   290  	// That is to say (2^26 * 64) + 977 = 4294968273
   291  	//
   292  	// The algorithm presented in the referenced section typically repeats
   293  	// until the quotient is zero.  However, due to our field representation
   294  	// we already know to within one reduction how many times we would need
   295  	// to repeat as it's the uppermost bits of the high order word.  Thus we
   296  	// can simply multiply the magnitude by the field representation of the
   297  	// prime and do a single iteration.  After this step there might be an
   298  	// additional carry to bit 256 (bit 22 of the high order word).
   299  	t9 := f.n[9]
   300  	m := t9 >> fieldMSBBits
   301  	t9 = t9 & fieldMSBMask
   302  	t0 := f.n[0] + m*977
   303  	t1 := (t0 >> fieldBase) + f.n[1] + (m << 6)
   304  	t0 = t0 & fieldBaseMask
   305  	t2 := (t1 >> fieldBase) + f.n[2]
   306  	t1 = t1 & fieldBaseMask
   307  	t3 := (t2 >> fieldBase) + f.n[3]
   308  	t2 = t2 & fieldBaseMask
   309  	t4 := (t3 >> fieldBase) + f.n[4]
   310  	t3 = t3 & fieldBaseMask
   311  	t5 := (t4 >> fieldBase) + f.n[5]
   312  	t4 = t4 & fieldBaseMask
   313  	t6 := (t5 >> fieldBase) + f.n[6]
   314  	t5 = t5 & fieldBaseMask
   315  	t7 := (t6 >> fieldBase) + f.n[7]
   316  	t6 = t6 & fieldBaseMask
   317  	t8 := (t7 >> fieldBase) + f.n[8]
   318  	t7 = t7 & fieldBaseMask
   319  	t9 = (t8 >> fieldBase) + t9
   320  	t8 = t8 & fieldBaseMask
   322  	// At this point, the magnitude is guaranteed to be one, however, the
   323  	// value could still be greater than the prime if there was either a
   324  	// carry through to bit 256 (bit 22 of the higher order word) or the
   325  	// value is greater than or equal to the field characteristic.  The
   326  	// following determines if either or these conditions are true and does
   327  	// the final reduction in constant time.
   328  	//
   329  	// Note that the if/else statements here intentionally do the bitwise
   330  	// operators even when it won't change the value to ensure constant time
   331  	// between the branches.  Also note that 'm' will be zero when neither
   332  	// of the aforementioned conditions are true and the value will not be
   333  	// changed when 'm' is zero.
   334  	m = 1
   335  	if t9 == fieldMSBMask {
   336  		m &= 1
   337  	} else {
   338  		m &= 0
   339  	}
   340  	if t2&t3&t4&t5&t6&t7&t8 == fieldBaseMask {
   341  		m &= 1
   342  	} else {
   343  		m &= 0
   344  	}
   345  	if ((t0+977)>>fieldBase + t1 + 64) > fieldBaseMask {
   346  		m &= 1
   347  	} else {
   348  		m &= 0
   349  	}
   350  	if t9>>fieldMSBBits != 0 {
   351  		m |= 1
   352  	} else {
   353  		m |= 0
   354  	}
   355  	t0 = t0 + m*977
   356  	t1 = (t0 >> fieldBase) + t1 + (m << 6)
   357  	t0 = t0 & fieldBaseMask
   358  	t2 = (t1 >> fieldBase) + t2
   359  	t1 = t1 & fieldBaseMask
   360  	t3 = (t2 >> fieldBase) + t3
   361  	t2 = t2 & fieldBaseMask
   362  	t4 = (t3 >> fieldBase) + t4
   363  	t3 = t3 & fieldBaseMask
   364  	t5 = (t4 >> fieldBase) + t5
   365  	t4 = t4 & fieldBaseMask
   366  	t6 = (t5 >> fieldBase) + t6
   367  	t5 = t5 & fieldBaseMask
   368  	t7 = (t6 >> fieldBase) + t7
   369  	t6 = t6 & fieldBaseMask
   370  	t8 = (t7 >> fieldBase) + t8
   371  	t7 = t7 & fieldBaseMask
   372  	t9 = (t8 >> fieldBase) + t9
   373  	t8 = t8 & fieldBaseMask
   374  	t9 = t9 & fieldMSBMask // Remove potential multiple of 2^256.
   376  	// Finally, set the normalized and reduced words.
   377  	f.n[0] = t0
   378  	f.n[1] = t1
   379  	f.n[2] = t2
   380  	f.n[3] = t3
   381  	f.n[4] = t4
   382  	f.n[5] = t5
   383  	f.n[6] = t6
   384  	f.n[7] = t7
   385  	f.n[8] = t8
   386  	f.n[9] = t9
   387  	return f
   388  }
   390  // PutBytes unpacks the field value to a 32-byte big-endian value using the
   391  // passed byte array.  There is a similar function, Bytes, which unpacks the
   392  // field value into a new array and returns that.  This version is provided
   393  // since it can be useful to cut down on the number of allocations by allowing
   394  // the caller to reuse a buffer.
   395  //
   396  // The field value must be normalized for this function to return the correct
   397  // result.
   398  func (f *fieldVal) PutBytes(b *[32]byte) {
   399  	// Unpack the 256 total bits from the 10 uint32 words with a max of
   400  	// 26-bits per word.  This could be done with a couple of for loops,
   401  	// but this unrolled version is a bit faster.  Benchmarks show this is
   402  	// about 10 times faster than the variant which uses loops.
   403  	b[31] = byte(f.n[0] & eightBitsMask)
   404  	b[30] = byte((f.n[0] >> 8) & eightBitsMask)
   405  	b[29] = byte((f.n[0] >> 16) & eightBitsMask)
   406  	b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
   407  	b[27] = byte((f.n[1] >> 6) & eightBitsMask)
   408  	b[26] = byte((f.n[1] >> 14) & eightBitsMask)
   409  	b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
   410  	b[24] = byte((f.n[2] >> 4) & eightBitsMask)
   411  	b[23] = byte((f.n[2] >> 12) & eightBitsMask)
   412  	b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
   413  	b[21] = byte((f.n[3] >> 2) & eightBitsMask)
   414  	b[20] = byte((f.n[3] >> 10) & eightBitsMask)
   415  	b[19] = byte((f.n[3] >> 18) & eightBitsMask)
   416  	b[18] = byte(f.n[4] & eightBitsMask)
   417  	b[17] = byte((f.n[4] >> 8) & eightBitsMask)
   418  	b[16] = byte((f.n[4] >> 16) & eightBitsMask)
   419  	b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
   420  	b[14] = byte((f.n[5] >> 6) & eightBitsMask)
   421  	b[13] = byte((f.n[5] >> 14) & eightBitsMask)
   422  	b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
   423  	b[11] = byte((f.n[6] >> 4) & eightBitsMask)
   424  	b[10] = byte((f.n[6] >> 12) & eightBitsMask)
   425  	b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
   426  	b[8] = byte((f.n[7] >> 2) & eightBitsMask)
   427  	b[7] = byte((f.n[7] >> 10) & eightBitsMask)
   428  	b[6] = byte((f.n[7] >> 18) & eightBitsMask)
   429  	b[5] = byte(f.n[8] & eightBitsMask)
   430  	b[4] = byte((f.n[8] >> 8) & eightBitsMask)
   431  	b[3] = byte((f.n[8] >> 16) & eightBitsMask)
   432  	b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
   433  	b[1] = byte((f.n[9] >> 6) & eightBitsMask)
   434  	b[0] = byte((f.n[9] >> 14) & eightBitsMask)
   435  }
   437  // Bytes unpacks the field value to a 32-byte big-endian value.  See PutBytes
   438  // for a variant that allows the a buffer to be passed which can be useful to
   439  // to cut down on the number of allocations by allowing the caller to reuse a
   440  // buffer.
   441  //
   442  // The field value must be normalized for this function to return correct
   443  // result.
   444  func (f *fieldVal) Bytes() *[32]byte {
   445  	b := new([32]byte)
   446  	f.PutBytes(b)
   447  	return b
   448  }
   450  // IsZero returns whether or not the field value is equal to zero.
   451  func (f *fieldVal) IsZero() bool {
   452  	// The value can only be zero if no bits are set in any of the words.
   453  	// This is a constant time implementation.
   454  	bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
   455  		f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
   457  	return bits == 0
   458  }
   460  // IsOdd returns whether or not the field value is an odd number.
   461  //
   462  // The field value must be normalized for this function to return correct
   463  // result.
   464  func (f *fieldVal) IsOdd() bool {
   465  	// Only odd numbers have the bottom bit set.
   466  	return f.n[0]&1 == 1
   467  }
   469  // Equals returns whether or not the two field values are the same.  Both
   470  // field values being compared must be normalized for this function to return
   471  // the correct result.
   472  func (f *fieldVal) Equals(val *fieldVal) bool {
   473  	// Xor only sets bits when they are different, so the two field values
   474  	// can only be the same if no bits are set after xoring each word.
   475  	// This is a constant time implementation.
   476  	bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
   477  		(f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
   478  		(f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
   479  		(f.n[9] ^ val.n[9])
   481  	return bits == 0
   482  }
   484  // NegateVal negates the passed value and stores the result in f.  The caller
   485  // must provide the magnitude of the passed value for a correct result.
   486  //
   487  // The field value is returned to support chaining.  This enables syntax like:
   488  // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
   489  func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal {
   490  	// Negation in the field is just the prime minus the value.  However,
   491  	// in order to allow negation against a field value without having to
   492  	// normalize/reduce it first, multiply by the magnitude (that is how
   493  	// "far" away it is from the normalized value) to adjust.  Also, since
   494  	// negating a value pushes it one more order of magnitude away from the
   495  	// normalized range, add 1 to compensate.
   496  	//
   497  	// For some intuition here, imagine you're performing mod 12 arithmetic
   498  	// (picture a clock) and you are negating the number 7.  So you start at
   499  	// 12 (which is of course 0 under mod 12) and count backwards (left on
   500  	// the clock) 7 times to arrive at 5.  Notice this is just 12-7 = 5.
   501  	// Now, assume you're starting with 19, which is a number that is
   502  	// already larger than the modulus and congruent to 7 (mod 12).  When a
   503  	// value is already in the desired range, its magnitude is 1.  Since 19
   504  	// is an additional "step", its magnitude (mod 12) is 2.  Since any
   505  	// multiple of the modulus is conguent to zero (mod m), the answer can
   506  	// be shortcut by simply mulplying the magnitude by the modulus and
   507  	// subtracting.  Keeping with the example, this would be (2*12)-19 = 5.
   508  	f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
   509  	f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
   510  	f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
   511  	f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
   512  	f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
   513  	f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
   514  	f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
   515  	f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
   516  	f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
   517  	f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
   519  	return f
   520  }
   522  // Negate negates the field value.  The existing field value is modified.  The
   523  // caller must provide the magnitude of the field value for a correct result.
   524  //
   525  // The field value is returned to support chaining.  This enables syntax like:
   526  // f.Negate().AddInt(1) so that f = -f + 1.
   527  func (f *fieldVal) Negate(magnitude uint32) *fieldVal {
   528  	return f.NegateVal(f, magnitude)
   529  }
   531  // AddInt adds the passed integer to the existing field value and stores the
   532  // result in f.  This is a convenience function since it is fairly common to
   533  // perform some arithemetic with small native integers.
   534  //
   535  // The field value is returned to support chaining.  This enables syntax like:
   536  // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
   537  func (f *fieldVal) AddInt(ui uint) *fieldVal {
   538  	// Since the field representation intentionally provides overflow bits,
   539  	// it's ok to use carryless addition as the carry bit is safely part of
   540  	// the word and will be normalized out.
   541  	f.n[0] += uint32(ui)
   543  	return f
   544  }
   546  // Add adds the passed value to the existing field value and stores the result
   547  // in f.
   548  //
   549  // The field value is returned to support chaining.  This enables syntax like:
   550  // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
   551  func (f *fieldVal) Add(val *fieldVal) *fieldVal {
   552  	// Since the field representation intentionally provides overflow bits,
   553  	// it's ok to use carryless addition as the carry bit is safely part of
   554  	// each word and will be normalized out.  This could obviously be done
   555  	// in a loop, but the unrolled version is faster.
   556  	f.n[0] += val.n[0]
   557  	f.n[1] += val.n[1]
   558  	f.n[2] += val.n[2]
   559  	f.n[3] += val.n[3]
   560  	f.n[4] += val.n[4]
   561  	f.n[5] += val.n[5]
   562  	f.n[6] += val.n[6]
   563  	f.n[7] += val.n[7]
   564  	f.n[8] += val.n[8]
   565  	f.n[9] += val.n[9]
   567  	return f
   568  }
   570  // Add2 adds the passed two field values together and stores the result in f.
   571  //
   572  // The field value is returned to support chaining.  This enables syntax like:
   573  // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
   574  func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal {
   575  	// Since the field representation intentionally provides overflow bits,
   576  	// it's ok to use carryless addition as the carry bit is safely part of
   577  	// each word and will be normalized out.  This could obviously be done
   578  	// in a loop, but the unrolled version is faster.
   579  	f.n[0] = val.n[0] + val2.n[0]
   580  	f.n[1] = val.n[1] + val2.n[1]
   581  	f.n[2] = val.n[2] + val2.n[2]
   582  	f.n[3] = val.n[3] + val2.n[3]
   583  	f.n[4] = val.n[4] + val2.n[4]
   584  	f.n[5] = val.n[5] + val2.n[5]
   585  	f.n[6] = val.n[6] + val2.n[6]
   586  	f.n[7] = val.n[7] + val2.n[7]
   587  	f.n[8] = val.n[8] + val2.n[8]
   588  	f.n[9] = val.n[9] + val2.n[9]
   590  	return f
   591  }
   593  // MulInt multiplies the field value by the passed int and stores the result in
   594  // f.  Note that this function can overflow if multiplying the value by any of
   595  // the individual words exceeds a max uint32.  Therefore it is important that
   596  // the caller ensures no overflows will occur before using this function.
   597  //
   598  // The field value is returned to support chaining.  This enables syntax like:
   599  // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
   600  func (f *fieldVal) MulInt(val uint) *fieldVal {
   601  	// Since each word of the field representation can hold up to
   602  	// fieldOverflowBits extra bits which will be normalized out, it's safe
   603  	// to multiply each word without using a larger type or carry
   604  	// propagation so long as the values won't overflow a uint32.  This
   605  	// could obviously be done in a loop, but the unrolled version is
   606  	// faster.
   607  	ui := uint32(val)
   608  	f.n[0] *= ui
   609  	f.n[1] *= ui
   610  	f.n[2] *= ui
   611  	f.n[3] *= ui
   612  	f.n[4] *= ui
   613  	f.n[5] *= ui
   614  	f.n[6] *= ui
   615  	f.n[7] *= ui
   616  	f.n[8] *= ui
   617  	f.n[9] *= ui
   619  	return f
   620  }
   622  // Mul multiplies the passed value to the existing field value and stores the
   623  // result in f.  Note that this function can overflow if multiplying any
   624  // of the individual words exceeds a max uint32.  In practice, this means the
   625  // magnitude of either value involved in the multiplication must be a max of
   626  // 8.
   627  //
   628  // The field value is returned to support chaining.  This enables syntax like:
   629  // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
   630  func (f *fieldVal) Mul(val *fieldVal) *fieldVal {
   631  	return f.Mul2(f, val)
   632  }
   634  // Mul2 multiplies the passed two field values together and stores the result
   635  // result in f.  Note that this function can overflow if multiplying any of
   636  // the individual words exceeds a max uint32.  In practice, this means the
   637  // magnitude of either value involved in the multiplication must be a max of
   638  // 8.
   639  //
   640  // The field value is returned to support chaining.  This enables syntax like:
   641  // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
   642  func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal {
   643  	// This could be done with a couple of for loops and an array to store
   644  	// the intermediate terms, but this unrolled version is significantly
   645  	// faster.
   647  	// Terms for 2^(fieldBase*0).
   648  	m := uint64(val.n[0]) * uint64(val2.n[0])
   649  	t0 := m & fieldBaseMask
   651  	// Terms for 2^(fieldBase*1).
   652  	m = (m >> fieldBase) +
   653  		uint64(val.n[0])*uint64(val2.n[1]) +
   654  		uint64(val.n[1])*uint64(val2.n[0])
   655  	t1 := m & fieldBaseMask
   657  	// Terms for 2^(fieldBase*2).
   658  	m = (m >> fieldBase) +
   659  		uint64(val.n[0])*uint64(val2.n[2]) +
   660  		uint64(val.n[1])*uint64(val2.n[1]) +
   661  		uint64(val.n[2])*uint64(val2.n[0])
   662  	t2 := m & fieldBaseMask
   664  	// Terms for 2^(fieldBase*3).
   665  	m = (m >> fieldBase) +
   666  		uint64(val.n[0])*uint64(val2.n[3]) +
   667  		uint64(val.n[1])*uint64(val2.n[2]) +
   668  		uint64(val.n[2])*uint64(val2.n[1]) +
   669  		uint64(val.n[3])*uint64(val2.n[0])
   670  	t3 := m & fieldBaseMask
   672  	// Terms for 2^(fieldBase*4).
   673  	m = (m >> fieldBase) +
   674  		uint64(val.n[0])*uint64(val2.n[4]) +
   675  		uint64(val.n[1])*uint64(val2.n[3]) +
   676  		uint64(val.n[2])*uint64(val2.n[2]) +
   677  		uint64(val.n[3])*uint64(val2.n[1]) +
   678  		uint64(val.n[4])*uint64(val2.n[0])
   679  	t4 := m & fieldBaseMask
   681  	// Terms for 2^(fieldBase*5).
   682  	m = (m >> fieldBase) +
   683  		uint64(val.n[0])*uint64(val2.n[5]) +
   684  		uint64(val.n[1])*uint64(val2.n[4]) +
   685  		uint64(val.n[2])*uint64(val2.n[3]) +
   686  		uint64(val.n[3])*uint64(val2.n[2]) +
   687  		uint64(val.n[4])*uint64(val2.n[1]) +
   688  		uint64(val.n[5])*uint64(val2.n[0])
   689  	t5 := m & fieldBaseMask
   691  	// Terms for 2^(fieldBase*6).
   692  	m = (m >> fieldBase) +
   693  		uint64(val.n[0])*uint64(val2.n[6]) +
   694  		uint64(val.n[1])*uint64(val2.n[5]) +
   695  		uint64(val.n[2])*uint64(val2.n[4]) +
   696  		uint64(val.n[3])*uint64(val2.n[3]) +
   697  		uint64(val.n[4])*uint64(val2.n[2]) +
   698  		uint64(val.n[5])*uint64(val2.n[1]) +
   699  		uint64(val.n[6])*uint64(val2.n[0])
   700  	t6 := m & fieldBaseMask
   702  	// Terms for 2^(fieldBase*7).
   703  	m = (m >> fieldBase) +
   704  		uint64(val.n[0])*uint64(val2.n[7]) +
   705  		uint64(val.n[1])*uint64(val2.n[6]) +
   706  		uint64(val.n[2])*uint64(val2.n[5]) +
   707  		uint64(val.n[3])*uint64(val2.n[4]) +
   708  		uint64(val.n[4])*uint64(val2.n[3]) +
   709  		uint64(val.n[5])*uint64(val2.n[2]) +
   710  		uint64(val.n[6])*uint64(val2.n[1]) +
   711  		uint64(val.n[7])*uint64(val2.n[0])
   712  	t7 := m & fieldBaseMask
   714  	// Terms for 2^(fieldBase*8).
   715  	m = (m >> fieldBase) +
   716  		uint64(val.n[0])*uint64(val2.n[8]) +
   717  		uint64(val.n[1])*uint64(val2.n[7]) +
   718  		uint64(val.n[2])*uint64(val2.n[6]) +
   719  		uint64(val.n[3])*uint64(val2.n[5]) +
   720  		uint64(val.n[4])*uint64(val2.n[4]) +
   721  		uint64(val.n[5])*uint64(val2.n[3]) +
   722  		uint64(val.n[6])*uint64(val2.n[2]) +
   723  		uint64(val.n[7])*uint64(val2.n[1]) +
   724  		uint64(val.n[8])*uint64(val2.n[0])
   725  	t8 := m & fieldBaseMask
   727  	// Terms for 2^(fieldBase*9).
   728  	m = (m >> fieldBase) +
   729  		uint64(val.n[0])*uint64(val2.n[9]) +
   730  		uint64(val.n[1])*uint64(val2.n[8]) +
   731  		uint64(val.n[2])*uint64(val2.n[7]) +
   732  		uint64(val.n[3])*uint64(val2.n[6]) +
   733  		uint64(val.n[4])*uint64(val2.n[5]) +
   734  		uint64(val.n[5])*uint64(val2.n[4]) +
   735  		uint64(val.n[6])*uint64(val2.n[3]) +
   736  		uint64(val.n[7])*uint64(val2.n[2]) +
   737  		uint64(val.n[8])*uint64(val2.n[1]) +
   738  		uint64(val.n[9])*uint64(val2.n[0])
   739  	t9 := m & fieldBaseMask
   741  	// Terms for 2^(fieldBase*10).
   742  	m = (m >> fieldBase) +
   743  		uint64(val.n[1])*uint64(val2.n[9]) +
   744  		uint64(val.n[2])*uint64(val2.n[8]) +
   745  		uint64(val.n[3])*uint64(val2.n[7]) +
   746  		uint64(val.n[4])*uint64(val2.n[6]) +
   747  		uint64(val.n[5])*uint64(val2.n[5]) +
   748  		uint64(val.n[6])*uint64(val2.n[4]) +
   749  		uint64(val.n[7])*uint64(val2.n[3]) +
   750  		uint64(val.n[8])*uint64(val2.n[2]) +
   751  		uint64(val.n[9])*uint64(val2.n[1])
   752  	t10 := m & fieldBaseMask
   754  	// Terms for 2^(fieldBase*11).
   755  	m = (m >> fieldBase) +
   756  		uint64(val.n[2])*uint64(val2.n[9]) +
   757  		uint64(val.n[3])*uint64(val2.n[8]) +
   758  		uint64(val.n[4])*uint64(val2.n[7]) +
   759  		uint64(val.n[5])*uint64(val2.n[6]) +
   760  		uint64(val.n[6])*uint64(val2.n[5]) +
   761  		uint64(val.n[7])*uint64(val2.n[4]) +
   762  		uint64(val.n[8])*uint64(val2.n[3]) +
   763  		uint64(val.n[9])*uint64(val2.n[2])
   764  	t11 := m & fieldBaseMask
   766  	// Terms for 2^(fieldBase*12).
   767  	m = (m >> fieldBase) +
   768  		uint64(val.n[3])*uint64(val2.n[9]) +
   769  		uint64(val.n[4])*uint64(val2.n[8]) +
   770  		uint64(val.n[5])*uint64(val2.n[7]) +
   771  		uint64(val.n[6])*uint64(val2.n[6]) +
   772  		uint64(val.n[7])*uint64(val2.n[5]) +
   773  		uint64(val.n[8])*uint64(val2.n[4]) +
   774  		uint64(val.n[9])*uint64(val2.n[3])
   775  	t12 := m & fieldBaseMask
   777  	// Terms for 2^(fieldBase*13).
   778  	m = (m >> fieldBase) +
   779  		uint64(val.n[4])*uint64(val2.n[9]) +
   780  		uint64(val.n[5])*uint64(val2.n[8]) +
   781  		uint64(val.n[6])*uint64(val2.n[7]) +
   782  		uint64(val.n[7])*uint64(val2.n[6]) +
   783  		uint64(val.n[8])*uint64(val2.n[5]) +
   784  		uint64(val.n[9])*uint64(val2.n[4])
   785  	t13 := m & fieldBaseMask
   787  	// Terms for 2^(fieldBase*14).
   788  	m = (m >> fieldBase) +
   789  		uint64(val.n[5])*uint64(val2.n[9]) +
   790  		uint64(val.n[6])*uint64(val2.n[8]) +
   791  		uint64(val.n[7])*uint64(val2.n[7]) +
   792  		uint64(val.n[8])*uint64(val2.n[6]) +
   793  		uint64(val.n[9])*uint64(val2.n[5])
   794  	t14 := m & fieldBaseMask
   796  	// Terms for 2^(fieldBase*15).
   797  	m = (m >> fieldBase) +
   798  		uint64(val.n[6])*uint64(val2.n[9]) +
   799  		uint64(val.n[7])*uint64(val2.n[8]) +
   800  		uint64(val.n[8])*uint64(val2.n[7]) +
   801  		uint64(val.n[9])*uint64(val2.n[6])
   802  	t15 := m & fieldBaseMask
   804  	// Terms for 2^(fieldBase*16).
   805  	m = (m >> fieldBase) +
   806  		uint64(val.n[7])*uint64(val2.n[9]) +
   807  		uint64(val.n[8])*uint64(val2.n[8]) +
   808  		uint64(val.n[9])*uint64(val2.n[7])
   809  	t16 := m & fieldBaseMask
   811  	// Terms for 2^(fieldBase*17).
   812  	m = (m >> fieldBase) +
   813  		uint64(val.n[8])*uint64(val2.n[9]) +
   814  		uint64(val.n[9])*uint64(val2.n[8])
   815  	t17 := m & fieldBaseMask
   817  	// Terms for 2^(fieldBase*18).
   818  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
   819  	t18 := m & fieldBaseMask
   821  	// What's left is for 2^(fieldBase*19).
   822  	t19 := m >> fieldBase
   824  	// At this point, all of the terms are grouped into their respective
   825  	// base.
   826  	//
   827  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
   828  	// when the modulus is of the special form m = b^t - c, highly efficient
   829  	// reduction can be achieved per the provided algorithm.
   830  	//
   831  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
   832  	// this criteria.
   833  	//
   834  	// 4294968273 in field representation (base 2^26) is:
   835  	// n[0] = 977
   836  	// n[1] = 64
   837  	// That is to say (2^26 * 64) + 977 = 4294968273
   838  	//
   839  	// Since each word is in base 26, the upper terms (t10 and up) start
   840  	// at 260 bits (versus the final desired range of 256 bits), so the
   841  	// field representation of 'c' from above needs to be adjusted for the
   842  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
   843  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
   844  	// n[0] = 977 * 16 = 15632
   845  	// n[1] = 64 * 16 = 1024
   846  	// That is to say (2^26 * 1024) + 15632 = 68719492368
   847  	//
   848  	// To reduce the final term, t19, the entire 'c' value is needed instead
   849  	// of only n[0] because there are no more terms left to handle n[1].
   850  	// This means there might be some magnitude left in the upper bits that
   851  	// is handled below.
   852  	m = t0 + t10*15632
   853  	t0 = m & fieldBaseMask
   854  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
   855  	t1 = m & fieldBaseMask
   856  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
   857  	t2 = m & fieldBaseMask
   858  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
   859  	t3 = m & fieldBaseMask
   860  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
   861  	t4 = m & fieldBaseMask
   862  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
   863  	t5 = m & fieldBaseMask
   864  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
   865  	t6 = m & fieldBaseMask
   866  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
   867  	t7 = m & fieldBaseMask
   868  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
   869  	t8 = m & fieldBaseMask
   870  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
   871  	t9 = m & fieldMSBMask
   872  	m = m >> fieldMSBBits
   874  	// At this point, if the magnitude is greater than 0, the overall value
   875  	// is greater than the max possible 256-bit value.  In particular, it is
   876  	// "how many times larger" than the max value it is.
   877  	//
   878  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
   879  	// quotient is zero.  However, due to the above, we already know at
   880  	// least how many times we would need to repeat as it's the value
   881  	// currently in m.  Thus we can simply multiply the magnitude by the
   882  	// field representation of the prime and do a single iteration.  Notice
   883  	// that nothing will be changed when the magnitude is zero, so we could
   884  	// skip this in that case, however always running regardless allows it
   885  	// to run in constant time.  The final result will be in the range
   886  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
   887  	// magnitude of 1, but it is denormalized.
   888  	d := t0 + m*977
   889  	f.n[0] = uint32(d & fieldBaseMask)
   890  	d = (d >> fieldBase) + t1 + m*64
   891  	f.n[1] = uint32(d & fieldBaseMask)
   892  	f.n[2] = uint32((d >> fieldBase) + t2)
   893  	f.n[3] = uint32(t3)
   894  	f.n[4] = uint32(t4)
   895  	f.n[5] = uint32(t5)
   896  	f.n[6] = uint32(t6)
   897  	f.n[7] = uint32(t7)
   898  	f.n[8] = uint32(t8)
   899  	f.n[9] = uint32(t9)
   901  	return f
   902  }
   904  // Square squares the field value.  The existing field value is modified.  Note
   905  // that this function can overflow if multiplying any of the individual words
   906  // exceeds a max uint32.  In practice, this means the magnitude of the field
   907  // must be a max of 8 to prevent overflow.
   908  //
   909  // The field value is returned to support chaining.  This enables syntax like:
   910  // f.Square().Mul(f2) so that f = f^2 * f2.
   911  func (f *fieldVal) Square() *fieldVal {
   912  	return f.SquareVal(f)
   913  }
   915  // SquareVal squares the passed value and stores the result in f.  Note that
   916  // this function can overflow if multiplying any of the individual words
   917  // exceeds a max uint32.  In practice, this means the magnitude of the field
   918  // being squred must be a max of 8 to prevent overflow.
   919  //
   920  // The field value is returned to support chaining.  This enables syntax like:
   921  // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
   922  func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal {
   923  	// This could be done with a couple of for loops and an array to store
   924  	// the intermediate terms, but this unrolled version is significantly
   925  	// faster.
   927  	// Terms for 2^(fieldBase*0).
   928  	m := uint64(val.n[0]) * uint64(val.n[0])
   929  	t0 := m & fieldBaseMask
   931  	// Terms for 2^(fieldBase*1).
   932  	m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
   933  	t1 := m & fieldBaseMask
   935  	// Terms for 2^(fieldBase*2).
   936  	m = (m >> fieldBase) +
   937  		2*uint64(val.n[0])*uint64(val.n[2]) +
   938  		uint64(val.n[1])*uint64(val.n[1])
   939  	t2 := m & fieldBaseMask
   941  	// Terms for 2^(fieldBase*3).
   942  	m = (m >> fieldBase) +
   943  		2*uint64(val.n[0])*uint64(val.n[3]) +
   944  		2*uint64(val.n[1])*uint64(val.n[2])
   945  	t3 := m & fieldBaseMask
   947  	// Terms for 2^(fieldBase*4).
   948  	m = (m >> fieldBase) +
   949  		2*uint64(val.n[0])*uint64(val.n[4]) +
   950  		2*uint64(val.n[1])*uint64(val.n[3]) +
   951  		uint64(val.n[2])*uint64(val.n[2])
   952  	t4 := m & fieldBaseMask
   954  	// Terms for 2^(fieldBase*5).
   955  	m = (m >> fieldBase) +
   956  		2*uint64(val.n[0])*uint64(val.n[5]) +
   957  		2*uint64(val.n[1])*uint64(val.n[4]) +
   958  		2*uint64(val.n[2])*uint64(val.n[3])
   959  	t5 := m & fieldBaseMask
   961  	// Terms for 2^(fieldBase*6).
   962  	m = (m >> fieldBase) +
   963  		2*uint64(val.n[0])*uint64(val.n[6]) +
   964  		2*uint64(val.n[1])*uint64(val.n[5]) +
   965  		2*uint64(val.n[2])*uint64(val.n[4]) +
   966  		uint64(val.n[3])*uint64(val.n[3])
   967  	t6 := m & fieldBaseMask
   969  	// Terms for 2^(fieldBase*7).
   970  	m = (m >> fieldBase) +
   971  		2*uint64(val.n[0])*uint64(val.n[7]) +
   972  		2*uint64(val.n[1])*uint64(val.n[6]) +
   973  		2*uint64(val.n[2])*uint64(val.n[5]) +
   974  		2*uint64(val.n[3])*uint64(val.n[4])
   975  	t7 := m & fieldBaseMask
   977  	// Terms for 2^(fieldBase*8).
   978  	m = (m >> fieldBase) +
   979  		2*uint64(val.n[0])*uint64(val.n[8]) +
   980  		2*uint64(val.n[1])*uint64(val.n[7]) +
   981  		2*uint64(val.n[2])*uint64(val.n[6]) +
   982  		2*uint64(val.n[3])*uint64(val.n[5]) +
   983  		uint64(val.n[4])*uint64(val.n[4])
   984  	t8 := m & fieldBaseMask
   986  	// Terms for 2^(fieldBase*9).
   987  	m = (m >> fieldBase) +
   988  		2*uint64(val.n[0])*uint64(val.n[9]) +
   989  		2*uint64(val.n[1])*uint64(val.n[8]) +
   990  		2*uint64(val.n[2])*uint64(val.n[7]) +
   991  		2*uint64(val.n[3])*uint64(val.n[6]) +
   992  		2*uint64(val.n[4])*uint64(val.n[5])
   993  	t9 := m & fieldBaseMask
   995  	// Terms for 2^(fieldBase*10).
   996  	m = (m >> fieldBase) +
   997  		2*uint64(val.n[1])*uint64(val.n[9]) +
   998  		2*uint64(val.n[2])*uint64(val.n[8]) +
   999  		2*uint64(val.n[3])*uint64(val.n[7]) +
  1000  		2*uint64(val.n[4])*uint64(val.n[6]) +
  1001  		uint64(val.n[5])*uint64(val.n[5])
  1002  	t10 := m & fieldBaseMask
  1004  	// Terms for 2^(fieldBase*11).
  1005  	m = (m >> fieldBase) +
  1006  		2*uint64(val.n[2])*uint64(val.n[9]) +
  1007  		2*uint64(val.n[3])*uint64(val.n[8]) +
  1008  		2*uint64(val.n[4])*uint64(val.n[7]) +
  1009  		2*uint64(val.n[5])*uint64(val.n[6])
  1010  	t11 := m & fieldBaseMask
  1012  	// Terms for 2^(fieldBase*12).
  1013  	m = (m >> fieldBase) +
  1014  		2*uint64(val.n[3])*uint64(val.n[9]) +
  1015  		2*uint64(val.n[4])*uint64(val.n[8]) +
  1016  		2*uint64(val.n[5])*uint64(val.n[7]) +
  1017  		uint64(val.n[6])*uint64(val.n[6])
  1018  	t12 := m & fieldBaseMask
  1020  	// Terms for 2^(fieldBase*13).
  1021  	m = (m >> fieldBase) +
  1022  		2*uint64(val.n[4])*uint64(val.n[9]) +
  1023  		2*uint64(val.n[5])*uint64(val.n[8]) +
  1024  		2*uint64(val.n[6])*uint64(val.n[7])
  1025  	t13 := m & fieldBaseMask
  1027  	// Terms for 2^(fieldBase*14).
  1028  	m = (m >> fieldBase) +
  1029  		2*uint64(val.n[5])*uint64(val.n[9]) +
  1030  		2*uint64(val.n[6])*uint64(val.n[8]) +
  1031  		uint64(val.n[7])*uint64(val.n[7])
  1032  	t14 := m & fieldBaseMask
  1034  	// Terms for 2^(fieldBase*15).
  1035  	m = (m >> fieldBase) +
  1036  		2*uint64(val.n[6])*uint64(val.n[9]) +
  1037  		2*uint64(val.n[7])*uint64(val.n[8])
  1038  	t15 := m & fieldBaseMask
  1040  	// Terms for 2^(fieldBase*16).
  1041  	m = (m >> fieldBase) +
  1042  		2*uint64(val.n[7])*uint64(val.n[9]) +
  1043  		uint64(val.n[8])*uint64(val.n[8])
  1044  	t16 := m & fieldBaseMask
  1046  	// Terms for 2^(fieldBase*17).
  1047  	m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
  1048  	t17 := m & fieldBaseMask
  1050  	// Terms for 2^(fieldBase*18).
  1051  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
  1052  	t18 := m & fieldBaseMask
  1054  	// What's left is for 2^(fieldBase*19).
  1055  	t19 := m >> fieldBase
  1057  	// At this point, all of the terms are grouped into their respective
  1058  	// base.
  1059  	//
  1060  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
  1061  	// when the modulus is of the special form m = b^t - c, highly efficient
  1062  	// reduction can be achieved per the provided algorithm.
  1063  	//
  1064  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
  1065  	// this criteria.
  1066  	//
  1067  	// 4294968273 in field representation (base 2^26) is:
  1068  	// n[0] = 977
  1069  	// n[1] = 64
  1070  	// That is to say (2^26 * 64) + 977 = 4294968273
  1071  	//
  1072  	// Since each word is in base 26, the upper terms (t10 and up) start
  1073  	// at 260 bits (versus the final desired range of 256 bits), so the
  1074  	// field representation of 'c' from above needs to be adjusted for the
  1075  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
  1076  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
  1077  	// n[0] = 977 * 16 = 15632
  1078  	// n[1] = 64 * 16 = 1024
  1079  	// That is to say (2^26 * 1024) + 15632 = 68719492368
  1080  	//
  1081  	// To reduce the final term, t19, the entire 'c' value is needed instead
  1082  	// of only n[0] because there are no more terms left to handle n[1].
  1083  	// This means there might be some magnitude left in the upper bits that
  1084  	// is handled below.
  1085  	m = t0 + t10*15632
  1086  	t0 = m & fieldBaseMask
  1087  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
  1088  	t1 = m & fieldBaseMask
  1089  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
  1090  	t2 = m & fieldBaseMask
  1091  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
  1092  	t3 = m & fieldBaseMask
  1093  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
  1094  	t4 = m & fieldBaseMask
  1095  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
  1096  	t5 = m & fieldBaseMask
  1097  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
  1098  	t6 = m & fieldBaseMask
  1099  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
  1100  	t7 = m & fieldBaseMask
  1101  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
  1102  	t8 = m & fieldBaseMask
  1103  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
  1104  	t9 = m & fieldMSBMask
  1105  	m = m >> fieldMSBBits
  1107  	// At this point, if the magnitude is greater than 0, the overall value
  1108  	// is greater than the max possible 256-bit value.  In particular, it is
  1109  	// "how many times larger" than the max value it is.
  1110  	//
  1111  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
  1112  	// quotient is zero.  However, due to the above, we already know at
  1113  	// least how many times we would need to repeat as it's the value
  1114  	// currently in m.  Thus we can simply multiply the magnitude by the
  1115  	// field representation of the prime and do a single iteration.  Notice
  1116  	// that nothing will be changed when the magnitude is zero, so we could
  1117  	// skip this in that case, however always running regardless allows it
  1118  	// to run in constant time.  The final result will be in the range
  1119  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
  1120  	// magnitude of 1, but it is denormalized.
  1121  	n := t0 + m*977
  1122  	f.n[0] = uint32(n & fieldBaseMask)
  1123  	n = (n >> fieldBase) + t1 + m*64
  1124  	f.n[1] = uint32(n & fieldBaseMask)
  1125  	f.n[2] = uint32((n >> fieldBase) + t2)
  1126  	f.n[3] = uint32(t3)
  1127  	f.n[4] = uint32(t4)
  1128  	f.n[5] = uint32(t5)
  1129  	f.n[6] = uint32(t6)
  1130  	f.n[7] = uint32(t7)
  1131  	f.n[8] = uint32(t8)
  1132  	f.n[9] = uint32(t9)
  1134  	return f
  1135  }
  1137  // Inverse finds the modular multiplicative inverse of the field value.  The
  1138  // existing field value is modified.
  1139  //
  1140  // The field value is returned to support chaining.  This enables syntax like:
  1141  // f.Inverse().Mul(f2) so that f = f^-1 * f2.
  1142  func (f *fieldVal) Inverse() *fieldVal {
  1143  	// Fermat's little theorem states that for a nonzero number a and prime
  1144  	// prime p, a^(p-1) = 1 (mod p).  Since the multipliciative inverse is
  1145  	// a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p).
  1146  	// Thus, a^(p-2) is the multiplicative inverse.
  1147  	//
  1148  	// In order to efficiently compute a^(p-2), p-2 needs to be split into
  1149  	// a sequence of squares and multipications that minimizes the number of
  1150  	// multiplications needed (since they are more costly than squarings).
  1151  	// Intermediate results are saved and reused as well.
  1152  	//
  1153  	// The secp256k1 prime - 2 is 2^256 - 4294968275.
  1154  	//
  1155  	// This has a cost of 258 field squarings and 33 field multiplications.
  1156  	var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal
  1157  	a2.SquareVal(f)
  1158  	a3.Mul2(&a2, f)
  1159  	a4.SquareVal(&a2)
  1160  	a10.SquareVal(&a4).Mul(&a2)
  1161  	a11.Mul2(&a10, f)
  1162  	a21.Mul2(&a10, &a11)
  1163  	a42.SquareVal(&a21)
  1164  	a45.Mul2(&a42, &a3)
  1165  	a63.Mul2(&a42, &a21)
  1166  	a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11)
  1167  	a1023.Mul2(&a1019, &a4)
  1168  	f.Set(&a63)                                    // f = a^(2^6 - 1)
  1169  	f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32)
  1170  	f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024)
  1171  	f.Mul(&a1023)                                  // f = a^(2^16 - 1)
  1172  	f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32)
  1173  	f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024)
  1174  	f.Mul(&a1023)                                  // f = a^(2^26 - 1)
  1175  	f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32)
  1176  	f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024)
  1177  	f.Mul(&a1023)                                  // f = a^(2^36 - 1)
  1178  	f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32)
  1179  	f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024)
  1180  	f.Mul(&a1023)                                  // f = a^(2^46 - 1)
  1181  	f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32)
  1182  	f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024)
  1183  	f.Mul(&a1023)                                  // f = a^(2^56 - 1)
  1184  	f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32)
  1185  	f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024)
  1186  	f.Mul(&a1023)                                  // f = a^(2^66 - 1)
  1187  	f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32)
  1188  	f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024)
  1189  	f.Mul(&a1023)                                  // f = a^(2^76 - 1)
  1190  	f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32)
  1191  	f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024)
  1192  	f.Mul(&a1023)                                  // f = a^(2^86 - 1)
  1193  	f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32)
  1194  	f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024)
  1195  	f.Mul(&a1023)                                  // f = a^(2^96 - 1)
  1196  	f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32)
  1197  	f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024)
  1198  	f.Mul(&a1023)                                  // f = a^(2^106 - 1)
  1199  	f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32)
  1200  	f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024)
  1201  	f.Mul(&a1023)                                  // f = a^(2^116 - 1)
  1202  	f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32)
  1203  	f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024)
  1204  	f.Mul(&a1023)                                  // f = a^(2^126 - 1)
  1205  	f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32)
  1206  	f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024)
  1207  	f.Mul(&a1023)                                  // f = a^(2^136 - 1)
  1208  	f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32)
  1209  	f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024)
  1210  	f.Mul(&a1023)                                  // f = a^(2^146 - 1)
  1211  	f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32)
  1212  	f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024)
  1213  	f.Mul(&a1023)                                  // f = a^(2^156 - 1)
  1214  	f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32)
  1215  	f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024)
  1216  	f.Mul(&a1023)                                  // f = a^(2^166 - 1)
  1217  	f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32)
  1218  	f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024)
  1219  	f.Mul(&a1023)                                  // f = a^(2^176 - 1)
  1220  	f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32)
  1221  	f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024)
  1222  	f.Mul(&a1023)                                  // f = a^(2^186 - 1)
  1223  	f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32)
  1224  	f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024)
  1225  	f.Mul(&a1023)                                  // f = a^(2^196 - 1)
  1226  	f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32)
  1227  	f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024)
  1228  	f.Mul(&a1023)                                  // f = a^(2^206 - 1)
  1229  	f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32)
  1230  	f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024)
  1231  	f.Mul(&a1023)                                  // f = a^(2^216 - 1)
  1232  	f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32)
  1233  	f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024)
  1234  	f.Mul(&a1019)                                  // f = a^(2^226 - 5)
  1235  	f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160)
  1236  	f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120)
  1237  	f.Mul(&a1023)                                  // f = a^(2^236 - 4097)
  1238  	f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104)
  1239  	f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328)
  1240  	f.Mul(&a1023)                                  // f = a^(2^246 - 4194305)
  1241  	f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760)
  1242  	f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320)
  1243  	return f.Mul(&a45)                             // f = a^(2^256 - 4294968275) = a^(p-2)
  1244  }
  1246  // SqrtVal computes the square root of x modulo the curve's prime, and stores
  1247  // the result in f. The square root is computed via exponentiation of x by the
  1248  // value Q = (P+1)/4 using the curve's precomputed big-endian representation of
  1249  // the Q.  This method uses a modified version of square-and-multiply
  1250  // exponentiation over secp256k1 fieldVals to operate on bytes instead of bits,
  1251  // which offers better performance over both big.Int exponentiation and bit-wise
  1252  // square-and-multiply.
  1253  //
  1254  // NOTE: This method only works when P is intended to be the secp256k1 prime and
  1255  // is not constant time. The returned value is of magnitude 1, but is
  1256  // denormalized.
  1257  func (f *fieldVal) SqrtVal(x *fieldVal) *fieldVal {
  1258  	// The following computation iteratively computes x^((P+1)/4) = x^Q
  1259  	// using the recursive, piece-wise definition:
  1260  	//
  1261  	//   x^n = (x^2)^(n/2) mod P       if n is even
  1262  	//   x^n = x(x^2)^(n-1/2) mod P    if n is odd
  1263  	//
  1264  	// Given n in its big-endian representation b_k, ..., b_0, x^n can be
  1265  	// computed by defining the sequence r_k+1, ..., r_0, where:
  1266  	//
  1267  	//   r_k+1 = 1
  1268  	//   r_i   = (r_i+1)^2 * x^b_i    for i = k, ..., 0
  1269  	//
  1270  	// The final value r_0 = x^n.
  1271  	//
  1272  	// See for more
  1273  	// details.
  1274  	//
  1275  	// This can be further optimized, by observing that the value of Q in
  1276  	// secp256k1 has the value:
  1277  	//
  1278  	//   Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c
  1279  	//
  1280  	// We can unroll the typical bit-wise interpretation of the
  1281  	// exponentiation algorithm above to instead operate on bytes.
  1282  	// This reduces the number of comparisons by an order of magnitude,
  1283  	// reducing the overhead of failed branch predictions and additional
  1284  	// comparisons in this method.
  1285  	//
  1286  	// Since there there are only 4 unique bytes of Q, this keeps the jump
  1287  	// table small without the need to handle all possible 8-bit values.
  1288  	// Further, we observe that 29 of the 32 bytes are 0xff; making the
  1289  	// first case handle 0xff therefore optimizes the hot path.
  1290  	f.SetInt(1)
  1291  	for _, b := range fieldQBytes {
  1292  		switch b {
  1294  		// Most common case, where all 8 bits are set.
  1295  		case 0xff:
  1296  			f.Square().Mul(x)
  1297  			f.Square().Mul(x)
  1298  			f.Square().Mul(x)
  1299  			f.Square().Mul(x)
  1300  			f.Square().Mul(x)
  1301  			f.Square().Mul(x)
  1302  			f.Square().Mul(x)
  1303  			f.Square().Mul(x)
  1305  		// First byte of Q (0x3f), where all but the top two bits are
  1306  		// set. Note that this case only applies six operations, since
  1307  		// the highest bit of Q resides in bit six of the first byte. We
  1308  		// ignore the first two bits, since squaring for these bits will
  1309  		// result in an invalid result. We forgo squaring f before the
  1310  		// first multiply, since 1^2 = 1.
  1311  		case 0x3f:
  1312  			f.Mul(x)
  1313  			f.Square().Mul(x)
  1314  			f.Square().Mul(x)
  1315  			f.Square().Mul(x)
  1316  			f.Square().Mul(x)
  1317  			f.Square().Mul(x)
  1319  		// Byte 28 of Q (0xbf), where only bit 7 is unset.
  1320  		case 0xbf:
  1321  			f.Square().Mul(x)
  1322  			f.Square()
  1323  			f.Square().Mul(x)
  1324  			f.Square().Mul(x)
  1325  			f.Square().Mul(x)
  1326  			f.Square().Mul(x)
  1327  			f.Square().Mul(x)
  1328  			f.Square().Mul(x)
  1330  		// Byte 31 of Q (0x0c), where only bits 3 and 4 are set.
  1331  		default:
  1332  			f.Square()
  1333  			f.Square()
  1334  			f.Square()
  1335  			f.Square()
  1336  			f.Square().Mul(x)
  1337  			f.Square().Mul(x)
  1338  			f.Square()
  1339  			f.Square()
  1340  		}
  1341  	}
  1343  	return f
  1344  }
  1346  // Sqrt computes the square root of f modulo the curve's prime, and stores the
  1347  // result in f. The square root is computed via exponentiation of x by the value
  1348  // Q = (P+1)/4 using the curve's precomputed big-endian representation of the Q.
  1349  // This method uses a modified version of square-and-multiply exponentiation
  1350  // over secp256k1 fieldVals to operate on bytes instead of bits, which offers
  1351  // better performance over both big.Int exponentiation and bit-wise
  1352  // square-and-multiply.
  1353  //
  1354  // NOTE: This method only works when P is intended to be the secp256k1 prime and
  1355  // is not constant time. The returned value is of magnitude 1, but is
  1356  // denormalized.
  1357  func (f *fieldVal) Sqrt() *fieldVal {
  1358  	return f.SqrtVal(f)
  1359  }