github.com/palcoin-project/palcd@v1.0.0/btcec/field.go

github.com/palcoin-project/palcd@v1.0.0/btcec/field.go (about)

     1  // Copyright (c) 2013-2016 The btcsuite developers
     2  // Copyright (c) 2013-2016 Dave Collins
     3  // Use of this source code is governed by an ISC
     4  // license that can be found in the LICENSE file.
     5  
     6  package btcec
     7  
     8  // References:
     9  //   [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
    10  //     http://cacr.uwaterloo.ca/hac/
    11  
    12  // All elliptic curve operations for secp256k1 are done in a finite field
    13  // characterized by a 256-bit prime.  Given this precision is larger than the
    14  // biggest available native type, obviously some form of bignum math is needed.
    15  // This package implements specialized fixed-precision field arithmetic rather
    16  // than relying on an arbitrary-precision arithmetic package such as math/big
    17  // for dealing with the field math since the size is known.  As a result, rather
    18  // large performance gains are achieved by taking advantage of many
    19  // optimizations not available to arbitrary-precision arithmetic and generic
    20  // modular arithmetic algorithms.
    21  //
    22  // There are various ways to internally represent each finite field element.
    23  // For example, the most obvious representation would be to use an array of 4
    24  // uint64s (64 bits * 4 = 256 bits).  However, that representation suffers from
    25  // a couple of issues.  First, there is no native Go type large enough to handle
    26  // the intermediate results while adding or multiplying two 64-bit numbers, and
    27  // second there is no space left for overflows when performing the intermediate
    28  // arithmetic between each array element which would lead to expensive carry
    29  // propagation.
    30  //
    31  // Given the above, this implementation represents the the field elements as
    32  // 10 uint32s with each word (array entry) treated as base 2^26.  This was
    33  // chosen for the following reasons:
    34  // 1) Most systems at the current time are 64-bit (or at least have 64-bit
    35  //    registers available for specialized purposes such as MMX) so the
    36  //    intermediate results can typically be done using a native register (and
    37  //    using uint64s to avoid the need for additional half-word arithmetic)
    38  // 2) In order to allow addition of the internal words without having to
    39  //    propagate the the carry, the max normalized value for each register must
    40  //    be less than the number of bits available in the register
    41  // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
    42  //    reasonable choice for #2
    43  // 4) Given the need for 256-bits of precision and the properties stated in #1,
    44  //    #2, and #3, the representation which best accommodates this is 10 uint32s
    45  //    with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
    46  //    bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
    47  //    overflow
    48  //
    49  // Since it is so important that the field arithmetic is extremely fast for
    50  // high performance crypto, this package does not perform any validation where
    51  // it ordinarily would.  For example, some functions only give the correct
    52  // result is the field is normalized and there is no checking to ensure it is.
    53  // While I typically prefer to ensure all state and input is valid for most
    54  // packages, this code is really only used internally and every extra check
    55  // counts.
    56  
    57  import (
    58  	"encoding/hex"
    59  )
    60  
    61  // Constants used to make the code more readable.
    62  const (
    63  	twoBitsMask   = 0x3
    64  	fourBitsMask  = 0xf
    65  	sixBitsMask   = 0x3f
    66  	eightBitsMask = 0xff
    67  )
    68  
    69  // Constants related to the field representation.
    70  const (
    71  	// fieldWords is the number of words used to internally represent the
    72  	// 256-bit value.
    73  	fieldWords = 10
    74  
    75  	// fieldBase is the exponent used to form the numeric base of each word.
    76  	// 2^(fieldBase*i) where i is the word position.
    77  	fieldBase = 26
    78  
    79  	// fieldOverflowBits is the minimum number of "overflow" bits for each
    80  	// word in the field value.
    81  	fieldOverflowBits = 32 - fieldBase
    82  
    83  	// fieldBaseMask is the mask for the bits in each word needed to
    84  	// represent the numeric base of each word (except the most significant
    85  	// word).
    86  	fieldBaseMask = (1 << fieldBase) - 1
    87  
    88  	// fieldMSBBits is the number of bits in the most significant word used
    89  	// to represent the value.
    90  	fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
    91  
    92  	// fieldMSBMask is the mask for the bits in the most significant word
    93  	// needed to represent the value.
    94  	fieldMSBMask = (1 << fieldMSBBits) - 1
    95  
    96  	// fieldPrimeWordZero is word zero of the secp256k1 prime in the
    97  	// internal field representation.  It is used during negation.
    98  	fieldPrimeWordZero = 0x3fffc2f
    99  
   100  	// fieldPrimeWordOne is word one of the secp256k1 prime in the
   101  	// internal field representation.  It is used during negation.
   102  	fieldPrimeWordOne = 0x3ffffbf
   103  )
   104  
   105  var (
   106  	// fieldQBytes is the value Q = (P+1)/4 for the secp256k1 prime P. This
   107  	// value is used to efficiently compute the square root of values in the
   108  	// field via exponentiation. The value of Q in hex is:
   109  	//
   110  	//   Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c
   111  	fieldQBytes = []byte{
   112  		0x3f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   113  		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   114  		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   115  		0xff, 0xff, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x0c,
   116  	}
   117  )
   118  
   119  // fieldVal implements optimized fixed-precision arithmetic over the
   120  // secp256k1 finite field.  This means all arithmetic is performed modulo
   121  // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.  It
   122  // represents each 256-bit value as 10 32-bit integers in base 2^26.  This
   123  // provides 6 bits of overflow in each word (10 bits in the most significant
   124  // word) for a total of 64 bits of overflow (9*6 + 10 = 64).  It only implements
   125  // the arithmetic needed for elliptic curve operations.
   126  //
   127  // The following depicts the internal representation:
   128  // 	 -----------------------------------------------------------------
   129  // 	|        n[9]       |        n[8]       | ... |        n[0]       |
   130  // 	| 32 bits available | 32 bits available | ... | 32 bits available |
   131  // 	| 22 bits for value | 26 bits for value | ... | 26 bits for value |
   132  // 	| 10 bits overflow  |  6 bits overflow  | ... |  6 bits overflow  |
   133  // 	| Mult: 2^(26*9)    | Mult: 2^(26*8)    | ... | Mult: 2^(26*0)    |
   134  // 	 -----------------------------------------------------------------
   135  //
   136  // For example, consider the number 2^49 + 1.  It would be represented as:
   137  // 	n[0] = 1
   138  // 	n[1] = 2^23
   139  // 	n[2..9] = 0
   140  //
   141  // The full 256-bit value is then calculated by looping i from 9..0 and
   142  // doing sum(n[i] * 2^(26i)) like so:
   143  // 	n[9] * 2^(26*9) = 0    * 2^234 = 0
   144  // 	n[8] * 2^(26*8) = 0    * 2^208 = 0
   145  // 	...
   146  // 	n[1] * 2^(26*1) = 2^23 * 2^26  = 2^49
   147  // 	n[0] * 2^(26*0) = 1    * 2^0   = 1
   148  // 	Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
   149  type fieldVal struct {
   150  	n [10]uint32
   151  }
   152  
   153  // String returns the field value as a human-readable hex string.
   154  func (f fieldVal) String() string {
   155  	t := new(fieldVal).Set(&f).Normalize()
   156  	return hex.EncodeToString(t.Bytes()[:])
   157  }
   158  
   159  // Zero sets the field value to zero.  A newly created field value is already
   160  // set to zero.  This function can be useful to clear an existing field value
   161  // for reuse.
   162  func (f *fieldVal) Zero() {
   163  	f.n[0] = 0
   164  	f.n[1] = 0
   165  	f.n[2] = 0
   166  	f.n[3] = 0
   167  	f.n[4] = 0
   168  	f.n[5] = 0
   169  	f.n[6] = 0
   170  	f.n[7] = 0
   171  	f.n[8] = 0
   172  	f.n[9] = 0
   173  }
   174  
   175  // Set sets the field value equal to the passed value.
   176  //
   177  // The field value is returned to support chaining.  This enables syntax like:
   178  // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
   179  // modified.
   180  func (f *fieldVal) Set(val *fieldVal) *fieldVal {
   181  	*f = *val
   182  	return f
   183  }
   184  
   185  // SetInt sets the field value to the passed integer.  This is a convenience
   186  // function since it is fairly common to perform some arithemetic with small
   187  // native integers.
   188  //
   189  // The field value is returned to support chaining.  This enables syntax such
   190  // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
   191  func (f *fieldVal) SetInt(ui uint) *fieldVal {
   192  	f.Zero()
   193  	f.n[0] = uint32(ui)
   194  	return f
   195  }
   196  
   197  // SetBytes packs the passed 32-byte big-endian value into the internal field
   198  // value representation.
   199  //
   200  // The field value is returned to support chaining.  This enables syntax like:
   201  // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2.
   202  func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal {
   203  	// Pack the 256 total bits across the 10 uint32 words with a max of
   204  	// 26-bits per word.  This could be done with a couple of for loops,
   205  	// but this unrolled version is significantly faster.  Benchmarks show
   206  	// this is about 34 times faster than the variant which uses loops.
   207  	f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
   208  		(uint32(b[28])&twoBitsMask)<<24
   209  	f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
   210  		(uint32(b[25])&fourBitsMask)<<22
   211  	f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
   212  		(uint32(b[22])&sixBitsMask)<<20
   213  	f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
   214  		uint32(b[19])<<18
   215  	f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
   216  		(uint32(b[15])&twoBitsMask)<<24
   217  	f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
   218  		(uint32(b[12])&fourBitsMask)<<22
   219  	f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
   220  		(uint32(b[9])&sixBitsMask)<<20
   221  	f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
   222  		uint32(b[6])<<18
   223  	f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
   224  		(uint32(b[2])&twoBitsMask)<<24
   225  	f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
   226  	return f
   227  }
   228  
   229  // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
   230  // integer (meaning it is truncated to the first 32 bytes), packs it into the
   231  // internal field value representation, and returns the updated field value.
   232  //
   233  // Note that since passing a slice with more than 32 bytes is truncated, it is
   234  // possible that the truncated value is less than the field prime.  It is up to
   235  // the caller to decide whether it needs to provide numbers of the appropriate
   236  // size or if it is acceptable to use this function with the described
   237  // truncation behavior.
   238  //
   239  // The field value is returned to support chaining.  This enables syntax like:
   240  // f := new(fieldVal).SetByteSlice(byteSlice)
   241  func (f *fieldVal) SetByteSlice(b []byte) *fieldVal {
   242  	var b32 [32]byte
   243  	if len(b) > 32 {
   244  		b = b[:32]
   245  	}
   246  	copy(b32[32-len(b):], b)
   247  	return f.SetBytes(&b32)
   248  }
   249  
   250  // SetHex decodes the passed big-endian hex string into the internal field value
   251  // representation.  Only the first 32-bytes are used.
   252  //
   253  // The field value is returned to support chaining.  This enables syntax like:
   254  // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1
   255  func (f *fieldVal) SetHex(hexString string) *fieldVal {
   256  	if len(hexString)%2 != 0 {
   257  		hexString = "0" + hexString
   258  	}
   259  	bytes, _ := hex.DecodeString(hexString)
   260  	return f.SetByteSlice(bytes)
   261  }
   262  
   263  // Normalize normalizes the internal field words into the desired range and
   264  // performs fast modular reduction over the secp256k1 prime by making use of the
   265  // special form of the prime.
   266  func (f *fieldVal) Normalize() *fieldVal {
   267  	// The field representation leaves 6 bits of overflow in each word so
   268  	// intermediate calculations can be performed without needing to
   269  	// propagate the carry to each higher word during the calculations.  In
   270  	// order to normalize, we need to "compact" the full 256-bit value to
   271  	// the right while propagating any carries through to the high order
   272  	// word.
   273  	//
   274  	// Since this field is doing arithmetic modulo the secp256k1 prime, we
   275  	// also need to perform modular reduction over the prime.
   276  	//
   277  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
   278  	// when the modulus is of the special form m = b^t - c, highly efficient
   279  	// reduction can be achieved.
   280  	//
   281  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
   282  	// this criteria.
   283  	//
   284  	// 4294968273 in field representation (base 2^26) is:
   285  	// n[0] = 977
   286  	// n[1] = 64
   287  	// That is to say (2^26 * 64) + 977 = 4294968273
   288  	//
   289  	// The algorithm presented in the referenced section typically repeats
   290  	// until the quotient is zero.  However, due to our field representation
   291  	// we already know to within one reduction how many times we would need
   292  	// to repeat as it's the uppermost bits of the high order word.  Thus we
   293  	// can simply multiply the magnitude by the field representation of the
   294  	// prime and do a single iteration.  After this step there might be an
   295  	// additional carry to bit 256 (bit 22 of the high order word).
   296  	t9 := f.n[9]
   297  	m := t9 >> fieldMSBBits
   298  	t9 = t9 & fieldMSBMask
   299  	t0 := f.n[0] + m*977
   300  	t1 := (t0 >> fieldBase) + f.n[1] + (m << 6)
   301  	t0 = t0 & fieldBaseMask
   302  	t2 := (t1 >> fieldBase) + f.n[2]
   303  	t1 = t1 & fieldBaseMask
   304  	t3 := (t2 >> fieldBase) + f.n[3]
   305  	t2 = t2 & fieldBaseMask
   306  	t4 := (t3 >> fieldBase) + f.n[4]
   307  	t3 = t3 & fieldBaseMask
   308  	t5 := (t4 >> fieldBase) + f.n[5]
   309  	t4 = t4 & fieldBaseMask
   310  	t6 := (t5 >> fieldBase) + f.n[6]
   311  	t5 = t5 & fieldBaseMask
   312  	t7 := (t6 >> fieldBase) + f.n[7]
   313  	t6 = t6 & fieldBaseMask
   314  	t8 := (t7 >> fieldBase) + f.n[8]
   315  	t7 = t7 & fieldBaseMask
   316  	t9 = (t8 >> fieldBase) + t9
   317  	t8 = t8 & fieldBaseMask
   318  
   319  	// At this point, the magnitude is guaranteed to be one, however, the
   320  	// value could still be greater than the prime if there was either a
   321  	// carry through to bit 256 (bit 22 of the higher order word) or the
   322  	// value is greater than or equal to the field characteristic.  The
   323  	// following determines if either or these conditions are true and does
   324  	// the final reduction in constant time.
   325  	//
   326  	// Note that the if/else statements here intentionally do the bitwise
   327  	// operators even when it won't change the value to ensure constant time
   328  	// between the branches.  Also note that 'm' will be zero when neither
   329  	// of the aforementioned conditions are true and the value will not be
   330  	// changed when 'm' is zero.
   331  	m = 1
   332  	if t9 == fieldMSBMask {
   333  		m &= 1
   334  	} else {
   335  		m &= 0
   336  	}
   337  	if t2&t3&t4&t5&t6&t7&t8 == fieldBaseMask {
   338  		m &= 1
   339  	} else {
   340  		m &= 0
   341  	}
   342  	if ((t0+977)>>fieldBase + t1 + 64) > fieldBaseMask {
   343  		m &= 1
   344  	} else {
   345  		m &= 0
   346  	}
   347  	if t9>>fieldMSBBits != 0 {
   348  		m |= 1
   349  	} else {
   350  		m |= 0
   351  	}
   352  	t0 = t0 + m*977
   353  	t1 = (t0 >> fieldBase) + t1 + (m << 6)
   354  	t0 = t0 & fieldBaseMask
   355  	t2 = (t1 >> fieldBase) + t2
   356  	t1 = t1 & fieldBaseMask
   357  	t3 = (t2 >> fieldBase) + t3
   358  	t2 = t2 & fieldBaseMask
   359  	t4 = (t3 >> fieldBase) + t4
   360  	t3 = t3 & fieldBaseMask
   361  	t5 = (t4 >> fieldBase) + t5
   362  	t4 = t4 & fieldBaseMask
   363  	t6 = (t5 >> fieldBase) + t6
   364  	t5 = t5 & fieldBaseMask
   365  	t7 = (t6 >> fieldBase) + t7
   366  	t6 = t6 & fieldBaseMask
   367  	t8 = (t7 >> fieldBase) + t8
   368  	t7 = t7 & fieldBaseMask
   369  	t9 = (t8 >> fieldBase) + t9
   370  	t8 = t8 & fieldBaseMask
   371  	t9 = t9 & fieldMSBMask // Remove potential multiple of 2^256.
   372  
   373  	// Finally, set the normalized and reduced words.
   374  	f.n[0] = t0
   375  	f.n[1] = t1
   376  	f.n[2] = t2
   377  	f.n[3] = t3
   378  	f.n[4] = t4
   379  	f.n[5] = t5
   380  	f.n[6] = t6
   381  	f.n[7] = t7
   382  	f.n[8] = t8
   383  	f.n[9] = t9
   384  	return f
   385  }
   386  
   387  // PutBytes unpacks the field value to a 32-byte big-endian value using the
   388  // passed byte array.  There is a similar function, Bytes, which unpacks the
   389  // field value into a new array and returns that.  This version is provided
   390  // since it can be useful to cut down on the number of allocations by allowing
   391  // the caller to reuse a buffer.
   392  //
   393  // The field value must be normalized for this function to return the correct
   394  // result.
   395  func (f *fieldVal) PutBytes(b *[32]byte) {
   396  	// Unpack the 256 total bits from the 10 uint32 words with a max of
   397  	// 26-bits per word.  This could be done with a couple of for loops,
   398  	// but this unrolled version is a bit faster.  Benchmarks show this is
   399  	// about 10 times faster than the variant which uses loops.
   400  	b[31] = byte(f.n[0] & eightBitsMask)
   401  	b[30] = byte((f.n[0] >> 8) & eightBitsMask)
   402  	b[29] = byte((f.n[0] >> 16) & eightBitsMask)
   403  	b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
   404  	b[27] = byte((f.n[1] >> 6) & eightBitsMask)
   405  	b[26] = byte((f.n[1] >> 14) & eightBitsMask)
   406  	b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
   407  	b[24] = byte((f.n[2] >> 4) & eightBitsMask)
   408  	b[23] = byte((f.n[2] >> 12) & eightBitsMask)
   409  	b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
   410  	b[21] = byte((f.n[3] >> 2) & eightBitsMask)
   411  	b[20] = byte((f.n[3] >> 10) & eightBitsMask)
   412  	b[19] = byte((f.n[3] >> 18) & eightBitsMask)
   413  	b[18] = byte(f.n[4] & eightBitsMask)
   414  	b[17] = byte((f.n[4] >> 8) & eightBitsMask)
   415  	b[16] = byte((f.n[4] >> 16) & eightBitsMask)
   416  	b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
   417  	b[14] = byte((f.n[5] >> 6) & eightBitsMask)
   418  	b[13] = byte((f.n[5] >> 14) & eightBitsMask)
   419  	b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
   420  	b[11] = byte((f.n[6] >> 4) & eightBitsMask)
   421  	b[10] = byte((f.n[6] >> 12) & eightBitsMask)
   422  	b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
   423  	b[8] = byte((f.n[7] >> 2) & eightBitsMask)
   424  	b[7] = byte((f.n[7] >> 10) & eightBitsMask)
   425  	b[6] = byte((f.n[7] >> 18) & eightBitsMask)
   426  	b[5] = byte(f.n[8] & eightBitsMask)
   427  	b[4] = byte((f.n[8] >> 8) & eightBitsMask)
   428  	b[3] = byte((f.n[8] >> 16) & eightBitsMask)
   429  	b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
   430  	b[1] = byte((f.n[9] >> 6) & eightBitsMask)
   431  	b[0] = byte((f.n[9] >> 14) & eightBitsMask)
   432  }
   433  
   434  // Bytes unpacks the field value to a 32-byte big-endian value.  See PutBytes
   435  // for a variant that allows the a buffer to be passed which can be useful to
   436  // to cut down on the number of allocations by allowing the caller to reuse a
   437  // buffer.
   438  //
   439  // The field value must be normalized for this function to return correct
   440  // result.
   441  func (f *fieldVal) Bytes() *[32]byte {
   442  	b := new([32]byte)
   443  	f.PutBytes(b)
   444  	return b
   445  }
   446  
   447  // IsZero returns whether or not the field value is equal to zero.
   448  func (f *fieldVal) IsZero() bool {
   449  	// The value can only be zero if no bits are set in any of the words.
   450  	// This is a constant time implementation.
   451  	bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
   452  		f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
   453  
   454  	return bits == 0
   455  }
   456  
   457  // IsOdd returns whether or not the field value is an odd number.
   458  //
   459  // The field value must be normalized for this function to return correct
   460  // result.
   461  func (f *fieldVal) IsOdd() bool {
   462  	// Only odd numbers have the bottom bit set.
   463  	return f.n[0]&1 == 1
   464  }
   465  
   466  // Equals returns whether or not the two field values are the same.  Both
   467  // field values being compared must be normalized for this function to return
   468  // the correct result.
   469  func (f *fieldVal) Equals(val *fieldVal) bool {
   470  	// Xor only sets bits when they are different, so the two field values
   471  	// can only be the same if no bits are set after xoring each word.
   472  	// This is a constant time implementation.
   473  	bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
   474  		(f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
   475  		(f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
   476  		(f.n[9] ^ val.n[9])
   477  
   478  	return bits == 0
   479  }
   480  
   481  // NegateVal negates the passed value and stores the result in f.  The caller
   482  // must provide the magnitude of the passed value for a correct result.
   483  //
   484  // The field value is returned to support chaining.  This enables syntax like:
   485  // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
   486  func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal {
   487  	// Negation in the field is just the prime minus the value.  However,
   488  	// in order to allow negation against a field value without having to
   489  	// normalize/reduce it first, multiply by the magnitude (that is how
   490  	// "far" away it is from the normalized value) to adjust.  Also, since
   491  	// negating a value pushes it one more order of magnitude away from the
   492  	// normalized range, add 1 to compensate.
   493  	//
   494  	// For some intuition here, imagine you're performing mod 12 arithmetic
   495  	// (picture a clock) and you are negating the number 7.  So you start at
   496  	// 12 (which is of course 0 under mod 12) and count backwards (left on
   497  	// the clock) 7 times to arrive at 5.  Notice this is just 12-7 = 5.
   498  	// Now, assume you're starting with 19, which is a number that is
   499  	// already larger than the modulus and congruent to 7 (mod 12).  When a
   500  	// value is already in the desired range, its magnitude is 1.  Since 19
   501  	// is an additional "step", its magnitude (mod 12) is 2.  Since any
   502  	// multiple of the modulus is conguent to zero (mod m), the answer can
   503  	// be shortcut by simply mulplying the magnitude by the modulus and
   504  	// subtracting.  Keeping with the example, this would be (2*12)-19 = 5.
   505  	f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
   506  	f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
   507  	f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
   508  	f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
   509  	f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
   510  	f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
   511  	f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
   512  	f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
   513  	f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
   514  	f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
   515  
   516  	return f
   517  }
   518  
   519  // Negate negates the field value.  The existing field value is modified.  The
   520  // caller must provide the magnitude of the field value for a correct result.
   521  //
   522  // The field value is returned to support chaining.  This enables syntax like:
   523  // f.Negate().AddInt(1) so that f = -f + 1.
   524  func (f *fieldVal) Negate(magnitude uint32) *fieldVal {
   525  	return f.NegateVal(f, magnitude)
   526  }
   527  
   528  // AddInt adds the passed integer to the existing field value and stores the
   529  // result in f.  This is a convenience function since it is fairly common to
   530  // perform some arithemetic with small native integers.
   531  //
   532  // The field value is returned to support chaining.  This enables syntax like:
   533  // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
   534  func (f *fieldVal) AddInt(ui uint) *fieldVal {
   535  	// Since the field representation intentionally provides overflow bits,
   536  	// it's ok to use carryless addition as the carry bit is safely part of
   537  	// the word and will be normalized out.
   538  	f.n[0] += uint32(ui)
   539  
   540  	return f
   541  }
   542  
   543  // Add adds the passed value to the existing field value and stores the result
   544  // in f.
   545  //
   546  // The field value is returned to support chaining.  This enables syntax like:
   547  // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
   548  func (f *fieldVal) Add(val *fieldVal) *fieldVal {
   549  	// Since the field representation intentionally provides overflow bits,
   550  	// it's ok to use carryless addition as the carry bit is safely part of
   551  	// each word and will be normalized out.  This could obviously be done
   552  	// in a loop, but the unrolled version is faster.
   553  	f.n[0] += val.n[0]
   554  	f.n[1] += val.n[1]
   555  	f.n[2] += val.n[2]
   556  	f.n[3] += val.n[3]
   557  	f.n[4] += val.n[4]
   558  	f.n[5] += val.n[5]
   559  	f.n[6] += val.n[6]
   560  	f.n[7] += val.n[7]
   561  	f.n[8] += val.n[8]
   562  	f.n[9] += val.n[9]
   563  
   564  	return f
   565  }
   566  
   567  // Add2 adds the passed two field values together and stores the result in f.
   568  //
   569  // The field value is returned to support chaining.  This enables syntax like:
   570  // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
   571  func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal {
   572  	// Since the field representation intentionally provides overflow bits,
   573  	// it's ok to use carryless addition as the carry bit is safely part of
   574  	// each word and will be normalized out.  This could obviously be done
   575  	// in a loop, but the unrolled version is faster.
   576  	f.n[0] = val.n[0] + val2.n[0]
   577  	f.n[1] = val.n[1] + val2.n[1]
   578  	f.n[2] = val.n[2] + val2.n[2]
   579  	f.n[3] = val.n[3] + val2.n[3]
   580  	f.n[4] = val.n[4] + val2.n[4]
   581  	f.n[5] = val.n[5] + val2.n[5]
   582  	f.n[6] = val.n[6] + val2.n[6]
   583  	f.n[7] = val.n[7] + val2.n[7]
   584  	f.n[8] = val.n[8] + val2.n[8]
   585  	f.n[9] = val.n[9] + val2.n[9]
   586  
   587  	return f
   588  }
   589  
   590  // MulInt multiplies the field value by the passed int and stores the result in
   591  // f.  Note that this function can overflow if multiplying the value by any of
   592  // the individual words exceeds a max uint32.  Therefore it is important that
   593  // the caller ensures no overflows will occur before using this function.
   594  //
   595  // The field value is returned to support chaining.  This enables syntax like:
   596  // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
   597  func (f *fieldVal) MulInt(val uint) *fieldVal {
   598  	// Since each word of the field representation can hold up to
   599  	// fieldOverflowBits extra bits which will be normalized out, it's safe
   600  	// to multiply each word without using a larger type or carry
   601  	// propagation so long as the values won't overflow a uint32.  This
   602  	// could obviously be done in a loop, but the unrolled version is
   603  	// faster.
   604  	ui := uint32(val)
   605  	f.n[0] *= ui
   606  	f.n[1] *= ui
   607  	f.n[2] *= ui
   608  	f.n[3] *= ui
   609  	f.n[4] *= ui
   610  	f.n[5] *= ui
   611  	f.n[6] *= ui
   612  	f.n[7] *= ui
   613  	f.n[8] *= ui
   614  	f.n[9] *= ui
   615  
   616  	return f
   617  }
   618  
   619  // Mul multiplies the passed value to the existing field value and stores the
   620  // result in f.  Note that this function can overflow if multiplying any
   621  // of the individual words exceeds a max uint32.  In practice, this means the
   622  // magnitude of either value involved in the multiplication must be a max of
   623  // 8.
   624  //
   625  // The field value is returned to support chaining.  This enables syntax like:
   626  // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
   627  func (f *fieldVal) Mul(val *fieldVal) *fieldVal {
   628  	return f.Mul2(f, val)
   629  }
   630  
   631  // Mul2 multiplies the passed two field values together and stores the result
   632  // result in f.  Note that this function can overflow if multiplying any of
   633  // the individual words exceeds a max uint32.  In practice, this means the
   634  // magnitude of either value involved in the multiplication must be a max of
   635  // 8.
   636  //
   637  // The field value is returned to support chaining.  This enables syntax like:
   638  // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
   639  func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal {
   640  	// This could be done with a couple of for loops and an array to store
   641  	// the intermediate terms, but this unrolled version is significantly
   642  	// faster.
   643  
   644  	// Terms for 2^(fieldBase*0).
   645  	m := uint64(val.n[0]) * uint64(val2.n[0])
   646  	t0 := m & fieldBaseMask
   647  
   648  	// Terms for 2^(fieldBase*1).
   649  	m = (m >> fieldBase) +
   650  		uint64(val.n[0])*uint64(val2.n[1]) +
   651  		uint64(val.n[1])*uint64(val2.n[0])
   652  	t1 := m & fieldBaseMask
   653  
   654  	// Terms for 2^(fieldBase*2).
   655  	m = (m >> fieldBase) +
   656  		uint64(val.n[0])*uint64(val2.n[2]) +
   657  		uint64(val.n[1])*uint64(val2.n[1]) +
   658  		uint64(val.n[2])*uint64(val2.n[0])
   659  	t2 := m & fieldBaseMask
   660  
   661  	// Terms for 2^(fieldBase*3).
   662  	m = (m >> fieldBase) +
   663  		uint64(val.n[0])*uint64(val2.n[3]) +
   664  		uint64(val.n[1])*uint64(val2.n[2]) +
   665  		uint64(val.n[2])*uint64(val2.n[1]) +
   666  		uint64(val.n[3])*uint64(val2.n[0])
   667  	t3 := m & fieldBaseMask
   668  
   669  	// Terms for 2^(fieldBase*4).
   670  	m = (m >> fieldBase) +
   671  		uint64(val.n[0])*uint64(val2.n[4]) +
   672  		uint64(val.n[1])*uint64(val2.n[3]) +
   673  		uint64(val.n[2])*uint64(val2.n[2]) +
   674  		uint64(val.n[3])*uint64(val2.n[1]) +
   675  		uint64(val.n[4])*uint64(val2.n[0])
   676  	t4 := m & fieldBaseMask
   677  
   678  	// Terms for 2^(fieldBase*5).
   679  	m = (m >> fieldBase) +
   680  		uint64(val.n[0])*uint64(val2.n[5]) +
   681  		uint64(val.n[1])*uint64(val2.n[4]) +
   682  		uint64(val.n[2])*uint64(val2.n[3]) +
   683  		uint64(val.n[3])*uint64(val2.n[2]) +
   684  		uint64(val.n[4])*uint64(val2.n[1]) +
   685  		uint64(val.n[5])*uint64(val2.n[0])
   686  	t5 := m & fieldBaseMask
   687  
   688  	// Terms for 2^(fieldBase*6).
   689  	m = (m >> fieldBase) +
   690  		uint64(val.n[0])*uint64(val2.n[6]) +
   691  		uint64(val.n[1])*uint64(val2.n[5]) +
   692  		uint64(val.n[2])*uint64(val2.n[4]) +
   693  		uint64(val.n[3])*uint64(val2.n[3]) +
   694  		uint64(val.n[4])*uint64(val2.n[2]) +
   695  		uint64(val.n[5])*uint64(val2.n[1]) +
   696  		uint64(val.n[6])*uint64(val2.n[0])
   697  	t6 := m & fieldBaseMask
   698  
   699  	// Terms for 2^(fieldBase*7).
   700  	m = (m >> fieldBase) +
   701  		uint64(val.n[0])*uint64(val2.n[7]) +
   702  		uint64(val.n[1])*uint64(val2.n[6]) +
   703  		uint64(val.n[2])*uint64(val2.n[5]) +
   704  		uint64(val.n[3])*uint64(val2.n[4]) +
   705  		uint64(val.n[4])*uint64(val2.n[3]) +
   706  		uint64(val.n[5])*uint64(val2.n[2]) +
   707  		uint64(val.n[6])*uint64(val2.n[1]) +
   708  		uint64(val.n[7])*uint64(val2.n[0])
   709  	t7 := m & fieldBaseMask
   710  
   711  	// Terms for 2^(fieldBase*8).
   712  	m = (m >> fieldBase) +
   713  		uint64(val.n[0])*uint64(val2.n[8]) +
   714  		uint64(val.n[1])*uint64(val2.n[7]) +
   715  		uint64(val.n[2])*uint64(val2.n[6]) +
   716  		uint64(val.n[3])*uint64(val2.n[5]) +
   717  		uint64(val.n[4])*uint64(val2.n[4]) +
   718  		uint64(val.n[5])*uint64(val2.n[3]) +
   719  		uint64(val.n[6])*uint64(val2.n[2]) +
   720  		uint64(val.n[7])*uint64(val2.n[1]) +
   721  		uint64(val.n[8])*uint64(val2.n[0])
   722  	t8 := m & fieldBaseMask
   723  
   724  	// Terms for 2^(fieldBase*9).
   725  	m = (m >> fieldBase) +
   726  		uint64(val.n[0])*uint64(val2.n[9]) +
   727  		uint64(val.n[1])*uint64(val2.n[8]) +
   728  		uint64(val.n[2])*uint64(val2.n[7]) +
   729  		uint64(val.n[3])*uint64(val2.n[6]) +
   730  		uint64(val.n[4])*uint64(val2.n[5]) +
   731  		uint64(val.n[5])*uint64(val2.n[4]) +
   732  		uint64(val.n[6])*uint64(val2.n[3]) +
   733  		uint64(val.n[7])*uint64(val2.n[2]) +
   734  		uint64(val.n[8])*uint64(val2.n[1]) +
   735  		uint64(val.n[9])*uint64(val2.n[0])
   736  	t9 := m & fieldBaseMask
   737  
   738  	// Terms for 2^(fieldBase*10).
   739  	m = (m >> fieldBase) +
   740  		uint64(val.n[1])*uint64(val2.n[9]) +
   741  		uint64(val.n[2])*uint64(val2.n[8]) +
   742  		uint64(val.n[3])*uint64(val2.n[7]) +
   743  		uint64(val.n[4])*uint64(val2.n[6]) +
   744  		uint64(val.n[5])*uint64(val2.n[5]) +
   745  		uint64(val.n[6])*uint64(val2.n[4]) +
   746  		uint64(val.n[7])*uint64(val2.n[3]) +
   747  		uint64(val.n[8])*uint64(val2.n[2]) +
   748  		uint64(val.n[9])*uint64(val2.n[1])
   749  	t10 := m & fieldBaseMask
   750  
   751  	// Terms for 2^(fieldBase*11).
   752  	m = (m >> fieldBase) +
   753  		uint64(val.n[2])*uint64(val2.n[9]) +
   754  		uint64(val.n[3])*uint64(val2.n[8]) +
   755  		uint64(val.n[4])*uint64(val2.n[7]) +
   756  		uint64(val.n[5])*uint64(val2.n[6]) +
   757  		uint64(val.n[6])*uint64(val2.n[5]) +
   758  		uint64(val.n[7])*uint64(val2.n[4]) +
   759  		uint64(val.n[8])*uint64(val2.n[3]) +
   760  		uint64(val.n[9])*uint64(val2.n[2])
   761  	t11 := m & fieldBaseMask
   762  
   763  	// Terms for 2^(fieldBase*12).
   764  	m = (m >> fieldBase) +
   765  		uint64(val.n[3])*uint64(val2.n[9]) +
   766  		uint64(val.n[4])*uint64(val2.n[8]) +
   767  		uint64(val.n[5])*uint64(val2.n[7]) +
   768  		uint64(val.n[6])*uint64(val2.n[6]) +
   769  		uint64(val.n[7])*uint64(val2.n[5]) +
   770  		uint64(val.n[8])*uint64(val2.n[4]) +
   771  		uint64(val.n[9])*uint64(val2.n[3])
   772  	t12 := m & fieldBaseMask
   773  
   774  	// Terms for 2^(fieldBase*13).
   775  	m = (m >> fieldBase) +
   776  		uint64(val.n[4])*uint64(val2.n[9]) +
   777  		uint64(val.n[5])*uint64(val2.n[8]) +
   778  		uint64(val.n[6])*uint64(val2.n[7]) +
   779  		uint64(val.n[7])*uint64(val2.n[6]) +
   780  		uint64(val.n[8])*uint64(val2.n[5]) +
   781  		uint64(val.n[9])*uint64(val2.n[4])
   782  	t13 := m & fieldBaseMask
   783  
   784  	// Terms for 2^(fieldBase*14).
   785  	m = (m >> fieldBase) +
   786  		uint64(val.n[5])*uint64(val2.n[9]) +
   787  		uint64(val.n[6])*uint64(val2.n[8]) +
   788  		uint64(val.n[7])*uint64(val2.n[7]) +
   789  		uint64(val.n[8])*uint64(val2.n[6]) +
   790  		uint64(val.n[9])*uint64(val2.n[5])
   791  	t14 := m & fieldBaseMask
   792  
   793  	// Terms for 2^(fieldBase*15).
   794  	m = (m >> fieldBase) +
   795  		uint64(val.n[6])*uint64(val2.n[9]) +
   796  		uint64(val.n[7])*uint64(val2.n[8]) +
   797  		uint64(val.n[8])*uint64(val2.n[7]) +
   798  		uint64(val.n[9])*uint64(val2.n[6])
   799  	t15 := m & fieldBaseMask
   800  
   801  	// Terms for 2^(fieldBase*16).
   802  	m = (m >> fieldBase) +
   803  		uint64(val.n[7])*uint64(val2.n[9]) +
   804  		uint64(val.n[8])*uint64(val2.n[8]) +
   805  		uint64(val.n[9])*uint64(val2.n[7])
   806  	t16 := m & fieldBaseMask
   807  
   808  	// Terms for 2^(fieldBase*17).
   809  	m = (m >> fieldBase) +
   810  		uint64(val.n[8])*uint64(val2.n[9]) +
   811  		uint64(val.n[9])*uint64(val2.n[8])
   812  	t17 := m & fieldBaseMask
   813  
   814  	// Terms for 2^(fieldBase*18).
   815  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
   816  	t18 := m & fieldBaseMask
   817  
   818  	// What's left is for 2^(fieldBase*19).
   819  	t19 := m >> fieldBase
   820  
   821  	// At this point, all of the terms are grouped into their respective
   822  	// base.
   823  	//
   824  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
   825  	// when the modulus is of the special form m = b^t - c, highly efficient
   826  	// reduction can be achieved per the provided algorithm.
   827  	//
   828  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
   829  	// this criteria.
   830  	//
   831  	// 4294968273 in field representation (base 2^26) is:
   832  	// n[0] = 977
   833  	// n[1] = 64
   834  	// That is to say (2^26 * 64) + 977 = 4294968273
   835  	//
   836  	// Since each word is in base 26, the upper terms (t10 and up) start
   837  	// at 260 bits (versus the final desired range of 256 bits), so the
   838  	// field representation of 'c' from above needs to be adjusted for the
   839  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
   840  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
   841  	// n[0] = 977 * 16 = 15632
   842  	// n[1] = 64 * 16 = 1024
   843  	// That is to say (2^26 * 1024) + 15632 = 68719492368
   844  	//
   845  	// To reduce the final term, t19, the entire 'c' value is needed instead
   846  	// of only n[0] because there are no more terms left to handle n[1].
   847  	// This means there might be some magnitude left in the upper bits that
   848  	// is handled below.
   849  	m = t0 + t10*15632
   850  	t0 = m & fieldBaseMask
   851  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
   852  	t1 = m & fieldBaseMask
   853  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
   854  	t2 = m & fieldBaseMask
   855  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
   856  	t3 = m & fieldBaseMask
   857  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
   858  	t4 = m & fieldBaseMask
   859  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
   860  	t5 = m & fieldBaseMask
   861  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
   862  	t6 = m & fieldBaseMask
   863  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
   864  	t7 = m & fieldBaseMask
   865  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
   866  	t8 = m & fieldBaseMask
   867  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
   868  	t9 = m & fieldMSBMask
   869  	m = m >> fieldMSBBits
   870  
   871  	// At this point, if the magnitude is greater than 0, the overall value
   872  	// is greater than the max possible 256-bit value.  In particular, it is
   873  	// "how many times larger" than the max value it is.
   874  	//
   875  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
   876  	// quotient is zero.  However, due to the above, we already know at
   877  	// least how many times we would need to repeat as it's the value
   878  	// currently in m.  Thus we can simply multiply the magnitude by the
   879  	// field representation of the prime and do a single iteration.  Notice
   880  	// that nothing will be changed when the magnitude is zero, so we could
   881  	// skip this in that case, however always running regardless allows it
   882  	// to run in constant time.  The final result will be in the range
   883  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
   884  	// magnitude of 1, but it is denormalized.
   885  	d := t0 + m*977
   886  	f.n[0] = uint32(d & fieldBaseMask)
   887  	d = (d >> fieldBase) + t1 + m*64
   888  	f.n[1] = uint32(d & fieldBaseMask)
   889  	f.n[2] = uint32((d >> fieldBase) + t2)
   890  	f.n[3] = uint32(t3)
   891  	f.n[4] = uint32(t4)
   892  	f.n[5] = uint32(t5)
   893  	f.n[6] = uint32(t6)
   894  	f.n[7] = uint32(t7)
   895  	f.n[8] = uint32(t8)
   896  	f.n[9] = uint32(t9)
   897  
   898  	return f
   899  }
   900  
   901  // Square squares the field value.  The existing field value is modified.  Note
   902  // that this function can overflow if multiplying any of the individual words
   903  // exceeds a max uint32.  In practice, this means the magnitude of the field
   904  // must be a max of 8 to prevent overflow.
   905  //
   906  // The field value is returned to support chaining.  This enables syntax like:
   907  // f.Square().Mul(f2) so that f = f^2 * f2.
   908  func (f *fieldVal) Square() *fieldVal {
   909  	return f.SquareVal(f)
   910  }
   911  
   912  // SquareVal squares the passed value and stores the result in f.  Note that
   913  // this function can overflow if multiplying any of the individual words
   914  // exceeds a max uint32.  In practice, this means the magnitude of the field
   915  // being squred must be a max of 8 to prevent overflow.
   916  //
   917  // The field value is returned to support chaining.  This enables syntax like:
   918  // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
   919  func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal {
   920  	// This could be done with a couple of for loops and an array to store
   921  	// the intermediate terms, but this unrolled version is significantly
   922  	// faster.
   923  
   924  	// Terms for 2^(fieldBase*0).
   925  	m := uint64(val.n[0]) * uint64(val.n[0])
   926  	t0 := m & fieldBaseMask
   927  
   928  	// Terms for 2^(fieldBase*1).
   929  	m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
   930  	t1 := m & fieldBaseMask
   931  
   932  	// Terms for 2^(fieldBase*2).
   933  	m = (m >> fieldBase) +
   934  		2*uint64(val.n[0])*uint64(val.n[2]) +
   935  		uint64(val.n[1])*uint64(val.n[1])
   936  	t2 := m & fieldBaseMask
   937  
   938  	// Terms for 2^(fieldBase*3).
   939  	m = (m >> fieldBase) +
   940  		2*uint64(val.n[0])*uint64(val.n[3]) +
   941  		2*uint64(val.n[1])*uint64(val.n[2])
   942  	t3 := m & fieldBaseMask
   943  
   944  	// Terms for 2^(fieldBase*4).
   945  	m = (m >> fieldBase) +
   946  		2*uint64(val.n[0])*uint64(val.n[4]) +
   947  		2*uint64(val.n[1])*uint64(val.n[3]) +
   948  		uint64(val.n[2])*uint64(val.n[2])
   949  	t4 := m & fieldBaseMask
   950  
   951  	// Terms for 2^(fieldBase*5).
   952  	m = (m >> fieldBase) +
   953  		2*uint64(val.n[0])*uint64(val.n[5]) +
   954  		2*uint64(val.n[1])*uint64(val.n[4]) +
   955  		2*uint64(val.n[2])*uint64(val.n[3])
   956  	t5 := m & fieldBaseMask
   957  
   958  	// Terms for 2^(fieldBase*6).
   959  	m = (m >> fieldBase) +
   960  		2*uint64(val.n[0])*uint64(val.n[6]) +
   961  		2*uint64(val.n[1])*uint64(val.n[5]) +
   962  		2*uint64(val.n[2])*uint64(val.n[4]) +
   963  		uint64(val.n[3])*uint64(val.n[3])
   964  	t6 := m & fieldBaseMask
   965  
   966  	// Terms for 2^(fieldBase*7).
   967  	m = (m >> fieldBase) +
   968  		2*uint64(val.n[0])*uint64(val.n[7]) +
   969  		2*uint64(val.n[1])*uint64(val.n[6]) +
   970  		2*uint64(val.n[2])*uint64(val.n[5]) +
   971  		2*uint64(val.n[3])*uint64(val.n[4])
   972  	t7 := m & fieldBaseMask
   973  
   974  	// Terms for 2^(fieldBase*8).
   975  	m = (m >> fieldBase) +
   976  		2*uint64(val.n[0])*uint64(val.n[8]) +
   977  		2*uint64(val.n[1])*uint64(val.n[7]) +
   978  		2*uint64(val.n[2])*uint64(val.n[6]) +
   979  		2*uint64(val.n[3])*uint64(val.n[5]) +
   980  		uint64(val.n[4])*uint64(val.n[4])
   981  	t8 := m & fieldBaseMask
   982  
   983  	// Terms for 2^(fieldBase*9).
   984  	m = (m >> fieldBase) +
   985  		2*uint64(val.n[0])*uint64(val.n[9]) +
   986  		2*uint64(val.n[1])*uint64(val.n[8]) +
   987  		2*uint64(val.n[2])*uint64(val.n[7]) +
   988  		2*uint64(val.n[3])*uint64(val.n[6]) +
   989  		2*uint64(val.n[4])*uint64(val.n[5])
   990  	t9 := m & fieldBaseMask
   991  
   992  	// Terms for 2^(fieldBase*10).
   993  	m = (m >> fieldBase) +
   994  		2*uint64(val.n[1])*uint64(val.n[9]) +
   995  		2*uint64(val.n[2])*uint64(val.n[8]) +
   996  		2*uint64(val.n[3])*uint64(val.n[7]) +
   997  		2*uint64(val.n[4])*uint64(val.n[6]) +
   998  		uint64(val.n[5])*uint64(val.n[5])
   999  	t10 := m & fieldBaseMask
  1000  
  1001  	// Terms for 2^(fieldBase*11).
  1002  	m = (m >> fieldBase) +
  1003  		2*uint64(val.n[2])*uint64(val.n[9]) +
  1004  		2*uint64(val.n[3])*uint64(val.n[8]) +
  1005  		2*uint64(val.n[4])*uint64(val.n[7]) +
  1006  		2*uint64(val.n[5])*uint64(val.n[6])
  1007  	t11 := m & fieldBaseMask
  1008  
  1009  	// Terms for 2^(fieldBase*12).
  1010  	m = (m >> fieldBase) +
  1011  		2*uint64(val.n[3])*uint64(val.n[9]) +
  1012  		2*uint64(val.n[4])*uint64(val.n[8]) +
  1013  		2*uint64(val.n[5])*uint64(val.n[7]) +
  1014  		uint64(val.n[6])*uint64(val.n[6])
  1015  	t12 := m & fieldBaseMask
  1016  
  1017  	// Terms for 2^(fieldBase*13).
  1018  	m = (m >> fieldBase) +
  1019  		2*uint64(val.n[4])*uint64(val.n[9]) +
  1020  		2*uint64(val.n[5])*uint64(val.n[8]) +
  1021  		2*uint64(val.n[6])*uint64(val.n[7])
  1022  	t13 := m & fieldBaseMask
  1023  
  1024  	// Terms for 2^(fieldBase*14).
  1025  	m = (m >> fieldBase) +
  1026  		2*uint64(val.n[5])*uint64(val.n[9]) +
  1027  		2*uint64(val.n[6])*uint64(val.n[8]) +
  1028  		uint64(val.n[7])*uint64(val.n[7])
  1029  	t14 := m & fieldBaseMask
  1030  
  1031  	// Terms for 2^(fieldBase*15).
  1032  	m = (m >> fieldBase) +
  1033  		2*uint64(val.n[6])*uint64(val.n[9]) +
  1034  		2*uint64(val.n[7])*uint64(val.n[8])
  1035  	t15 := m & fieldBaseMask
  1036  
  1037  	// Terms for 2^(fieldBase*16).
  1038  	m = (m >> fieldBase) +
  1039  		2*uint64(val.n[7])*uint64(val.n[9]) +
  1040  		uint64(val.n[8])*uint64(val.n[8])
  1041  	t16 := m & fieldBaseMask
  1042  
  1043  	// Terms for 2^(fieldBase*17).
  1044  	m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
  1045  	t17 := m & fieldBaseMask
  1046  
  1047  	// Terms for 2^(fieldBase*18).
  1048  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
  1049  	t18 := m & fieldBaseMask
  1050  
  1051  	// What's left is for 2^(fieldBase*19).
  1052  	t19 := m >> fieldBase
  1053  
  1054  	// At this point, all of the terms are grouped into their respective
  1055  	// base.
  1056  	//
  1057  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
  1058  	// when the modulus is of the special form m = b^t - c, highly efficient
  1059  	// reduction can be achieved per the provided algorithm.
  1060  	//
  1061  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
  1062  	// this criteria.
  1063  	//
  1064  	// 4294968273 in field representation (base 2^26) is:
  1065  	// n[0] = 977
  1066  	// n[1] = 64
  1067  	// That is to say (2^26 * 64) + 977 = 4294968273
  1068  	//
  1069  	// Since each word is in base 26, the upper terms (t10 and up) start
  1070  	// at 260 bits (versus the final desired range of 256 bits), so the
  1071  	// field representation of 'c' from above needs to be adjusted for the
  1072  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
  1073  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
  1074  	// n[0] = 977 * 16 = 15632
  1075  	// n[1] = 64 * 16 = 1024
  1076  	// That is to say (2^26 * 1024) + 15632 = 68719492368
  1077  	//
  1078  	// To reduce the final term, t19, the entire 'c' value is needed instead
  1079  	// of only n[0] because there are no more terms left to handle n[1].
  1080  	// This means there might be some magnitude left in the upper bits that
  1081  	// is handled below.
  1082  	m = t0 + t10*15632
  1083  	t0 = m & fieldBaseMask
  1084  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
  1085  	t1 = m & fieldBaseMask
  1086  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
  1087  	t2 = m & fieldBaseMask
  1088  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
  1089  	t3 = m & fieldBaseMask
  1090  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
  1091  	t4 = m & fieldBaseMask
  1092  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
  1093  	t5 = m & fieldBaseMask
  1094  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
  1095  	t6 = m & fieldBaseMask
  1096  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
  1097  	t7 = m & fieldBaseMask
  1098  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
  1099  	t8 = m & fieldBaseMask
  1100  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
  1101  	t9 = m & fieldMSBMask
  1102  	m = m >> fieldMSBBits
  1103  
  1104  	// At this point, if the magnitude is greater than 0, the overall value
  1105  	// is greater than the max possible 256-bit value.  In particular, it is
  1106  	// "how many times larger" than the max value it is.
  1107  	//
  1108  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
  1109  	// quotient is zero.  However, due to the above, we already know at
  1110  	// least how many times we would need to repeat as it's the value
  1111  	// currently in m.  Thus we can simply multiply the magnitude by the
  1112  	// field representation of the prime and do a single iteration.  Notice
  1113  	// that nothing will be changed when the magnitude is zero, so we could
  1114  	// skip this in that case, however always running regardless allows it
  1115  	// to run in constant time.  The final result will be in the range
  1116  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
  1117  	// magnitude of 1, but it is denormalized.
  1118  	n := t0 + m*977
  1119  	f.n[0] = uint32(n & fieldBaseMask)
  1120  	n = (n >> fieldBase) + t1 + m*64
  1121  	f.n[1] = uint32(n & fieldBaseMask)
  1122  	f.n[2] = uint32((n >> fieldBase) + t2)
  1123  	f.n[3] = uint32(t3)
  1124  	f.n[4] = uint32(t4)
  1125  	f.n[5] = uint32(t5)
  1126  	f.n[6] = uint32(t6)
  1127  	f.n[7] = uint32(t7)
  1128  	f.n[8] = uint32(t8)
  1129  	f.n[9] = uint32(t9)
  1130  
  1131  	return f
  1132  }
  1133  
  1134  // Inverse finds the modular multiplicative inverse of the field value.  The
  1135  // existing field value is modified.
  1136  //
  1137  // The field value is returned to support chaining.  This enables syntax like:
  1138  // f.Inverse().Mul(f2) so that f = f^-1 * f2.
  1139  func (f *fieldVal) Inverse() *fieldVal {
  1140  	// Fermat's little theorem states that for a nonzero number a and prime
  1141  	// prime p, a^(p-1) = 1 (mod p).  Since the multipliciative inverse is
  1142  	// a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p).
  1143  	// Thus, a^(p-2) is the multiplicative inverse.
  1144  	//
  1145  	// In order to efficiently compute a^(p-2), p-2 needs to be split into
  1146  	// a sequence of squares and multipications that minimizes the number of
  1147  	// multiplications needed (since they are more costly than squarings).
  1148  	// Intermediate results are saved and reused as well.
  1149  	//
  1150  	// The secp256k1 prime - 2 is 2^256 - 4294968275.
  1151  	//
  1152  	// This has a cost of 258 field squarings and 33 field multiplications.
  1153  	var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal
  1154  	a2.SquareVal(f)
  1155  	a3.Mul2(&a2, f)
  1156  	a4.SquareVal(&a2)
  1157  	a10.SquareVal(&a4).Mul(&a2)
  1158  	a11.Mul2(&a10, f)
  1159  	a21.Mul2(&a10, &a11)
  1160  	a42.SquareVal(&a21)
  1161  	a45.Mul2(&a42, &a3)
  1162  	a63.Mul2(&a42, &a21)
  1163  	a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11)
  1164  	a1023.Mul2(&a1019, &a4)
  1165  	f.Set(&a63)                                    // f = a^(2^6 - 1)
  1166  	f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32)
  1167  	f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024)
  1168  	f.Mul(&a1023)                                  // f = a^(2^16 - 1)
  1169  	f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32)
  1170  	f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024)
  1171  	f.Mul(&a1023)                                  // f = a^(2^26 - 1)
  1172  	f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32)
  1173  	f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024)
  1174  	f.Mul(&a1023)                                  // f = a^(2^36 - 1)
  1175  	f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32)
  1176  	f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024)
  1177  	f.Mul(&a1023)                                  // f = a^(2^46 - 1)
  1178  	f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32)
  1179  	f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024)
  1180  	f.Mul(&a1023)                                  // f = a^(2^56 - 1)
  1181  	f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32)
  1182  	f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024)
  1183  	f.Mul(&a1023)                                  // f = a^(2^66 - 1)
  1184  	f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32)
  1185  	f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024)
  1186  	f.Mul(&a1023)                                  // f = a^(2^76 - 1)
  1187  	f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32)
  1188  	f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024)
  1189  	f.Mul(&a1023)                                  // f = a^(2^86 - 1)
  1190  	f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32)
  1191  	f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024)
  1192  	f.Mul(&a1023)                                  // f = a^(2^96 - 1)
  1193  	f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32)
  1194  	f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024)
  1195  	f.Mul(&a1023)                                  // f = a^(2^106 - 1)
  1196  	f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32)
  1197  	f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024)
  1198  	f.Mul(&a1023)                                  // f = a^(2^116 - 1)
  1199  	f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32)
  1200  	f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024)
  1201  	f.Mul(&a1023)                                  // f = a^(2^126 - 1)
  1202  	f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32)
  1203  	f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024)
  1204  	f.Mul(&a1023)                                  // f = a^(2^136 - 1)
  1205  	f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32)
  1206  	f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024)
  1207  	f.Mul(&a1023)                                  // f = a^(2^146 - 1)
  1208  	f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32)
  1209  	f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024)
  1210  	f.Mul(&a1023)                                  // f = a^(2^156 - 1)
  1211  	f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32)
  1212  	f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024)
  1213  	f.Mul(&a1023)                                  // f = a^(2^166 - 1)
  1214  	f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32)
  1215  	f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024)
  1216  	f.Mul(&a1023)                                  // f = a^(2^176 - 1)
  1217  	f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32)
  1218  	f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024)
  1219  	f.Mul(&a1023)                                  // f = a^(2^186 - 1)
  1220  	f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32)
  1221  	f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024)
  1222  	f.Mul(&a1023)                                  // f = a^(2^196 - 1)
  1223  	f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32)
  1224  	f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024)
  1225  	f.Mul(&a1023)                                  // f = a^(2^206 - 1)
  1226  	f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32)
  1227  	f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024)
  1228  	f.Mul(&a1023)                                  // f = a^(2^216 - 1)
  1229  	f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32)
  1230  	f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024)
  1231  	f.Mul(&a1019)                                  // f = a^(2^226 - 5)
  1232  	f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160)
  1233  	f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120)
  1234  	f.Mul(&a1023)                                  // f = a^(2^236 - 4097)
  1235  	f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104)
  1236  	f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328)
  1237  	f.Mul(&a1023)                                  // f = a^(2^246 - 4194305)
  1238  	f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760)
  1239  	f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320)
  1240  	return f.Mul(&a45)                             // f = a^(2^256 - 4294968275) = a^(p-2)
  1241  }
  1242  
  1243  // SqrtVal computes the square root of x modulo the curve's prime, and stores
  1244  // the result in f. The square root is computed via exponentiation of x by the
  1245  // value Q = (P+1)/4 using the curve's precomputed big-endian representation of
  1246  // the Q.  This method uses a modified version of square-and-multiply
  1247  // exponentiation over secp256k1 fieldVals to operate on bytes instead of bits,
  1248  // which offers better performance over both big.Int exponentiation and bit-wise
  1249  // square-and-multiply.
  1250  //
  1251  // NOTE: This method only works when P is intended to be the secp256k1 prime and
  1252  // is not constant time. The returned value is of magnitude 1, but is
  1253  // denormalized.
  1254  func (f *fieldVal) SqrtVal(x *fieldVal) *fieldVal {
  1255  	// The following computation iteratively computes x^((P+1)/4) = x^Q
  1256  	// using the recursive, piece-wise definition:
  1257  	//
  1258  	//   x^n = (x^2)^(n/2) mod P       if n is even
  1259  	//   x^n = x(x^2)^(n-1/2) mod P    if n is odd
  1260  	//
  1261  	// Given n in its big-endian representation b_k, ..., b_0, x^n can be
  1262  	// computed by defining the sequence r_k+1, ..., r_0, where:
  1263  	//
  1264  	//   r_k+1 = 1
  1265  	//   r_i   = (r_i+1)^2 * x^b_i    for i = k, ..., 0
  1266  	//
  1267  	// The final value r_0 = x^n.
  1268  	//
  1269  	// See https://en.wikipedia.org/wiki/Exponentiation_by_squaring for more
  1270  	// details.
  1271  	//
  1272  	// This can be further optimized, by observing that the value of Q in
  1273  	// secp256k1 has the value:
  1274  	//
  1275  	//   Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c
  1276  	//
  1277  	// We can unroll the typical bit-wise interpretation of the
  1278  	// exponentiation algorithm above to instead operate on bytes.
  1279  	// This reduces the number of comparisons by an order of magnitude,
  1280  	// reducing the overhead of failed branch predictions and additional
  1281  	// comparisons in this method.
  1282  	//
  1283  	// Since there there are only 4 unique bytes of Q, this keeps the jump
  1284  	// table small without the need to handle all possible 8-bit values.
  1285  	// Further, we observe that 29 of the 32 bytes are 0xff; making the
  1286  	// first case handle 0xff therefore optimizes the hot path.
  1287  	f.SetInt(1)
  1288  	for _, b := range fieldQBytes {
  1289  		switch b {
  1290  
  1291  		// Most common case, where all 8 bits are set.
  1292  		case 0xff:
  1293  			f.Square().Mul(x)
  1294  			f.Square().Mul(x)
  1295  			f.Square().Mul(x)
  1296  			f.Square().Mul(x)
  1297  			f.Square().Mul(x)
  1298  			f.Square().Mul(x)
  1299  			f.Square().Mul(x)
  1300  			f.Square().Mul(x)
  1301  
  1302  		// First byte of Q (0x3f), where all but the top two bits are
  1303  		// set. Note that this case only applies six operations, since
  1304  		// the highest bit of Q resides in bit six of the first byte. We
  1305  		// ignore the first two bits, since squaring for these bits will
  1306  		// result in an invalid result. We forgo squaring f before the
  1307  		// first multiply, since 1^2 = 1.
  1308  		case 0x3f:
  1309  			f.Mul(x)
  1310  			f.Square().Mul(x)
  1311  			f.Square().Mul(x)
  1312  			f.Square().Mul(x)
  1313  			f.Square().Mul(x)
  1314  			f.Square().Mul(x)
  1315  
  1316  		// Byte 28 of Q (0xbf), where only bit 7 is unset.
  1317  		case 0xbf:
  1318  			f.Square().Mul(x)
  1319  			f.Square()
  1320  			f.Square().Mul(x)
  1321  			f.Square().Mul(x)
  1322  			f.Square().Mul(x)
  1323  			f.Square().Mul(x)
  1324  			f.Square().Mul(x)
  1325  			f.Square().Mul(x)
  1326  
  1327  		// Byte 31 of Q (0x0c), where only bits 3 and 4 are set.
  1328  		default:
  1329  			f.Square()
  1330  			f.Square()
  1331  			f.Square()
  1332  			f.Square()
  1333  			f.Square().Mul(x)
  1334  			f.Square().Mul(x)
  1335  			f.Square()
  1336  			f.Square()
  1337  		}
  1338  	}
  1339  
  1340  	return f
  1341  }
  1342  
  1343  // Sqrt computes the square root of f modulo the curve's prime, and stores the
  1344  // result in f. The square root is computed via exponentiation of x by the value
  1345  // Q = (P+1)/4 using the curve's precomputed big-endian representation of the Q.
  1346  // This method uses a modified version of square-and-multiply exponentiation
  1347  // over secp256k1 fieldVals to operate on bytes instead of bits, which offers
  1348  // better performance over both big.Int exponentiation and bit-wise
  1349  // square-and-multiply.
  1350  //
  1351  // NOTE: This method only works when P is intended to be the secp256k1 prime and
  1352  // is not constant time. The returned value is of magnitude 1, but is
  1353  // denormalized.
  1354  func (f *fieldVal) Sqrt() *fieldVal {
  1355  	return f.SqrtVal(f)
  1356  }