github.com/mit-dci/lit@v0.0.0-20221102210550-8c3d3b49f2ce/crypto/koblitz/field.go (about) 1 // Copyright (c) 2013-2014 The btcsuite developers 2 // Copyright (c) 2013-2014 Dave Collins 3 // Use of this source code is governed by an ISC 4 // license that can be found in the LICENSE file. 5 6 package koblitz 7 8 // References: 9 // [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone. 10 // http://cacr.uwaterloo.ca/hac/ 11 12 // All elliptic curve operations for secp256k1 are done in a finite field 13 // characterized by a 256-bit prime. Given this precision is larger than the 14 // biggest available native type, obviously some form of bignum math is needed. 15 // This package implements specialized fixed-precision field arithmetic rather 16 // than relying on an arbitrary-precision arithmetic package such as math/big 17 // for dealing with the field math since the size is known. As a result, rather 18 // large performance gains are achieved by taking advantage of many 19 // optimizations not available to arbitrary-precision arithmetic and generic 20 // modular arithmetic algorithms. 21 // 22 // There are various ways to internally represent each finite field element. 23 // For example, the most obvious representation would be to use an array of 4 24 // uint64s (64 bits * 4 = 256 bits). However, that representation suffers from 25 // a couple of issues. First, there is no native Go type large enough to handle 26 // the intermediate results while adding or multiplying two 64-bit numbers, and 27 // second there is no space left for overflows when performing the intermediate 28 // arithmetic between each array element which would lead to expensive carry 29 // propagation. 30 // 31 // Given the above, this implementation represents the the field elements as 32 // 10 uint32s with each word (array entry) treated as base 2^26. This was 33 // chosen for the following reasons: 34 // 1) Most systems at the current time are 64-bit (or at least have 64-bit 35 // registers available for specialized purposes such as MMX) so the 36 // intermediate results can typically be done using a native register (and 37 // using uint64s to avoid the need for additional half-word arithmetic) 38 // 2) In order to allow addition of the internal words without having to 39 // propagate the the carry, the max normalized value for each register must 40 // be less than the number of bits available in the register 41 // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a 42 // reasonable choice for #2 43 // 4) Given the need for 256-bits of precision and the properties stated in #1, 44 // #2, and #3, the representation which best accommodates this is 10 uint32s 45 // with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22 46 // bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for 47 // overflow 48 // 49 // Since it is so important that the field arithmetic is extremely fast for 50 // high performance crypto, this package does not perform any validation where 51 // it ordinarily would. For example, some functions only give the correct 52 // result is the field is normalized and there is no checking to ensure it is. 53 // While I typically prefer to ensure all state and input is valid for most 54 // packages, this code is really only used internally and every extra check 55 // counts. 56 57 import ( 58 "encoding/hex" 59 ) 60 61 // Constants used to make the code more readable. 62 const ( 63 twoBitsMask = 0x3 64 fourBitsMask = 0xf 65 sixBitsMask = 0x3f 66 eightBitsMask = 0xff 67 ) 68 69 // Constants related to the field representation. 70 const ( 71 // fieldWords is the number of words used to internally represent the 72 // 256-bit value. 73 fieldWords = 10 74 75 // fieldBase is the exponent used to form the numeric base of each word. 76 // 2^(fieldBase*i) where i is the word position. 77 fieldBase = 26 78 79 // fieldOverflowBits is the minimum number of "overflow" bits for each 80 // word in the field value. 81 fieldOverflowBits = 32 - fieldBase 82 83 // fieldBaseMask is the mask for the bits in each word needed to 84 // represent the numeric base of each word (except the most significant 85 // word). 86 fieldBaseMask = (1 << fieldBase) - 1 87 88 // fieldMSBBits is the number of bits in the most significant word used 89 // to represent the value. 90 fieldMSBBits = 256 - (fieldBase * (fieldWords - 1)) 91 92 // fieldMSBMask is the mask for the bits in the most significant word 93 // needed to represent the value. 94 fieldMSBMask = (1 << fieldMSBBits) - 1 95 96 // fieldPrimeWordZero is word zero of the secp256k1 prime in the 97 // internal field representation. It is used during modular reduction 98 // and negation. 99 fieldPrimeWordZero = 0x3fffc2f 100 101 // fieldPrimeWordOne is word one of the secp256k1 prime in the 102 // internal field representation. It is used during modular reduction 103 // and negation. 104 fieldPrimeWordOne = 0x3ffffbf 105 ) 106 107 // fieldVal implements optimized fixed-precision arithmetic over the 108 // secp256k1 finite field. This means all arithmetic is performed modulo 109 // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f. It 110 // represents each 256-bit value as 10 32-bit integers in base 2^26. This 111 // provides 6 bits of overflow in each word (10 bits in the most significant 112 // word) for a total of 64 bits of overflow (9*6 + 10 = 64). It only implements 113 // the arithmetic needed for elliptic curve operations. 114 // 115 // The following depicts the internal representation: 116 // ----------------------------------------------------------------- 117 // | n[9] | n[8] | ... | n[0] | 118 // | 32 bits available | 32 bits available | ... | 32 bits available | 119 // | 22 bits for value | 26 bits for value | ... | 26 bits for value | 120 // | 10 bits overflow | 6 bits overflow | ... | 6 bits overflow | 121 // | Mult: 2^(26*9) | Mult: 2^(26*8) | ... | Mult: 2^(26*0) | 122 // ----------------------------------------------------------------- 123 // 124 // For example, consider the number 2^49 + 1. It would be represented as: 125 // n[0] = 1 126 // n[1] = 2^23 127 // n[2..9] = 0 128 // 129 // The full 256-bit value is then calculated by looping i from 9..0 and 130 // doing sum(n[i] * 2^(26i)) like so: 131 // n[9] * 2^(26*9) = 0 * 2^234 = 0 132 // n[8] * 2^(26*8) = 0 * 2^208 = 0 133 // ... 134 // n[1] * 2^(26*1) = 2^23 * 2^26 = 2^49 135 // n[0] * 2^(26*0) = 1 * 2^0 = 1 136 // Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1 137 type fieldVal struct { 138 n [10]uint32 139 } 140 141 // String returns the field value as a human-readable hex string. 142 func (f fieldVal) String() string { 143 t := new(fieldVal).Set(&f).Normalize() 144 return hex.EncodeToString(t.Bytes()[:]) 145 } 146 147 // Zero sets the field value to zero. A newly created field value is already 148 // set to zero. This function can be useful to clear an existing field value 149 // for reuse. 150 func (f *fieldVal) Zero() { 151 f.n[0] = 0 152 f.n[1] = 0 153 f.n[2] = 0 154 f.n[3] = 0 155 f.n[4] = 0 156 f.n[5] = 0 157 f.n[6] = 0 158 f.n[7] = 0 159 f.n[8] = 0 160 f.n[9] = 0 161 } 162 163 // Set sets the field value equal to the passed value. 164 // 165 // The field value is returned to support chaining. This enables syntax like: 166 // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not 167 // modified. 168 func (f *fieldVal) Set(val *fieldVal) *fieldVal { 169 *f = *val 170 return f 171 } 172 173 // SetInt sets the field value to the passed integer. This is a convenience 174 // function since it is fairly common to perform some arithemetic with small 175 // native integers. 176 // 177 // The field value is returned to support chaining. This enables syntax such 178 // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2. 179 func (f *fieldVal) SetInt(ui uint) *fieldVal { 180 f.Zero() 181 f.n[0] = uint32(ui) 182 return f 183 } 184 185 // SetBytes packs the passed 32-byte big-endian value into the internal field 186 // value representation. 187 // 188 // The field value is returned to support chaining. This enables syntax like: 189 // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2. 190 func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal { 191 // Pack the 256 total bits across the 10 uint32 words with a max of 192 // 26-bits per word. This could be done with a couple of for loops, 193 // but this unrolled version is significantly faster. Benchmarks show 194 // this is about 34 times faster than the variant which uses loops. 195 f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 | 196 (uint32(b[28])&twoBitsMask)<<24 197 f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 | 198 (uint32(b[25])&fourBitsMask)<<22 199 f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 | 200 (uint32(b[22])&sixBitsMask)<<20 201 f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 | 202 uint32(b[19])<<18 203 f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 | 204 (uint32(b[15])&twoBitsMask)<<24 205 f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 | 206 (uint32(b[12])&fourBitsMask)<<22 207 f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 | 208 (uint32(b[9])&sixBitsMask)<<20 209 f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 | 210 uint32(b[6])<<18 211 f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 | 212 (uint32(b[2])&twoBitsMask)<<24 213 f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14 214 return f 215 } 216 217 // SetByteSlice packs the passed big-endian value into the internal field value 218 // representation. Only the first 32-bytes are used. As a result, it is up to 219 // the caller to ensure numbers of the appropriate size are used or the value 220 // will be truncated. 221 // 222 // The field value is returned to support chaining. This enables syntax like: 223 // f := new(fieldVal).SetByteSlice(byteSlice) 224 func (f *fieldVal) SetByteSlice(b []byte) *fieldVal { 225 var b32 [32]byte 226 for i := 0; i < len(b); i++ { 227 if i < 32 { 228 b32[i+(32-len(b))] = b[i] 229 } 230 } 231 return f.SetBytes(&b32) 232 } 233 234 // SetHex decodes the passed big-endian hex string into the internal field value 235 // representation. Only the first 32-bytes are used. 236 // 237 // The field value is returned to support chaining. This enables syntax like: 238 // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1 239 func (f *fieldVal) SetHex(hexString string) *fieldVal { 240 if len(hexString)%2 != 0 { 241 hexString = "0" + hexString 242 } 243 bytes, _ := hex.DecodeString(hexString) 244 return f.SetByteSlice(bytes) 245 } 246 247 // Normalize normalizes the internal field words into the desired range and 248 // performs fast modular reduction over the secp256k1 prime by making use of the 249 // special form of the prime. 250 func (f *fieldVal) Normalize() *fieldVal { 251 // The field representation leaves 6 bits of overflow in each 252 // word so intermediate calculations can be performed without needing 253 // to propagate the carry to each higher word during the calculations. 254 // In order to normalize, first we need to "compact" the full 256-bit 255 // value to the right and treat the additional 64 leftmost bits as 256 // the magnitude. 257 m := f.n[0] 258 t0 := m & fieldBaseMask 259 m = (m >> fieldBase) + f.n[1] 260 t1 := m & fieldBaseMask 261 m = (m >> fieldBase) + f.n[2] 262 t2 := m & fieldBaseMask 263 m = (m >> fieldBase) + f.n[3] 264 t3 := m & fieldBaseMask 265 m = (m >> fieldBase) + f.n[4] 266 t4 := m & fieldBaseMask 267 m = (m >> fieldBase) + f.n[5] 268 t5 := m & fieldBaseMask 269 m = (m >> fieldBase) + f.n[6] 270 t6 := m & fieldBaseMask 271 m = (m >> fieldBase) + f.n[7] 272 t7 := m & fieldBaseMask 273 m = (m >> fieldBase) + f.n[8] 274 t8 := m & fieldBaseMask 275 m = (m >> fieldBase) + f.n[9] 276 t9 := m & fieldMSBMask 277 m = m >> fieldMSBBits 278 279 // At this point, if the magnitude is greater than 0, the overall value 280 // is greater than the max possible 256-bit value. In particular, it is 281 // "how many times larger" than the max value it is. Since this field 282 // is doing arithmetic modulo the secp256k1 prime, we need to perform 283 // modular reduction over the prime. 284 // 285 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 286 // when the modulus is of the special form m = b^t - c, highly efficient 287 // reduction can be achieved. 288 // 289 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 290 // this criteria. 291 // 292 // 4294968273 in field representation (base 2^26) is: 293 // n[0] = 977 294 // n[1] = 64 295 // That is to say (2^26 * 64) + 977 = 4294968273 296 // 297 // The algorithm presented in the referenced section typically repeats 298 // until the quotient is zero. However, due to our field representation 299 // we already know at least how many times we would need to repeat as 300 // it's the value currently in m. Thus we can simply multiply the 301 // magnitude by the field representation of the prime and do a single 302 // iteration. Notice that nothing will be changed when the magnitude is 303 // zero, so we could skip this in that case, however always running 304 // regardless allows it to run in constant time. 305 r := t0 + m*977 306 t0 = r & fieldBaseMask 307 r = (r >> fieldBase) + t1 + m*64 308 t1 = r & fieldBaseMask 309 r = (r >> fieldBase) + t2 310 t2 = r & fieldBaseMask 311 r = (r >> fieldBase) + t3 312 t3 = r & fieldBaseMask 313 r = (r >> fieldBase) + t4 314 t4 = r & fieldBaseMask 315 r = (r >> fieldBase) + t5 316 t5 = r & fieldBaseMask 317 r = (r >> fieldBase) + t6 318 t6 = r & fieldBaseMask 319 r = (r >> fieldBase) + t7 320 t7 = r & fieldBaseMask 321 r = (r >> fieldBase) + t8 322 t8 = r & fieldBaseMask 323 r = (r >> fieldBase) + t9 324 t9 = r & fieldMSBMask 325 326 // At this point, the result will be in the range 0 <= result <= 327 // prime + (2^64 - c). Therefore, one more subtraction of the prime 328 // might be needed if the current result is greater than or equal to the 329 // prime. The following does the final reduction in constant time. 330 // Note that the if/else here intentionally does the bitwise OR with 331 // zero even though it won't change the value to ensure constant time 332 // between the branches. 333 var mask int32 334 if t0 < fieldPrimeWordZero { 335 mask |= -1 336 } else { 337 mask |= 0 338 } 339 if t1 < fieldPrimeWordOne { 340 mask |= -1 341 } else { 342 mask |= 0 343 } 344 if t2 < fieldBaseMask { 345 mask |= -1 346 } else { 347 mask |= 0 348 } 349 if t3 < fieldBaseMask { 350 mask |= -1 351 } else { 352 mask |= 0 353 } 354 if t4 < fieldBaseMask { 355 mask |= -1 356 } else { 357 mask |= 0 358 } 359 if t5 < fieldBaseMask { 360 mask |= -1 361 } else { 362 mask |= 0 363 } 364 if t6 < fieldBaseMask { 365 mask |= -1 366 } else { 367 mask |= 0 368 } 369 if t7 < fieldBaseMask { 370 mask |= -1 371 } else { 372 mask |= 0 373 } 374 if t8 < fieldBaseMask { 375 mask |= -1 376 } else { 377 mask |= 0 378 } 379 if t9 < fieldMSBMask { 380 mask |= -1 381 } else { 382 mask |= 0 383 } 384 t0 = t0 - uint32(^mask&fieldPrimeWordZero) 385 t1 = t1 - uint32(^mask&fieldPrimeWordOne) 386 t2 = t2 & uint32(mask) 387 t3 = t3 & uint32(mask) 388 t4 = t4 & uint32(mask) 389 t5 = t5 & uint32(mask) 390 t6 = t6 & uint32(mask) 391 t7 = t7 & uint32(mask) 392 t8 = t8 & uint32(mask) 393 t9 = t9 & uint32(mask) 394 395 // Finally, set the normalized and reduced words. 396 f.n[0] = t0 397 f.n[1] = t1 398 f.n[2] = t2 399 f.n[3] = t3 400 f.n[4] = t4 401 f.n[5] = t5 402 f.n[6] = t6 403 f.n[7] = t7 404 f.n[8] = t8 405 f.n[9] = t9 406 return f 407 } 408 409 // PutBytes unpacks the field value to a 32-byte big-endian value using the 410 // passed byte array. There is a similar function, Bytes, which unpacks the 411 // field value into a new array and returns that. This version is provided 412 // since it can be useful to cut down on the number of allocations by allowing 413 // the caller to reuse a buffer. 414 // 415 // The field value must be normalized for this function to return the correct 416 // result. 417 func (f *fieldVal) PutBytes(b *[32]byte) { 418 // Unpack the 256 total bits from the 10 uint32 words with a max of 419 // 26-bits per word. This could be done with a couple of for loops, 420 // but this unrolled version is a bit faster. Benchmarks show this is 421 // about 10 times faster than the variant which uses loops. 422 b[31] = byte(f.n[0] & eightBitsMask) 423 b[30] = byte((f.n[0] >> 8) & eightBitsMask) 424 b[29] = byte((f.n[0] >> 16) & eightBitsMask) 425 b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2) 426 b[27] = byte((f.n[1] >> 6) & eightBitsMask) 427 b[26] = byte((f.n[1] >> 14) & eightBitsMask) 428 b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4) 429 b[24] = byte((f.n[2] >> 4) & eightBitsMask) 430 b[23] = byte((f.n[2] >> 12) & eightBitsMask) 431 b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6) 432 b[21] = byte((f.n[3] >> 2) & eightBitsMask) 433 b[20] = byte((f.n[3] >> 10) & eightBitsMask) 434 b[19] = byte((f.n[3] >> 18) & eightBitsMask) 435 b[18] = byte(f.n[4] & eightBitsMask) 436 b[17] = byte((f.n[4] >> 8) & eightBitsMask) 437 b[16] = byte((f.n[4] >> 16) & eightBitsMask) 438 b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2) 439 b[14] = byte((f.n[5] >> 6) & eightBitsMask) 440 b[13] = byte((f.n[5] >> 14) & eightBitsMask) 441 b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4) 442 b[11] = byte((f.n[6] >> 4) & eightBitsMask) 443 b[10] = byte((f.n[6] >> 12) & eightBitsMask) 444 b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6) 445 b[8] = byte((f.n[7] >> 2) & eightBitsMask) 446 b[7] = byte((f.n[7] >> 10) & eightBitsMask) 447 b[6] = byte((f.n[7] >> 18) & eightBitsMask) 448 b[5] = byte(f.n[8] & eightBitsMask) 449 b[4] = byte((f.n[8] >> 8) & eightBitsMask) 450 b[3] = byte((f.n[8] >> 16) & eightBitsMask) 451 b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2) 452 b[1] = byte((f.n[9] >> 6) & eightBitsMask) 453 b[0] = byte((f.n[9] >> 14) & eightBitsMask) 454 } 455 456 // Bytes unpacks the field value to a 32-byte big-endian value. See PutBytes 457 // for a variant that allows the a buffer to be passed which can be useful to 458 // to cut down on the number of allocations by allowing the caller to reuse a 459 // buffer. 460 // 461 // The field value must be normalized for this function to return correct 462 // result. 463 func (f *fieldVal) Bytes() *[32]byte { 464 b := new([32]byte) 465 f.PutBytes(b) 466 return b 467 } 468 469 // IsZero returns whether or not the field value is equal to zero. 470 func (f *fieldVal) IsZero() bool { 471 // The value can only be zero if no bits are set in any of the words. 472 // This is a constant time implementation. 473 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] | 474 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9] 475 476 return bits == 0 477 } 478 479 // IsOdd returns whether or not the field value is an odd number. 480 // 481 // The field value must be normalized for this function to return correct 482 // result. 483 func (f *fieldVal) IsOdd() bool { 484 // Only odd numbers have the bottom bit set. 485 return f.n[0]&1 == 1 486 } 487 488 // Equals returns whether or not the two field values are the same. Both 489 // field values being compared must be normalized for this function to return 490 // the correct result. 491 func (f *fieldVal) Equals(val *fieldVal) bool { 492 // Xor only sets bits when they are different, so the two field values 493 // can only be the same if no bits are set after xoring each word. 494 // This is a constant time implementation. 495 bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) | 496 (f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) | 497 (f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) | 498 (f.n[9] ^ val.n[9]) 499 500 return bits == 0 501 } 502 503 // NegateVal negates the passed value and stores the result in f. The caller 504 // must provide the magnitude of the passed value for a correct result. 505 // 506 // The field value is returned to support chaining. This enables syntax like: 507 // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1. 508 func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal { 509 // Negation in the field is just the prime minus the value. However, 510 // in order to allow negation against a field value without having to 511 // normalize/reduce it first, multiply by the magnitude (that is how 512 // "far" away it is from the normalized value) to adjust. Also, since 513 // negating a value pushes it one more order of magnitude away from the 514 // normalized range, add 1 to compensate. 515 // 516 // For some intuition here, imagine you're performing mod 12 arithmetic 517 // (picture a clock) and you are negating the number 7. So you start at 518 // 12 (which is of course 0 under mod 12) and count backwards (left on 519 // the clock) 7 times to arrive at 5. Notice this is just 12-7 = 5. 520 // Now, assume you're starting with 19, which is a number that is 521 // already larger than the modulus and congruent to 7 (mod 12). When a 522 // value is already in the desired range, its magnitude is 1. Since 19 523 // is an additional "step", its magnitude (mod 12) is 2. Since any 524 // multiple of the modulus is conguent to zero (mod m), the answer can 525 // be shortcut by simply mulplying the magnitude by the modulus and 526 // subtracting. Keeping with the example, this would be (2*12)-19 = 5. 527 f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0] 528 f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1] 529 f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2] 530 f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3] 531 f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4] 532 f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5] 533 f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6] 534 f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7] 535 f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8] 536 f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9] 537 538 return f 539 } 540 541 // Negate negates the field value. The existing field value is modified. The 542 // caller must provide the magnitude of the field value for a correct result. 543 // 544 // The field value is returned to support chaining. This enables syntax like: 545 // f.Negate().AddInt(1) so that f = -f + 1. 546 func (f *fieldVal) Negate(magnitude uint32) *fieldVal { 547 return f.NegateVal(f, magnitude) 548 } 549 550 // AddInt adds the passed integer to the existing field value and stores the 551 // result in f. This is a convenience function since it is fairly common to 552 // perform some arithemetic with small native integers. 553 // 554 // The field value is returned to support chaining. This enables syntax like: 555 // f.AddInt(1).Add(f2) so that f = f + 1 + f2. 556 func (f *fieldVal) AddInt(ui uint) *fieldVal { 557 // Since the field representation intentionally provides overflow bits, 558 // it's ok to use carryless addition as the carry bit is safely part of 559 // the word and will be normalized out. 560 f.n[0] += uint32(ui) 561 562 return f 563 } 564 565 // Add adds the passed value to the existing field value and stores the result 566 // in f. 567 // 568 // The field value is returned to support chaining. This enables syntax like: 569 // f.Add(f2).AddInt(1) so that f = f + f2 + 1. 570 func (f *fieldVal) Add(val *fieldVal) *fieldVal { 571 // Since the field representation intentionally provides overflow bits, 572 // it's ok to use carryless addition as the carry bit is safely part of 573 // each word and will be normalized out. This could obviously be done 574 // in a loop, but the unrolled version is faster. 575 f.n[0] += val.n[0] 576 f.n[1] += val.n[1] 577 f.n[2] += val.n[2] 578 f.n[3] += val.n[3] 579 f.n[4] += val.n[4] 580 f.n[5] += val.n[5] 581 f.n[6] += val.n[6] 582 f.n[7] += val.n[7] 583 f.n[8] += val.n[8] 584 f.n[9] += val.n[9] 585 586 return f 587 } 588 589 // Add2 adds the passed two field values together and stores the result in f. 590 // 591 // The field value is returned to support chaining. This enables syntax like: 592 // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1. 593 func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal { 594 // Since the field representation intentionally provides overflow bits, 595 // it's ok to use carryless addition as the carry bit is safely part of 596 // each word and will be normalized out. This could obviously be done 597 // in a loop, but the unrolled version is faster. 598 f.n[0] = val.n[0] + val2.n[0] 599 f.n[1] = val.n[1] + val2.n[1] 600 f.n[2] = val.n[2] + val2.n[2] 601 f.n[3] = val.n[3] + val2.n[3] 602 f.n[4] = val.n[4] + val2.n[4] 603 f.n[5] = val.n[5] + val2.n[5] 604 f.n[6] = val.n[6] + val2.n[6] 605 f.n[7] = val.n[7] + val2.n[7] 606 f.n[8] = val.n[8] + val2.n[8] 607 f.n[9] = val.n[9] + val2.n[9] 608 609 return f 610 } 611 612 // MulInt multiplies the field value by the passed int and stores the result in 613 // f. Note that this function can overflow if multiplying the value by any of 614 // the individual words exceeds a max uint32. Therefore it is important that 615 // the caller ensures no overflows will occur before using this function. 616 // 617 // The field value is returned to support chaining. This enables syntax like: 618 // f.MulInt(2).Add(f2) so that f = 2 * f + f2. 619 func (f *fieldVal) MulInt(val uint) *fieldVal { 620 // Since each word of the field representation can hold up to 621 // fieldOverflowBits extra bits which will be normalized out, it's safe 622 // to multiply each word without using a larger type or carry 623 // propagation so long as the values won't overflow a uint32. This 624 // could obviously be done in a loop, but the unrolled version is 625 // faster. 626 ui := uint32(val) 627 f.n[0] *= ui 628 f.n[1] *= ui 629 f.n[2] *= ui 630 f.n[3] *= ui 631 f.n[4] *= ui 632 f.n[5] *= ui 633 f.n[6] *= ui 634 f.n[7] *= ui 635 f.n[8] *= ui 636 f.n[9] *= ui 637 638 return f 639 } 640 641 // Mul multiplies the passed value to the existing field value and stores the 642 // result in f. Note that this function can overflow if multiplying any 643 // of the individual words exceeds a max uint32. In practice, this means the 644 // magnitude of either value involved in the multiplication must be a max of 645 // 8. 646 // 647 // The field value is returned to support chaining. This enables syntax like: 648 // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1. 649 func (f *fieldVal) Mul(val *fieldVal) *fieldVal { 650 return f.Mul2(f, val) 651 } 652 653 // Mul2 multiplies the passed two field values together and stores the result 654 // result in f. Note that this function can overflow if multiplying any of 655 // the individual words exceeds a max uint32. In practice, this means the 656 // magnitude of either value involved in the multiplication must be a max of 657 // 8. 658 // 659 // The field value is returned to support chaining. This enables syntax like: 660 // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1. 661 func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal { 662 // This could be done with a couple of for loops and an array to store 663 // the intermediate terms, but this unrolled version is significantly 664 // faster. 665 666 // Terms for 2^(fieldBase*0). 667 m := uint64(val.n[0]) * uint64(val2.n[0]) 668 t0 := m & fieldBaseMask 669 670 // Terms for 2^(fieldBase*1). 671 m = (m >> fieldBase) + 672 uint64(val.n[0])*uint64(val2.n[1]) + 673 uint64(val.n[1])*uint64(val2.n[0]) 674 t1 := m & fieldBaseMask 675 676 // Terms for 2^(fieldBase*2). 677 m = (m >> fieldBase) + 678 uint64(val.n[0])*uint64(val2.n[2]) + 679 uint64(val.n[1])*uint64(val2.n[1]) + 680 uint64(val.n[2])*uint64(val2.n[0]) 681 t2 := m & fieldBaseMask 682 683 // Terms for 2^(fieldBase*3). 684 m = (m >> fieldBase) + 685 uint64(val.n[0])*uint64(val2.n[3]) + 686 uint64(val.n[1])*uint64(val2.n[2]) + 687 uint64(val.n[2])*uint64(val2.n[1]) + 688 uint64(val.n[3])*uint64(val2.n[0]) 689 t3 := m & fieldBaseMask 690 691 // Terms for 2^(fieldBase*4). 692 m = (m >> fieldBase) + 693 uint64(val.n[0])*uint64(val2.n[4]) + 694 uint64(val.n[1])*uint64(val2.n[3]) + 695 uint64(val.n[2])*uint64(val2.n[2]) + 696 uint64(val.n[3])*uint64(val2.n[1]) + 697 uint64(val.n[4])*uint64(val2.n[0]) 698 t4 := m & fieldBaseMask 699 700 // Terms for 2^(fieldBase*5). 701 m = (m >> fieldBase) + 702 uint64(val.n[0])*uint64(val2.n[5]) + 703 uint64(val.n[1])*uint64(val2.n[4]) + 704 uint64(val.n[2])*uint64(val2.n[3]) + 705 uint64(val.n[3])*uint64(val2.n[2]) + 706 uint64(val.n[4])*uint64(val2.n[1]) + 707 uint64(val.n[5])*uint64(val2.n[0]) 708 t5 := m & fieldBaseMask 709 710 // Terms for 2^(fieldBase*6). 711 m = (m >> fieldBase) + 712 uint64(val.n[0])*uint64(val2.n[6]) + 713 uint64(val.n[1])*uint64(val2.n[5]) + 714 uint64(val.n[2])*uint64(val2.n[4]) + 715 uint64(val.n[3])*uint64(val2.n[3]) + 716 uint64(val.n[4])*uint64(val2.n[2]) + 717 uint64(val.n[5])*uint64(val2.n[1]) + 718 uint64(val.n[6])*uint64(val2.n[0]) 719 t6 := m & fieldBaseMask 720 721 // Terms for 2^(fieldBase*7). 722 m = (m >> fieldBase) + 723 uint64(val.n[0])*uint64(val2.n[7]) + 724 uint64(val.n[1])*uint64(val2.n[6]) + 725 uint64(val.n[2])*uint64(val2.n[5]) + 726 uint64(val.n[3])*uint64(val2.n[4]) + 727 uint64(val.n[4])*uint64(val2.n[3]) + 728 uint64(val.n[5])*uint64(val2.n[2]) + 729 uint64(val.n[6])*uint64(val2.n[1]) + 730 uint64(val.n[7])*uint64(val2.n[0]) 731 t7 := m & fieldBaseMask 732 733 // Terms for 2^(fieldBase*8). 734 m = (m >> fieldBase) + 735 uint64(val.n[0])*uint64(val2.n[8]) + 736 uint64(val.n[1])*uint64(val2.n[7]) + 737 uint64(val.n[2])*uint64(val2.n[6]) + 738 uint64(val.n[3])*uint64(val2.n[5]) + 739 uint64(val.n[4])*uint64(val2.n[4]) + 740 uint64(val.n[5])*uint64(val2.n[3]) + 741 uint64(val.n[6])*uint64(val2.n[2]) + 742 uint64(val.n[7])*uint64(val2.n[1]) + 743 uint64(val.n[8])*uint64(val2.n[0]) 744 t8 := m & fieldBaseMask 745 746 // Terms for 2^(fieldBase*9). 747 m = (m >> fieldBase) + 748 uint64(val.n[0])*uint64(val2.n[9]) + 749 uint64(val.n[1])*uint64(val2.n[8]) + 750 uint64(val.n[2])*uint64(val2.n[7]) + 751 uint64(val.n[3])*uint64(val2.n[6]) + 752 uint64(val.n[4])*uint64(val2.n[5]) + 753 uint64(val.n[5])*uint64(val2.n[4]) + 754 uint64(val.n[6])*uint64(val2.n[3]) + 755 uint64(val.n[7])*uint64(val2.n[2]) + 756 uint64(val.n[8])*uint64(val2.n[1]) + 757 uint64(val.n[9])*uint64(val2.n[0]) 758 t9 := m & fieldBaseMask 759 760 // Terms for 2^(fieldBase*10). 761 m = (m >> fieldBase) + 762 uint64(val.n[1])*uint64(val2.n[9]) + 763 uint64(val.n[2])*uint64(val2.n[8]) + 764 uint64(val.n[3])*uint64(val2.n[7]) + 765 uint64(val.n[4])*uint64(val2.n[6]) + 766 uint64(val.n[5])*uint64(val2.n[5]) + 767 uint64(val.n[6])*uint64(val2.n[4]) + 768 uint64(val.n[7])*uint64(val2.n[3]) + 769 uint64(val.n[8])*uint64(val2.n[2]) + 770 uint64(val.n[9])*uint64(val2.n[1]) 771 t10 := m & fieldBaseMask 772 773 // Terms for 2^(fieldBase*11). 774 m = (m >> fieldBase) + 775 uint64(val.n[2])*uint64(val2.n[9]) + 776 uint64(val.n[3])*uint64(val2.n[8]) + 777 uint64(val.n[4])*uint64(val2.n[7]) + 778 uint64(val.n[5])*uint64(val2.n[6]) + 779 uint64(val.n[6])*uint64(val2.n[5]) + 780 uint64(val.n[7])*uint64(val2.n[4]) + 781 uint64(val.n[8])*uint64(val2.n[3]) + 782 uint64(val.n[9])*uint64(val2.n[2]) 783 t11 := m & fieldBaseMask 784 785 // Terms for 2^(fieldBase*12). 786 m = (m >> fieldBase) + 787 uint64(val.n[3])*uint64(val2.n[9]) + 788 uint64(val.n[4])*uint64(val2.n[8]) + 789 uint64(val.n[5])*uint64(val2.n[7]) + 790 uint64(val.n[6])*uint64(val2.n[6]) + 791 uint64(val.n[7])*uint64(val2.n[5]) + 792 uint64(val.n[8])*uint64(val2.n[4]) + 793 uint64(val.n[9])*uint64(val2.n[3]) 794 t12 := m & fieldBaseMask 795 796 // Terms for 2^(fieldBase*13). 797 m = (m >> fieldBase) + 798 uint64(val.n[4])*uint64(val2.n[9]) + 799 uint64(val.n[5])*uint64(val2.n[8]) + 800 uint64(val.n[6])*uint64(val2.n[7]) + 801 uint64(val.n[7])*uint64(val2.n[6]) + 802 uint64(val.n[8])*uint64(val2.n[5]) + 803 uint64(val.n[9])*uint64(val2.n[4]) 804 t13 := m & fieldBaseMask 805 806 // Terms for 2^(fieldBase*14). 807 m = (m >> fieldBase) + 808 uint64(val.n[5])*uint64(val2.n[9]) + 809 uint64(val.n[6])*uint64(val2.n[8]) + 810 uint64(val.n[7])*uint64(val2.n[7]) + 811 uint64(val.n[8])*uint64(val2.n[6]) + 812 uint64(val.n[9])*uint64(val2.n[5]) 813 t14 := m & fieldBaseMask 814 815 // Terms for 2^(fieldBase*15). 816 m = (m >> fieldBase) + 817 uint64(val.n[6])*uint64(val2.n[9]) + 818 uint64(val.n[7])*uint64(val2.n[8]) + 819 uint64(val.n[8])*uint64(val2.n[7]) + 820 uint64(val.n[9])*uint64(val2.n[6]) 821 t15 := m & fieldBaseMask 822 823 // Terms for 2^(fieldBase*16). 824 m = (m >> fieldBase) + 825 uint64(val.n[7])*uint64(val2.n[9]) + 826 uint64(val.n[8])*uint64(val2.n[8]) + 827 uint64(val.n[9])*uint64(val2.n[7]) 828 t16 := m & fieldBaseMask 829 830 // Terms for 2^(fieldBase*17). 831 m = (m >> fieldBase) + 832 uint64(val.n[8])*uint64(val2.n[9]) + 833 uint64(val.n[9])*uint64(val2.n[8]) 834 t17 := m & fieldBaseMask 835 836 // Terms for 2^(fieldBase*18). 837 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9]) 838 t18 := m & fieldBaseMask 839 840 // What's left is for 2^(fieldBase*19). 841 t19 := m >> fieldBase 842 843 // At this point, all of the terms are grouped into their respective 844 // base. 845 // 846 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 847 // when the modulus is of the special form m = b^t - c, highly efficient 848 // reduction can be achieved per the provided algorithm. 849 // 850 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 851 // this criteria. 852 // 853 // 4294968273 in field representation (base 2^26) is: 854 // n[0] = 977 855 // n[1] = 64 856 // That is to say (2^26 * 64) + 977 = 4294968273 857 // 858 // Since each word is in base 26, the upper terms (t10 and up) start 859 // at 260 bits (versus the final desired range of 256 bits), so the 860 // field representation of 'c' from above needs to be adjusted for the 861 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 862 // 68719492368. Thus, the adjusted field representation of 'c' is: 863 // n[0] = 977 * 16 = 15632 864 // n[1] = 64 * 16 = 1024 865 // That is to say (2^26 * 1024) + 15632 = 68719492368 866 // 867 // To reduce the final term, t19, the entire 'c' value is needed instead 868 // of only n[0] because there are no more terms left to handle n[1]. 869 // This means there might be some magnitude left in the upper bits that 870 // is handled below. 871 m = t0 + t10*15632 872 t0 = m & fieldBaseMask 873 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 874 t1 = m & fieldBaseMask 875 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 876 t2 = m & fieldBaseMask 877 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 878 t3 = m & fieldBaseMask 879 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 880 t4 = m & fieldBaseMask 881 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 882 t5 = m & fieldBaseMask 883 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 884 t6 = m & fieldBaseMask 885 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 886 t7 = m & fieldBaseMask 887 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 888 t8 = m & fieldBaseMask 889 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 890 t9 = m & fieldMSBMask 891 m = m >> fieldMSBBits 892 893 // At this point, if the magnitude is greater than 0, the overall value 894 // is greater than the max possible 256-bit value. In particular, it is 895 // "how many times larger" than the max value it is. 896 // 897 // The algorithm presented in [HAC] section 14.3.4 repeats until the 898 // quotient is zero. However, due to the above, we already know at 899 // least how many times we would need to repeat as it's the value 900 // currently in m. Thus we can simply multiply the magnitude by the 901 // field representation of the prime and do a single iteration. Notice 902 // that nothing will be changed when the magnitude is zero, so we could 903 // skip this in that case, however always running regardless allows it 904 // to run in constant time. The final result will be in the range 905 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 906 // magnitude of 1, but it is denormalized. 907 d := t0 + m*977 908 f.n[0] = uint32(d & fieldBaseMask) 909 d = (d >> fieldBase) + t1 + m*64 910 f.n[1] = uint32(d & fieldBaseMask) 911 f.n[2] = uint32((d >> fieldBase) + t2) 912 f.n[3] = uint32(t3) 913 f.n[4] = uint32(t4) 914 f.n[5] = uint32(t5) 915 f.n[6] = uint32(t6) 916 f.n[7] = uint32(t7) 917 f.n[8] = uint32(t8) 918 f.n[9] = uint32(t9) 919 920 return f 921 } 922 923 // Square squares the field value. The existing field value is modified. Note 924 // that this function can overflow if multiplying any of the individual words 925 // exceeds a max uint32. In practice, this means the magnitude of the field 926 // must be a max of 8 to prevent overflow. 927 // 928 // The field value is returned to support chaining. This enables syntax like: 929 // f.Square().Mul(f2) so that f = f^2 * f2. 930 func (f *fieldVal) Square() *fieldVal { 931 return f.SquareVal(f) 932 } 933 934 // SquareVal squares the passed value and stores the result in f. Note that 935 // this function can overflow if multiplying any of the individual words 936 // exceeds a max uint32. In practice, this means the magnitude of the field 937 // being squred must be a max of 8 to prevent overflow. 938 // 939 // The field value is returned to support chaining. This enables syntax like: 940 // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3. 941 func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal { 942 // This could be done with a couple of for loops and an array to store 943 // the intermediate terms, but this unrolled version is significantly 944 // faster. 945 946 // Terms for 2^(fieldBase*0). 947 m := uint64(val.n[0]) * uint64(val.n[0]) 948 t0 := m & fieldBaseMask 949 950 // Terms for 2^(fieldBase*1). 951 m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1]) 952 t1 := m & fieldBaseMask 953 954 // Terms for 2^(fieldBase*2). 955 m = (m >> fieldBase) + 956 2*uint64(val.n[0])*uint64(val.n[2]) + 957 uint64(val.n[1])*uint64(val.n[1]) 958 t2 := m & fieldBaseMask 959 960 // Terms for 2^(fieldBase*3). 961 m = (m >> fieldBase) + 962 2*uint64(val.n[0])*uint64(val.n[3]) + 963 2*uint64(val.n[1])*uint64(val.n[2]) 964 t3 := m & fieldBaseMask 965 966 // Terms for 2^(fieldBase*4). 967 m = (m >> fieldBase) + 968 2*uint64(val.n[0])*uint64(val.n[4]) + 969 2*uint64(val.n[1])*uint64(val.n[3]) + 970 uint64(val.n[2])*uint64(val.n[2]) 971 t4 := m & fieldBaseMask 972 973 // Terms for 2^(fieldBase*5). 974 m = (m >> fieldBase) + 975 2*uint64(val.n[0])*uint64(val.n[5]) + 976 2*uint64(val.n[1])*uint64(val.n[4]) + 977 2*uint64(val.n[2])*uint64(val.n[3]) 978 t5 := m & fieldBaseMask 979 980 // Terms for 2^(fieldBase*6). 981 m = (m >> fieldBase) + 982 2*uint64(val.n[0])*uint64(val.n[6]) + 983 2*uint64(val.n[1])*uint64(val.n[5]) + 984 2*uint64(val.n[2])*uint64(val.n[4]) + 985 uint64(val.n[3])*uint64(val.n[3]) 986 t6 := m & fieldBaseMask 987 988 // Terms for 2^(fieldBase*7). 989 m = (m >> fieldBase) + 990 2*uint64(val.n[0])*uint64(val.n[7]) + 991 2*uint64(val.n[1])*uint64(val.n[6]) + 992 2*uint64(val.n[2])*uint64(val.n[5]) + 993 2*uint64(val.n[3])*uint64(val.n[4]) 994 t7 := m & fieldBaseMask 995 996 // Terms for 2^(fieldBase*8). 997 m = (m >> fieldBase) + 998 2*uint64(val.n[0])*uint64(val.n[8]) + 999 2*uint64(val.n[1])*uint64(val.n[7]) + 1000 2*uint64(val.n[2])*uint64(val.n[6]) + 1001 2*uint64(val.n[3])*uint64(val.n[5]) + 1002 uint64(val.n[4])*uint64(val.n[4]) 1003 t8 := m & fieldBaseMask 1004 1005 // Terms for 2^(fieldBase*9). 1006 m = (m >> fieldBase) + 1007 2*uint64(val.n[0])*uint64(val.n[9]) + 1008 2*uint64(val.n[1])*uint64(val.n[8]) + 1009 2*uint64(val.n[2])*uint64(val.n[7]) + 1010 2*uint64(val.n[3])*uint64(val.n[6]) + 1011 2*uint64(val.n[4])*uint64(val.n[5]) 1012 t9 := m & fieldBaseMask 1013 1014 // Terms for 2^(fieldBase*10). 1015 m = (m >> fieldBase) + 1016 2*uint64(val.n[1])*uint64(val.n[9]) + 1017 2*uint64(val.n[2])*uint64(val.n[8]) + 1018 2*uint64(val.n[3])*uint64(val.n[7]) + 1019 2*uint64(val.n[4])*uint64(val.n[6]) + 1020 uint64(val.n[5])*uint64(val.n[5]) 1021 t10 := m & fieldBaseMask 1022 1023 // Terms for 2^(fieldBase*11). 1024 m = (m >> fieldBase) + 1025 2*uint64(val.n[2])*uint64(val.n[9]) + 1026 2*uint64(val.n[3])*uint64(val.n[8]) + 1027 2*uint64(val.n[4])*uint64(val.n[7]) + 1028 2*uint64(val.n[5])*uint64(val.n[6]) 1029 t11 := m & fieldBaseMask 1030 1031 // Terms for 2^(fieldBase*12). 1032 m = (m >> fieldBase) + 1033 2*uint64(val.n[3])*uint64(val.n[9]) + 1034 2*uint64(val.n[4])*uint64(val.n[8]) + 1035 2*uint64(val.n[5])*uint64(val.n[7]) + 1036 uint64(val.n[6])*uint64(val.n[6]) 1037 t12 := m & fieldBaseMask 1038 1039 // Terms for 2^(fieldBase*13). 1040 m = (m >> fieldBase) + 1041 2*uint64(val.n[4])*uint64(val.n[9]) + 1042 2*uint64(val.n[5])*uint64(val.n[8]) + 1043 2*uint64(val.n[6])*uint64(val.n[7]) 1044 t13 := m & fieldBaseMask 1045 1046 // Terms for 2^(fieldBase*14). 1047 m = (m >> fieldBase) + 1048 2*uint64(val.n[5])*uint64(val.n[9]) + 1049 2*uint64(val.n[6])*uint64(val.n[8]) + 1050 uint64(val.n[7])*uint64(val.n[7]) 1051 t14 := m & fieldBaseMask 1052 1053 // Terms for 2^(fieldBase*15). 1054 m = (m >> fieldBase) + 1055 2*uint64(val.n[6])*uint64(val.n[9]) + 1056 2*uint64(val.n[7])*uint64(val.n[8]) 1057 t15 := m & fieldBaseMask 1058 1059 // Terms for 2^(fieldBase*16). 1060 m = (m >> fieldBase) + 1061 2*uint64(val.n[7])*uint64(val.n[9]) + 1062 uint64(val.n[8])*uint64(val.n[8]) 1063 t16 := m & fieldBaseMask 1064 1065 // Terms for 2^(fieldBase*17). 1066 m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9]) 1067 t17 := m & fieldBaseMask 1068 1069 // Terms for 2^(fieldBase*18). 1070 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9]) 1071 t18 := m & fieldBaseMask 1072 1073 // What's left is for 2^(fieldBase*19). 1074 t19 := m >> fieldBase 1075 1076 // At this point, all of the terms are grouped into their respective 1077 // base. 1078 // 1079 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 1080 // when the modulus is of the special form m = b^t - c, highly efficient 1081 // reduction can be achieved per the provided algorithm. 1082 // 1083 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 1084 // this criteria. 1085 // 1086 // 4294968273 in field representation (base 2^26) is: 1087 // n[0] = 977 1088 // n[1] = 64 1089 // That is to say (2^26 * 64) + 977 = 4294968273 1090 // 1091 // Since each word is in base 26, the upper terms (t10 and up) start 1092 // at 260 bits (versus the final desired range of 256 bits), so the 1093 // field representation of 'c' from above needs to be adjusted for the 1094 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 1095 // 68719492368. Thus, the adjusted field representation of 'c' is: 1096 // n[0] = 977 * 16 = 15632 1097 // n[1] = 64 * 16 = 1024 1098 // That is to say (2^26 * 1024) + 15632 = 68719492368 1099 // 1100 // To reduce the final term, t19, the entire 'c' value is needed instead 1101 // of only n[0] because there are no more terms left to handle n[1]. 1102 // This means there might be some magnitude left in the upper bits that 1103 // is handled below. 1104 m = t0 + t10*15632 1105 t0 = m & fieldBaseMask 1106 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 1107 t1 = m & fieldBaseMask 1108 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 1109 t2 = m & fieldBaseMask 1110 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 1111 t3 = m & fieldBaseMask 1112 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 1113 t4 = m & fieldBaseMask 1114 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 1115 t5 = m & fieldBaseMask 1116 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 1117 t6 = m & fieldBaseMask 1118 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 1119 t7 = m & fieldBaseMask 1120 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 1121 t8 = m & fieldBaseMask 1122 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 1123 t9 = m & fieldMSBMask 1124 m = m >> fieldMSBBits 1125 1126 // At this point, if the magnitude is greater than 0, the overall value 1127 // is greater than the max possible 256-bit value. In particular, it is 1128 // "how many times larger" than the max value it is. 1129 // 1130 // The algorithm presented in [HAC] section 14.3.4 repeats until the 1131 // quotient is zero. However, due to the above, we already know at 1132 // least how many times we would need to repeat as it's the value 1133 // currently in m. Thus we can simply multiply the magnitude by the 1134 // field representation of the prime and do a single iteration. Notice 1135 // that nothing will be changed when the magnitude is zero, so we could 1136 // skip this in that case, however always running regardless allows it 1137 // to run in constant time. The final result will be in the range 1138 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 1139 // magnitude of 1, but it is denormalized. 1140 n := t0 + m*977 1141 f.n[0] = uint32(n & fieldBaseMask) 1142 n = (n >> fieldBase) + t1 + m*64 1143 f.n[1] = uint32(n & fieldBaseMask) 1144 f.n[2] = uint32((n >> fieldBase) + t2) 1145 f.n[3] = uint32(t3) 1146 f.n[4] = uint32(t4) 1147 f.n[5] = uint32(t5) 1148 f.n[6] = uint32(t6) 1149 f.n[7] = uint32(t7) 1150 f.n[8] = uint32(t8) 1151 f.n[9] = uint32(t9) 1152 1153 return f 1154 } 1155 1156 // Inverse finds the modular multiplicative inverse of the field value. The 1157 // existing field value is modified. 1158 // 1159 // The field value is returned to support chaining. This enables syntax like: 1160 // f.Inverse().Mul(f2) so that f = f^-1 * f2. 1161 func (f *fieldVal) Inverse() *fieldVal { 1162 // Fermat's little theorem states that for a nonzero number a and prime 1163 // prime p, a^(p-1) = 1 (mod p). Since the multipliciative inverse is 1164 // a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p). 1165 // Thus, a^(p-2) is the multiplicative inverse. 1166 // 1167 // In order to efficiently compute a^(p-2), p-2 needs to be split into 1168 // a sequence of squares and multipications that minimizes the number of 1169 // multiplications needed (since they are more costly than squarings). 1170 // Intermediate results are saved and reused as well. 1171 // 1172 // The secp256k1 prime - 2 is 2^256 - 4294968275. 1173 // 1174 // This has a cost of 258 field squarings and 33 field multiplications. 1175 var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal 1176 a2.SquareVal(f) 1177 a3.Mul2(&a2, f) 1178 a4.SquareVal(&a2) 1179 a10.SquareVal(&a4).Mul(&a2) 1180 a11.Mul2(&a10, f) 1181 a21.Mul2(&a10, &a11) 1182 a42.SquareVal(&a21) 1183 a45.Mul2(&a42, &a3) 1184 a63.Mul2(&a42, &a21) 1185 a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11) 1186 a1023.Mul2(&a1019, &a4) 1187 f.Set(&a63) // f = a^(2^6 - 1) 1188 f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32) 1189 f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024) 1190 f.Mul(&a1023) // f = a^(2^16 - 1) 1191 f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32) 1192 f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024) 1193 f.Mul(&a1023) // f = a^(2^26 - 1) 1194 f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32) 1195 f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024) 1196 f.Mul(&a1023) // f = a^(2^36 - 1) 1197 f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32) 1198 f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024) 1199 f.Mul(&a1023) // f = a^(2^46 - 1) 1200 f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32) 1201 f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024) 1202 f.Mul(&a1023) // f = a^(2^56 - 1) 1203 f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32) 1204 f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024) 1205 f.Mul(&a1023) // f = a^(2^66 - 1) 1206 f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32) 1207 f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024) 1208 f.Mul(&a1023) // f = a^(2^76 - 1) 1209 f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32) 1210 f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024) 1211 f.Mul(&a1023) // f = a^(2^86 - 1) 1212 f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32) 1213 f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024) 1214 f.Mul(&a1023) // f = a^(2^96 - 1) 1215 f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32) 1216 f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024) 1217 f.Mul(&a1023) // f = a^(2^106 - 1) 1218 f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32) 1219 f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024) 1220 f.Mul(&a1023) // f = a^(2^116 - 1) 1221 f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32) 1222 f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024) 1223 f.Mul(&a1023) // f = a^(2^126 - 1) 1224 f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32) 1225 f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024) 1226 f.Mul(&a1023) // f = a^(2^136 - 1) 1227 f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32) 1228 f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024) 1229 f.Mul(&a1023) // f = a^(2^146 - 1) 1230 f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32) 1231 f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024) 1232 f.Mul(&a1023) // f = a^(2^156 - 1) 1233 f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32) 1234 f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024) 1235 f.Mul(&a1023) // f = a^(2^166 - 1) 1236 f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32) 1237 f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024) 1238 f.Mul(&a1023) // f = a^(2^176 - 1) 1239 f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32) 1240 f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024) 1241 f.Mul(&a1023) // f = a^(2^186 - 1) 1242 f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32) 1243 f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024) 1244 f.Mul(&a1023) // f = a^(2^196 - 1) 1245 f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32) 1246 f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024) 1247 f.Mul(&a1023) // f = a^(2^206 - 1) 1248 f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32) 1249 f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024) 1250 f.Mul(&a1023) // f = a^(2^216 - 1) 1251 f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32) 1252 f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024) 1253 f.Mul(&a1019) // f = a^(2^226 - 5) 1254 f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160) 1255 f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120) 1256 f.Mul(&a1023) // f = a^(2^236 - 4097) 1257 f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104) 1258 f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328) 1259 f.Mul(&a1023) // f = a^(2^246 - 4194305) 1260 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760) 1261 f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320) 1262 return f.Mul(&a45) // f = a^(2^256 - 4294968275) = a^(p-2) 1263 }