github.com/dashpay/godash@v0.0.0-20160726055534-e038a21e0e3d/btcec/field.go (about) 1 // Copyright (c) 2013-2014 The btcsuite developers 2 // Copyright (c) 2016 The Dash developers 3 // Copyright (c) 2013-2014 Dave Collins 4 // Use of this source code is governed by an ISC 5 // license that can be found in the LICENSE file. 6 7 package btcec 8 9 // References: 10 // [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone. 11 // http://cacr.uwaterloo.ca/hac/ 12 13 // All elliptic curve operations for secp256k1 are done in a finite field 14 // characterized by a 256-bit prime. Given this precision is larger than the 15 // biggest available native type, obviously some form of bignum math is needed. 16 // This package implements specialized fixed-precision field arithmetic rather 17 // than relying on an arbitrary-precision arithmetic package such as math/big 18 // for dealing with the field math since the size is known. As a result, rather 19 // large performance gains are achieved by taking advantage of many 20 // optimizations not available to arbitrary-precision arithmetic and generic 21 // modular arithmetic algorithms. 22 // 23 // There are various ways to internally represent each finite field element. 24 // For example, the most obvious representation would be to use an array of 4 25 // uint64s (64 bits * 4 = 256 bits). However, that representation suffers from 26 // a couple of issues. First, there is no native Go type large enough to handle 27 // the intermediate results while adding or multiplying two 64-bit numbers, and 28 // second there is no space left for overflows when performing the intermediate 29 // arithmetic between each array element which would lead to expensive carry 30 // propagation. 31 // 32 // Given the above, this implementation represents the the field elements as 33 // 10 uint32s with each word (array entry) treated as base 2^26. This was 34 // chosen for the following reasons: 35 // 1) Most systems at the current time are 64-bit (or at least have 64-bit 36 // registers available for specialized purposes such as MMX) so the 37 // intermediate results can typically be done using a native register (and 38 // using uint64s to avoid the need for additional half-word arithmetic) 39 // 2) In order to allow addition of the internal words without having to 40 // propagate the the carry, the max normalized value for each register must 41 // be less than the number of bits available in the register 42 // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a 43 // reasonable choice for #2 44 // 4) Given the need for 256-bits of precision and the properties stated in #1, 45 // #2, and #3, the representation which best accommodates this is 10 uint32s 46 // with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22 47 // bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for 48 // overflow 49 // 50 // Since it is so important that the field arithmetic is extremely fast for 51 // high performance crypto, this package does not perform any validation where 52 // it ordinarily would. For example, some functions only give the correct 53 // result is the field is normalized and there is no checking to ensure it is. 54 // While I typically prefer to ensure all state and input is valid for most 55 // packages, this code is really only used internally and every extra check 56 // counts. 57 58 import ( 59 "encoding/hex" 60 ) 61 62 // Constants used to make the code more readable. 63 const ( 64 twoBitsMask = 0x3 65 fourBitsMask = 0xf 66 sixBitsMask = 0x3f 67 eightBitsMask = 0xff 68 ) 69 70 // Constants related to the field representation. 71 const ( 72 // fieldWords is the number of words used to internally represent the 73 // 256-bit value. 74 fieldWords = 10 75 76 // fieldBase is the exponent used to form the numeric base of each word. 77 // 2^(fieldBase*i) where i is the word position. 78 fieldBase = 26 79 80 // fieldOverflowBits is the minimum number of "overflow" bits for each 81 // word in the field value. 82 fieldOverflowBits = 32 - fieldBase 83 84 // fieldBaseMask is the mask for the bits in each word needed to 85 // represent the numeric base of each word (except the most significant 86 // word). 87 fieldBaseMask = (1 << fieldBase) - 1 88 89 // fieldMSBBits is the number of bits in the most significant word used 90 // to represent the value. 91 fieldMSBBits = 256 - (fieldBase * (fieldWords - 1)) 92 93 // fieldMSBMask is the mask for the bits in the most significant word 94 // needed to represent the value. 95 fieldMSBMask = (1 << fieldMSBBits) - 1 96 97 // fieldPrimeWordZero is word zero of the secp256k1 prime in the 98 // internal field representation. It is used during modular reduction 99 // and negation. 100 fieldPrimeWordZero = 0x3fffc2f 101 102 // fieldPrimeWordOne is word one of the secp256k1 prime in the 103 // internal field representation. It is used during modular reduction 104 // and negation. 105 fieldPrimeWordOne = 0x3ffffbf 106 ) 107 108 // fieldVal implements optimized fixed-precision arithmetic over the 109 // secp256k1 finite field. This means all arithmetic is performed modulo 110 // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f. It 111 // represents each 256-bit value as 10 32-bit integers in base 2^26. This 112 // provides 6 bits of overflow in each word (10 bits in the most significant 113 // word) for a total of 64 bits of overflow (9*6 + 10 = 64). It only implements 114 // the arithmetic needed for elliptic curve operations. 115 // 116 // The following depicts the internal representation: 117 // ----------------------------------------------------------------- 118 // | n[9] | n[8] | ... | n[0] | 119 // | 32 bits available | 32 bits available | ... | 32 bits available | 120 // | 22 bits for value | 26 bits for value | ... | 26 bits for value | 121 // | 10 bits overflow | 6 bits overflow | ... | 6 bits overflow | 122 // | Mult: 2^(26*9) | Mult: 2^(26*8) | ... | Mult: 2^(26*0) | 123 // ----------------------------------------------------------------- 124 // 125 // For example, consider the number 2^49 + 1. It would be represented as: 126 // n[0] = 1 127 // n[1] = 2^23 128 // n[2..9] = 0 129 // 130 // The full 256-bit value is then calculated by looping i from 9..0 and 131 // doing sum(n[i] * 2^(26i)) like so: 132 // n[9] * 2^(26*9) = 0 * 2^234 = 0 133 // n[8] * 2^(26*8) = 0 * 2^208 = 0 134 // ... 135 // n[1] * 2^(26*1) = 2^23 * 2^26 = 2^49 136 // n[0] * 2^(26*0) = 1 * 2^0 = 1 137 // Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1 138 type fieldVal struct { 139 n [10]uint32 140 } 141 142 // String returns the field value as a human-readable hex string. 143 func (f fieldVal) String() string { 144 t := new(fieldVal).Set(&f).Normalize() 145 return hex.EncodeToString(t.Bytes()[:]) 146 } 147 148 // Zero sets the field value to zero. A newly created field value is already 149 // set to zero. This function can be useful to clear an existing field value 150 // for reuse. 151 func (f *fieldVal) Zero() { 152 f.n[0] = 0 153 f.n[1] = 0 154 f.n[2] = 0 155 f.n[3] = 0 156 f.n[4] = 0 157 f.n[5] = 0 158 f.n[6] = 0 159 f.n[7] = 0 160 f.n[8] = 0 161 f.n[9] = 0 162 } 163 164 // Set sets the field value equal to the passed value. 165 // 166 // The field value is returned to support chaining. This enables syntax like: 167 // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not 168 // modified. 169 func (f *fieldVal) Set(val *fieldVal) *fieldVal { 170 *f = *val 171 return f 172 } 173 174 // SetInt sets the field value to the passed integer. This is a convenience 175 // function since it is fairly common to perform some arithemetic with small 176 // native integers. 177 // 178 // The field value is returned to support chaining. This enables syntax such 179 // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2. 180 func (f *fieldVal) SetInt(ui uint) *fieldVal { 181 f.Zero() 182 f.n[0] = uint32(ui) 183 return f 184 } 185 186 // SetBytes packs the passed 32-byte big-endian value into the internal field 187 // value representation. 188 // 189 // The field value is returned to support chaining. This enables syntax like: 190 // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2. 191 func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal { 192 // Pack the 256 total bits across the 10 uint32 words with a max of 193 // 26-bits per word. This could be done with a couple of for loops, 194 // but this unrolled version is significantly faster. Benchmarks show 195 // this is about 34 times faster than the variant which uses loops. 196 f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 | 197 (uint32(b[28])&twoBitsMask)<<24 198 f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 | 199 (uint32(b[25])&fourBitsMask)<<22 200 f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 | 201 (uint32(b[22])&sixBitsMask)<<20 202 f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 | 203 uint32(b[19])<<18 204 f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 | 205 (uint32(b[15])&twoBitsMask)<<24 206 f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 | 207 (uint32(b[12])&fourBitsMask)<<22 208 f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 | 209 (uint32(b[9])&sixBitsMask)<<20 210 f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 | 211 uint32(b[6])<<18 212 f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 | 213 (uint32(b[2])&twoBitsMask)<<24 214 f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14 215 return f 216 } 217 218 // SetByteSlice packs the passed big-endian value into the internal field value 219 // representation. Only the first 32-bytes are used. As a result, it is up to 220 // the caller to ensure numbers of the appropriate size are used or the value 221 // will be truncated. 222 // 223 // The field value is returned to support chaining. This enables syntax like: 224 // f := new(fieldVal).SetByteSlice(byteSlice) 225 func (f *fieldVal) SetByteSlice(b []byte) *fieldVal { 226 var b32 [32]byte 227 for i := 0; i < len(b); i++ { 228 if i < 32 { 229 b32[i+(32-len(b))] = b[i] 230 } 231 } 232 return f.SetBytes(&b32) 233 } 234 235 // SetHex decodes the passed big-endian hex string into the internal field value 236 // representation. Only the first 32-bytes are used. 237 // 238 // The field value is returned to support chaining. This enables syntax like: 239 // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1 240 func (f *fieldVal) SetHex(hexString string) *fieldVal { 241 if len(hexString)%2 != 0 { 242 hexString = "0" + hexString 243 } 244 bytes, _ := hex.DecodeString(hexString) 245 return f.SetByteSlice(bytes) 246 } 247 248 // Normalize normalizes the internal field words into the desired range and 249 // performs fast modular reduction over the secp256k1 prime by making use of the 250 // special form of the prime. 251 func (f *fieldVal) Normalize() *fieldVal { 252 // The field representation leaves 6 bits of overflow in each 253 // word so intermediate calculations can be performed without needing 254 // to propagate the carry to each higher word during the calculations. 255 // In order to normalize, first we need to "compact" the full 256-bit 256 // value to the right and treat the additional 64 leftmost bits as 257 // the magnitude. 258 m := f.n[0] 259 t0 := m & fieldBaseMask 260 m = (m >> fieldBase) + f.n[1] 261 t1 := m & fieldBaseMask 262 m = (m >> fieldBase) + f.n[2] 263 t2 := m & fieldBaseMask 264 m = (m >> fieldBase) + f.n[3] 265 t3 := m & fieldBaseMask 266 m = (m >> fieldBase) + f.n[4] 267 t4 := m & fieldBaseMask 268 m = (m >> fieldBase) + f.n[5] 269 t5 := m & fieldBaseMask 270 m = (m >> fieldBase) + f.n[6] 271 t6 := m & fieldBaseMask 272 m = (m >> fieldBase) + f.n[7] 273 t7 := m & fieldBaseMask 274 m = (m >> fieldBase) + f.n[8] 275 t8 := m & fieldBaseMask 276 m = (m >> fieldBase) + f.n[9] 277 t9 := m & fieldMSBMask 278 m = m >> fieldMSBBits 279 280 // At this point, if the magnitude is greater than 0, the overall value 281 // is greater than the max possible 256-bit value. In particular, it is 282 // "how many times larger" than the max value it is. Since this field 283 // is doing arithmetic modulo the secp256k1 prime, we need to perform 284 // modular reduction over the prime. 285 // 286 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 287 // when the modulus is of the special form m = b^t - c, highly efficient 288 // reduction can be achieved. 289 // 290 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 291 // this criteria. 292 // 293 // 4294968273 in field representation (base 2^26) is: 294 // n[0] = 977 295 // n[1] = 64 296 // That is to say (2^26 * 64) + 977 = 4294968273 297 // 298 // The algorithm presented in the referenced section typically repeats 299 // until the quotient is zero. However, due to our field representation 300 // we already know at least how many times we would need to repeat as 301 // it's the value currently in m. Thus we can simply multiply the 302 // magnitude by the field representation of the prime and do a single 303 // iteration. Notice that nothing will be changed when the magnitude is 304 // zero, so we could skip this in that case, however always running 305 // regardless allows it to run in constant time. 306 r := t0 + m*977 307 t0 = r & fieldBaseMask 308 r = (r >> fieldBase) + t1 + m*64 309 t1 = r & fieldBaseMask 310 r = (r >> fieldBase) + t2 311 t2 = r & fieldBaseMask 312 r = (r >> fieldBase) + t3 313 t3 = r & fieldBaseMask 314 r = (r >> fieldBase) + t4 315 t4 = r & fieldBaseMask 316 r = (r >> fieldBase) + t5 317 t5 = r & fieldBaseMask 318 r = (r >> fieldBase) + t6 319 t6 = r & fieldBaseMask 320 r = (r >> fieldBase) + t7 321 t7 = r & fieldBaseMask 322 r = (r >> fieldBase) + t8 323 t8 = r & fieldBaseMask 324 r = (r >> fieldBase) + t9 325 t9 = r & fieldMSBMask 326 327 // At this point, the result will be in the range 0 <= result <= 328 // prime + (2^64 - c). Therefore, one more subtraction of the prime 329 // might be needed if the current result is greater than or equal to the 330 // prime. The following does the final reduction in constant time. 331 // Note that the if/else here intentionally does the bitwise OR with 332 // zero even though it won't change the value to ensure constant time 333 // between the branches. 334 var mask int32 335 if t0 < fieldPrimeWordZero { 336 mask |= -1 337 } else { 338 mask |= 0 339 } 340 if t1 < fieldPrimeWordOne { 341 mask |= -1 342 } else { 343 mask |= 0 344 } 345 if t2 < fieldBaseMask { 346 mask |= -1 347 } else { 348 mask |= 0 349 } 350 if t3 < fieldBaseMask { 351 mask |= -1 352 } else { 353 mask |= 0 354 } 355 if t4 < fieldBaseMask { 356 mask |= -1 357 } else { 358 mask |= 0 359 } 360 if t5 < fieldBaseMask { 361 mask |= -1 362 } else { 363 mask |= 0 364 } 365 if t6 < fieldBaseMask { 366 mask |= -1 367 } else { 368 mask |= 0 369 } 370 if t7 < fieldBaseMask { 371 mask |= -1 372 } else { 373 mask |= 0 374 } 375 if t8 < fieldBaseMask { 376 mask |= -1 377 } else { 378 mask |= 0 379 } 380 if t9 < fieldMSBMask { 381 mask |= -1 382 } else { 383 mask |= 0 384 } 385 t0 = t0 - uint32(^mask&fieldPrimeWordZero) 386 t1 = t1 - uint32(^mask&fieldPrimeWordOne) 387 t2 = t2 & uint32(mask) 388 t3 = t3 & uint32(mask) 389 t4 = t4 & uint32(mask) 390 t5 = t5 & uint32(mask) 391 t6 = t6 & uint32(mask) 392 t7 = t7 & uint32(mask) 393 t8 = t8 & uint32(mask) 394 t9 = t9 & uint32(mask) 395 396 // Finally, set the normalized and reduced words. 397 f.n[0] = t0 398 f.n[1] = t1 399 f.n[2] = t2 400 f.n[3] = t3 401 f.n[4] = t4 402 f.n[5] = t5 403 f.n[6] = t6 404 f.n[7] = t7 405 f.n[8] = t8 406 f.n[9] = t9 407 return f 408 } 409 410 // PutBytes unpacks the field value to a 32-byte big-endian value using the 411 // passed byte array. There is a similar function, Bytes, which unpacks the 412 // field value into a new array and returns that. This version is provided 413 // since it can be useful to cut down on the number of allocations by allowing 414 // the caller to reuse a buffer. 415 // 416 // The field value must be normalized for this function to return the correct 417 // result. 418 func (f *fieldVal) PutBytes(b *[32]byte) { 419 // Unpack the 256 total bits from the 10 uint32 words with a max of 420 // 26-bits per word. This could be done with a couple of for loops, 421 // but this unrolled version is a bit faster. Benchmarks show this is 422 // about 10 times faster than the variant which uses loops. 423 b[31] = byte(f.n[0] & eightBitsMask) 424 b[30] = byte((f.n[0] >> 8) & eightBitsMask) 425 b[29] = byte((f.n[0] >> 16) & eightBitsMask) 426 b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2) 427 b[27] = byte((f.n[1] >> 6) & eightBitsMask) 428 b[26] = byte((f.n[1] >> 14) & eightBitsMask) 429 b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4) 430 b[24] = byte((f.n[2] >> 4) & eightBitsMask) 431 b[23] = byte((f.n[2] >> 12) & eightBitsMask) 432 b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6) 433 b[21] = byte((f.n[3] >> 2) & eightBitsMask) 434 b[20] = byte((f.n[3] >> 10) & eightBitsMask) 435 b[19] = byte((f.n[3] >> 18) & eightBitsMask) 436 b[18] = byte(f.n[4] & eightBitsMask) 437 b[17] = byte((f.n[4] >> 8) & eightBitsMask) 438 b[16] = byte((f.n[4] >> 16) & eightBitsMask) 439 b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2) 440 b[14] = byte((f.n[5] >> 6) & eightBitsMask) 441 b[13] = byte((f.n[5] >> 14) & eightBitsMask) 442 b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4) 443 b[11] = byte((f.n[6] >> 4) & eightBitsMask) 444 b[10] = byte((f.n[6] >> 12) & eightBitsMask) 445 b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6) 446 b[8] = byte((f.n[7] >> 2) & eightBitsMask) 447 b[7] = byte((f.n[7] >> 10) & eightBitsMask) 448 b[6] = byte((f.n[7] >> 18) & eightBitsMask) 449 b[5] = byte(f.n[8] & eightBitsMask) 450 b[4] = byte((f.n[8] >> 8) & eightBitsMask) 451 b[3] = byte((f.n[8] >> 16) & eightBitsMask) 452 b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2) 453 b[1] = byte((f.n[9] >> 6) & eightBitsMask) 454 b[0] = byte((f.n[9] >> 14) & eightBitsMask) 455 } 456 457 // Bytes unpacks the field value to a 32-byte big-endian value. See PutBytes 458 // for a variant that allows the a buffer to be passed which can be useful to 459 // to cut down on the number of allocations by allowing the caller to reuse a 460 // buffer. 461 // 462 // The field value must be normalized for this function to return correct 463 // result. 464 func (f *fieldVal) Bytes() *[32]byte { 465 b := new([32]byte) 466 f.PutBytes(b) 467 return b 468 } 469 470 // IsZero returns whether or not the field value is equal to zero. 471 func (f *fieldVal) IsZero() bool { 472 // The value can only be zero if no bits are set in any of the words. 473 // This is a constant time implementation. 474 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] | 475 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9] 476 477 return bits == 0 478 } 479 480 // IsOdd returns whether or not the field value is an odd number. 481 // 482 // The field value must be normalized for this function to return correct 483 // result. 484 func (f *fieldVal) IsOdd() bool { 485 // Only odd numbers have the bottom bit set. 486 return f.n[0]&1 == 1 487 } 488 489 // Equals returns whether or not the two field values are the same. Both 490 // field values being compared must be normalized for this function to return 491 // the correct result. 492 func (f *fieldVal) Equals(val *fieldVal) bool { 493 // Xor only sets bits when they are different, so the two field values 494 // can only be the same if no bits are set after xoring each word. 495 // This is a constant time implementation. 496 bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) | 497 (f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) | 498 (f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) | 499 (f.n[9] ^ val.n[9]) 500 501 return bits == 0 502 } 503 504 // NegateVal negates the passed value and stores the result in f. The caller 505 // must provide the magnitude of the passed value for a correct result. 506 // 507 // The field value is returned to support chaining. This enables syntax like: 508 // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1. 509 func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal { 510 // Negation in the field is just the prime minus the value. However, 511 // in order to allow negation against a field value without having to 512 // normalize/reduce it first, multiply by the magnitude (that is how 513 // "far" away it is from the normalized value) to adjust. Also, since 514 // negating a value pushes it one more order of magnitude away from the 515 // normalized range, add 1 to compensate. 516 // 517 // For some intuition here, imagine you're performing mod 12 arithmetic 518 // (picture a clock) and you are negating the number 7. So you start at 519 // 12 (which is of course 0 under mod 12) and count backwards (left on 520 // the clock) 7 times to arrive at 5. Notice this is just 12-7 = 5. 521 // Now, assume you're starting with 19, which is a number that is 522 // already larger than the modulus and congruent to 7 (mod 12). When a 523 // value is already in the desired range, its magnitude is 1. Since 19 524 // is an additional "step", its magnitude (mod 12) is 2. Since any 525 // multiple of the modulus is conguent to zero (mod m), the answer can 526 // be shortcut by simply mulplying the magnitude by the modulus and 527 // subtracting. Keeping with the example, this would be (2*12)-19 = 5. 528 f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0] 529 f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1] 530 f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2] 531 f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3] 532 f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4] 533 f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5] 534 f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6] 535 f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7] 536 f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8] 537 f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9] 538 539 return f 540 } 541 542 // Negate negates the field value. The existing field value is modified. The 543 // caller must provide the magnitude of the field value for a correct result. 544 // 545 // The field value is returned to support chaining. This enables syntax like: 546 // f.Negate().AddInt(1) so that f = -f + 1. 547 func (f *fieldVal) Negate(magnitude uint32) *fieldVal { 548 return f.NegateVal(f, magnitude) 549 } 550 551 // AddInt adds the passed integer to the existing field value and stores the 552 // result in f. This is a convenience function since it is fairly common to 553 // perform some arithemetic with small native integers. 554 // 555 // The field value is returned to support chaining. This enables syntax like: 556 // f.AddInt(1).Add(f2) so that f = f + 1 + f2. 557 func (f *fieldVal) AddInt(ui uint) *fieldVal { 558 // Since the field representation intentionally provides overflow bits, 559 // it's ok to use carryless addition as the carry bit is safely part of 560 // the word and will be normalized out. 561 f.n[0] += uint32(ui) 562 563 return f 564 } 565 566 // Add adds the passed value to the existing field value and stores the result 567 // in f. 568 // 569 // The field value is returned to support chaining. This enables syntax like: 570 // f.Add(f2).AddInt(1) so that f = f + f2 + 1. 571 func (f *fieldVal) Add(val *fieldVal) *fieldVal { 572 // Since the field representation intentionally provides overflow bits, 573 // it's ok to use carryless addition as the carry bit is safely part of 574 // each word and will be normalized out. This could obviously be done 575 // in a loop, but the unrolled version is faster. 576 f.n[0] += val.n[0] 577 f.n[1] += val.n[1] 578 f.n[2] += val.n[2] 579 f.n[3] += val.n[3] 580 f.n[4] += val.n[4] 581 f.n[5] += val.n[5] 582 f.n[6] += val.n[6] 583 f.n[7] += val.n[7] 584 f.n[8] += val.n[8] 585 f.n[9] += val.n[9] 586 587 return f 588 } 589 590 // Add2 adds the passed two field values together and stores the result in f. 591 // 592 // The field value is returned to support chaining. This enables syntax like: 593 // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1. 594 func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal { 595 // Since the field representation intentionally provides overflow bits, 596 // it's ok to use carryless addition as the carry bit is safely part of 597 // each word and will be normalized out. This could obviously be done 598 // in a loop, but the unrolled version is faster. 599 f.n[0] = val.n[0] + val2.n[0] 600 f.n[1] = val.n[1] + val2.n[1] 601 f.n[2] = val.n[2] + val2.n[2] 602 f.n[3] = val.n[3] + val2.n[3] 603 f.n[4] = val.n[4] + val2.n[4] 604 f.n[5] = val.n[5] + val2.n[5] 605 f.n[6] = val.n[6] + val2.n[6] 606 f.n[7] = val.n[7] + val2.n[7] 607 f.n[8] = val.n[8] + val2.n[8] 608 f.n[9] = val.n[9] + val2.n[9] 609 610 return f 611 } 612 613 // MulInt multiplies the field value by the passed int and stores the result in 614 // f. Note that this function can overflow if multiplying the value by any of 615 // the individual words exceeds a max uint32. Therefore it is important that 616 // the caller ensures no overflows will occur before using this function. 617 // 618 // The field value is returned to support chaining. This enables syntax like: 619 // f.MulInt(2).Add(f2) so that f = 2 * f + f2. 620 func (f *fieldVal) MulInt(val uint) *fieldVal { 621 // Since each word of the field representation can hold up to 622 // fieldOverflowBits extra bits which will be normalized out, it's safe 623 // to multiply each word without using a larger type or carry 624 // propagation so long as the values won't overflow a uint32. This 625 // could obviously be done in a loop, but the unrolled version is 626 // faster. 627 ui := uint32(val) 628 f.n[0] *= ui 629 f.n[1] *= ui 630 f.n[2] *= ui 631 f.n[3] *= ui 632 f.n[4] *= ui 633 f.n[5] *= ui 634 f.n[6] *= ui 635 f.n[7] *= ui 636 f.n[8] *= ui 637 f.n[9] *= ui 638 639 return f 640 } 641 642 // Mul multiplies the passed value to the existing field value and stores the 643 // result in f. Note that this function can overflow if multiplying any 644 // of the individual words exceeds a max uint32. In practice, this means the 645 // magnitude of either value involved in the multiplication must be a max of 646 // 8. 647 // 648 // The field value is returned to support chaining. This enables syntax like: 649 // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1. 650 func (f *fieldVal) Mul(val *fieldVal) *fieldVal { 651 return f.Mul2(f, val) 652 } 653 654 // Mul2 multiplies the passed two field values together and stores the result 655 // result in f. Note that this function can overflow if multiplying any of 656 // the individual words exceeds a max uint32. In practice, this means the 657 // magnitude of either value involved in the multiplication must be a max of 658 // 8. 659 // 660 // The field value is returned to support chaining. This enables syntax like: 661 // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1. 662 func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal { 663 // This could be done with a couple of for loops and an array to store 664 // the intermediate terms, but this unrolled version is significantly 665 // faster. 666 667 // Terms for 2^(fieldBase*0). 668 m := uint64(val.n[0]) * uint64(val2.n[0]) 669 t0 := m & fieldBaseMask 670 671 // Terms for 2^(fieldBase*1). 672 m = (m >> fieldBase) + 673 uint64(val.n[0])*uint64(val2.n[1]) + 674 uint64(val.n[1])*uint64(val2.n[0]) 675 t1 := m & fieldBaseMask 676 677 // Terms for 2^(fieldBase*2). 678 m = (m >> fieldBase) + 679 uint64(val.n[0])*uint64(val2.n[2]) + 680 uint64(val.n[1])*uint64(val2.n[1]) + 681 uint64(val.n[2])*uint64(val2.n[0]) 682 t2 := m & fieldBaseMask 683 684 // Terms for 2^(fieldBase*3). 685 m = (m >> fieldBase) + 686 uint64(val.n[0])*uint64(val2.n[3]) + 687 uint64(val.n[1])*uint64(val2.n[2]) + 688 uint64(val.n[2])*uint64(val2.n[1]) + 689 uint64(val.n[3])*uint64(val2.n[0]) 690 t3 := m & fieldBaseMask 691 692 // Terms for 2^(fieldBase*4). 693 m = (m >> fieldBase) + 694 uint64(val.n[0])*uint64(val2.n[4]) + 695 uint64(val.n[1])*uint64(val2.n[3]) + 696 uint64(val.n[2])*uint64(val2.n[2]) + 697 uint64(val.n[3])*uint64(val2.n[1]) + 698 uint64(val.n[4])*uint64(val2.n[0]) 699 t4 := m & fieldBaseMask 700 701 // Terms for 2^(fieldBase*5). 702 m = (m >> fieldBase) + 703 uint64(val.n[0])*uint64(val2.n[5]) + 704 uint64(val.n[1])*uint64(val2.n[4]) + 705 uint64(val.n[2])*uint64(val2.n[3]) + 706 uint64(val.n[3])*uint64(val2.n[2]) + 707 uint64(val.n[4])*uint64(val2.n[1]) + 708 uint64(val.n[5])*uint64(val2.n[0]) 709 t5 := m & fieldBaseMask 710 711 // Terms for 2^(fieldBase*6). 712 m = (m >> fieldBase) + 713 uint64(val.n[0])*uint64(val2.n[6]) + 714 uint64(val.n[1])*uint64(val2.n[5]) + 715 uint64(val.n[2])*uint64(val2.n[4]) + 716 uint64(val.n[3])*uint64(val2.n[3]) + 717 uint64(val.n[4])*uint64(val2.n[2]) + 718 uint64(val.n[5])*uint64(val2.n[1]) + 719 uint64(val.n[6])*uint64(val2.n[0]) 720 t6 := m & fieldBaseMask 721 722 // Terms for 2^(fieldBase*7). 723 m = (m >> fieldBase) + 724 uint64(val.n[0])*uint64(val2.n[7]) + 725 uint64(val.n[1])*uint64(val2.n[6]) + 726 uint64(val.n[2])*uint64(val2.n[5]) + 727 uint64(val.n[3])*uint64(val2.n[4]) + 728 uint64(val.n[4])*uint64(val2.n[3]) + 729 uint64(val.n[5])*uint64(val2.n[2]) + 730 uint64(val.n[6])*uint64(val2.n[1]) + 731 uint64(val.n[7])*uint64(val2.n[0]) 732 t7 := m & fieldBaseMask 733 734 // Terms for 2^(fieldBase*8). 735 m = (m >> fieldBase) + 736 uint64(val.n[0])*uint64(val2.n[8]) + 737 uint64(val.n[1])*uint64(val2.n[7]) + 738 uint64(val.n[2])*uint64(val2.n[6]) + 739 uint64(val.n[3])*uint64(val2.n[5]) + 740 uint64(val.n[4])*uint64(val2.n[4]) + 741 uint64(val.n[5])*uint64(val2.n[3]) + 742 uint64(val.n[6])*uint64(val2.n[2]) + 743 uint64(val.n[7])*uint64(val2.n[1]) + 744 uint64(val.n[8])*uint64(val2.n[0]) 745 t8 := m & fieldBaseMask 746 747 // Terms for 2^(fieldBase*9). 748 m = (m >> fieldBase) + 749 uint64(val.n[0])*uint64(val2.n[9]) + 750 uint64(val.n[1])*uint64(val2.n[8]) + 751 uint64(val.n[2])*uint64(val2.n[7]) + 752 uint64(val.n[3])*uint64(val2.n[6]) + 753 uint64(val.n[4])*uint64(val2.n[5]) + 754 uint64(val.n[5])*uint64(val2.n[4]) + 755 uint64(val.n[6])*uint64(val2.n[3]) + 756 uint64(val.n[7])*uint64(val2.n[2]) + 757 uint64(val.n[8])*uint64(val2.n[1]) + 758 uint64(val.n[9])*uint64(val2.n[0]) 759 t9 := m & fieldBaseMask 760 761 // Terms for 2^(fieldBase*10). 762 m = (m >> fieldBase) + 763 uint64(val.n[1])*uint64(val2.n[9]) + 764 uint64(val.n[2])*uint64(val2.n[8]) + 765 uint64(val.n[3])*uint64(val2.n[7]) + 766 uint64(val.n[4])*uint64(val2.n[6]) + 767 uint64(val.n[5])*uint64(val2.n[5]) + 768 uint64(val.n[6])*uint64(val2.n[4]) + 769 uint64(val.n[7])*uint64(val2.n[3]) + 770 uint64(val.n[8])*uint64(val2.n[2]) + 771 uint64(val.n[9])*uint64(val2.n[1]) 772 t10 := m & fieldBaseMask 773 774 // Terms for 2^(fieldBase*11). 775 m = (m >> fieldBase) + 776 uint64(val.n[2])*uint64(val2.n[9]) + 777 uint64(val.n[3])*uint64(val2.n[8]) + 778 uint64(val.n[4])*uint64(val2.n[7]) + 779 uint64(val.n[5])*uint64(val2.n[6]) + 780 uint64(val.n[6])*uint64(val2.n[5]) + 781 uint64(val.n[7])*uint64(val2.n[4]) + 782 uint64(val.n[8])*uint64(val2.n[3]) + 783 uint64(val.n[9])*uint64(val2.n[2]) 784 t11 := m & fieldBaseMask 785 786 // Terms for 2^(fieldBase*12). 787 m = (m >> fieldBase) + 788 uint64(val.n[3])*uint64(val2.n[9]) + 789 uint64(val.n[4])*uint64(val2.n[8]) + 790 uint64(val.n[5])*uint64(val2.n[7]) + 791 uint64(val.n[6])*uint64(val2.n[6]) + 792 uint64(val.n[7])*uint64(val2.n[5]) + 793 uint64(val.n[8])*uint64(val2.n[4]) + 794 uint64(val.n[9])*uint64(val2.n[3]) 795 t12 := m & fieldBaseMask 796 797 // Terms for 2^(fieldBase*13). 798 m = (m >> fieldBase) + 799 uint64(val.n[4])*uint64(val2.n[9]) + 800 uint64(val.n[5])*uint64(val2.n[8]) + 801 uint64(val.n[6])*uint64(val2.n[7]) + 802 uint64(val.n[7])*uint64(val2.n[6]) + 803 uint64(val.n[8])*uint64(val2.n[5]) + 804 uint64(val.n[9])*uint64(val2.n[4]) 805 t13 := m & fieldBaseMask 806 807 // Terms for 2^(fieldBase*14). 808 m = (m >> fieldBase) + 809 uint64(val.n[5])*uint64(val2.n[9]) + 810 uint64(val.n[6])*uint64(val2.n[8]) + 811 uint64(val.n[7])*uint64(val2.n[7]) + 812 uint64(val.n[8])*uint64(val2.n[6]) + 813 uint64(val.n[9])*uint64(val2.n[5]) 814 t14 := m & fieldBaseMask 815 816 // Terms for 2^(fieldBase*15). 817 m = (m >> fieldBase) + 818 uint64(val.n[6])*uint64(val2.n[9]) + 819 uint64(val.n[7])*uint64(val2.n[8]) + 820 uint64(val.n[8])*uint64(val2.n[7]) + 821 uint64(val.n[9])*uint64(val2.n[6]) 822 t15 := m & fieldBaseMask 823 824 // Terms for 2^(fieldBase*16). 825 m = (m >> fieldBase) + 826 uint64(val.n[7])*uint64(val2.n[9]) + 827 uint64(val.n[8])*uint64(val2.n[8]) + 828 uint64(val.n[9])*uint64(val2.n[7]) 829 t16 := m & fieldBaseMask 830 831 // Terms for 2^(fieldBase*17). 832 m = (m >> fieldBase) + 833 uint64(val.n[8])*uint64(val2.n[9]) + 834 uint64(val.n[9])*uint64(val2.n[8]) 835 t17 := m & fieldBaseMask 836 837 // Terms for 2^(fieldBase*18). 838 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9]) 839 t18 := m & fieldBaseMask 840 841 // What's left is for 2^(fieldBase*19). 842 t19 := m >> fieldBase 843 844 // At this point, all of the terms are grouped into their respective 845 // base. 846 // 847 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 848 // when the modulus is of the special form m = b^t - c, highly efficient 849 // reduction can be achieved per the provided algorithm. 850 // 851 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 852 // this criteria. 853 // 854 // 4294968273 in field representation (base 2^26) is: 855 // n[0] = 977 856 // n[1] = 64 857 // That is to say (2^26 * 64) + 977 = 4294968273 858 // 859 // Since each word is in base 26, the upper terms (t10 and up) start 860 // at 260 bits (versus the final desired range of 256 bits), so the 861 // field representation of 'c' from above needs to be adjusted for the 862 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 863 // 68719492368. Thus, the adjusted field representation of 'c' is: 864 // n[0] = 977 * 16 = 15632 865 // n[1] = 64 * 16 = 1024 866 // That is to say (2^26 * 1024) + 15632 = 68719492368 867 // 868 // To reduce the final term, t19, the entire 'c' value is needed instead 869 // of only n[0] because there are no more terms left to handle n[1]. 870 // This means there might be some magnitude left in the upper bits that 871 // is handled below. 872 m = t0 + t10*15632 873 t0 = m & fieldBaseMask 874 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 875 t1 = m & fieldBaseMask 876 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 877 t2 = m & fieldBaseMask 878 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 879 t3 = m & fieldBaseMask 880 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 881 t4 = m & fieldBaseMask 882 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 883 t5 = m & fieldBaseMask 884 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 885 t6 = m & fieldBaseMask 886 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 887 t7 = m & fieldBaseMask 888 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 889 t8 = m & fieldBaseMask 890 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 891 t9 = m & fieldMSBMask 892 m = m >> fieldMSBBits 893 894 // At this point, if the magnitude is greater than 0, the overall value 895 // is greater than the max possible 256-bit value. In particular, it is 896 // "how many times larger" than the max value it is. 897 // 898 // The algorithm presented in [HAC] section 14.3.4 repeats until the 899 // quotient is zero. However, due to the above, we already know at 900 // least how many times we would need to repeat as it's the value 901 // currently in m. Thus we can simply multiply the magnitude by the 902 // field representation of the prime and do a single iteration. Notice 903 // that nothing will be changed when the magnitude is zero, so we could 904 // skip this in that case, however always running regardless allows it 905 // to run in constant time. The final result will be in the range 906 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 907 // magnitude of 1, but it is denormalized. 908 d := t0 + m*977 909 f.n[0] = uint32(d & fieldBaseMask) 910 d = (d >> fieldBase) + t1 + m*64 911 f.n[1] = uint32(d & fieldBaseMask) 912 f.n[2] = uint32((d >> fieldBase) + t2) 913 f.n[3] = uint32(t3) 914 f.n[4] = uint32(t4) 915 f.n[5] = uint32(t5) 916 f.n[6] = uint32(t6) 917 f.n[7] = uint32(t7) 918 f.n[8] = uint32(t8) 919 f.n[9] = uint32(t9) 920 921 return f 922 } 923 924 // Square squares the field value. The existing field value is modified. Note 925 // that this function can overflow if multiplying any of the individual words 926 // exceeds a max uint32. In practice, this means the magnitude of the field 927 // must be a max of 8 to prevent overflow. 928 // 929 // The field value is returned to support chaining. This enables syntax like: 930 // f.Square().Mul(f2) so that f = f^2 * f2. 931 func (f *fieldVal) Square() *fieldVal { 932 return f.SquareVal(f) 933 } 934 935 // SquareVal squares the passed value and stores the result in f. Note that 936 // this function can overflow if multiplying any of the individual words 937 // exceeds a max uint32. In practice, this means the magnitude of the field 938 // being squred must be a max of 8 to prevent overflow. 939 // 940 // The field value is returned to support chaining. This enables syntax like: 941 // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3. 942 func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal { 943 // This could be done with a couple of for loops and an array to store 944 // the intermediate terms, but this unrolled version is significantly 945 // faster. 946 947 // Terms for 2^(fieldBase*0). 948 m := uint64(val.n[0]) * uint64(val.n[0]) 949 t0 := m & fieldBaseMask 950 951 // Terms for 2^(fieldBase*1). 952 m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1]) 953 t1 := m & fieldBaseMask 954 955 // Terms for 2^(fieldBase*2). 956 m = (m >> fieldBase) + 957 2*uint64(val.n[0])*uint64(val.n[2]) + 958 uint64(val.n[1])*uint64(val.n[1]) 959 t2 := m & fieldBaseMask 960 961 // Terms for 2^(fieldBase*3). 962 m = (m >> fieldBase) + 963 2*uint64(val.n[0])*uint64(val.n[3]) + 964 2*uint64(val.n[1])*uint64(val.n[2]) 965 t3 := m & fieldBaseMask 966 967 // Terms for 2^(fieldBase*4). 968 m = (m >> fieldBase) + 969 2*uint64(val.n[0])*uint64(val.n[4]) + 970 2*uint64(val.n[1])*uint64(val.n[3]) + 971 uint64(val.n[2])*uint64(val.n[2]) 972 t4 := m & fieldBaseMask 973 974 // Terms for 2^(fieldBase*5). 975 m = (m >> fieldBase) + 976 2*uint64(val.n[0])*uint64(val.n[5]) + 977 2*uint64(val.n[1])*uint64(val.n[4]) + 978 2*uint64(val.n[2])*uint64(val.n[3]) 979 t5 := m & fieldBaseMask 980 981 // Terms for 2^(fieldBase*6). 982 m = (m >> fieldBase) + 983 2*uint64(val.n[0])*uint64(val.n[6]) + 984 2*uint64(val.n[1])*uint64(val.n[5]) + 985 2*uint64(val.n[2])*uint64(val.n[4]) + 986 uint64(val.n[3])*uint64(val.n[3]) 987 t6 := m & fieldBaseMask 988 989 // Terms for 2^(fieldBase*7). 990 m = (m >> fieldBase) + 991 2*uint64(val.n[0])*uint64(val.n[7]) + 992 2*uint64(val.n[1])*uint64(val.n[6]) + 993 2*uint64(val.n[2])*uint64(val.n[5]) + 994 2*uint64(val.n[3])*uint64(val.n[4]) 995 t7 := m & fieldBaseMask 996 997 // Terms for 2^(fieldBase*8). 998 m = (m >> fieldBase) + 999 2*uint64(val.n[0])*uint64(val.n[8]) + 1000 2*uint64(val.n[1])*uint64(val.n[7]) + 1001 2*uint64(val.n[2])*uint64(val.n[6]) + 1002 2*uint64(val.n[3])*uint64(val.n[5]) + 1003 uint64(val.n[4])*uint64(val.n[4]) 1004 t8 := m & fieldBaseMask 1005 1006 // Terms for 2^(fieldBase*9). 1007 m = (m >> fieldBase) + 1008 2*uint64(val.n[0])*uint64(val.n[9]) + 1009 2*uint64(val.n[1])*uint64(val.n[8]) + 1010 2*uint64(val.n[2])*uint64(val.n[7]) + 1011 2*uint64(val.n[3])*uint64(val.n[6]) + 1012 2*uint64(val.n[4])*uint64(val.n[5]) 1013 t9 := m & fieldBaseMask 1014 1015 // Terms for 2^(fieldBase*10). 1016 m = (m >> fieldBase) + 1017 2*uint64(val.n[1])*uint64(val.n[9]) + 1018 2*uint64(val.n[2])*uint64(val.n[8]) + 1019 2*uint64(val.n[3])*uint64(val.n[7]) + 1020 2*uint64(val.n[4])*uint64(val.n[6]) + 1021 uint64(val.n[5])*uint64(val.n[5]) 1022 t10 := m & fieldBaseMask 1023 1024 // Terms for 2^(fieldBase*11). 1025 m = (m >> fieldBase) + 1026 2*uint64(val.n[2])*uint64(val.n[9]) + 1027 2*uint64(val.n[3])*uint64(val.n[8]) + 1028 2*uint64(val.n[4])*uint64(val.n[7]) + 1029 2*uint64(val.n[5])*uint64(val.n[6]) 1030 t11 := m & fieldBaseMask 1031 1032 // Terms for 2^(fieldBase*12). 1033 m = (m >> fieldBase) + 1034 2*uint64(val.n[3])*uint64(val.n[9]) + 1035 2*uint64(val.n[4])*uint64(val.n[8]) + 1036 2*uint64(val.n[5])*uint64(val.n[7]) + 1037 uint64(val.n[6])*uint64(val.n[6]) 1038 t12 := m & fieldBaseMask 1039 1040 // Terms for 2^(fieldBase*13). 1041 m = (m >> fieldBase) + 1042 2*uint64(val.n[4])*uint64(val.n[9]) + 1043 2*uint64(val.n[5])*uint64(val.n[8]) + 1044 2*uint64(val.n[6])*uint64(val.n[7]) 1045 t13 := m & fieldBaseMask 1046 1047 // Terms for 2^(fieldBase*14). 1048 m = (m >> fieldBase) + 1049 2*uint64(val.n[5])*uint64(val.n[9]) + 1050 2*uint64(val.n[6])*uint64(val.n[8]) + 1051 uint64(val.n[7])*uint64(val.n[7]) 1052 t14 := m & fieldBaseMask 1053 1054 // Terms for 2^(fieldBase*15). 1055 m = (m >> fieldBase) + 1056 2*uint64(val.n[6])*uint64(val.n[9]) + 1057 2*uint64(val.n[7])*uint64(val.n[8]) 1058 t15 := m & fieldBaseMask 1059 1060 // Terms for 2^(fieldBase*16). 1061 m = (m >> fieldBase) + 1062 2*uint64(val.n[7])*uint64(val.n[9]) + 1063 uint64(val.n[8])*uint64(val.n[8]) 1064 t16 := m & fieldBaseMask 1065 1066 // Terms for 2^(fieldBase*17). 1067 m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9]) 1068 t17 := m & fieldBaseMask 1069 1070 // Terms for 2^(fieldBase*18). 1071 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9]) 1072 t18 := m & fieldBaseMask 1073 1074 // What's left is for 2^(fieldBase*19). 1075 t19 := m >> fieldBase 1076 1077 // At this point, all of the terms are grouped into their respective 1078 // base. 1079 // 1080 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 1081 // when the modulus is of the special form m = b^t - c, highly efficient 1082 // reduction can be achieved per the provided algorithm. 1083 // 1084 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 1085 // this criteria. 1086 // 1087 // 4294968273 in field representation (base 2^26) is: 1088 // n[0] = 977 1089 // n[1] = 64 1090 // That is to say (2^26 * 64) + 977 = 4294968273 1091 // 1092 // Since each word is in base 26, the upper terms (t10 and up) start 1093 // at 260 bits (versus the final desired range of 256 bits), so the 1094 // field representation of 'c' from above needs to be adjusted for the 1095 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 1096 // 68719492368. Thus, the adjusted field representation of 'c' is: 1097 // n[0] = 977 * 16 = 15632 1098 // n[1] = 64 * 16 = 1024 1099 // That is to say (2^26 * 1024) + 15632 = 68719492368 1100 // 1101 // To reduce the final term, t19, the entire 'c' value is needed instead 1102 // of only n[0] because there are no more terms left to handle n[1]. 1103 // This means there might be some magnitude left in the upper bits that 1104 // is handled below. 1105 m = t0 + t10*15632 1106 t0 = m & fieldBaseMask 1107 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 1108 t1 = m & fieldBaseMask 1109 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 1110 t2 = m & fieldBaseMask 1111 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 1112 t3 = m & fieldBaseMask 1113 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 1114 t4 = m & fieldBaseMask 1115 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 1116 t5 = m & fieldBaseMask 1117 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 1118 t6 = m & fieldBaseMask 1119 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 1120 t7 = m & fieldBaseMask 1121 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 1122 t8 = m & fieldBaseMask 1123 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 1124 t9 = m & fieldMSBMask 1125 m = m >> fieldMSBBits 1126 1127 // At this point, if the magnitude is greater than 0, the overall value 1128 // is greater than the max possible 256-bit value. In particular, it is 1129 // "how many times larger" than the max value it is. 1130 // 1131 // The algorithm presented in [HAC] section 14.3.4 repeats until the 1132 // quotient is zero. However, due to the above, we already know at 1133 // least how many times we would need to repeat as it's the value 1134 // currently in m. Thus we can simply multiply the magnitude by the 1135 // field representation of the prime and do a single iteration. Notice 1136 // that nothing will be changed when the magnitude is zero, so we could 1137 // skip this in that case, however always running regardless allows it 1138 // to run in constant time. The final result will be in the range 1139 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 1140 // magnitude of 1, but it is denormalized. 1141 n := t0 + m*977 1142 f.n[0] = uint32(n & fieldBaseMask) 1143 n = (n >> fieldBase) + t1 + m*64 1144 f.n[1] = uint32(n & fieldBaseMask) 1145 f.n[2] = uint32((n >> fieldBase) + t2) 1146 f.n[3] = uint32(t3) 1147 f.n[4] = uint32(t4) 1148 f.n[5] = uint32(t5) 1149 f.n[6] = uint32(t6) 1150 f.n[7] = uint32(t7) 1151 f.n[8] = uint32(t8) 1152 f.n[9] = uint32(t9) 1153 1154 return f 1155 } 1156 1157 // Inverse finds the modular multiplicative inverse of the field value. The 1158 // existing field value is modified. 1159 // 1160 // The field value is returned to support chaining. This enables syntax like: 1161 // f.Inverse().Mul(f2) so that f = f^-1 * f2. 1162 func (f *fieldVal) Inverse() *fieldVal { 1163 // Fermat's little theorem states that for a nonzero number a and prime 1164 // prime p, a^(p-1) = 1 (mod p). Since the multipliciative inverse is 1165 // a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p). 1166 // Thus, a^(p-2) is the multiplicative inverse. 1167 // 1168 // In order to efficiently compute a^(p-2), p-2 needs to be split into 1169 // a sequence of squares and multipications that minimizes the number of 1170 // multiplications needed (since they are more costly than squarings). 1171 // Intermediate results are saved and reused as well. 1172 // 1173 // The secp256k1 prime - 2 is 2^256 - 4294968275. 1174 // 1175 // This has a cost of 258 field squarings and 33 field multiplications. 1176 var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal 1177 a2.SquareVal(f) 1178 a3.Mul2(&a2, f) 1179 a4.SquareVal(&a2) 1180 a10.SquareVal(&a4).Mul(&a2) 1181 a11.Mul2(&a10, f) 1182 a21.Mul2(&a10, &a11) 1183 a42.SquareVal(&a21) 1184 a45.Mul2(&a42, &a3) 1185 a63.Mul2(&a42, &a21) 1186 a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11) 1187 a1023.Mul2(&a1019, &a4) 1188 f.Set(&a63) // f = a^(2^6 - 1) 1189 f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32) 1190 f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024) 1191 f.Mul(&a1023) // f = a^(2^16 - 1) 1192 f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32) 1193 f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024) 1194 f.Mul(&a1023) // f = a^(2^26 - 1) 1195 f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32) 1196 f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024) 1197 f.Mul(&a1023) // f = a^(2^36 - 1) 1198 f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32) 1199 f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024) 1200 f.Mul(&a1023) // f = a^(2^46 - 1) 1201 f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32) 1202 f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024) 1203 f.Mul(&a1023) // f = a^(2^56 - 1) 1204 f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32) 1205 f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024) 1206 f.Mul(&a1023) // f = a^(2^66 - 1) 1207 f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32) 1208 f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024) 1209 f.Mul(&a1023) // f = a^(2^76 - 1) 1210 f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32) 1211 f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024) 1212 f.Mul(&a1023) // f = a^(2^86 - 1) 1213 f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32) 1214 f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024) 1215 f.Mul(&a1023) // f = a^(2^96 - 1) 1216 f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32) 1217 f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024) 1218 f.Mul(&a1023) // f = a^(2^106 - 1) 1219 f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32) 1220 f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024) 1221 f.Mul(&a1023) // f = a^(2^116 - 1) 1222 f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32) 1223 f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024) 1224 f.Mul(&a1023) // f = a^(2^126 - 1) 1225 f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32) 1226 f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024) 1227 f.Mul(&a1023) // f = a^(2^136 - 1) 1228 f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32) 1229 f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024) 1230 f.Mul(&a1023) // f = a^(2^146 - 1) 1231 f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32) 1232 f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024) 1233 f.Mul(&a1023) // f = a^(2^156 - 1) 1234 f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32) 1235 f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024) 1236 f.Mul(&a1023) // f = a^(2^166 - 1) 1237 f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32) 1238 f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024) 1239 f.Mul(&a1023) // f = a^(2^176 - 1) 1240 f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32) 1241 f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024) 1242 f.Mul(&a1023) // f = a^(2^186 - 1) 1243 f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32) 1244 f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024) 1245 f.Mul(&a1023) // f = a^(2^196 - 1) 1246 f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32) 1247 f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024) 1248 f.Mul(&a1023) // f = a^(2^206 - 1) 1249 f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32) 1250 f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024) 1251 f.Mul(&a1023) // f = a^(2^216 - 1) 1252 f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32) 1253 f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024) 1254 f.Mul(&a1019) // f = a^(2^226 - 5) 1255 f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160) 1256 f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120) 1257 f.Mul(&a1023) // f = a^(2^236 - 4097) 1258 f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104) 1259 f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328) 1260 f.Mul(&a1023) // f = a^(2^246 - 4194305) 1261 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760) 1262 f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320) 1263 return f.Mul(&a45) // f = a^(2^256 - 4294968275) = a^(p-2) 1264 }