github.com/chain5j/chain5j-pkg@v1.0.7/crypto/signature/secp256k1/btcecv1/field.go (about) 1 // Copyright (c) 2013-2016 The btcsuite developers 2 // Copyright (c) 2013-2016 Dave Collins 3 // Use of this source code is governed by an ISC 4 // license that can be found in the LICENSE file. 5 6 package btcecv1 7 8 // References: 9 // [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone. 10 // http://cacr.uwaterloo.ca/hac/ 11 12 // All elliptic curve operations for secp256k1 are done in a finite field 13 // characterized by a 256-bit prime. Given this precision is larger than the 14 // biggest available native type, obviously some form of bignum math is needed. 15 // This package implements specialized fixed-precision field arithmetic rather 16 // than relying on an arbitrary-precision arithmetic package such as math/big 17 // for dealing with the field math since the size is known. As a result, rather 18 // large performance gains are achieved by taking advantage of many 19 // optimizations not available to arbitrary-precision arithmetic and generic 20 // modular arithmetic algorithms. 21 // 22 // There are various ways to internally represent each finite field element. 23 // For example, the most obvious representation would be to use an array of 4 24 // uint64s (64 bits * 4 = 256 bits). However, that representation suffers from 25 // a couple of issues. First, there is no native Go type large enough to handle 26 // the intermediate results while adding or multiplying two 64-bit numbers, and 27 // second there is no space left for overflows when performing the intermediate 28 // arithmetic between each array element which would lead to expensive carry 29 // propagation. 30 // 31 // Given the above, this implementation represents the the field elements as 32 // 10 uint32s with each word (array entry) treated as base 2^26. This was 33 // chosen for the following reasons: 34 // 1) Most systems at the current time are 64-bit (or at least have 64-bit 35 // registers available for specialized purposes such as MMX) so the 36 // intermediate results can typically be done using a native register (and 37 // using uint64s to avoid the need for additional half-word arithmetic) 38 // 2) In order to allow addition of the internal words without having to 39 // propagate the the carry, the max normalized value for each register must 40 // be less than the number of bits available in the register 41 // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a 42 // reasonable choice for #2 43 // 4) Given the need for 256-bits of precision and the properties stated in #1, 44 // #2, and #3, the representation which best accommodates this is 10 uint32s 45 // with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22 46 // bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for 47 // overflow 48 // 49 // Since it is so important that the field arithmetic is extremely fast for 50 // high performance crypto, this package does not perform any validation where 51 // it ordinarily would. For example, some functions only give the correct 52 // result is the field is normalized and there is no checking to ensure it is. 53 // While I typically prefer to ensure all state and input is valid for most 54 // packages, this code is really only used internally and every extra check 55 // counts. 56 57 import ( 58 "encoding/hex" 59 ) 60 61 // Constants used to make the code more readable. 62 const ( 63 twoBitsMask = 0x3 64 fourBitsMask = 0xf 65 sixBitsMask = 0x3f 66 eightBitsMask = 0xff 67 ) 68 69 // Constants related to the field representation. 70 const ( 71 // fieldWords is the number of words used to internally represent the 72 // 256-bit value. 73 fieldWords = 10 74 75 // fieldBase is the exponent used to form the numeric base of each word. 76 // 2^(fieldBase*i) where i is the word position. 77 fieldBase = 26 78 79 // fieldOverflowBits is the minimum number of "overflow" bits for each 80 // word in the field value. 81 fieldOverflowBits = 32 - fieldBase 82 83 // fieldBaseMask is the mask for the bits in each word needed to 84 // represent the numeric base of each word (except the most significant 85 // word). 86 fieldBaseMask = (1 << fieldBase) - 1 87 88 // fieldMSBBits is the number of bits in the most significant word used 89 // to represent the value. 90 fieldMSBBits = 256 - (fieldBase * (fieldWords - 1)) 91 92 // fieldMSBMask is the mask for the bits in the most significant word 93 // needed to represent the value. 94 fieldMSBMask = (1 << fieldMSBBits) - 1 95 96 // fieldPrimeWordZero is word zero of the secp256k1 prime in the 97 // internal field representation. It is used during negation. 98 fieldPrimeWordZero = 0x3fffc2f 99 100 // fieldPrimeWordOne is word one of the secp256k1 prime in the 101 // internal field representation. It is used during negation. 102 fieldPrimeWordOne = 0x3ffffbf 103 ) 104 105 var ( 106 // fieldQBytes is the value Q = (P+1)/4 for the secp256k1 prime P. This 107 // value is used to efficiently compute the square root of values in the 108 // field via exponentiation. The value of Q in hex is: 109 // 110 // Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c 111 fieldQBytes = []byte{ 112 0x3f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 113 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 114 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 115 0xff, 0xff, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x0c, 116 } 117 ) 118 119 // fieldVal implements optimized fixed-precision arithmetic over the 120 // secp256k1 finite field. This means all arithmetic is performed modulo 121 // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f. It 122 // represents each 256-bit value as 10 32-bit integers in base 2^26. This 123 // provides 6 bits of overflow in each word (10 bits in the most significant 124 // word) for a total of 64 bits of overflow (9*6 + 10 = 64). It only implements 125 // the arithmetic needed for elliptic curve operations. 126 // 127 // The following depicts the internal representation: 128 // 129 // ----------------------------------------------------------------- 130 // | n[9] | n[8] | ... | n[0] | 131 // | 32 bits available | 32 bits available | ... | 32 bits available | 132 // | 22 bits for value | 26 bits for value | ... | 26 bits for value | 133 // | 10 bits overflow | 6 bits overflow | ... | 6 bits overflow | 134 // | Mult: 2^(26*9) | Mult: 2^(26*8) | ... | Mult: 2^(26*0) | 135 // ----------------------------------------------------------------- 136 // 137 // For example, consider the number 2^49 + 1. It would be represented as: 138 // 139 // n[0] = 1 140 // n[1] = 2^23 141 // n[2..9] = 0 142 // 143 // The full 256-bit value is then calculated by looping i from 9..0 and 144 // doing sum(n[i] * 2^(26i)) like so: 145 // 146 // n[9] * 2^(26*9) = 0 * 2^234 = 0 147 // n[8] * 2^(26*8) = 0 * 2^208 = 0 148 // ... 149 // n[1] * 2^(26*1) = 2^23 * 2^26 = 2^49 150 // n[0] * 2^(26*0) = 1 * 2^0 = 1 151 // Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1 152 type fieldVal struct { 153 n [10]uint32 154 } 155 156 // String returns the field value as a human-readable hex string. 157 func (f fieldVal) String() string { 158 t := new(fieldVal).Set(&f).Normalize() 159 return hex.EncodeToString(t.Bytes()[:]) 160 } 161 162 // Zero sets the field value to zero. A newly created field value is already 163 // set to zero. This function can be useful to clear an existing field value 164 // for reuse. 165 func (f *fieldVal) Zero() { 166 f.n[0] = 0 167 f.n[1] = 0 168 f.n[2] = 0 169 f.n[3] = 0 170 f.n[4] = 0 171 f.n[5] = 0 172 f.n[6] = 0 173 f.n[7] = 0 174 f.n[8] = 0 175 f.n[9] = 0 176 } 177 178 // Set sets the field value equal to the passed value. 179 // 180 // The field value is returned to support chaining. This enables syntax like: 181 // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not 182 // modified. 183 func (f *fieldVal) Set(val *fieldVal) *fieldVal { 184 *f = *val 185 return f 186 } 187 188 // SetInt sets the field value to the passed integer. This is a convenience 189 // function since it is fairly common to perform some arithemetic with small 190 // native integers. 191 // 192 // The field value is returned to support chaining. This enables syntax such 193 // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2. 194 func (f *fieldVal) SetInt(ui uint) *fieldVal { 195 f.Zero() 196 f.n[0] = uint32(ui) 197 return f 198 } 199 200 // SetBytes packs the passed 32-byte big-endian value into the internal field 201 // value representation. 202 // 203 // The field value is returned to support chaining. This enables syntax like: 204 // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2. 205 func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal { 206 // Pack the 256 total bits across the 10 uint32 words with a max of 207 // 26-bits per word. This could be done with a couple of for loops, 208 // but this unrolled version is significantly faster. Benchmarks show 209 // this is about 34 times faster than the variant which uses loops. 210 f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 | 211 (uint32(b[28])&twoBitsMask)<<24 212 f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 | 213 (uint32(b[25])&fourBitsMask)<<22 214 f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 | 215 (uint32(b[22])&sixBitsMask)<<20 216 f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 | 217 uint32(b[19])<<18 218 f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 | 219 (uint32(b[15])&twoBitsMask)<<24 220 f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 | 221 (uint32(b[12])&fourBitsMask)<<22 222 f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 | 223 (uint32(b[9])&sixBitsMask)<<20 224 f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 | 225 uint32(b[6])<<18 226 f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 | 227 (uint32(b[2])&twoBitsMask)<<24 228 f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14 229 return f 230 } 231 232 // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned 233 // integer (meaning it is truncated to the first 32 bytes), packs it into the 234 // internal field value representation, and returns the updated field value. 235 // 236 // Note that since passing a slice with more than 32 bytes is truncated, it is 237 // possible that the truncated value is less than the field prime. It is up to 238 // the caller to decide whether it needs to provide numbers of the appropriate 239 // size or if it is acceptable to use this function with the described 240 // truncation behavior. 241 // 242 // The field value is returned to support chaining. This enables syntax like: 243 // f := new(fieldVal).SetByteSlice(byteSlice) 244 func (f *fieldVal) SetByteSlice(b []byte) *fieldVal { 245 var b32 [32]byte 246 if len(b) > 32 { 247 b = b[:32] 248 } 249 copy(b32[32-len(b):], b) 250 return f.SetBytes(&b32) 251 } 252 253 // SetHex decodes the passed big-endian hex string into the internal field value 254 // representation. Only the first 32-bytes are used. 255 // 256 // The field value is returned to support chaining. This enables syntax like: 257 // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1 258 func (f *fieldVal) SetHex(hexString string) *fieldVal { 259 if len(hexString)%2 != 0 { 260 hexString = "0" + hexString 261 } 262 bytes, _ := hex.DecodeString(hexString) 263 return f.SetByteSlice(bytes) 264 } 265 266 // Normalize normalizes the internal field words into the desired range and 267 // performs fast modular reduction over the secp256k1 prime by making use of the 268 // special form of the prime. 269 func (f *fieldVal) Normalize() *fieldVal { 270 // The field representation leaves 6 bits of overflow in each word so 271 // intermediate calculations can be performed without needing to 272 // propagate the carry to each higher word during the calculations. In 273 // order to normalize, we need to "compact" the full 256-bit value to 274 // the right while propagating any carries through to the high order 275 // word. 276 // 277 // Since this field is doing arithmetic modulo the secp256k1 prime, we 278 // also need to perform modular reduction over the prime. 279 // 280 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 281 // when the modulus is of the special form m = b^t - c, highly efficient 282 // reduction can be achieved. 283 // 284 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 285 // this criteria. 286 // 287 // 4294968273 in field representation (base 2^26) is: 288 // n[0] = 977 289 // n[1] = 64 290 // That is to say (2^26 * 64) + 977 = 4294968273 291 // 292 // The algorithm presented in the referenced section typically repeats 293 // until the quotient is zero. However, due to our field representation 294 // we already know to within one reduction how many times we would need 295 // to repeat as it's the uppermost bits of the high order word. Thus we 296 // can simply multiply the magnitude by the field representation of the 297 // prime and do a single iteration. After this step there might be an 298 // additional carry to bit 256 (bit 22 of the high order word). 299 t9 := f.n[9] 300 m := t9 >> fieldMSBBits 301 t9 = t9 & fieldMSBMask 302 t0 := f.n[0] + m*977 303 t1 := (t0 >> fieldBase) + f.n[1] + (m << 6) 304 t0 = t0 & fieldBaseMask 305 t2 := (t1 >> fieldBase) + f.n[2] 306 t1 = t1 & fieldBaseMask 307 t3 := (t2 >> fieldBase) + f.n[3] 308 t2 = t2 & fieldBaseMask 309 t4 := (t3 >> fieldBase) + f.n[4] 310 t3 = t3 & fieldBaseMask 311 t5 := (t4 >> fieldBase) + f.n[5] 312 t4 = t4 & fieldBaseMask 313 t6 := (t5 >> fieldBase) + f.n[6] 314 t5 = t5 & fieldBaseMask 315 t7 := (t6 >> fieldBase) + f.n[7] 316 t6 = t6 & fieldBaseMask 317 t8 := (t7 >> fieldBase) + f.n[8] 318 t7 = t7 & fieldBaseMask 319 t9 = (t8 >> fieldBase) + t9 320 t8 = t8 & fieldBaseMask 321 322 // At this point, the magnitude is guaranteed to be one, however, the 323 // value could still be greater than the prime if there was either a 324 // carry through to bit 256 (bit 22 of the higher order word) or the 325 // value is greater than or equal to the field characteristic. The 326 // following determines if either or these conditions are true and does 327 // the final reduction in constant time. 328 // 329 // Note that the if/else statements here intentionally do the bitwise 330 // operators even when it won't change the value to ensure constant time 331 // between the branches. Also note that 'm' will be zero when neither 332 // of the aforementioned conditions are true and the value will not be 333 // changed when 'm' is zero. 334 m = 1 335 if t9 == fieldMSBMask { 336 m &= 1 337 } else { 338 m &= 0 339 } 340 if t2&t3&t4&t5&t6&t7&t8 == fieldBaseMask { 341 m &= 1 342 } else { 343 m &= 0 344 } 345 if ((t0+977)>>fieldBase + t1 + 64) > fieldBaseMask { 346 m &= 1 347 } else { 348 m &= 0 349 } 350 if t9>>fieldMSBBits != 0 { 351 m |= 1 352 } else { 353 m |= 0 354 } 355 t0 = t0 + m*977 356 t1 = (t0 >> fieldBase) + t1 + (m << 6) 357 t0 = t0 & fieldBaseMask 358 t2 = (t1 >> fieldBase) + t2 359 t1 = t1 & fieldBaseMask 360 t3 = (t2 >> fieldBase) + t3 361 t2 = t2 & fieldBaseMask 362 t4 = (t3 >> fieldBase) + t4 363 t3 = t3 & fieldBaseMask 364 t5 = (t4 >> fieldBase) + t5 365 t4 = t4 & fieldBaseMask 366 t6 = (t5 >> fieldBase) + t6 367 t5 = t5 & fieldBaseMask 368 t7 = (t6 >> fieldBase) + t7 369 t6 = t6 & fieldBaseMask 370 t8 = (t7 >> fieldBase) + t8 371 t7 = t7 & fieldBaseMask 372 t9 = (t8 >> fieldBase) + t9 373 t8 = t8 & fieldBaseMask 374 t9 = t9 & fieldMSBMask // Remove potential multiple of 2^256. 375 376 // Finally, set the normalized and reduced words. 377 f.n[0] = t0 378 f.n[1] = t1 379 f.n[2] = t2 380 f.n[3] = t3 381 f.n[4] = t4 382 f.n[5] = t5 383 f.n[6] = t6 384 f.n[7] = t7 385 f.n[8] = t8 386 f.n[9] = t9 387 return f 388 } 389 390 // PutBytes unpacks the field value to a 32-byte big-endian value using the 391 // passed byte array. There is a similar function, Bytes, which unpacks the 392 // field value into a new array and returns that. This version is provided 393 // since it can be useful to cut down on the number of allocations by allowing 394 // the caller to reuse a buffer. 395 // 396 // The field value must be normalized for this function to return the correct 397 // result. 398 func (f *fieldVal) PutBytes(b *[32]byte) { 399 // Unpack the 256 total bits from the 10 uint32 words with a max of 400 // 26-bits per word. This could be done with a couple of for loops, 401 // but this unrolled version is a bit faster. Benchmarks show this is 402 // about 10 times faster than the variant which uses loops. 403 b[31] = byte(f.n[0] & eightBitsMask) 404 b[30] = byte((f.n[0] >> 8) & eightBitsMask) 405 b[29] = byte((f.n[0] >> 16) & eightBitsMask) 406 b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2) 407 b[27] = byte((f.n[1] >> 6) & eightBitsMask) 408 b[26] = byte((f.n[1] >> 14) & eightBitsMask) 409 b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4) 410 b[24] = byte((f.n[2] >> 4) & eightBitsMask) 411 b[23] = byte((f.n[2] >> 12) & eightBitsMask) 412 b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6) 413 b[21] = byte((f.n[3] >> 2) & eightBitsMask) 414 b[20] = byte((f.n[3] >> 10) & eightBitsMask) 415 b[19] = byte((f.n[3] >> 18) & eightBitsMask) 416 b[18] = byte(f.n[4] & eightBitsMask) 417 b[17] = byte((f.n[4] >> 8) & eightBitsMask) 418 b[16] = byte((f.n[4] >> 16) & eightBitsMask) 419 b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2) 420 b[14] = byte((f.n[5] >> 6) & eightBitsMask) 421 b[13] = byte((f.n[5] >> 14) & eightBitsMask) 422 b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4) 423 b[11] = byte((f.n[6] >> 4) & eightBitsMask) 424 b[10] = byte((f.n[6] >> 12) & eightBitsMask) 425 b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6) 426 b[8] = byte((f.n[7] >> 2) & eightBitsMask) 427 b[7] = byte((f.n[7] >> 10) & eightBitsMask) 428 b[6] = byte((f.n[7] >> 18) & eightBitsMask) 429 b[5] = byte(f.n[8] & eightBitsMask) 430 b[4] = byte((f.n[8] >> 8) & eightBitsMask) 431 b[3] = byte((f.n[8] >> 16) & eightBitsMask) 432 b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2) 433 b[1] = byte((f.n[9] >> 6) & eightBitsMask) 434 b[0] = byte((f.n[9] >> 14) & eightBitsMask) 435 } 436 437 // Bytes unpacks the field value to a 32-byte big-endian value. See PutBytes 438 // for a variant that allows the a buffer to be passed which can be useful to 439 // to cut down on the number of allocations by allowing the caller to reuse a 440 // buffer. 441 // 442 // The field value must be normalized for this function to return correct 443 // result. 444 func (f *fieldVal) Bytes() *[32]byte { 445 b := new([32]byte) 446 f.PutBytes(b) 447 return b 448 } 449 450 // IsZero returns whether or not the field value is equal to zero. 451 func (f *fieldVal) IsZero() bool { 452 // The value can only be zero if no bits are set in any of the words. 453 // This is a constant time implementation. 454 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] | 455 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9] 456 457 return bits == 0 458 } 459 460 // IsOdd returns whether or not the field value is an odd number. 461 // 462 // The field value must be normalized for this function to return correct 463 // result. 464 func (f *fieldVal) IsOdd() bool { 465 // Only odd numbers have the bottom bit set. 466 return f.n[0]&1 == 1 467 } 468 469 // Equals returns whether or not the two field values are the same. Both 470 // field values being compared must be normalized for this function to return 471 // the correct result. 472 func (f *fieldVal) Equals(val *fieldVal) bool { 473 // Xor only sets bits when they are different, so the two field values 474 // can only be the same if no bits are set after xoring each word. 475 // This is a constant time implementation. 476 bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) | 477 (f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) | 478 (f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) | 479 (f.n[9] ^ val.n[9]) 480 481 return bits == 0 482 } 483 484 // NegateVal negates the passed value and stores the result in f. The caller 485 // must provide the magnitude of the passed value for a correct result. 486 // 487 // The field value is returned to support chaining. This enables syntax like: 488 // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1. 489 func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal { 490 // Negation in the field is just the prime minus the value. However, 491 // in order to allow negation against a field value without having to 492 // normalize/reduce it first, multiply by the magnitude (that is how 493 // "far" away it is from the normalized value) to adjust. Also, since 494 // negating a value pushes it one more order of magnitude away from the 495 // normalized range, add 1 to compensate. 496 // 497 // For some intuition here, imagine you're performing mod 12 arithmetic 498 // (picture a clock) and you are negating the number 7. So you start at 499 // 12 (which is of course 0 under mod 12) and count backwards (left on 500 // the clock) 7 times to arrive at 5. Notice this is just 12-7 = 5. 501 // Now, assume you're starting with 19, which is a number that is 502 // already larger than the modulus and congruent to 7 (mod 12). When a 503 // value is already in the desired range, its magnitude is 1. Since 19 504 // is an additional "step", its magnitude (mod 12) is 2. Since any 505 // multiple of the modulus is conguent to zero (mod m), the answer can 506 // be shortcut by simply mulplying the magnitude by the modulus and 507 // subtracting. Keeping with the example, this would be (2*12)-19 = 5. 508 f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0] 509 f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1] 510 f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2] 511 f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3] 512 f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4] 513 f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5] 514 f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6] 515 f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7] 516 f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8] 517 f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9] 518 519 return f 520 } 521 522 // Negate negates the field value. The existing field value is modified. The 523 // caller must provide the magnitude of the field value for a correct result. 524 // 525 // The field value is returned to support chaining. This enables syntax like: 526 // f.Negate().AddInt(1) so that f = -f + 1. 527 func (f *fieldVal) Negate(magnitude uint32) *fieldVal { 528 return f.NegateVal(f, magnitude) 529 } 530 531 // AddInt adds the passed integer to the existing field value and stores the 532 // result in f. This is a convenience function since it is fairly common to 533 // perform some arithemetic with small native integers. 534 // 535 // The field value is returned to support chaining. This enables syntax like: 536 // f.AddInt(1).Add(f2) so that f = f + 1 + f2. 537 func (f *fieldVal) AddInt(ui uint) *fieldVal { 538 // Since the field representation intentionally provides overflow bits, 539 // it's ok to use carryless addition as the carry bit is safely part of 540 // the word and will be normalized out. 541 f.n[0] += uint32(ui) 542 543 return f 544 } 545 546 // Add adds the passed value to the existing field value and stores the result 547 // in f. 548 // 549 // The field value is returned to support chaining. This enables syntax like: 550 // f.Add(f2).AddInt(1) so that f = f + f2 + 1. 551 func (f *fieldVal) Add(val *fieldVal) *fieldVal { 552 // Since the field representation intentionally provides overflow bits, 553 // it's ok to use carryless addition as the carry bit is safely part of 554 // each word and will be normalized out. This could obviously be done 555 // in a loop, but the unrolled version is faster. 556 f.n[0] += val.n[0] 557 f.n[1] += val.n[1] 558 f.n[2] += val.n[2] 559 f.n[3] += val.n[3] 560 f.n[4] += val.n[4] 561 f.n[5] += val.n[5] 562 f.n[6] += val.n[6] 563 f.n[7] += val.n[7] 564 f.n[8] += val.n[8] 565 f.n[9] += val.n[9] 566 567 return f 568 } 569 570 // Add2 adds the passed two field values together and stores the result in f. 571 // 572 // The field value is returned to support chaining. This enables syntax like: 573 // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1. 574 func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal { 575 // Since the field representation intentionally provides overflow bits, 576 // it's ok to use carryless addition as the carry bit is safely part of 577 // each word and will be normalized out. This could obviously be done 578 // in a loop, but the unrolled version is faster. 579 f.n[0] = val.n[0] + val2.n[0] 580 f.n[1] = val.n[1] + val2.n[1] 581 f.n[2] = val.n[2] + val2.n[2] 582 f.n[3] = val.n[3] + val2.n[3] 583 f.n[4] = val.n[4] + val2.n[4] 584 f.n[5] = val.n[5] + val2.n[5] 585 f.n[6] = val.n[6] + val2.n[6] 586 f.n[7] = val.n[7] + val2.n[7] 587 f.n[8] = val.n[8] + val2.n[8] 588 f.n[9] = val.n[9] + val2.n[9] 589 590 return f 591 } 592 593 // MulInt multiplies the field value by the passed int and stores the result in 594 // f. Note that this function can overflow if multiplying the value by any of 595 // the individual words exceeds a max uint32. Therefore it is important that 596 // the caller ensures no overflows will occur before using this function. 597 // 598 // The field value is returned to support chaining. This enables syntax like: 599 // f.MulInt(2).Add(f2) so that f = 2 * f + f2. 600 func (f *fieldVal) MulInt(val uint) *fieldVal { 601 // Since each word of the field representation can hold up to 602 // fieldOverflowBits extra bits which will be normalized out, it's safe 603 // to multiply each word without using a larger type or carry 604 // propagation so long as the values won't overflow a uint32. This 605 // could obviously be done in a loop, but the unrolled version is 606 // faster. 607 ui := uint32(val) 608 f.n[0] *= ui 609 f.n[1] *= ui 610 f.n[2] *= ui 611 f.n[3] *= ui 612 f.n[4] *= ui 613 f.n[5] *= ui 614 f.n[6] *= ui 615 f.n[7] *= ui 616 f.n[8] *= ui 617 f.n[9] *= ui 618 619 return f 620 } 621 622 // Mul multiplies the passed value to the existing field value and stores the 623 // result in f. Note that this function can overflow if multiplying any 624 // of the individual words exceeds a max uint32. In practice, this means the 625 // magnitude of either value involved in the multiplication must be a max of 626 // 8. 627 // 628 // The field value is returned to support chaining. This enables syntax like: 629 // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1. 630 func (f *fieldVal) Mul(val *fieldVal) *fieldVal { 631 return f.Mul2(f, val) 632 } 633 634 // Mul2 multiplies the passed two field values together and stores the result 635 // result in f. Note that this function can overflow if multiplying any of 636 // the individual words exceeds a max uint32. In practice, this means the 637 // magnitude of either value involved in the multiplication must be a max of 638 // 8. 639 // 640 // The field value is returned to support chaining. This enables syntax like: 641 // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1. 642 func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal { 643 // This could be done with a couple of for loops and an array to store 644 // the intermediate terms, but this unrolled version is significantly 645 // faster. 646 647 // Terms for 2^(fieldBase*0). 648 m := uint64(val.n[0]) * uint64(val2.n[0]) 649 t0 := m & fieldBaseMask 650 651 // Terms for 2^(fieldBase*1). 652 m = (m >> fieldBase) + 653 uint64(val.n[0])*uint64(val2.n[1]) + 654 uint64(val.n[1])*uint64(val2.n[0]) 655 t1 := m & fieldBaseMask 656 657 // Terms for 2^(fieldBase*2). 658 m = (m >> fieldBase) + 659 uint64(val.n[0])*uint64(val2.n[2]) + 660 uint64(val.n[1])*uint64(val2.n[1]) + 661 uint64(val.n[2])*uint64(val2.n[0]) 662 t2 := m & fieldBaseMask 663 664 // Terms for 2^(fieldBase*3). 665 m = (m >> fieldBase) + 666 uint64(val.n[0])*uint64(val2.n[3]) + 667 uint64(val.n[1])*uint64(val2.n[2]) + 668 uint64(val.n[2])*uint64(val2.n[1]) + 669 uint64(val.n[3])*uint64(val2.n[0]) 670 t3 := m & fieldBaseMask 671 672 // Terms for 2^(fieldBase*4). 673 m = (m >> fieldBase) + 674 uint64(val.n[0])*uint64(val2.n[4]) + 675 uint64(val.n[1])*uint64(val2.n[3]) + 676 uint64(val.n[2])*uint64(val2.n[2]) + 677 uint64(val.n[3])*uint64(val2.n[1]) + 678 uint64(val.n[4])*uint64(val2.n[0]) 679 t4 := m & fieldBaseMask 680 681 // Terms for 2^(fieldBase*5). 682 m = (m >> fieldBase) + 683 uint64(val.n[0])*uint64(val2.n[5]) + 684 uint64(val.n[1])*uint64(val2.n[4]) + 685 uint64(val.n[2])*uint64(val2.n[3]) + 686 uint64(val.n[3])*uint64(val2.n[2]) + 687 uint64(val.n[4])*uint64(val2.n[1]) + 688 uint64(val.n[5])*uint64(val2.n[0]) 689 t5 := m & fieldBaseMask 690 691 // Terms for 2^(fieldBase*6). 692 m = (m >> fieldBase) + 693 uint64(val.n[0])*uint64(val2.n[6]) + 694 uint64(val.n[1])*uint64(val2.n[5]) + 695 uint64(val.n[2])*uint64(val2.n[4]) + 696 uint64(val.n[3])*uint64(val2.n[3]) + 697 uint64(val.n[4])*uint64(val2.n[2]) + 698 uint64(val.n[5])*uint64(val2.n[1]) + 699 uint64(val.n[6])*uint64(val2.n[0]) 700 t6 := m & fieldBaseMask 701 702 // Terms for 2^(fieldBase*7). 703 m = (m >> fieldBase) + 704 uint64(val.n[0])*uint64(val2.n[7]) + 705 uint64(val.n[1])*uint64(val2.n[6]) + 706 uint64(val.n[2])*uint64(val2.n[5]) + 707 uint64(val.n[3])*uint64(val2.n[4]) + 708 uint64(val.n[4])*uint64(val2.n[3]) + 709 uint64(val.n[5])*uint64(val2.n[2]) + 710 uint64(val.n[6])*uint64(val2.n[1]) + 711 uint64(val.n[7])*uint64(val2.n[0]) 712 t7 := m & fieldBaseMask 713 714 // Terms for 2^(fieldBase*8). 715 m = (m >> fieldBase) + 716 uint64(val.n[0])*uint64(val2.n[8]) + 717 uint64(val.n[1])*uint64(val2.n[7]) + 718 uint64(val.n[2])*uint64(val2.n[6]) + 719 uint64(val.n[3])*uint64(val2.n[5]) + 720 uint64(val.n[4])*uint64(val2.n[4]) + 721 uint64(val.n[5])*uint64(val2.n[3]) + 722 uint64(val.n[6])*uint64(val2.n[2]) + 723 uint64(val.n[7])*uint64(val2.n[1]) + 724 uint64(val.n[8])*uint64(val2.n[0]) 725 t8 := m & fieldBaseMask 726 727 // Terms for 2^(fieldBase*9). 728 m = (m >> fieldBase) + 729 uint64(val.n[0])*uint64(val2.n[9]) + 730 uint64(val.n[1])*uint64(val2.n[8]) + 731 uint64(val.n[2])*uint64(val2.n[7]) + 732 uint64(val.n[3])*uint64(val2.n[6]) + 733 uint64(val.n[4])*uint64(val2.n[5]) + 734 uint64(val.n[5])*uint64(val2.n[4]) + 735 uint64(val.n[6])*uint64(val2.n[3]) + 736 uint64(val.n[7])*uint64(val2.n[2]) + 737 uint64(val.n[8])*uint64(val2.n[1]) + 738 uint64(val.n[9])*uint64(val2.n[0]) 739 t9 := m & fieldBaseMask 740 741 // Terms for 2^(fieldBase*10). 742 m = (m >> fieldBase) + 743 uint64(val.n[1])*uint64(val2.n[9]) + 744 uint64(val.n[2])*uint64(val2.n[8]) + 745 uint64(val.n[3])*uint64(val2.n[7]) + 746 uint64(val.n[4])*uint64(val2.n[6]) + 747 uint64(val.n[5])*uint64(val2.n[5]) + 748 uint64(val.n[6])*uint64(val2.n[4]) + 749 uint64(val.n[7])*uint64(val2.n[3]) + 750 uint64(val.n[8])*uint64(val2.n[2]) + 751 uint64(val.n[9])*uint64(val2.n[1]) 752 t10 := m & fieldBaseMask 753 754 // Terms for 2^(fieldBase*11). 755 m = (m >> fieldBase) + 756 uint64(val.n[2])*uint64(val2.n[9]) + 757 uint64(val.n[3])*uint64(val2.n[8]) + 758 uint64(val.n[4])*uint64(val2.n[7]) + 759 uint64(val.n[5])*uint64(val2.n[6]) + 760 uint64(val.n[6])*uint64(val2.n[5]) + 761 uint64(val.n[7])*uint64(val2.n[4]) + 762 uint64(val.n[8])*uint64(val2.n[3]) + 763 uint64(val.n[9])*uint64(val2.n[2]) 764 t11 := m & fieldBaseMask 765 766 // Terms for 2^(fieldBase*12). 767 m = (m >> fieldBase) + 768 uint64(val.n[3])*uint64(val2.n[9]) + 769 uint64(val.n[4])*uint64(val2.n[8]) + 770 uint64(val.n[5])*uint64(val2.n[7]) + 771 uint64(val.n[6])*uint64(val2.n[6]) + 772 uint64(val.n[7])*uint64(val2.n[5]) + 773 uint64(val.n[8])*uint64(val2.n[4]) + 774 uint64(val.n[9])*uint64(val2.n[3]) 775 t12 := m & fieldBaseMask 776 777 // Terms for 2^(fieldBase*13). 778 m = (m >> fieldBase) + 779 uint64(val.n[4])*uint64(val2.n[9]) + 780 uint64(val.n[5])*uint64(val2.n[8]) + 781 uint64(val.n[6])*uint64(val2.n[7]) + 782 uint64(val.n[7])*uint64(val2.n[6]) + 783 uint64(val.n[8])*uint64(val2.n[5]) + 784 uint64(val.n[9])*uint64(val2.n[4]) 785 t13 := m & fieldBaseMask 786 787 // Terms for 2^(fieldBase*14). 788 m = (m >> fieldBase) + 789 uint64(val.n[5])*uint64(val2.n[9]) + 790 uint64(val.n[6])*uint64(val2.n[8]) + 791 uint64(val.n[7])*uint64(val2.n[7]) + 792 uint64(val.n[8])*uint64(val2.n[6]) + 793 uint64(val.n[9])*uint64(val2.n[5]) 794 t14 := m & fieldBaseMask 795 796 // Terms for 2^(fieldBase*15). 797 m = (m >> fieldBase) + 798 uint64(val.n[6])*uint64(val2.n[9]) + 799 uint64(val.n[7])*uint64(val2.n[8]) + 800 uint64(val.n[8])*uint64(val2.n[7]) + 801 uint64(val.n[9])*uint64(val2.n[6]) 802 t15 := m & fieldBaseMask 803 804 // Terms for 2^(fieldBase*16). 805 m = (m >> fieldBase) + 806 uint64(val.n[7])*uint64(val2.n[9]) + 807 uint64(val.n[8])*uint64(val2.n[8]) + 808 uint64(val.n[9])*uint64(val2.n[7]) 809 t16 := m & fieldBaseMask 810 811 // Terms for 2^(fieldBase*17). 812 m = (m >> fieldBase) + 813 uint64(val.n[8])*uint64(val2.n[9]) + 814 uint64(val.n[9])*uint64(val2.n[8]) 815 t17 := m & fieldBaseMask 816 817 // Terms for 2^(fieldBase*18). 818 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9]) 819 t18 := m & fieldBaseMask 820 821 // What's left is for 2^(fieldBase*19). 822 t19 := m >> fieldBase 823 824 // At this point, all of the terms are grouped into their respective 825 // base. 826 // 827 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 828 // when the modulus is of the special form m = b^t - c, highly efficient 829 // reduction can be achieved per the provided algorithm. 830 // 831 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 832 // this criteria. 833 // 834 // 4294968273 in field representation (base 2^26) is: 835 // n[0] = 977 836 // n[1] = 64 837 // That is to say (2^26 * 64) + 977 = 4294968273 838 // 839 // Since each word is in base 26, the upper terms (t10 and up) start 840 // at 260 bits (versus the final desired range of 256 bits), so the 841 // field representation of 'c' from above needs to be adjusted for the 842 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 843 // 68719492368. Thus, the adjusted field representation of 'c' is: 844 // n[0] = 977 * 16 = 15632 845 // n[1] = 64 * 16 = 1024 846 // That is to say (2^26 * 1024) + 15632 = 68719492368 847 // 848 // To reduce the final term, t19, the entire 'c' value is needed instead 849 // of only n[0] because there are no more terms left to handle n[1]. 850 // This means there might be some magnitude left in the upper bits that 851 // is handled below. 852 m = t0 + t10*15632 853 t0 = m & fieldBaseMask 854 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 855 t1 = m & fieldBaseMask 856 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 857 t2 = m & fieldBaseMask 858 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 859 t3 = m & fieldBaseMask 860 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 861 t4 = m & fieldBaseMask 862 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 863 t5 = m & fieldBaseMask 864 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 865 t6 = m & fieldBaseMask 866 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 867 t7 = m & fieldBaseMask 868 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 869 t8 = m & fieldBaseMask 870 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 871 t9 = m & fieldMSBMask 872 m = m >> fieldMSBBits 873 874 // At this point, if the magnitude is greater than 0, the overall value 875 // is greater than the max possible 256-bit value. In particular, it is 876 // "how many times larger" than the max value it is. 877 // 878 // The algorithm presented in [HAC] section 14.3.4 repeats until the 879 // quotient is zero. However, due to the above, we already know at 880 // least how many times we would need to repeat as it's the value 881 // currently in m. Thus we can simply multiply the magnitude by the 882 // field representation of the prime and do a single iteration. Notice 883 // that nothing will be changed when the magnitude is zero, so we could 884 // skip this in that case, however always running regardless allows it 885 // to run in constant time. The final result will be in the range 886 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 887 // magnitude of 1, but it is denormalized. 888 d := t0 + m*977 889 f.n[0] = uint32(d & fieldBaseMask) 890 d = (d >> fieldBase) + t1 + m*64 891 f.n[1] = uint32(d & fieldBaseMask) 892 f.n[2] = uint32((d >> fieldBase) + t2) 893 f.n[3] = uint32(t3) 894 f.n[4] = uint32(t4) 895 f.n[5] = uint32(t5) 896 f.n[6] = uint32(t6) 897 f.n[7] = uint32(t7) 898 f.n[8] = uint32(t8) 899 f.n[9] = uint32(t9) 900 901 return f 902 } 903 904 // Square squares the field value. The existing field value is modified. Note 905 // that this function can overflow if multiplying any of the individual words 906 // exceeds a max uint32. In practice, this means the magnitude of the field 907 // must be a max of 8 to prevent overflow. 908 // 909 // The field value is returned to support chaining. This enables syntax like: 910 // f.Square().Mul(f2) so that f = f^2 * f2. 911 func (f *fieldVal) Square() *fieldVal { 912 return f.SquareVal(f) 913 } 914 915 // SquareVal squares the passed value and stores the result in f. Note that 916 // this function can overflow if multiplying any of the individual words 917 // exceeds a max uint32. In practice, this means the magnitude of the field 918 // being squred must be a max of 8 to prevent overflow. 919 // 920 // The field value is returned to support chaining. This enables syntax like: 921 // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3. 922 func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal { 923 // This could be done with a couple of for loops and an array to store 924 // the intermediate terms, but this unrolled version is significantly 925 // faster. 926 927 // Terms for 2^(fieldBase*0). 928 m := uint64(val.n[0]) * uint64(val.n[0]) 929 t0 := m & fieldBaseMask 930 931 // Terms for 2^(fieldBase*1). 932 m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1]) 933 t1 := m & fieldBaseMask 934 935 // Terms for 2^(fieldBase*2). 936 m = (m >> fieldBase) + 937 2*uint64(val.n[0])*uint64(val.n[2]) + 938 uint64(val.n[1])*uint64(val.n[1]) 939 t2 := m & fieldBaseMask 940 941 // Terms for 2^(fieldBase*3). 942 m = (m >> fieldBase) + 943 2*uint64(val.n[0])*uint64(val.n[3]) + 944 2*uint64(val.n[1])*uint64(val.n[2]) 945 t3 := m & fieldBaseMask 946 947 // Terms for 2^(fieldBase*4). 948 m = (m >> fieldBase) + 949 2*uint64(val.n[0])*uint64(val.n[4]) + 950 2*uint64(val.n[1])*uint64(val.n[3]) + 951 uint64(val.n[2])*uint64(val.n[2]) 952 t4 := m & fieldBaseMask 953 954 // Terms for 2^(fieldBase*5). 955 m = (m >> fieldBase) + 956 2*uint64(val.n[0])*uint64(val.n[5]) + 957 2*uint64(val.n[1])*uint64(val.n[4]) + 958 2*uint64(val.n[2])*uint64(val.n[3]) 959 t5 := m & fieldBaseMask 960 961 // Terms for 2^(fieldBase*6). 962 m = (m >> fieldBase) + 963 2*uint64(val.n[0])*uint64(val.n[6]) + 964 2*uint64(val.n[1])*uint64(val.n[5]) + 965 2*uint64(val.n[2])*uint64(val.n[4]) + 966 uint64(val.n[3])*uint64(val.n[3]) 967 t6 := m & fieldBaseMask 968 969 // Terms for 2^(fieldBase*7). 970 m = (m >> fieldBase) + 971 2*uint64(val.n[0])*uint64(val.n[7]) + 972 2*uint64(val.n[1])*uint64(val.n[6]) + 973 2*uint64(val.n[2])*uint64(val.n[5]) + 974 2*uint64(val.n[3])*uint64(val.n[4]) 975 t7 := m & fieldBaseMask 976 977 // Terms for 2^(fieldBase*8). 978 m = (m >> fieldBase) + 979 2*uint64(val.n[0])*uint64(val.n[8]) + 980 2*uint64(val.n[1])*uint64(val.n[7]) + 981 2*uint64(val.n[2])*uint64(val.n[6]) + 982 2*uint64(val.n[3])*uint64(val.n[5]) + 983 uint64(val.n[4])*uint64(val.n[4]) 984 t8 := m & fieldBaseMask 985 986 // Terms for 2^(fieldBase*9). 987 m = (m >> fieldBase) + 988 2*uint64(val.n[0])*uint64(val.n[9]) + 989 2*uint64(val.n[1])*uint64(val.n[8]) + 990 2*uint64(val.n[2])*uint64(val.n[7]) + 991 2*uint64(val.n[3])*uint64(val.n[6]) + 992 2*uint64(val.n[4])*uint64(val.n[5]) 993 t9 := m & fieldBaseMask 994 995 // Terms for 2^(fieldBase*10). 996 m = (m >> fieldBase) + 997 2*uint64(val.n[1])*uint64(val.n[9]) + 998 2*uint64(val.n[2])*uint64(val.n[8]) + 999 2*uint64(val.n[3])*uint64(val.n[7]) + 1000 2*uint64(val.n[4])*uint64(val.n[6]) + 1001 uint64(val.n[5])*uint64(val.n[5]) 1002 t10 := m & fieldBaseMask 1003 1004 // Terms for 2^(fieldBase*11). 1005 m = (m >> fieldBase) + 1006 2*uint64(val.n[2])*uint64(val.n[9]) + 1007 2*uint64(val.n[3])*uint64(val.n[8]) + 1008 2*uint64(val.n[4])*uint64(val.n[7]) + 1009 2*uint64(val.n[5])*uint64(val.n[6]) 1010 t11 := m & fieldBaseMask 1011 1012 // Terms for 2^(fieldBase*12). 1013 m = (m >> fieldBase) + 1014 2*uint64(val.n[3])*uint64(val.n[9]) + 1015 2*uint64(val.n[4])*uint64(val.n[8]) + 1016 2*uint64(val.n[5])*uint64(val.n[7]) + 1017 uint64(val.n[6])*uint64(val.n[6]) 1018 t12 := m & fieldBaseMask 1019 1020 // Terms for 2^(fieldBase*13). 1021 m = (m >> fieldBase) + 1022 2*uint64(val.n[4])*uint64(val.n[9]) + 1023 2*uint64(val.n[5])*uint64(val.n[8]) + 1024 2*uint64(val.n[6])*uint64(val.n[7]) 1025 t13 := m & fieldBaseMask 1026 1027 // Terms for 2^(fieldBase*14). 1028 m = (m >> fieldBase) + 1029 2*uint64(val.n[5])*uint64(val.n[9]) + 1030 2*uint64(val.n[6])*uint64(val.n[8]) + 1031 uint64(val.n[7])*uint64(val.n[7]) 1032 t14 := m & fieldBaseMask 1033 1034 // Terms for 2^(fieldBase*15). 1035 m = (m >> fieldBase) + 1036 2*uint64(val.n[6])*uint64(val.n[9]) + 1037 2*uint64(val.n[7])*uint64(val.n[8]) 1038 t15 := m & fieldBaseMask 1039 1040 // Terms for 2^(fieldBase*16). 1041 m = (m >> fieldBase) + 1042 2*uint64(val.n[7])*uint64(val.n[9]) + 1043 uint64(val.n[8])*uint64(val.n[8]) 1044 t16 := m & fieldBaseMask 1045 1046 // Terms for 2^(fieldBase*17). 1047 m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9]) 1048 t17 := m & fieldBaseMask 1049 1050 // Terms for 2^(fieldBase*18). 1051 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9]) 1052 t18 := m & fieldBaseMask 1053 1054 // What's left is for 2^(fieldBase*19). 1055 t19 := m >> fieldBase 1056 1057 // At this point, all of the terms are grouped into their respective 1058 // base. 1059 // 1060 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 1061 // when the modulus is of the special form m = b^t - c, highly efficient 1062 // reduction can be achieved per the provided algorithm. 1063 // 1064 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 1065 // this criteria. 1066 // 1067 // 4294968273 in field representation (base 2^26) is: 1068 // n[0] = 977 1069 // n[1] = 64 1070 // That is to say (2^26 * 64) + 977 = 4294968273 1071 // 1072 // Since each word is in base 26, the upper terms (t10 and up) start 1073 // at 260 bits (versus the final desired range of 256 bits), so the 1074 // field representation of 'c' from above needs to be adjusted for the 1075 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 1076 // 68719492368. Thus, the adjusted field representation of 'c' is: 1077 // n[0] = 977 * 16 = 15632 1078 // n[1] = 64 * 16 = 1024 1079 // That is to say (2^26 * 1024) + 15632 = 68719492368 1080 // 1081 // To reduce the final term, t19, the entire 'c' value is needed instead 1082 // of only n[0] because there are no more terms left to handle n[1]. 1083 // This means there might be some magnitude left in the upper bits that 1084 // is handled below. 1085 m = t0 + t10*15632 1086 t0 = m & fieldBaseMask 1087 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 1088 t1 = m & fieldBaseMask 1089 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 1090 t2 = m & fieldBaseMask 1091 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 1092 t3 = m & fieldBaseMask 1093 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 1094 t4 = m & fieldBaseMask 1095 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 1096 t5 = m & fieldBaseMask 1097 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 1098 t6 = m & fieldBaseMask 1099 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 1100 t7 = m & fieldBaseMask 1101 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 1102 t8 = m & fieldBaseMask 1103 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 1104 t9 = m & fieldMSBMask 1105 m = m >> fieldMSBBits 1106 1107 // At this point, if the magnitude is greater than 0, the overall value 1108 // is greater than the max possible 256-bit value. In particular, it is 1109 // "how many times larger" than the max value it is. 1110 // 1111 // The algorithm presented in [HAC] section 14.3.4 repeats until the 1112 // quotient is zero. However, due to the above, we already know at 1113 // least how many times we would need to repeat as it's the value 1114 // currently in m. Thus we can simply multiply the magnitude by the 1115 // field representation of the prime and do a single iteration. Notice 1116 // that nothing will be changed when the magnitude is zero, so we could 1117 // skip this in that case, however always running regardless allows it 1118 // to run in constant time. The final result will be in the range 1119 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 1120 // magnitude of 1, but it is denormalized. 1121 n := t0 + m*977 1122 f.n[0] = uint32(n & fieldBaseMask) 1123 n = (n >> fieldBase) + t1 + m*64 1124 f.n[1] = uint32(n & fieldBaseMask) 1125 f.n[2] = uint32((n >> fieldBase) + t2) 1126 f.n[3] = uint32(t3) 1127 f.n[4] = uint32(t4) 1128 f.n[5] = uint32(t5) 1129 f.n[6] = uint32(t6) 1130 f.n[7] = uint32(t7) 1131 f.n[8] = uint32(t8) 1132 f.n[9] = uint32(t9) 1133 1134 return f 1135 } 1136 1137 // Inverse finds the modular multiplicative inverse of the field value. The 1138 // existing field value is modified. 1139 // 1140 // The field value is returned to support chaining. This enables syntax like: 1141 // f.Inverse().Mul(f2) so that f = f^-1 * f2. 1142 func (f *fieldVal) Inverse() *fieldVal { 1143 // Fermat's little theorem states that for a nonzero number a and prime 1144 // prime p, a^(p-1) = 1 (mod p). Since the multipliciative inverse is 1145 // a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p). 1146 // Thus, a^(p-2) is the multiplicative inverse. 1147 // 1148 // In order to efficiently compute a^(p-2), p-2 needs to be split into 1149 // a sequence of squares and multipications that minimizes the number of 1150 // multiplications needed (since they are more costly than squarings). 1151 // Intermediate results are saved and reused as well. 1152 // 1153 // The secp256k1 prime - 2 is 2^256 - 4294968275. 1154 // 1155 // This has a cost of 258 field squarings and 33 field multiplications. 1156 var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal 1157 a2.SquareVal(f) 1158 a3.Mul2(&a2, f) 1159 a4.SquareVal(&a2) 1160 a10.SquareVal(&a4).Mul(&a2) 1161 a11.Mul2(&a10, f) 1162 a21.Mul2(&a10, &a11) 1163 a42.SquareVal(&a21) 1164 a45.Mul2(&a42, &a3) 1165 a63.Mul2(&a42, &a21) 1166 a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11) 1167 a1023.Mul2(&a1019, &a4) 1168 f.Set(&a63) // f = a^(2^6 - 1) 1169 f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32) 1170 f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024) 1171 f.Mul(&a1023) // f = a^(2^16 - 1) 1172 f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32) 1173 f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024) 1174 f.Mul(&a1023) // f = a^(2^26 - 1) 1175 f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32) 1176 f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024) 1177 f.Mul(&a1023) // f = a^(2^36 - 1) 1178 f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32) 1179 f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024) 1180 f.Mul(&a1023) // f = a^(2^46 - 1) 1181 f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32) 1182 f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024) 1183 f.Mul(&a1023) // f = a^(2^56 - 1) 1184 f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32) 1185 f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024) 1186 f.Mul(&a1023) // f = a^(2^66 - 1) 1187 f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32) 1188 f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024) 1189 f.Mul(&a1023) // f = a^(2^76 - 1) 1190 f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32) 1191 f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024) 1192 f.Mul(&a1023) // f = a^(2^86 - 1) 1193 f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32) 1194 f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024) 1195 f.Mul(&a1023) // f = a^(2^96 - 1) 1196 f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32) 1197 f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024) 1198 f.Mul(&a1023) // f = a^(2^106 - 1) 1199 f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32) 1200 f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024) 1201 f.Mul(&a1023) // f = a^(2^116 - 1) 1202 f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32) 1203 f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024) 1204 f.Mul(&a1023) // f = a^(2^126 - 1) 1205 f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32) 1206 f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024) 1207 f.Mul(&a1023) // f = a^(2^136 - 1) 1208 f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32) 1209 f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024) 1210 f.Mul(&a1023) // f = a^(2^146 - 1) 1211 f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32) 1212 f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024) 1213 f.Mul(&a1023) // f = a^(2^156 - 1) 1214 f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32) 1215 f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024) 1216 f.Mul(&a1023) // f = a^(2^166 - 1) 1217 f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32) 1218 f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024) 1219 f.Mul(&a1023) // f = a^(2^176 - 1) 1220 f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32) 1221 f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024) 1222 f.Mul(&a1023) // f = a^(2^186 - 1) 1223 f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32) 1224 f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024) 1225 f.Mul(&a1023) // f = a^(2^196 - 1) 1226 f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32) 1227 f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024) 1228 f.Mul(&a1023) // f = a^(2^206 - 1) 1229 f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32) 1230 f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024) 1231 f.Mul(&a1023) // f = a^(2^216 - 1) 1232 f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32) 1233 f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024) 1234 f.Mul(&a1019) // f = a^(2^226 - 5) 1235 f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160) 1236 f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120) 1237 f.Mul(&a1023) // f = a^(2^236 - 4097) 1238 f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104) 1239 f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328) 1240 f.Mul(&a1023) // f = a^(2^246 - 4194305) 1241 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760) 1242 f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320) 1243 return f.Mul(&a45) // f = a^(2^256 - 4294968275) = a^(p-2) 1244 } 1245 1246 // SqrtVal computes the square root of x modulo the curve's prime, and stores 1247 // the result in f. The square root is computed via exponentiation of x by the 1248 // value Q = (P+1)/4 using the curve's precomputed big-endian representation of 1249 // the Q. This method uses a modified version of square-and-multiply 1250 // exponentiation over secp256k1 fieldVals to operate on bytes instead of bits, 1251 // which offers better performance over both big.Int exponentiation and bit-wise 1252 // square-and-multiply. 1253 // 1254 // NOTE: This method only works when P is intended to be the secp256k1 prime and 1255 // is not constant time. The returned value is of magnitude 1, but is 1256 // denormalized. 1257 func (f *fieldVal) SqrtVal(x *fieldVal) *fieldVal { 1258 // The following computation iteratively computes x^((P+1)/4) = x^Q 1259 // using the recursive, piece-wise definition: 1260 // 1261 // x^n = (x^2)^(n/2) mod P if n is even 1262 // x^n = x(x^2)^(n-1/2) mod P if n is odd 1263 // 1264 // Given n in its big-endian representation b_k, ..., b_0, x^n can be 1265 // computed by defining the sequence r_k+1, ..., r_0, where: 1266 // 1267 // r_k+1 = 1 1268 // r_i = (r_i+1)^2 * x^b_i for i = k, ..., 0 1269 // 1270 // The final value r_0 = x^n. 1271 // 1272 // See https://en.wikipedia.org/wiki/Exponentiation_by_squaring for more 1273 // details. 1274 // 1275 // This can be further optimized, by observing that the value of Q in 1276 // secp256k1 has the value: 1277 // 1278 // Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c 1279 // 1280 // We can unroll the typical bit-wise interpretation of the 1281 // exponentiation algorithm above to instead operate on bytes. 1282 // This reduces the number of comparisons by an order of magnitude, 1283 // reducing the overhead of failed branch predictions and additional 1284 // comparisons in this method. 1285 // 1286 // Since there there are only 4 unique bytes of Q, this keeps the jump 1287 // table small without the need to handle all possible 8-bit values. 1288 // Further, we observe that 29 of the 32 bytes are 0xff; making the 1289 // first case handle 0xff therefore optimizes the hot path. 1290 f.SetInt(1) 1291 for _, b := range fieldQBytes { 1292 switch b { 1293 1294 // Most common case, where all 8 bits are set. 1295 case 0xff: 1296 f.Square().Mul(x) 1297 f.Square().Mul(x) 1298 f.Square().Mul(x) 1299 f.Square().Mul(x) 1300 f.Square().Mul(x) 1301 f.Square().Mul(x) 1302 f.Square().Mul(x) 1303 f.Square().Mul(x) 1304 1305 // First byte of Q (0x3f), where all but the top two bits are 1306 // set. Note that this case only applies six operations, since 1307 // the highest bit of Q resides in bit six of the first byte. We 1308 // ignore the first two bits, since squaring for these bits will 1309 // result in an invalid result. We forgo squaring f before the 1310 // first multiply, since 1^2 = 1. 1311 case 0x3f: 1312 f.Mul(x) 1313 f.Square().Mul(x) 1314 f.Square().Mul(x) 1315 f.Square().Mul(x) 1316 f.Square().Mul(x) 1317 f.Square().Mul(x) 1318 1319 // Byte 28 of Q (0xbf), where only bit 7 is unset. 1320 case 0xbf: 1321 f.Square().Mul(x) 1322 f.Square() 1323 f.Square().Mul(x) 1324 f.Square().Mul(x) 1325 f.Square().Mul(x) 1326 f.Square().Mul(x) 1327 f.Square().Mul(x) 1328 f.Square().Mul(x) 1329 1330 // Byte 31 of Q (0x0c), where only bits 3 and 4 are set. 1331 default: 1332 f.Square() 1333 f.Square() 1334 f.Square() 1335 f.Square() 1336 f.Square().Mul(x) 1337 f.Square().Mul(x) 1338 f.Square() 1339 f.Square() 1340 } 1341 } 1342 1343 return f 1344 } 1345 1346 // Sqrt computes the square root of f modulo the curve's prime, and stores the 1347 // result in f. The square root is computed via exponentiation of x by the value 1348 // Q = (P+1)/4 using the curve's precomputed big-endian representation of the Q. 1349 // This method uses a modified version of square-and-multiply exponentiation 1350 // over secp256k1 fieldVals to operate on bytes instead of bits, which offers 1351 // better performance over both big.Int exponentiation and bit-wise 1352 // square-and-multiply. 1353 // 1354 // NOTE: This method only works when P is intended to be the secp256k1 prime and 1355 // is not constant time. The returned value is of magnitude 1, but is 1356 // denormalized. 1357 func (f *fieldVal) Sqrt() *fieldVal { 1358 return f.SqrtVal(f) 1359 }