github.com/palcoin-project/palcd@v1.0.0/btcec/field.go (about) 1 // Copyright (c) 2013-2016 The btcsuite developers 2 // Copyright (c) 2013-2016 Dave Collins 3 // Use of this source code is governed by an ISC 4 // license that can be found in the LICENSE file. 5 6 package btcec 7 8 // References: 9 // [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone. 10 // http://cacr.uwaterloo.ca/hac/ 11 12 // All elliptic curve operations for secp256k1 are done in a finite field 13 // characterized by a 256-bit prime. Given this precision is larger than the 14 // biggest available native type, obviously some form of bignum math is needed. 15 // This package implements specialized fixed-precision field arithmetic rather 16 // than relying on an arbitrary-precision arithmetic package such as math/big 17 // for dealing with the field math since the size is known. As a result, rather 18 // large performance gains are achieved by taking advantage of many 19 // optimizations not available to arbitrary-precision arithmetic and generic 20 // modular arithmetic algorithms. 21 // 22 // There are various ways to internally represent each finite field element. 23 // For example, the most obvious representation would be to use an array of 4 24 // uint64s (64 bits * 4 = 256 bits). However, that representation suffers from 25 // a couple of issues. First, there is no native Go type large enough to handle 26 // the intermediate results while adding or multiplying two 64-bit numbers, and 27 // second there is no space left for overflows when performing the intermediate 28 // arithmetic between each array element which would lead to expensive carry 29 // propagation. 30 // 31 // Given the above, this implementation represents the the field elements as 32 // 10 uint32s with each word (array entry) treated as base 2^26. This was 33 // chosen for the following reasons: 34 // 1) Most systems at the current time are 64-bit (or at least have 64-bit 35 // registers available for specialized purposes such as MMX) so the 36 // intermediate results can typically be done using a native register (and 37 // using uint64s to avoid the need for additional half-word arithmetic) 38 // 2) In order to allow addition of the internal words without having to 39 // propagate the the carry, the max normalized value for each register must 40 // be less than the number of bits available in the register 41 // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a 42 // reasonable choice for #2 43 // 4) Given the need for 256-bits of precision and the properties stated in #1, 44 // #2, and #3, the representation which best accommodates this is 10 uint32s 45 // with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22 46 // bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for 47 // overflow 48 // 49 // Since it is so important that the field arithmetic is extremely fast for 50 // high performance crypto, this package does not perform any validation where 51 // it ordinarily would. For example, some functions only give the correct 52 // result is the field is normalized and there is no checking to ensure it is. 53 // While I typically prefer to ensure all state and input is valid for most 54 // packages, this code is really only used internally and every extra check 55 // counts. 56 57 import ( 58 "encoding/hex" 59 ) 60 61 // Constants used to make the code more readable. 62 const ( 63 twoBitsMask = 0x3 64 fourBitsMask = 0xf 65 sixBitsMask = 0x3f 66 eightBitsMask = 0xff 67 ) 68 69 // Constants related to the field representation. 70 const ( 71 // fieldWords is the number of words used to internally represent the 72 // 256-bit value. 73 fieldWords = 10 74 75 // fieldBase is the exponent used to form the numeric base of each word. 76 // 2^(fieldBase*i) where i is the word position. 77 fieldBase = 26 78 79 // fieldOverflowBits is the minimum number of "overflow" bits for each 80 // word in the field value. 81 fieldOverflowBits = 32 - fieldBase 82 83 // fieldBaseMask is the mask for the bits in each word needed to 84 // represent the numeric base of each word (except the most significant 85 // word). 86 fieldBaseMask = (1 << fieldBase) - 1 87 88 // fieldMSBBits is the number of bits in the most significant word used 89 // to represent the value. 90 fieldMSBBits = 256 - (fieldBase * (fieldWords - 1)) 91 92 // fieldMSBMask is the mask for the bits in the most significant word 93 // needed to represent the value. 94 fieldMSBMask = (1 << fieldMSBBits) - 1 95 96 // fieldPrimeWordZero is word zero of the secp256k1 prime in the 97 // internal field representation. It is used during negation. 98 fieldPrimeWordZero = 0x3fffc2f 99 100 // fieldPrimeWordOne is word one of the secp256k1 prime in the 101 // internal field representation. It is used during negation. 102 fieldPrimeWordOne = 0x3ffffbf 103 ) 104 105 var ( 106 // fieldQBytes is the value Q = (P+1)/4 for the secp256k1 prime P. This 107 // value is used to efficiently compute the square root of values in the 108 // field via exponentiation. The value of Q in hex is: 109 // 110 // Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c 111 fieldQBytes = []byte{ 112 0x3f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 113 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 114 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 115 0xff, 0xff, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x0c, 116 } 117 ) 118 119 // fieldVal implements optimized fixed-precision arithmetic over the 120 // secp256k1 finite field. This means all arithmetic is performed modulo 121 // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f. It 122 // represents each 256-bit value as 10 32-bit integers in base 2^26. This 123 // provides 6 bits of overflow in each word (10 bits in the most significant 124 // word) for a total of 64 bits of overflow (9*6 + 10 = 64). It only implements 125 // the arithmetic needed for elliptic curve operations. 126 // 127 // The following depicts the internal representation: 128 // ----------------------------------------------------------------- 129 // | n[9] | n[8] | ... | n[0] | 130 // | 32 bits available | 32 bits available | ... | 32 bits available | 131 // | 22 bits for value | 26 bits for value | ... | 26 bits for value | 132 // | 10 bits overflow | 6 bits overflow | ... | 6 bits overflow | 133 // | Mult: 2^(26*9) | Mult: 2^(26*8) | ... | Mult: 2^(26*0) | 134 // ----------------------------------------------------------------- 135 // 136 // For example, consider the number 2^49 + 1. It would be represented as: 137 // n[0] = 1 138 // n[1] = 2^23 139 // n[2..9] = 0 140 // 141 // The full 256-bit value is then calculated by looping i from 9..0 and 142 // doing sum(n[i] * 2^(26i)) like so: 143 // n[9] * 2^(26*9) = 0 * 2^234 = 0 144 // n[8] * 2^(26*8) = 0 * 2^208 = 0 145 // ... 146 // n[1] * 2^(26*1) = 2^23 * 2^26 = 2^49 147 // n[0] * 2^(26*0) = 1 * 2^0 = 1 148 // Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1 149 type fieldVal struct { 150 n [10]uint32 151 } 152 153 // String returns the field value as a human-readable hex string. 154 func (f fieldVal) String() string { 155 t := new(fieldVal).Set(&f).Normalize() 156 return hex.EncodeToString(t.Bytes()[:]) 157 } 158 159 // Zero sets the field value to zero. A newly created field value is already 160 // set to zero. This function can be useful to clear an existing field value 161 // for reuse. 162 func (f *fieldVal) Zero() { 163 f.n[0] = 0 164 f.n[1] = 0 165 f.n[2] = 0 166 f.n[3] = 0 167 f.n[4] = 0 168 f.n[5] = 0 169 f.n[6] = 0 170 f.n[7] = 0 171 f.n[8] = 0 172 f.n[9] = 0 173 } 174 175 // Set sets the field value equal to the passed value. 176 // 177 // The field value is returned to support chaining. This enables syntax like: 178 // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not 179 // modified. 180 func (f *fieldVal) Set(val *fieldVal) *fieldVal { 181 *f = *val 182 return f 183 } 184 185 // SetInt sets the field value to the passed integer. This is a convenience 186 // function since it is fairly common to perform some arithemetic with small 187 // native integers. 188 // 189 // The field value is returned to support chaining. This enables syntax such 190 // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2. 191 func (f *fieldVal) SetInt(ui uint) *fieldVal { 192 f.Zero() 193 f.n[0] = uint32(ui) 194 return f 195 } 196 197 // SetBytes packs the passed 32-byte big-endian value into the internal field 198 // value representation. 199 // 200 // The field value is returned to support chaining. This enables syntax like: 201 // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2. 202 func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal { 203 // Pack the 256 total bits across the 10 uint32 words with a max of 204 // 26-bits per word. This could be done with a couple of for loops, 205 // but this unrolled version is significantly faster. Benchmarks show 206 // this is about 34 times faster than the variant which uses loops. 207 f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 | 208 (uint32(b[28])&twoBitsMask)<<24 209 f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 | 210 (uint32(b[25])&fourBitsMask)<<22 211 f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 | 212 (uint32(b[22])&sixBitsMask)<<20 213 f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 | 214 uint32(b[19])<<18 215 f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 | 216 (uint32(b[15])&twoBitsMask)<<24 217 f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 | 218 (uint32(b[12])&fourBitsMask)<<22 219 f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 | 220 (uint32(b[9])&sixBitsMask)<<20 221 f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 | 222 uint32(b[6])<<18 223 f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 | 224 (uint32(b[2])&twoBitsMask)<<24 225 f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14 226 return f 227 } 228 229 // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned 230 // integer (meaning it is truncated to the first 32 bytes), packs it into the 231 // internal field value representation, and returns the updated field value. 232 // 233 // Note that since passing a slice with more than 32 bytes is truncated, it is 234 // possible that the truncated value is less than the field prime. It is up to 235 // the caller to decide whether it needs to provide numbers of the appropriate 236 // size or if it is acceptable to use this function with the described 237 // truncation behavior. 238 // 239 // The field value is returned to support chaining. This enables syntax like: 240 // f := new(fieldVal).SetByteSlice(byteSlice) 241 func (f *fieldVal) SetByteSlice(b []byte) *fieldVal { 242 var b32 [32]byte 243 if len(b) > 32 { 244 b = b[:32] 245 } 246 copy(b32[32-len(b):], b) 247 return f.SetBytes(&b32) 248 } 249 250 // SetHex decodes the passed big-endian hex string into the internal field value 251 // representation. Only the first 32-bytes are used. 252 // 253 // The field value is returned to support chaining. This enables syntax like: 254 // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1 255 func (f *fieldVal) SetHex(hexString string) *fieldVal { 256 if len(hexString)%2 != 0 { 257 hexString = "0" + hexString 258 } 259 bytes, _ := hex.DecodeString(hexString) 260 return f.SetByteSlice(bytes) 261 } 262 263 // Normalize normalizes the internal field words into the desired range and 264 // performs fast modular reduction over the secp256k1 prime by making use of the 265 // special form of the prime. 266 func (f *fieldVal) Normalize() *fieldVal { 267 // The field representation leaves 6 bits of overflow in each word so 268 // intermediate calculations can be performed without needing to 269 // propagate the carry to each higher word during the calculations. In 270 // order to normalize, we need to "compact" the full 256-bit value to 271 // the right while propagating any carries through to the high order 272 // word. 273 // 274 // Since this field is doing arithmetic modulo the secp256k1 prime, we 275 // also need to perform modular reduction over the prime. 276 // 277 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 278 // when the modulus is of the special form m = b^t - c, highly efficient 279 // reduction can be achieved. 280 // 281 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 282 // this criteria. 283 // 284 // 4294968273 in field representation (base 2^26) is: 285 // n[0] = 977 286 // n[1] = 64 287 // That is to say (2^26 * 64) + 977 = 4294968273 288 // 289 // The algorithm presented in the referenced section typically repeats 290 // until the quotient is zero. However, due to our field representation 291 // we already know to within one reduction how many times we would need 292 // to repeat as it's the uppermost bits of the high order word. Thus we 293 // can simply multiply the magnitude by the field representation of the 294 // prime and do a single iteration. After this step there might be an 295 // additional carry to bit 256 (bit 22 of the high order word). 296 t9 := f.n[9] 297 m := t9 >> fieldMSBBits 298 t9 = t9 & fieldMSBMask 299 t0 := f.n[0] + m*977 300 t1 := (t0 >> fieldBase) + f.n[1] + (m << 6) 301 t0 = t0 & fieldBaseMask 302 t2 := (t1 >> fieldBase) + f.n[2] 303 t1 = t1 & fieldBaseMask 304 t3 := (t2 >> fieldBase) + f.n[3] 305 t2 = t2 & fieldBaseMask 306 t4 := (t3 >> fieldBase) + f.n[4] 307 t3 = t3 & fieldBaseMask 308 t5 := (t4 >> fieldBase) + f.n[5] 309 t4 = t4 & fieldBaseMask 310 t6 := (t5 >> fieldBase) + f.n[6] 311 t5 = t5 & fieldBaseMask 312 t7 := (t6 >> fieldBase) + f.n[7] 313 t6 = t6 & fieldBaseMask 314 t8 := (t7 >> fieldBase) + f.n[8] 315 t7 = t7 & fieldBaseMask 316 t9 = (t8 >> fieldBase) + t9 317 t8 = t8 & fieldBaseMask 318 319 // At this point, the magnitude is guaranteed to be one, however, the 320 // value could still be greater than the prime if there was either a 321 // carry through to bit 256 (bit 22 of the higher order word) or the 322 // value is greater than or equal to the field characteristic. The 323 // following determines if either or these conditions are true and does 324 // the final reduction in constant time. 325 // 326 // Note that the if/else statements here intentionally do the bitwise 327 // operators even when it won't change the value to ensure constant time 328 // between the branches. Also note that 'm' will be zero when neither 329 // of the aforementioned conditions are true and the value will not be 330 // changed when 'm' is zero. 331 m = 1 332 if t9 == fieldMSBMask { 333 m &= 1 334 } else { 335 m &= 0 336 } 337 if t2&t3&t4&t5&t6&t7&t8 == fieldBaseMask { 338 m &= 1 339 } else { 340 m &= 0 341 } 342 if ((t0+977)>>fieldBase + t1 + 64) > fieldBaseMask { 343 m &= 1 344 } else { 345 m &= 0 346 } 347 if t9>>fieldMSBBits != 0 { 348 m |= 1 349 } else { 350 m |= 0 351 } 352 t0 = t0 + m*977 353 t1 = (t0 >> fieldBase) + t1 + (m << 6) 354 t0 = t0 & fieldBaseMask 355 t2 = (t1 >> fieldBase) + t2 356 t1 = t1 & fieldBaseMask 357 t3 = (t2 >> fieldBase) + t3 358 t2 = t2 & fieldBaseMask 359 t4 = (t3 >> fieldBase) + t4 360 t3 = t3 & fieldBaseMask 361 t5 = (t4 >> fieldBase) + t5 362 t4 = t4 & fieldBaseMask 363 t6 = (t5 >> fieldBase) + t6 364 t5 = t5 & fieldBaseMask 365 t7 = (t6 >> fieldBase) + t7 366 t6 = t6 & fieldBaseMask 367 t8 = (t7 >> fieldBase) + t8 368 t7 = t7 & fieldBaseMask 369 t9 = (t8 >> fieldBase) + t9 370 t8 = t8 & fieldBaseMask 371 t9 = t9 & fieldMSBMask // Remove potential multiple of 2^256. 372 373 // Finally, set the normalized and reduced words. 374 f.n[0] = t0 375 f.n[1] = t1 376 f.n[2] = t2 377 f.n[3] = t3 378 f.n[4] = t4 379 f.n[5] = t5 380 f.n[6] = t6 381 f.n[7] = t7 382 f.n[8] = t8 383 f.n[9] = t9 384 return f 385 } 386 387 // PutBytes unpacks the field value to a 32-byte big-endian value using the 388 // passed byte array. There is a similar function, Bytes, which unpacks the 389 // field value into a new array and returns that. This version is provided 390 // since it can be useful to cut down on the number of allocations by allowing 391 // the caller to reuse a buffer. 392 // 393 // The field value must be normalized for this function to return the correct 394 // result. 395 func (f *fieldVal) PutBytes(b *[32]byte) { 396 // Unpack the 256 total bits from the 10 uint32 words with a max of 397 // 26-bits per word. This could be done with a couple of for loops, 398 // but this unrolled version is a bit faster. Benchmarks show this is 399 // about 10 times faster than the variant which uses loops. 400 b[31] = byte(f.n[0] & eightBitsMask) 401 b[30] = byte((f.n[0] >> 8) & eightBitsMask) 402 b[29] = byte((f.n[0] >> 16) & eightBitsMask) 403 b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2) 404 b[27] = byte((f.n[1] >> 6) & eightBitsMask) 405 b[26] = byte((f.n[1] >> 14) & eightBitsMask) 406 b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4) 407 b[24] = byte((f.n[2] >> 4) & eightBitsMask) 408 b[23] = byte((f.n[2] >> 12) & eightBitsMask) 409 b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6) 410 b[21] = byte((f.n[3] >> 2) & eightBitsMask) 411 b[20] = byte((f.n[3] >> 10) & eightBitsMask) 412 b[19] = byte((f.n[3] >> 18) & eightBitsMask) 413 b[18] = byte(f.n[4] & eightBitsMask) 414 b[17] = byte((f.n[4] >> 8) & eightBitsMask) 415 b[16] = byte((f.n[4] >> 16) & eightBitsMask) 416 b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2) 417 b[14] = byte((f.n[5] >> 6) & eightBitsMask) 418 b[13] = byte((f.n[5] >> 14) & eightBitsMask) 419 b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4) 420 b[11] = byte((f.n[6] >> 4) & eightBitsMask) 421 b[10] = byte((f.n[6] >> 12) & eightBitsMask) 422 b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6) 423 b[8] = byte((f.n[7] >> 2) & eightBitsMask) 424 b[7] = byte((f.n[7] >> 10) & eightBitsMask) 425 b[6] = byte((f.n[7] >> 18) & eightBitsMask) 426 b[5] = byte(f.n[8] & eightBitsMask) 427 b[4] = byte((f.n[8] >> 8) & eightBitsMask) 428 b[3] = byte((f.n[8] >> 16) & eightBitsMask) 429 b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2) 430 b[1] = byte((f.n[9] >> 6) & eightBitsMask) 431 b[0] = byte((f.n[9] >> 14) & eightBitsMask) 432 } 433 434 // Bytes unpacks the field value to a 32-byte big-endian value. See PutBytes 435 // for a variant that allows the a buffer to be passed which can be useful to 436 // to cut down on the number of allocations by allowing the caller to reuse a 437 // buffer. 438 // 439 // The field value must be normalized for this function to return correct 440 // result. 441 func (f *fieldVal) Bytes() *[32]byte { 442 b := new([32]byte) 443 f.PutBytes(b) 444 return b 445 } 446 447 // IsZero returns whether or not the field value is equal to zero. 448 func (f *fieldVal) IsZero() bool { 449 // The value can only be zero if no bits are set in any of the words. 450 // This is a constant time implementation. 451 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] | 452 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9] 453 454 return bits == 0 455 } 456 457 // IsOdd returns whether or not the field value is an odd number. 458 // 459 // The field value must be normalized for this function to return correct 460 // result. 461 func (f *fieldVal) IsOdd() bool { 462 // Only odd numbers have the bottom bit set. 463 return f.n[0]&1 == 1 464 } 465 466 // Equals returns whether or not the two field values are the same. Both 467 // field values being compared must be normalized for this function to return 468 // the correct result. 469 func (f *fieldVal) Equals(val *fieldVal) bool { 470 // Xor only sets bits when they are different, so the two field values 471 // can only be the same if no bits are set after xoring each word. 472 // This is a constant time implementation. 473 bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) | 474 (f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) | 475 (f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) | 476 (f.n[9] ^ val.n[9]) 477 478 return bits == 0 479 } 480 481 // NegateVal negates the passed value and stores the result in f. The caller 482 // must provide the magnitude of the passed value for a correct result. 483 // 484 // The field value is returned to support chaining. This enables syntax like: 485 // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1. 486 func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal { 487 // Negation in the field is just the prime minus the value. However, 488 // in order to allow negation against a field value without having to 489 // normalize/reduce it first, multiply by the magnitude (that is how 490 // "far" away it is from the normalized value) to adjust. Also, since 491 // negating a value pushes it one more order of magnitude away from the 492 // normalized range, add 1 to compensate. 493 // 494 // For some intuition here, imagine you're performing mod 12 arithmetic 495 // (picture a clock) and you are negating the number 7. So you start at 496 // 12 (which is of course 0 under mod 12) and count backwards (left on 497 // the clock) 7 times to arrive at 5. Notice this is just 12-7 = 5. 498 // Now, assume you're starting with 19, which is a number that is 499 // already larger than the modulus and congruent to 7 (mod 12). When a 500 // value is already in the desired range, its magnitude is 1. Since 19 501 // is an additional "step", its magnitude (mod 12) is 2. Since any 502 // multiple of the modulus is conguent to zero (mod m), the answer can 503 // be shortcut by simply mulplying the magnitude by the modulus and 504 // subtracting. Keeping with the example, this would be (2*12)-19 = 5. 505 f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0] 506 f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1] 507 f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2] 508 f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3] 509 f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4] 510 f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5] 511 f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6] 512 f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7] 513 f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8] 514 f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9] 515 516 return f 517 } 518 519 // Negate negates the field value. The existing field value is modified. The 520 // caller must provide the magnitude of the field value for a correct result. 521 // 522 // The field value is returned to support chaining. This enables syntax like: 523 // f.Negate().AddInt(1) so that f = -f + 1. 524 func (f *fieldVal) Negate(magnitude uint32) *fieldVal { 525 return f.NegateVal(f, magnitude) 526 } 527 528 // AddInt adds the passed integer to the existing field value and stores the 529 // result in f. This is a convenience function since it is fairly common to 530 // perform some arithemetic with small native integers. 531 // 532 // The field value is returned to support chaining. This enables syntax like: 533 // f.AddInt(1).Add(f2) so that f = f + 1 + f2. 534 func (f *fieldVal) AddInt(ui uint) *fieldVal { 535 // Since the field representation intentionally provides overflow bits, 536 // it's ok to use carryless addition as the carry bit is safely part of 537 // the word and will be normalized out. 538 f.n[0] += uint32(ui) 539 540 return f 541 } 542 543 // Add adds the passed value to the existing field value and stores the result 544 // in f. 545 // 546 // The field value is returned to support chaining. This enables syntax like: 547 // f.Add(f2).AddInt(1) so that f = f + f2 + 1. 548 func (f *fieldVal) Add(val *fieldVal) *fieldVal { 549 // Since the field representation intentionally provides overflow bits, 550 // it's ok to use carryless addition as the carry bit is safely part of 551 // each word and will be normalized out. This could obviously be done 552 // in a loop, but the unrolled version is faster. 553 f.n[0] += val.n[0] 554 f.n[1] += val.n[1] 555 f.n[2] += val.n[2] 556 f.n[3] += val.n[3] 557 f.n[4] += val.n[4] 558 f.n[5] += val.n[5] 559 f.n[6] += val.n[6] 560 f.n[7] += val.n[7] 561 f.n[8] += val.n[8] 562 f.n[9] += val.n[9] 563 564 return f 565 } 566 567 // Add2 adds the passed two field values together and stores the result in f. 568 // 569 // The field value is returned to support chaining. This enables syntax like: 570 // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1. 571 func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal { 572 // Since the field representation intentionally provides overflow bits, 573 // it's ok to use carryless addition as the carry bit is safely part of 574 // each word and will be normalized out. This could obviously be done 575 // in a loop, but the unrolled version is faster. 576 f.n[0] = val.n[0] + val2.n[0] 577 f.n[1] = val.n[1] + val2.n[1] 578 f.n[2] = val.n[2] + val2.n[2] 579 f.n[3] = val.n[3] + val2.n[3] 580 f.n[4] = val.n[4] + val2.n[4] 581 f.n[5] = val.n[5] + val2.n[5] 582 f.n[6] = val.n[6] + val2.n[6] 583 f.n[7] = val.n[7] + val2.n[7] 584 f.n[8] = val.n[8] + val2.n[8] 585 f.n[9] = val.n[9] + val2.n[9] 586 587 return f 588 } 589 590 // MulInt multiplies the field value by the passed int and stores the result in 591 // f. Note that this function can overflow if multiplying the value by any of 592 // the individual words exceeds a max uint32. Therefore it is important that 593 // the caller ensures no overflows will occur before using this function. 594 // 595 // The field value is returned to support chaining. This enables syntax like: 596 // f.MulInt(2).Add(f2) so that f = 2 * f + f2. 597 func (f *fieldVal) MulInt(val uint) *fieldVal { 598 // Since each word of the field representation can hold up to 599 // fieldOverflowBits extra bits which will be normalized out, it's safe 600 // to multiply each word without using a larger type or carry 601 // propagation so long as the values won't overflow a uint32. This 602 // could obviously be done in a loop, but the unrolled version is 603 // faster. 604 ui := uint32(val) 605 f.n[0] *= ui 606 f.n[1] *= ui 607 f.n[2] *= ui 608 f.n[3] *= ui 609 f.n[4] *= ui 610 f.n[5] *= ui 611 f.n[6] *= ui 612 f.n[7] *= ui 613 f.n[8] *= ui 614 f.n[9] *= ui 615 616 return f 617 } 618 619 // Mul multiplies the passed value to the existing field value and stores the 620 // result in f. Note that this function can overflow if multiplying any 621 // of the individual words exceeds a max uint32. In practice, this means the 622 // magnitude of either value involved in the multiplication must be a max of 623 // 8. 624 // 625 // The field value is returned to support chaining. This enables syntax like: 626 // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1. 627 func (f *fieldVal) Mul(val *fieldVal) *fieldVal { 628 return f.Mul2(f, val) 629 } 630 631 // Mul2 multiplies the passed two field values together and stores the result 632 // result in f. Note that this function can overflow if multiplying any of 633 // the individual words exceeds a max uint32. In practice, this means the 634 // magnitude of either value involved in the multiplication must be a max of 635 // 8. 636 // 637 // The field value is returned to support chaining. This enables syntax like: 638 // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1. 639 func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal { 640 // This could be done with a couple of for loops and an array to store 641 // the intermediate terms, but this unrolled version is significantly 642 // faster. 643 644 // Terms for 2^(fieldBase*0). 645 m := uint64(val.n[0]) * uint64(val2.n[0]) 646 t0 := m & fieldBaseMask 647 648 // Terms for 2^(fieldBase*1). 649 m = (m >> fieldBase) + 650 uint64(val.n[0])*uint64(val2.n[1]) + 651 uint64(val.n[1])*uint64(val2.n[0]) 652 t1 := m & fieldBaseMask 653 654 // Terms for 2^(fieldBase*2). 655 m = (m >> fieldBase) + 656 uint64(val.n[0])*uint64(val2.n[2]) + 657 uint64(val.n[1])*uint64(val2.n[1]) + 658 uint64(val.n[2])*uint64(val2.n[0]) 659 t2 := m & fieldBaseMask 660 661 // Terms for 2^(fieldBase*3). 662 m = (m >> fieldBase) + 663 uint64(val.n[0])*uint64(val2.n[3]) + 664 uint64(val.n[1])*uint64(val2.n[2]) + 665 uint64(val.n[2])*uint64(val2.n[1]) + 666 uint64(val.n[3])*uint64(val2.n[0]) 667 t3 := m & fieldBaseMask 668 669 // Terms for 2^(fieldBase*4). 670 m = (m >> fieldBase) + 671 uint64(val.n[0])*uint64(val2.n[4]) + 672 uint64(val.n[1])*uint64(val2.n[3]) + 673 uint64(val.n[2])*uint64(val2.n[2]) + 674 uint64(val.n[3])*uint64(val2.n[1]) + 675 uint64(val.n[4])*uint64(val2.n[0]) 676 t4 := m & fieldBaseMask 677 678 // Terms for 2^(fieldBase*5). 679 m = (m >> fieldBase) + 680 uint64(val.n[0])*uint64(val2.n[5]) + 681 uint64(val.n[1])*uint64(val2.n[4]) + 682 uint64(val.n[2])*uint64(val2.n[3]) + 683 uint64(val.n[3])*uint64(val2.n[2]) + 684 uint64(val.n[4])*uint64(val2.n[1]) + 685 uint64(val.n[5])*uint64(val2.n[0]) 686 t5 := m & fieldBaseMask 687 688 // Terms for 2^(fieldBase*6). 689 m = (m >> fieldBase) + 690 uint64(val.n[0])*uint64(val2.n[6]) + 691 uint64(val.n[1])*uint64(val2.n[5]) + 692 uint64(val.n[2])*uint64(val2.n[4]) + 693 uint64(val.n[3])*uint64(val2.n[3]) + 694 uint64(val.n[4])*uint64(val2.n[2]) + 695 uint64(val.n[5])*uint64(val2.n[1]) + 696 uint64(val.n[6])*uint64(val2.n[0]) 697 t6 := m & fieldBaseMask 698 699 // Terms for 2^(fieldBase*7). 700 m = (m >> fieldBase) + 701 uint64(val.n[0])*uint64(val2.n[7]) + 702 uint64(val.n[1])*uint64(val2.n[6]) + 703 uint64(val.n[2])*uint64(val2.n[5]) + 704 uint64(val.n[3])*uint64(val2.n[4]) + 705 uint64(val.n[4])*uint64(val2.n[3]) + 706 uint64(val.n[5])*uint64(val2.n[2]) + 707 uint64(val.n[6])*uint64(val2.n[1]) + 708 uint64(val.n[7])*uint64(val2.n[0]) 709 t7 := m & fieldBaseMask 710 711 // Terms for 2^(fieldBase*8). 712 m = (m >> fieldBase) + 713 uint64(val.n[0])*uint64(val2.n[8]) + 714 uint64(val.n[1])*uint64(val2.n[7]) + 715 uint64(val.n[2])*uint64(val2.n[6]) + 716 uint64(val.n[3])*uint64(val2.n[5]) + 717 uint64(val.n[4])*uint64(val2.n[4]) + 718 uint64(val.n[5])*uint64(val2.n[3]) + 719 uint64(val.n[6])*uint64(val2.n[2]) + 720 uint64(val.n[7])*uint64(val2.n[1]) + 721 uint64(val.n[8])*uint64(val2.n[0]) 722 t8 := m & fieldBaseMask 723 724 // Terms for 2^(fieldBase*9). 725 m = (m >> fieldBase) + 726 uint64(val.n[0])*uint64(val2.n[9]) + 727 uint64(val.n[1])*uint64(val2.n[8]) + 728 uint64(val.n[2])*uint64(val2.n[7]) + 729 uint64(val.n[3])*uint64(val2.n[6]) + 730 uint64(val.n[4])*uint64(val2.n[5]) + 731 uint64(val.n[5])*uint64(val2.n[4]) + 732 uint64(val.n[6])*uint64(val2.n[3]) + 733 uint64(val.n[7])*uint64(val2.n[2]) + 734 uint64(val.n[8])*uint64(val2.n[1]) + 735 uint64(val.n[9])*uint64(val2.n[0]) 736 t9 := m & fieldBaseMask 737 738 // Terms for 2^(fieldBase*10). 739 m = (m >> fieldBase) + 740 uint64(val.n[1])*uint64(val2.n[9]) + 741 uint64(val.n[2])*uint64(val2.n[8]) + 742 uint64(val.n[3])*uint64(val2.n[7]) + 743 uint64(val.n[4])*uint64(val2.n[6]) + 744 uint64(val.n[5])*uint64(val2.n[5]) + 745 uint64(val.n[6])*uint64(val2.n[4]) + 746 uint64(val.n[7])*uint64(val2.n[3]) + 747 uint64(val.n[8])*uint64(val2.n[2]) + 748 uint64(val.n[9])*uint64(val2.n[1]) 749 t10 := m & fieldBaseMask 750 751 // Terms for 2^(fieldBase*11). 752 m = (m >> fieldBase) + 753 uint64(val.n[2])*uint64(val2.n[9]) + 754 uint64(val.n[3])*uint64(val2.n[8]) + 755 uint64(val.n[4])*uint64(val2.n[7]) + 756 uint64(val.n[5])*uint64(val2.n[6]) + 757 uint64(val.n[6])*uint64(val2.n[5]) + 758 uint64(val.n[7])*uint64(val2.n[4]) + 759 uint64(val.n[8])*uint64(val2.n[3]) + 760 uint64(val.n[9])*uint64(val2.n[2]) 761 t11 := m & fieldBaseMask 762 763 // Terms for 2^(fieldBase*12). 764 m = (m >> fieldBase) + 765 uint64(val.n[3])*uint64(val2.n[9]) + 766 uint64(val.n[4])*uint64(val2.n[8]) + 767 uint64(val.n[5])*uint64(val2.n[7]) + 768 uint64(val.n[6])*uint64(val2.n[6]) + 769 uint64(val.n[7])*uint64(val2.n[5]) + 770 uint64(val.n[8])*uint64(val2.n[4]) + 771 uint64(val.n[9])*uint64(val2.n[3]) 772 t12 := m & fieldBaseMask 773 774 // Terms for 2^(fieldBase*13). 775 m = (m >> fieldBase) + 776 uint64(val.n[4])*uint64(val2.n[9]) + 777 uint64(val.n[5])*uint64(val2.n[8]) + 778 uint64(val.n[6])*uint64(val2.n[7]) + 779 uint64(val.n[7])*uint64(val2.n[6]) + 780 uint64(val.n[8])*uint64(val2.n[5]) + 781 uint64(val.n[9])*uint64(val2.n[4]) 782 t13 := m & fieldBaseMask 783 784 // Terms for 2^(fieldBase*14). 785 m = (m >> fieldBase) + 786 uint64(val.n[5])*uint64(val2.n[9]) + 787 uint64(val.n[6])*uint64(val2.n[8]) + 788 uint64(val.n[7])*uint64(val2.n[7]) + 789 uint64(val.n[8])*uint64(val2.n[6]) + 790 uint64(val.n[9])*uint64(val2.n[5]) 791 t14 := m & fieldBaseMask 792 793 // Terms for 2^(fieldBase*15). 794 m = (m >> fieldBase) + 795 uint64(val.n[6])*uint64(val2.n[9]) + 796 uint64(val.n[7])*uint64(val2.n[8]) + 797 uint64(val.n[8])*uint64(val2.n[7]) + 798 uint64(val.n[9])*uint64(val2.n[6]) 799 t15 := m & fieldBaseMask 800 801 // Terms for 2^(fieldBase*16). 802 m = (m >> fieldBase) + 803 uint64(val.n[7])*uint64(val2.n[9]) + 804 uint64(val.n[8])*uint64(val2.n[8]) + 805 uint64(val.n[9])*uint64(val2.n[7]) 806 t16 := m & fieldBaseMask 807 808 // Terms for 2^(fieldBase*17). 809 m = (m >> fieldBase) + 810 uint64(val.n[8])*uint64(val2.n[9]) + 811 uint64(val.n[9])*uint64(val2.n[8]) 812 t17 := m & fieldBaseMask 813 814 // Terms for 2^(fieldBase*18). 815 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9]) 816 t18 := m & fieldBaseMask 817 818 // What's left is for 2^(fieldBase*19). 819 t19 := m >> fieldBase 820 821 // At this point, all of the terms are grouped into their respective 822 // base. 823 // 824 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 825 // when the modulus is of the special form m = b^t - c, highly efficient 826 // reduction can be achieved per the provided algorithm. 827 // 828 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 829 // this criteria. 830 // 831 // 4294968273 in field representation (base 2^26) is: 832 // n[0] = 977 833 // n[1] = 64 834 // That is to say (2^26 * 64) + 977 = 4294968273 835 // 836 // Since each word is in base 26, the upper terms (t10 and up) start 837 // at 260 bits (versus the final desired range of 256 bits), so the 838 // field representation of 'c' from above needs to be adjusted for the 839 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 840 // 68719492368. Thus, the adjusted field representation of 'c' is: 841 // n[0] = 977 * 16 = 15632 842 // n[1] = 64 * 16 = 1024 843 // That is to say (2^26 * 1024) + 15632 = 68719492368 844 // 845 // To reduce the final term, t19, the entire 'c' value is needed instead 846 // of only n[0] because there are no more terms left to handle n[1]. 847 // This means there might be some magnitude left in the upper bits that 848 // is handled below. 849 m = t0 + t10*15632 850 t0 = m & fieldBaseMask 851 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 852 t1 = m & fieldBaseMask 853 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 854 t2 = m & fieldBaseMask 855 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 856 t3 = m & fieldBaseMask 857 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 858 t4 = m & fieldBaseMask 859 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 860 t5 = m & fieldBaseMask 861 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 862 t6 = m & fieldBaseMask 863 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 864 t7 = m & fieldBaseMask 865 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 866 t8 = m & fieldBaseMask 867 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 868 t9 = m & fieldMSBMask 869 m = m >> fieldMSBBits 870 871 // At this point, if the magnitude is greater than 0, the overall value 872 // is greater than the max possible 256-bit value. In particular, it is 873 // "how many times larger" than the max value it is. 874 // 875 // The algorithm presented in [HAC] section 14.3.4 repeats until the 876 // quotient is zero. However, due to the above, we already know at 877 // least how many times we would need to repeat as it's the value 878 // currently in m. Thus we can simply multiply the magnitude by the 879 // field representation of the prime and do a single iteration. Notice 880 // that nothing will be changed when the magnitude is zero, so we could 881 // skip this in that case, however always running regardless allows it 882 // to run in constant time. The final result will be in the range 883 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 884 // magnitude of 1, but it is denormalized. 885 d := t0 + m*977 886 f.n[0] = uint32(d & fieldBaseMask) 887 d = (d >> fieldBase) + t1 + m*64 888 f.n[1] = uint32(d & fieldBaseMask) 889 f.n[2] = uint32((d >> fieldBase) + t2) 890 f.n[3] = uint32(t3) 891 f.n[4] = uint32(t4) 892 f.n[5] = uint32(t5) 893 f.n[6] = uint32(t6) 894 f.n[7] = uint32(t7) 895 f.n[8] = uint32(t8) 896 f.n[9] = uint32(t9) 897 898 return f 899 } 900 901 // Square squares the field value. The existing field value is modified. Note 902 // that this function can overflow if multiplying any of the individual words 903 // exceeds a max uint32. In practice, this means the magnitude of the field 904 // must be a max of 8 to prevent overflow. 905 // 906 // The field value is returned to support chaining. This enables syntax like: 907 // f.Square().Mul(f2) so that f = f^2 * f2. 908 func (f *fieldVal) Square() *fieldVal { 909 return f.SquareVal(f) 910 } 911 912 // SquareVal squares the passed value and stores the result in f. Note that 913 // this function can overflow if multiplying any of the individual words 914 // exceeds a max uint32. In practice, this means the magnitude of the field 915 // being squred must be a max of 8 to prevent overflow. 916 // 917 // The field value is returned to support chaining. This enables syntax like: 918 // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3. 919 func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal { 920 // This could be done with a couple of for loops and an array to store 921 // the intermediate terms, but this unrolled version is significantly 922 // faster. 923 924 // Terms for 2^(fieldBase*0). 925 m := uint64(val.n[0]) * uint64(val.n[0]) 926 t0 := m & fieldBaseMask 927 928 // Terms for 2^(fieldBase*1). 929 m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1]) 930 t1 := m & fieldBaseMask 931 932 // Terms for 2^(fieldBase*2). 933 m = (m >> fieldBase) + 934 2*uint64(val.n[0])*uint64(val.n[2]) + 935 uint64(val.n[1])*uint64(val.n[1]) 936 t2 := m & fieldBaseMask 937 938 // Terms for 2^(fieldBase*3). 939 m = (m >> fieldBase) + 940 2*uint64(val.n[0])*uint64(val.n[3]) + 941 2*uint64(val.n[1])*uint64(val.n[2]) 942 t3 := m & fieldBaseMask 943 944 // Terms for 2^(fieldBase*4). 945 m = (m >> fieldBase) + 946 2*uint64(val.n[0])*uint64(val.n[4]) + 947 2*uint64(val.n[1])*uint64(val.n[3]) + 948 uint64(val.n[2])*uint64(val.n[2]) 949 t4 := m & fieldBaseMask 950 951 // Terms for 2^(fieldBase*5). 952 m = (m >> fieldBase) + 953 2*uint64(val.n[0])*uint64(val.n[5]) + 954 2*uint64(val.n[1])*uint64(val.n[4]) + 955 2*uint64(val.n[2])*uint64(val.n[3]) 956 t5 := m & fieldBaseMask 957 958 // Terms for 2^(fieldBase*6). 959 m = (m >> fieldBase) + 960 2*uint64(val.n[0])*uint64(val.n[6]) + 961 2*uint64(val.n[1])*uint64(val.n[5]) + 962 2*uint64(val.n[2])*uint64(val.n[4]) + 963 uint64(val.n[3])*uint64(val.n[3]) 964 t6 := m & fieldBaseMask 965 966 // Terms for 2^(fieldBase*7). 967 m = (m >> fieldBase) + 968 2*uint64(val.n[0])*uint64(val.n[7]) + 969 2*uint64(val.n[1])*uint64(val.n[6]) + 970 2*uint64(val.n[2])*uint64(val.n[5]) + 971 2*uint64(val.n[3])*uint64(val.n[4]) 972 t7 := m & fieldBaseMask 973 974 // Terms for 2^(fieldBase*8). 975 m = (m >> fieldBase) + 976 2*uint64(val.n[0])*uint64(val.n[8]) + 977 2*uint64(val.n[1])*uint64(val.n[7]) + 978 2*uint64(val.n[2])*uint64(val.n[6]) + 979 2*uint64(val.n[3])*uint64(val.n[5]) + 980 uint64(val.n[4])*uint64(val.n[4]) 981 t8 := m & fieldBaseMask 982 983 // Terms for 2^(fieldBase*9). 984 m = (m >> fieldBase) + 985 2*uint64(val.n[0])*uint64(val.n[9]) + 986 2*uint64(val.n[1])*uint64(val.n[8]) + 987 2*uint64(val.n[2])*uint64(val.n[7]) + 988 2*uint64(val.n[3])*uint64(val.n[6]) + 989 2*uint64(val.n[4])*uint64(val.n[5]) 990 t9 := m & fieldBaseMask 991 992 // Terms for 2^(fieldBase*10). 993 m = (m >> fieldBase) + 994 2*uint64(val.n[1])*uint64(val.n[9]) + 995 2*uint64(val.n[2])*uint64(val.n[8]) + 996 2*uint64(val.n[3])*uint64(val.n[7]) + 997 2*uint64(val.n[4])*uint64(val.n[6]) + 998 uint64(val.n[5])*uint64(val.n[5]) 999 t10 := m & fieldBaseMask 1000 1001 // Terms for 2^(fieldBase*11). 1002 m = (m >> fieldBase) + 1003 2*uint64(val.n[2])*uint64(val.n[9]) + 1004 2*uint64(val.n[3])*uint64(val.n[8]) + 1005 2*uint64(val.n[4])*uint64(val.n[7]) + 1006 2*uint64(val.n[5])*uint64(val.n[6]) 1007 t11 := m & fieldBaseMask 1008 1009 // Terms for 2^(fieldBase*12). 1010 m = (m >> fieldBase) + 1011 2*uint64(val.n[3])*uint64(val.n[9]) + 1012 2*uint64(val.n[4])*uint64(val.n[8]) + 1013 2*uint64(val.n[5])*uint64(val.n[7]) + 1014 uint64(val.n[6])*uint64(val.n[6]) 1015 t12 := m & fieldBaseMask 1016 1017 // Terms for 2^(fieldBase*13). 1018 m = (m >> fieldBase) + 1019 2*uint64(val.n[4])*uint64(val.n[9]) + 1020 2*uint64(val.n[5])*uint64(val.n[8]) + 1021 2*uint64(val.n[6])*uint64(val.n[7]) 1022 t13 := m & fieldBaseMask 1023 1024 // Terms for 2^(fieldBase*14). 1025 m = (m >> fieldBase) + 1026 2*uint64(val.n[5])*uint64(val.n[9]) + 1027 2*uint64(val.n[6])*uint64(val.n[8]) + 1028 uint64(val.n[7])*uint64(val.n[7]) 1029 t14 := m & fieldBaseMask 1030 1031 // Terms for 2^(fieldBase*15). 1032 m = (m >> fieldBase) + 1033 2*uint64(val.n[6])*uint64(val.n[9]) + 1034 2*uint64(val.n[7])*uint64(val.n[8]) 1035 t15 := m & fieldBaseMask 1036 1037 // Terms for 2^(fieldBase*16). 1038 m = (m >> fieldBase) + 1039 2*uint64(val.n[7])*uint64(val.n[9]) + 1040 uint64(val.n[8])*uint64(val.n[8]) 1041 t16 := m & fieldBaseMask 1042 1043 // Terms for 2^(fieldBase*17). 1044 m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9]) 1045 t17 := m & fieldBaseMask 1046 1047 // Terms for 2^(fieldBase*18). 1048 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9]) 1049 t18 := m & fieldBaseMask 1050 1051 // What's left is for 2^(fieldBase*19). 1052 t19 := m >> fieldBase 1053 1054 // At this point, all of the terms are grouped into their respective 1055 // base. 1056 // 1057 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 1058 // when the modulus is of the special form m = b^t - c, highly efficient 1059 // reduction can be achieved per the provided algorithm. 1060 // 1061 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 1062 // this criteria. 1063 // 1064 // 4294968273 in field representation (base 2^26) is: 1065 // n[0] = 977 1066 // n[1] = 64 1067 // That is to say (2^26 * 64) + 977 = 4294968273 1068 // 1069 // Since each word is in base 26, the upper terms (t10 and up) start 1070 // at 260 bits (versus the final desired range of 256 bits), so the 1071 // field representation of 'c' from above needs to be adjusted for the 1072 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 1073 // 68719492368. Thus, the adjusted field representation of 'c' is: 1074 // n[0] = 977 * 16 = 15632 1075 // n[1] = 64 * 16 = 1024 1076 // That is to say (2^26 * 1024) + 15632 = 68719492368 1077 // 1078 // To reduce the final term, t19, the entire 'c' value is needed instead 1079 // of only n[0] because there are no more terms left to handle n[1]. 1080 // This means there might be some magnitude left in the upper bits that 1081 // is handled below. 1082 m = t0 + t10*15632 1083 t0 = m & fieldBaseMask 1084 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 1085 t1 = m & fieldBaseMask 1086 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 1087 t2 = m & fieldBaseMask 1088 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 1089 t3 = m & fieldBaseMask 1090 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 1091 t4 = m & fieldBaseMask 1092 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 1093 t5 = m & fieldBaseMask 1094 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 1095 t6 = m & fieldBaseMask 1096 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 1097 t7 = m & fieldBaseMask 1098 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 1099 t8 = m & fieldBaseMask 1100 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 1101 t9 = m & fieldMSBMask 1102 m = m >> fieldMSBBits 1103 1104 // At this point, if the magnitude is greater than 0, the overall value 1105 // is greater than the max possible 256-bit value. In particular, it is 1106 // "how many times larger" than the max value it is. 1107 // 1108 // The algorithm presented in [HAC] section 14.3.4 repeats until the 1109 // quotient is zero. However, due to the above, we already know at 1110 // least how many times we would need to repeat as it's the value 1111 // currently in m. Thus we can simply multiply the magnitude by the 1112 // field representation of the prime and do a single iteration. Notice 1113 // that nothing will be changed when the magnitude is zero, so we could 1114 // skip this in that case, however always running regardless allows it 1115 // to run in constant time. The final result will be in the range 1116 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 1117 // magnitude of 1, but it is denormalized. 1118 n := t0 + m*977 1119 f.n[0] = uint32(n & fieldBaseMask) 1120 n = (n >> fieldBase) + t1 + m*64 1121 f.n[1] = uint32(n & fieldBaseMask) 1122 f.n[2] = uint32((n >> fieldBase) + t2) 1123 f.n[3] = uint32(t3) 1124 f.n[4] = uint32(t4) 1125 f.n[5] = uint32(t5) 1126 f.n[6] = uint32(t6) 1127 f.n[7] = uint32(t7) 1128 f.n[8] = uint32(t8) 1129 f.n[9] = uint32(t9) 1130 1131 return f 1132 } 1133 1134 // Inverse finds the modular multiplicative inverse of the field value. The 1135 // existing field value is modified. 1136 // 1137 // The field value is returned to support chaining. This enables syntax like: 1138 // f.Inverse().Mul(f2) so that f = f^-1 * f2. 1139 func (f *fieldVal) Inverse() *fieldVal { 1140 // Fermat's little theorem states that for a nonzero number a and prime 1141 // prime p, a^(p-1) = 1 (mod p). Since the multipliciative inverse is 1142 // a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p). 1143 // Thus, a^(p-2) is the multiplicative inverse. 1144 // 1145 // In order to efficiently compute a^(p-2), p-2 needs to be split into 1146 // a sequence of squares and multipications that minimizes the number of 1147 // multiplications needed (since they are more costly than squarings). 1148 // Intermediate results are saved and reused as well. 1149 // 1150 // The secp256k1 prime - 2 is 2^256 - 4294968275. 1151 // 1152 // This has a cost of 258 field squarings and 33 field multiplications. 1153 var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal 1154 a2.SquareVal(f) 1155 a3.Mul2(&a2, f) 1156 a4.SquareVal(&a2) 1157 a10.SquareVal(&a4).Mul(&a2) 1158 a11.Mul2(&a10, f) 1159 a21.Mul2(&a10, &a11) 1160 a42.SquareVal(&a21) 1161 a45.Mul2(&a42, &a3) 1162 a63.Mul2(&a42, &a21) 1163 a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11) 1164 a1023.Mul2(&a1019, &a4) 1165 f.Set(&a63) // f = a^(2^6 - 1) 1166 f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32) 1167 f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024) 1168 f.Mul(&a1023) // f = a^(2^16 - 1) 1169 f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32) 1170 f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024) 1171 f.Mul(&a1023) // f = a^(2^26 - 1) 1172 f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32) 1173 f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024) 1174 f.Mul(&a1023) // f = a^(2^36 - 1) 1175 f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32) 1176 f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024) 1177 f.Mul(&a1023) // f = a^(2^46 - 1) 1178 f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32) 1179 f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024) 1180 f.Mul(&a1023) // f = a^(2^56 - 1) 1181 f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32) 1182 f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024) 1183 f.Mul(&a1023) // f = a^(2^66 - 1) 1184 f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32) 1185 f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024) 1186 f.Mul(&a1023) // f = a^(2^76 - 1) 1187 f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32) 1188 f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024) 1189 f.Mul(&a1023) // f = a^(2^86 - 1) 1190 f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32) 1191 f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024) 1192 f.Mul(&a1023) // f = a^(2^96 - 1) 1193 f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32) 1194 f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024) 1195 f.Mul(&a1023) // f = a^(2^106 - 1) 1196 f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32) 1197 f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024) 1198 f.Mul(&a1023) // f = a^(2^116 - 1) 1199 f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32) 1200 f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024) 1201 f.Mul(&a1023) // f = a^(2^126 - 1) 1202 f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32) 1203 f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024) 1204 f.Mul(&a1023) // f = a^(2^136 - 1) 1205 f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32) 1206 f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024) 1207 f.Mul(&a1023) // f = a^(2^146 - 1) 1208 f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32) 1209 f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024) 1210 f.Mul(&a1023) // f = a^(2^156 - 1) 1211 f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32) 1212 f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024) 1213 f.Mul(&a1023) // f = a^(2^166 - 1) 1214 f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32) 1215 f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024) 1216 f.Mul(&a1023) // f = a^(2^176 - 1) 1217 f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32) 1218 f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024) 1219 f.Mul(&a1023) // f = a^(2^186 - 1) 1220 f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32) 1221 f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024) 1222 f.Mul(&a1023) // f = a^(2^196 - 1) 1223 f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32) 1224 f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024) 1225 f.Mul(&a1023) // f = a^(2^206 - 1) 1226 f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32) 1227 f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024) 1228 f.Mul(&a1023) // f = a^(2^216 - 1) 1229 f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32) 1230 f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024) 1231 f.Mul(&a1019) // f = a^(2^226 - 5) 1232 f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160) 1233 f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120) 1234 f.Mul(&a1023) // f = a^(2^236 - 4097) 1235 f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104) 1236 f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328) 1237 f.Mul(&a1023) // f = a^(2^246 - 4194305) 1238 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760) 1239 f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320) 1240 return f.Mul(&a45) // f = a^(2^256 - 4294968275) = a^(p-2) 1241 } 1242 1243 // SqrtVal computes the square root of x modulo the curve's prime, and stores 1244 // the result in f. The square root is computed via exponentiation of x by the 1245 // value Q = (P+1)/4 using the curve's precomputed big-endian representation of 1246 // the Q. This method uses a modified version of square-and-multiply 1247 // exponentiation over secp256k1 fieldVals to operate on bytes instead of bits, 1248 // which offers better performance over both big.Int exponentiation and bit-wise 1249 // square-and-multiply. 1250 // 1251 // NOTE: This method only works when P is intended to be the secp256k1 prime and 1252 // is not constant time. The returned value is of magnitude 1, but is 1253 // denormalized. 1254 func (f *fieldVal) SqrtVal(x *fieldVal) *fieldVal { 1255 // The following computation iteratively computes x^((P+1)/4) = x^Q 1256 // using the recursive, piece-wise definition: 1257 // 1258 // x^n = (x^2)^(n/2) mod P if n is even 1259 // x^n = x(x^2)^(n-1/2) mod P if n is odd 1260 // 1261 // Given n in its big-endian representation b_k, ..., b_0, x^n can be 1262 // computed by defining the sequence r_k+1, ..., r_0, where: 1263 // 1264 // r_k+1 = 1 1265 // r_i = (r_i+1)^2 * x^b_i for i = k, ..., 0 1266 // 1267 // The final value r_0 = x^n. 1268 // 1269 // See https://en.wikipedia.org/wiki/Exponentiation_by_squaring for more 1270 // details. 1271 // 1272 // This can be further optimized, by observing that the value of Q in 1273 // secp256k1 has the value: 1274 // 1275 // Q = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c 1276 // 1277 // We can unroll the typical bit-wise interpretation of the 1278 // exponentiation algorithm above to instead operate on bytes. 1279 // This reduces the number of comparisons by an order of magnitude, 1280 // reducing the overhead of failed branch predictions and additional 1281 // comparisons in this method. 1282 // 1283 // Since there there are only 4 unique bytes of Q, this keeps the jump 1284 // table small without the need to handle all possible 8-bit values. 1285 // Further, we observe that 29 of the 32 bytes are 0xff; making the 1286 // first case handle 0xff therefore optimizes the hot path. 1287 f.SetInt(1) 1288 for _, b := range fieldQBytes { 1289 switch b { 1290 1291 // Most common case, where all 8 bits are set. 1292 case 0xff: 1293 f.Square().Mul(x) 1294 f.Square().Mul(x) 1295 f.Square().Mul(x) 1296 f.Square().Mul(x) 1297 f.Square().Mul(x) 1298 f.Square().Mul(x) 1299 f.Square().Mul(x) 1300 f.Square().Mul(x) 1301 1302 // First byte of Q (0x3f), where all but the top two bits are 1303 // set. Note that this case only applies six operations, since 1304 // the highest bit of Q resides in bit six of the first byte. We 1305 // ignore the first two bits, since squaring for these bits will 1306 // result in an invalid result. We forgo squaring f before the 1307 // first multiply, since 1^2 = 1. 1308 case 0x3f: 1309 f.Mul(x) 1310 f.Square().Mul(x) 1311 f.Square().Mul(x) 1312 f.Square().Mul(x) 1313 f.Square().Mul(x) 1314 f.Square().Mul(x) 1315 1316 // Byte 28 of Q (0xbf), where only bit 7 is unset. 1317 case 0xbf: 1318 f.Square().Mul(x) 1319 f.Square() 1320 f.Square().Mul(x) 1321 f.Square().Mul(x) 1322 f.Square().Mul(x) 1323 f.Square().Mul(x) 1324 f.Square().Mul(x) 1325 f.Square().Mul(x) 1326 1327 // Byte 31 of Q (0x0c), where only bits 3 and 4 are set. 1328 default: 1329 f.Square() 1330 f.Square() 1331 f.Square() 1332 f.Square() 1333 f.Square().Mul(x) 1334 f.Square().Mul(x) 1335 f.Square() 1336 f.Square() 1337 } 1338 } 1339 1340 return f 1341 } 1342 1343 // Sqrt computes the square root of f modulo the curve's prime, and stores the 1344 // result in f. The square root is computed via exponentiation of x by the value 1345 // Q = (P+1)/4 using the curve's precomputed big-endian representation of the Q. 1346 // This method uses a modified version of square-and-multiply exponentiation 1347 // over secp256k1 fieldVals to operate on bytes instead of bits, which offers 1348 // better performance over both big.Int exponentiation and bit-wise 1349 // square-and-multiply. 1350 // 1351 // NOTE: This method only works when P is intended to be the secp256k1 prime and 1352 // is not constant time. The returned value is of magnitude 1, but is 1353 // denormalized. 1354 func (f *fieldVal) Sqrt() *fieldVal { 1355 return f.SqrtVal(f) 1356 }