github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-756/fp/element.go (about) 1 // Copyright 2020 ConsenSys Software Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Code generated by consensys/gnark-crypto DO NOT EDIT 16 17 package fp 18 19 import ( 20 "crypto/rand" 21 "encoding/binary" 22 "errors" 23 "io" 24 "math/big" 25 "math/bits" 26 "reflect" 27 "strconv" 28 "strings" 29 30 "github.com/bits-and-blooms/bitset" 31 "github.com/consensys/gnark-crypto/field/hash" 32 "github.com/consensys/gnark-crypto/field/pool" 33 ) 34 35 // Element represents a field element stored on 12 words (uint64) 36 // 37 // Element are assumed to be in Montgomery form in all methods. 38 // 39 // Modulus q = 40 // 41 // q[base10] = 366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849 42 // q[base16] = 0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f868400000000000000000001 43 // 44 // # Warning 45 // 46 // This code has not been audited and is provided as-is. In particular, there is no security guarantees such as constant time implementation or side-channel attack resistance. 47 type Element [12]uint64 48 49 const ( 50 Limbs = 12 // number of 64 bits words needed to represent a Element 51 Bits = 756 // number of bits needed to represent a Element 52 Bytes = 96 // number of bytes needed to represent a Element 53 ) 54 55 // Field modulus q 56 const ( 57 q0 uint64 = 1 58 q1 uint64 = 3731203976813871104 59 q2 uint64 = 15039355238879481536 60 q3 uint64 = 4828608925799409630 61 q4 uint64 = 16326337093237622437 62 q5 uint64 = 756237273905161798 63 q6 uint64 = 16934317532427647658 64 q7 uint64 = 14755673041361585881 65 q8 uint64 = 18154628166362162086 66 q9 uint64 = 6671956210750770825 67 q10 uint64 = 16333450281447942351 68 q11 uint64 = 4352613195430282 69 ) 70 71 var qElement = Element{ 72 q0, 73 q1, 74 q2, 75 q3, 76 q4, 77 q5, 78 q6, 79 q7, 80 q8, 81 q9, 82 q10, 83 q11, 84 } 85 86 var _modulus big.Int // q stored as big.Int 87 88 // Modulus returns q as a big.Int 89 // 90 // q[base10] = 366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849 91 // q[base16] = 0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f868400000000000000000001 92 func Modulus() *big.Int { 93 return new(big.Int).Set(&_modulus) 94 } 95 96 // q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r 97 // used for Montgomery reduction 98 const qInvNeg uint64 = 18446744073709551615 99 100 func init() { 101 _modulus.SetString("f76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f868400000000000000000001", 16) 102 } 103 104 // NewElement returns a new Element from a uint64 value 105 // 106 // it is equivalent to 107 // 108 // var v Element 109 // v.SetUint64(...) 110 func NewElement(v uint64) Element { 111 z := Element{v} 112 z.Mul(&z, &rSquare) 113 return z 114 } 115 116 // SetUint64 sets z to v and returns z 117 func (z *Element) SetUint64(v uint64) *Element { 118 // sets z LSB to v (non-Montgomery form) and convert z to Montgomery form 119 *z = Element{v} 120 return z.Mul(z, &rSquare) // z.toMont() 121 } 122 123 // SetInt64 sets z to v and returns z 124 func (z *Element) SetInt64(v int64) *Element { 125 126 // absolute value of v 127 m := v >> 63 128 z.SetUint64(uint64((v ^ m) - m)) 129 130 if m != 0 { 131 // v is negative 132 z.Neg(z) 133 } 134 135 return z 136 } 137 138 // Set z = x and returns z 139 func (z *Element) Set(x *Element) *Element { 140 z[0] = x[0] 141 z[1] = x[1] 142 z[2] = x[2] 143 z[3] = x[3] 144 z[4] = x[4] 145 z[5] = x[5] 146 z[6] = x[6] 147 z[7] = x[7] 148 z[8] = x[8] 149 z[9] = x[9] 150 z[10] = x[10] 151 z[11] = x[11] 152 return z 153 } 154 155 // SetInterface converts provided interface into Element 156 // returns an error if provided type is not supported 157 // supported types: 158 // 159 // Element 160 // *Element 161 // uint64 162 // int 163 // string (see SetString for valid formats) 164 // *big.Int 165 // big.Int 166 // []byte 167 func (z *Element) SetInterface(i1 interface{}) (*Element, error) { 168 if i1 == nil { 169 return nil, errors.New("can't set fp.Element with <nil>") 170 } 171 172 switch c1 := i1.(type) { 173 case Element: 174 return z.Set(&c1), nil 175 case *Element: 176 if c1 == nil { 177 return nil, errors.New("can't set fp.Element with <nil>") 178 } 179 return z.Set(c1), nil 180 case uint8: 181 return z.SetUint64(uint64(c1)), nil 182 case uint16: 183 return z.SetUint64(uint64(c1)), nil 184 case uint32: 185 return z.SetUint64(uint64(c1)), nil 186 case uint: 187 return z.SetUint64(uint64(c1)), nil 188 case uint64: 189 return z.SetUint64(c1), nil 190 case int8: 191 return z.SetInt64(int64(c1)), nil 192 case int16: 193 return z.SetInt64(int64(c1)), nil 194 case int32: 195 return z.SetInt64(int64(c1)), nil 196 case int64: 197 return z.SetInt64(c1), nil 198 case int: 199 return z.SetInt64(int64(c1)), nil 200 case string: 201 return z.SetString(c1) 202 case *big.Int: 203 if c1 == nil { 204 return nil, errors.New("can't set fp.Element with <nil>") 205 } 206 return z.SetBigInt(c1), nil 207 case big.Int: 208 return z.SetBigInt(&c1), nil 209 case []byte: 210 return z.SetBytes(c1), nil 211 default: 212 return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String()) 213 } 214 } 215 216 // SetZero z = 0 217 func (z *Element) SetZero() *Element { 218 z[0] = 0 219 z[1] = 0 220 z[2] = 0 221 z[3] = 0 222 z[4] = 0 223 z[5] = 0 224 z[6] = 0 225 z[7] = 0 226 z[8] = 0 227 z[9] = 0 228 z[10] = 0 229 z[11] = 0 230 return z 231 } 232 233 // SetOne z = 1 (in Montgomery form) 234 func (z *Element) SetOne() *Element { 235 z[0] = 18446744073709547378 236 z[1] = 14463961505609547775 237 z[2] = 15160016368967634470 238 z[3] = 12241294279704278364 239 z[4] = 2720419343484222500 240 z[5] = 4799902015386277509 241 z[6] = 8643488375494563078 242 z[7] = 18366804658688562287 243 z[8] = 2055362399696866477 244 z[9] = 3108243834975866807 245 z[10] = 9468215855567529777 246 z[11] = 369351476012747 247 return z 248 } 249 250 // Div z = x*y⁻¹ (mod q) 251 func (z *Element) Div(x, y *Element) *Element { 252 var yInv Element 253 yInv.Inverse(y) 254 z.Mul(x, &yInv) 255 return z 256 } 257 258 // Equal returns z == x; constant-time 259 func (z *Element) Equal(x *Element) bool { 260 return z.NotEqual(x) == 0 261 } 262 263 // NotEqual returns 0 if and only if z == x; constant-time 264 func (z *Element) NotEqual(x *Element) uint64 { 265 return (z[11] ^ x[11]) | (z[10] ^ x[10]) | (z[9] ^ x[9]) | (z[8] ^ x[8]) | (z[7] ^ x[7]) | (z[6] ^ x[6]) | (z[5] ^ x[5]) | (z[4] ^ x[4]) | (z[3] ^ x[3]) | (z[2] ^ x[2]) | (z[1] ^ x[1]) | (z[0] ^ x[0]) 266 } 267 268 // IsZero returns z == 0 269 func (z *Element) IsZero() bool { 270 return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0 271 } 272 273 // IsOne returns z == 1 274 func (z *Element) IsOne() bool { 275 return ((z[11] ^ 369351476012747) | (z[10] ^ 9468215855567529777) | (z[9] ^ 3108243834975866807) | (z[8] ^ 2055362399696866477) | (z[7] ^ 18366804658688562287) | (z[6] ^ 8643488375494563078) | (z[5] ^ 4799902015386277509) | (z[4] ^ 2720419343484222500) | (z[3] ^ 12241294279704278364) | (z[2] ^ 15160016368967634470) | (z[1] ^ 14463961505609547775) | (z[0] ^ 18446744073709547378)) == 0 276 } 277 278 // IsUint64 reports whether z can be represented as an uint64. 279 func (z *Element) IsUint64() bool { 280 zz := *z 281 zz.fromMont() 282 return zz.FitsOnOneWord() 283 } 284 285 // Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined. 286 func (z *Element) Uint64() uint64 { 287 return z.Bits()[0] 288 } 289 290 // FitsOnOneWord reports whether z words (except the least significant word) are 0 291 // 292 // It is the responsibility of the caller to convert from Montgomery to Regular form if needed. 293 func (z *Element) FitsOnOneWord() bool { 294 return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1]) == 0 295 } 296 297 // Cmp compares (lexicographic order) z and x and returns: 298 // 299 // -1 if z < x 300 // 0 if z == x 301 // +1 if z > x 302 func (z *Element) Cmp(x *Element) int { 303 _z := z.Bits() 304 _x := x.Bits() 305 if _z[11] > _x[11] { 306 return 1 307 } else if _z[11] < _x[11] { 308 return -1 309 } 310 if _z[10] > _x[10] { 311 return 1 312 } else if _z[10] < _x[10] { 313 return -1 314 } 315 if _z[9] > _x[9] { 316 return 1 317 } else if _z[9] < _x[9] { 318 return -1 319 } 320 if _z[8] > _x[8] { 321 return 1 322 } else if _z[8] < _x[8] { 323 return -1 324 } 325 if _z[7] > _x[7] { 326 return 1 327 } else if _z[7] < _x[7] { 328 return -1 329 } 330 if _z[6] > _x[6] { 331 return 1 332 } else if _z[6] < _x[6] { 333 return -1 334 } 335 if _z[5] > _x[5] { 336 return 1 337 } else if _z[5] < _x[5] { 338 return -1 339 } 340 if _z[4] > _x[4] { 341 return 1 342 } else if _z[4] < _x[4] { 343 return -1 344 } 345 if _z[3] > _x[3] { 346 return 1 347 } else if _z[3] < _x[3] { 348 return -1 349 } 350 if _z[2] > _x[2] { 351 return 1 352 } else if _z[2] < _x[2] { 353 return -1 354 } 355 if _z[1] > _x[1] { 356 return 1 357 } else if _z[1] < _x[1] { 358 return -1 359 } 360 if _z[0] > _x[0] { 361 return 1 362 } else if _z[0] < _x[0] { 363 return -1 364 } 365 return 0 366 } 367 368 // LexicographicallyLargest returns true if this element is strictly lexicographically 369 // larger than its negation, false otherwise 370 func (z *Element) LexicographicallyLargest() bool { 371 // adapted from github.com/zkcrypto/bls12_381 372 // we check if the element is larger than (q-1) / 2 373 // if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2 374 375 _z := z.Bits() 376 377 var b uint64 378 _, b = bits.Sub64(_z[0], 1, 0) 379 _, b = bits.Sub64(_z[1], 1865601988406935552, b) 380 _, b = bits.Sub64(_z[2], 7519677619439740768, b) 381 _, b = bits.Sub64(_z[3], 11637676499754480623, b) 382 _, b = bits.Sub64(_z[4], 8163168546618811218, b) 383 _, b = bits.Sub64(_z[5], 378118636952580899, b) 384 _, b = bits.Sub64(_z[6], 17690530803068599637, b) 385 _, b = bits.Sub64(_z[7], 7377836520680792940, b) 386 _, b = bits.Sub64(_z[8], 18300686120035856851, b) 387 _, b = bits.Sub64(_z[9], 12559350142230161220, b) 388 _, b = bits.Sub64(_z[10], 8166725140723971175, b) 389 _, b = bits.Sub64(_z[11], 2176306597715141, b) 390 391 return b == 0 392 } 393 394 // SetRandom sets z to a uniform random value in [0, q). 395 // 396 // This might error only if reading from crypto/rand.Reader errors, 397 // in which case, value of z is undefined. 398 func (z *Element) SetRandom() (*Element, error) { 399 // this code is generated for all modulus 400 // and derived from go/src/crypto/rand/util.go 401 402 // l is number of limbs * 8; the number of bytes needed to reconstruct 12 uint64 403 const l = 96 404 405 // bitLen is the maximum bit length needed to encode a value < q. 406 const bitLen = 756 407 408 // k is the maximum byte length needed to encode a value < q. 409 const k = (bitLen + 7) / 8 410 411 // b is the number of bits in the most significant byte of q-1. 412 b := uint(bitLen % 8) 413 if b == 0 { 414 b = 8 415 } 416 417 var bytes [l]byte 418 419 for { 420 // note that bytes[k:l] is always 0 421 if _, err := io.ReadFull(rand.Reader, bytes[:k]); err != nil { 422 return nil, err 423 } 424 425 // Clear unused bits in in the most significant byte to increase probability 426 // that the candidate is < q. 427 bytes[k-1] &= uint8(int(1<<b) - 1) 428 z[0] = binary.LittleEndian.Uint64(bytes[0:8]) 429 z[1] = binary.LittleEndian.Uint64(bytes[8:16]) 430 z[2] = binary.LittleEndian.Uint64(bytes[16:24]) 431 z[3] = binary.LittleEndian.Uint64(bytes[24:32]) 432 z[4] = binary.LittleEndian.Uint64(bytes[32:40]) 433 z[5] = binary.LittleEndian.Uint64(bytes[40:48]) 434 z[6] = binary.LittleEndian.Uint64(bytes[48:56]) 435 z[7] = binary.LittleEndian.Uint64(bytes[56:64]) 436 z[8] = binary.LittleEndian.Uint64(bytes[64:72]) 437 z[9] = binary.LittleEndian.Uint64(bytes[72:80]) 438 z[10] = binary.LittleEndian.Uint64(bytes[80:88]) 439 z[11] = binary.LittleEndian.Uint64(bytes[88:96]) 440 441 if !z.smallerThanModulus() { 442 continue // ignore the candidate and re-sample 443 } 444 445 return z, nil 446 } 447 } 448 449 // smallerThanModulus returns true if z < q 450 // This is not constant time 451 func (z *Element) smallerThanModulus() bool { 452 return (z[11] < q11 || (z[11] == q11 && (z[10] < q10 || (z[10] == q10 && (z[9] < q9 || (z[9] == q9 && (z[8] < q8 || (z[8] == q8 && (z[7] < q7 || (z[7] == q7 && (z[6] < q6 || (z[6] == q6 && (z[5] < q5 || (z[5] == q5 && (z[4] < q4 || (z[4] == q4 && (z[3] < q3 || (z[3] == q3 && (z[2] < q2 || (z[2] == q2 && (z[1] < q1 || (z[1] == q1 && (z[0] < q0))))))))))))))))))))))) 453 } 454 455 // One returns 1 456 func One() Element { 457 var one Element 458 one.SetOne() 459 return one 460 } 461 462 // Halve sets z to z / 2 (mod q) 463 func (z *Element) Halve() { 464 var carry uint64 465 466 if z[0]&1 == 1 { 467 // z = z + q 468 z[0], carry = bits.Add64(z[0], q0, 0) 469 z[1], carry = bits.Add64(z[1], q1, carry) 470 z[2], carry = bits.Add64(z[2], q2, carry) 471 z[3], carry = bits.Add64(z[3], q3, carry) 472 z[4], carry = bits.Add64(z[4], q4, carry) 473 z[5], carry = bits.Add64(z[5], q5, carry) 474 z[6], carry = bits.Add64(z[6], q6, carry) 475 z[7], carry = bits.Add64(z[7], q7, carry) 476 z[8], carry = bits.Add64(z[8], q8, carry) 477 z[9], carry = bits.Add64(z[9], q9, carry) 478 z[10], carry = bits.Add64(z[10], q10, carry) 479 z[11], _ = bits.Add64(z[11], q11, carry) 480 481 } 482 // z = z >> 1 483 z[0] = z[0]>>1 | z[1]<<63 484 z[1] = z[1]>>1 | z[2]<<63 485 z[2] = z[2]>>1 | z[3]<<63 486 z[3] = z[3]>>1 | z[4]<<63 487 z[4] = z[4]>>1 | z[5]<<63 488 z[5] = z[5]>>1 | z[6]<<63 489 z[6] = z[6]>>1 | z[7]<<63 490 z[7] = z[7]>>1 | z[8]<<63 491 z[8] = z[8]>>1 | z[9]<<63 492 z[9] = z[9]>>1 | z[10]<<63 493 z[10] = z[10]>>1 | z[11]<<63 494 z[11] >>= 1 495 496 } 497 498 // fromMont converts z in place (i.e. mutates) from Montgomery to regular representation 499 // sets and returns z = z * 1 500 func (z *Element) fromMont() *Element { 501 fromMont(z) 502 return z 503 } 504 505 // Add z = x + y (mod q) 506 func (z *Element) Add(x, y *Element) *Element { 507 508 var carry uint64 509 z[0], carry = bits.Add64(x[0], y[0], 0) 510 z[1], carry = bits.Add64(x[1], y[1], carry) 511 z[2], carry = bits.Add64(x[2], y[2], carry) 512 z[3], carry = bits.Add64(x[3], y[3], carry) 513 z[4], carry = bits.Add64(x[4], y[4], carry) 514 z[5], carry = bits.Add64(x[5], y[5], carry) 515 z[6], carry = bits.Add64(x[6], y[6], carry) 516 z[7], carry = bits.Add64(x[7], y[7], carry) 517 z[8], carry = bits.Add64(x[8], y[8], carry) 518 z[9], carry = bits.Add64(x[9], y[9], carry) 519 z[10], carry = bits.Add64(x[10], y[10], carry) 520 z[11], _ = bits.Add64(x[11], y[11], carry) 521 522 // if z ⩾ q → z -= q 523 if !z.smallerThanModulus() { 524 var b uint64 525 z[0], b = bits.Sub64(z[0], q0, 0) 526 z[1], b = bits.Sub64(z[1], q1, b) 527 z[2], b = bits.Sub64(z[2], q2, b) 528 z[3], b = bits.Sub64(z[3], q3, b) 529 z[4], b = bits.Sub64(z[4], q4, b) 530 z[5], b = bits.Sub64(z[5], q5, b) 531 z[6], b = bits.Sub64(z[6], q6, b) 532 z[7], b = bits.Sub64(z[7], q7, b) 533 z[8], b = bits.Sub64(z[8], q8, b) 534 z[9], b = bits.Sub64(z[9], q9, b) 535 z[10], b = bits.Sub64(z[10], q10, b) 536 z[11], _ = bits.Sub64(z[11], q11, b) 537 } 538 return z 539 } 540 541 // Double z = x + x (mod q), aka Lsh 1 542 func (z *Element) Double(x *Element) *Element { 543 544 var carry uint64 545 z[0], carry = bits.Add64(x[0], x[0], 0) 546 z[1], carry = bits.Add64(x[1], x[1], carry) 547 z[2], carry = bits.Add64(x[2], x[2], carry) 548 z[3], carry = bits.Add64(x[3], x[3], carry) 549 z[4], carry = bits.Add64(x[4], x[4], carry) 550 z[5], carry = bits.Add64(x[5], x[5], carry) 551 z[6], carry = bits.Add64(x[6], x[6], carry) 552 z[7], carry = bits.Add64(x[7], x[7], carry) 553 z[8], carry = bits.Add64(x[8], x[8], carry) 554 z[9], carry = bits.Add64(x[9], x[9], carry) 555 z[10], carry = bits.Add64(x[10], x[10], carry) 556 z[11], _ = bits.Add64(x[11], x[11], carry) 557 558 // if z ⩾ q → z -= q 559 if !z.smallerThanModulus() { 560 var b uint64 561 z[0], b = bits.Sub64(z[0], q0, 0) 562 z[1], b = bits.Sub64(z[1], q1, b) 563 z[2], b = bits.Sub64(z[2], q2, b) 564 z[3], b = bits.Sub64(z[3], q3, b) 565 z[4], b = bits.Sub64(z[4], q4, b) 566 z[5], b = bits.Sub64(z[5], q5, b) 567 z[6], b = bits.Sub64(z[6], q6, b) 568 z[7], b = bits.Sub64(z[7], q7, b) 569 z[8], b = bits.Sub64(z[8], q8, b) 570 z[9], b = bits.Sub64(z[9], q9, b) 571 z[10], b = bits.Sub64(z[10], q10, b) 572 z[11], _ = bits.Sub64(z[11], q11, b) 573 } 574 return z 575 } 576 577 // Sub z = x - y (mod q) 578 func (z *Element) Sub(x, y *Element) *Element { 579 var b uint64 580 z[0], b = bits.Sub64(x[0], y[0], 0) 581 z[1], b = bits.Sub64(x[1], y[1], b) 582 z[2], b = bits.Sub64(x[2], y[2], b) 583 z[3], b = bits.Sub64(x[3], y[3], b) 584 z[4], b = bits.Sub64(x[4], y[4], b) 585 z[5], b = bits.Sub64(x[5], y[5], b) 586 z[6], b = bits.Sub64(x[6], y[6], b) 587 z[7], b = bits.Sub64(x[7], y[7], b) 588 z[8], b = bits.Sub64(x[8], y[8], b) 589 z[9], b = bits.Sub64(x[9], y[9], b) 590 z[10], b = bits.Sub64(x[10], y[10], b) 591 z[11], b = bits.Sub64(x[11], y[11], b) 592 if b != 0 { 593 var c uint64 594 z[0], c = bits.Add64(z[0], q0, 0) 595 z[1], c = bits.Add64(z[1], q1, c) 596 z[2], c = bits.Add64(z[2], q2, c) 597 z[3], c = bits.Add64(z[3], q3, c) 598 z[4], c = bits.Add64(z[4], q4, c) 599 z[5], c = bits.Add64(z[5], q5, c) 600 z[6], c = bits.Add64(z[6], q6, c) 601 z[7], c = bits.Add64(z[7], q7, c) 602 z[8], c = bits.Add64(z[8], q8, c) 603 z[9], c = bits.Add64(z[9], q9, c) 604 z[10], c = bits.Add64(z[10], q10, c) 605 z[11], _ = bits.Add64(z[11], q11, c) 606 } 607 return z 608 } 609 610 // Neg z = q - x 611 func (z *Element) Neg(x *Element) *Element { 612 if x.IsZero() { 613 z.SetZero() 614 return z 615 } 616 var borrow uint64 617 z[0], borrow = bits.Sub64(q0, x[0], 0) 618 z[1], borrow = bits.Sub64(q1, x[1], borrow) 619 z[2], borrow = bits.Sub64(q2, x[2], borrow) 620 z[3], borrow = bits.Sub64(q3, x[3], borrow) 621 z[4], borrow = bits.Sub64(q4, x[4], borrow) 622 z[5], borrow = bits.Sub64(q5, x[5], borrow) 623 z[6], borrow = bits.Sub64(q6, x[6], borrow) 624 z[7], borrow = bits.Sub64(q7, x[7], borrow) 625 z[8], borrow = bits.Sub64(q8, x[8], borrow) 626 z[9], borrow = bits.Sub64(q9, x[9], borrow) 627 z[10], borrow = bits.Sub64(q10, x[10], borrow) 628 z[11], _ = bits.Sub64(q11, x[11], borrow) 629 return z 630 } 631 632 // Select is a constant-time conditional move. 633 // If c=0, z = x0. Else z = x1 634 func (z *Element) Select(c int, x0 *Element, x1 *Element) *Element { 635 cC := uint64((int64(c) | -int64(c)) >> 63) // "canonicized" into: 0 if c=0, -1 otherwise 636 z[0] = x0[0] ^ cC&(x0[0]^x1[0]) 637 z[1] = x0[1] ^ cC&(x0[1]^x1[1]) 638 z[2] = x0[2] ^ cC&(x0[2]^x1[2]) 639 z[3] = x0[3] ^ cC&(x0[3]^x1[3]) 640 z[4] = x0[4] ^ cC&(x0[4]^x1[4]) 641 z[5] = x0[5] ^ cC&(x0[5]^x1[5]) 642 z[6] = x0[6] ^ cC&(x0[6]^x1[6]) 643 z[7] = x0[7] ^ cC&(x0[7]^x1[7]) 644 z[8] = x0[8] ^ cC&(x0[8]^x1[8]) 645 z[9] = x0[9] ^ cC&(x0[9]^x1[9]) 646 z[10] = x0[10] ^ cC&(x0[10]^x1[10]) 647 z[11] = x0[11] ^ cC&(x0[11]^x1[11]) 648 return z 649 } 650 651 // _mulGeneric is unoptimized textbook CIOS 652 // it is a fallback solution on x86 when ADX instruction set is not available 653 // and is used for testing purposes. 654 func _mulGeneric(z, x, y *Element) { 655 656 // Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis 657 // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf 658 // 659 // The algorithm: 660 // 661 // for i=0 to N-1 662 // C := 0 663 // for j=0 to N-1 664 // (C,t[j]) := t[j] + x[j]*y[i] + C 665 // (t[N+1],t[N]) := t[N] + C 666 // 667 // C := 0 668 // m := t[0]*q'[0] mod D 669 // (C,_) := t[0] + m*q[0] 670 // for j=1 to N-1 671 // (C,t[j-1]) := t[j] + m*q[j] + C 672 // 673 // (C,t[N-1]) := t[N] + C 674 // t[N] := t[N+1] + C 675 // 676 // → N is the number of machine words needed to store the modulus q 677 // → D is the word size. For example, on a 64-bit architecture D is 2 64 678 // → x[i], y[i], q[i] is the ith word of the numbers x,y,q 679 // → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs. 680 // → t is a temporary array of size N+2 681 // → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number 682 683 var t [13]uint64 684 var D uint64 685 var m, C uint64 686 // ----------------------------------- 687 // First loop 688 689 C, t[0] = bits.Mul64(y[0], x[0]) 690 C, t[1] = madd1(y[0], x[1], C) 691 C, t[2] = madd1(y[0], x[2], C) 692 C, t[3] = madd1(y[0], x[3], C) 693 C, t[4] = madd1(y[0], x[4], C) 694 C, t[5] = madd1(y[0], x[5], C) 695 C, t[6] = madd1(y[0], x[6], C) 696 C, t[7] = madd1(y[0], x[7], C) 697 C, t[8] = madd1(y[0], x[8], C) 698 C, t[9] = madd1(y[0], x[9], C) 699 C, t[10] = madd1(y[0], x[10], C) 700 C, t[11] = madd1(y[0], x[11], C) 701 702 t[12], D = bits.Add64(t[12], C, 0) 703 704 // m = t[0]n'[0] mod W 705 m = t[0] * qInvNeg 706 707 // ----------------------------------- 708 // Second loop 709 C = madd0(m, q0, t[0]) 710 C, t[0] = madd2(m, q1, t[1], C) 711 C, t[1] = madd2(m, q2, t[2], C) 712 C, t[2] = madd2(m, q3, t[3], C) 713 C, t[3] = madd2(m, q4, t[4], C) 714 C, t[4] = madd2(m, q5, t[5], C) 715 C, t[5] = madd2(m, q6, t[6], C) 716 C, t[6] = madd2(m, q7, t[7], C) 717 C, t[7] = madd2(m, q8, t[8], C) 718 C, t[8] = madd2(m, q9, t[9], C) 719 C, t[9] = madd2(m, q10, t[10], C) 720 C, t[10] = madd2(m, q11, t[11], C) 721 722 t[11], C = bits.Add64(t[12], C, 0) 723 t[12], _ = bits.Add64(0, D, C) 724 // ----------------------------------- 725 // First loop 726 727 C, t[0] = madd1(y[1], x[0], t[0]) 728 C, t[1] = madd2(y[1], x[1], t[1], C) 729 C, t[2] = madd2(y[1], x[2], t[2], C) 730 C, t[3] = madd2(y[1], x[3], t[3], C) 731 C, t[4] = madd2(y[1], x[4], t[4], C) 732 C, t[5] = madd2(y[1], x[5], t[5], C) 733 C, t[6] = madd2(y[1], x[6], t[6], C) 734 C, t[7] = madd2(y[1], x[7], t[7], C) 735 C, t[8] = madd2(y[1], x[8], t[8], C) 736 C, t[9] = madd2(y[1], x[9], t[9], C) 737 C, t[10] = madd2(y[1], x[10], t[10], C) 738 C, t[11] = madd2(y[1], x[11], t[11], C) 739 740 t[12], D = bits.Add64(t[12], C, 0) 741 742 // m = t[0]n'[0] mod W 743 m = t[0] * qInvNeg 744 745 // ----------------------------------- 746 // Second loop 747 C = madd0(m, q0, t[0]) 748 C, t[0] = madd2(m, q1, t[1], C) 749 C, t[1] = madd2(m, q2, t[2], C) 750 C, t[2] = madd2(m, q3, t[3], C) 751 C, t[3] = madd2(m, q4, t[4], C) 752 C, t[4] = madd2(m, q5, t[5], C) 753 C, t[5] = madd2(m, q6, t[6], C) 754 C, t[6] = madd2(m, q7, t[7], C) 755 C, t[7] = madd2(m, q8, t[8], C) 756 C, t[8] = madd2(m, q9, t[9], C) 757 C, t[9] = madd2(m, q10, t[10], C) 758 C, t[10] = madd2(m, q11, t[11], C) 759 760 t[11], C = bits.Add64(t[12], C, 0) 761 t[12], _ = bits.Add64(0, D, C) 762 // ----------------------------------- 763 // First loop 764 765 C, t[0] = madd1(y[2], x[0], t[0]) 766 C, t[1] = madd2(y[2], x[1], t[1], C) 767 C, t[2] = madd2(y[2], x[2], t[2], C) 768 C, t[3] = madd2(y[2], x[3], t[3], C) 769 C, t[4] = madd2(y[2], x[4], t[4], C) 770 C, t[5] = madd2(y[2], x[5], t[5], C) 771 C, t[6] = madd2(y[2], x[6], t[6], C) 772 C, t[7] = madd2(y[2], x[7], t[7], C) 773 C, t[8] = madd2(y[2], x[8], t[8], C) 774 C, t[9] = madd2(y[2], x[9], t[9], C) 775 C, t[10] = madd2(y[2], x[10], t[10], C) 776 C, t[11] = madd2(y[2], x[11], t[11], C) 777 778 t[12], D = bits.Add64(t[12], C, 0) 779 780 // m = t[0]n'[0] mod W 781 m = t[0] * qInvNeg 782 783 // ----------------------------------- 784 // Second loop 785 C = madd0(m, q0, t[0]) 786 C, t[0] = madd2(m, q1, t[1], C) 787 C, t[1] = madd2(m, q2, t[2], C) 788 C, t[2] = madd2(m, q3, t[3], C) 789 C, t[3] = madd2(m, q4, t[4], C) 790 C, t[4] = madd2(m, q5, t[5], C) 791 C, t[5] = madd2(m, q6, t[6], C) 792 C, t[6] = madd2(m, q7, t[7], C) 793 C, t[7] = madd2(m, q8, t[8], C) 794 C, t[8] = madd2(m, q9, t[9], C) 795 C, t[9] = madd2(m, q10, t[10], C) 796 C, t[10] = madd2(m, q11, t[11], C) 797 798 t[11], C = bits.Add64(t[12], C, 0) 799 t[12], _ = bits.Add64(0, D, C) 800 // ----------------------------------- 801 // First loop 802 803 C, t[0] = madd1(y[3], x[0], t[0]) 804 C, t[1] = madd2(y[3], x[1], t[1], C) 805 C, t[2] = madd2(y[3], x[2], t[2], C) 806 C, t[3] = madd2(y[3], x[3], t[3], C) 807 C, t[4] = madd2(y[3], x[4], t[4], C) 808 C, t[5] = madd2(y[3], x[5], t[5], C) 809 C, t[6] = madd2(y[3], x[6], t[6], C) 810 C, t[7] = madd2(y[3], x[7], t[7], C) 811 C, t[8] = madd2(y[3], x[8], t[8], C) 812 C, t[9] = madd2(y[3], x[9], t[9], C) 813 C, t[10] = madd2(y[3], x[10], t[10], C) 814 C, t[11] = madd2(y[3], x[11], t[11], C) 815 816 t[12], D = bits.Add64(t[12], C, 0) 817 818 // m = t[0]n'[0] mod W 819 m = t[0] * qInvNeg 820 821 // ----------------------------------- 822 // Second loop 823 C = madd0(m, q0, t[0]) 824 C, t[0] = madd2(m, q1, t[1], C) 825 C, t[1] = madd2(m, q2, t[2], C) 826 C, t[2] = madd2(m, q3, t[3], C) 827 C, t[3] = madd2(m, q4, t[4], C) 828 C, t[4] = madd2(m, q5, t[5], C) 829 C, t[5] = madd2(m, q6, t[6], C) 830 C, t[6] = madd2(m, q7, t[7], C) 831 C, t[7] = madd2(m, q8, t[8], C) 832 C, t[8] = madd2(m, q9, t[9], C) 833 C, t[9] = madd2(m, q10, t[10], C) 834 C, t[10] = madd2(m, q11, t[11], C) 835 836 t[11], C = bits.Add64(t[12], C, 0) 837 t[12], _ = bits.Add64(0, D, C) 838 // ----------------------------------- 839 // First loop 840 841 C, t[0] = madd1(y[4], x[0], t[0]) 842 C, t[1] = madd2(y[4], x[1], t[1], C) 843 C, t[2] = madd2(y[4], x[2], t[2], C) 844 C, t[3] = madd2(y[4], x[3], t[3], C) 845 C, t[4] = madd2(y[4], x[4], t[4], C) 846 C, t[5] = madd2(y[4], x[5], t[5], C) 847 C, t[6] = madd2(y[4], x[6], t[6], C) 848 C, t[7] = madd2(y[4], x[7], t[7], C) 849 C, t[8] = madd2(y[4], x[8], t[8], C) 850 C, t[9] = madd2(y[4], x[9], t[9], C) 851 C, t[10] = madd2(y[4], x[10], t[10], C) 852 C, t[11] = madd2(y[4], x[11], t[11], C) 853 854 t[12], D = bits.Add64(t[12], C, 0) 855 856 // m = t[0]n'[0] mod W 857 m = t[0] * qInvNeg 858 859 // ----------------------------------- 860 // Second loop 861 C = madd0(m, q0, t[0]) 862 C, t[0] = madd2(m, q1, t[1], C) 863 C, t[1] = madd2(m, q2, t[2], C) 864 C, t[2] = madd2(m, q3, t[3], C) 865 C, t[3] = madd2(m, q4, t[4], C) 866 C, t[4] = madd2(m, q5, t[5], C) 867 C, t[5] = madd2(m, q6, t[6], C) 868 C, t[6] = madd2(m, q7, t[7], C) 869 C, t[7] = madd2(m, q8, t[8], C) 870 C, t[8] = madd2(m, q9, t[9], C) 871 C, t[9] = madd2(m, q10, t[10], C) 872 C, t[10] = madd2(m, q11, t[11], C) 873 874 t[11], C = bits.Add64(t[12], C, 0) 875 t[12], _ = bits.Add64(0, D, C) 876 // ----------------------------------- 877 // First loop 878 879 C, t[0] = madd1(y[5], x[0], t[0]) 880 C, t[1] = madd2(y[5], x[1], t[1], C) 881 C, t[2] = madd2(y[5], x[2], t[2], C) 882 C, t[3] = madd2(y[5], x[3], t[3], C) 883 C, t[4] = madd2(y[5], x[4], t[4], C) 884 C, t[5] = madd2(y[5], x[5], t[5], C) 885 C, t[6] = madd2(y[5], x[6], t[6], C) 886 C, t[7] = madd2(y[5], x[7], t[7], C) 887 C, t[8] = madd2(y[5], x[8], t[8], C) 888 C, t[9] = madd2(y[5], x[9], t[9], C) 889 C, t[10] = madd2(y[5], x[10], t[10], C) 890 C, t[11] = madd2(y[5], x[11], t[11], C) 891 892 t[12], D = bits.Add64(t[12], C, 0) 893 894 // m = t[0]n'[0] mod W 895 m = t[0] * qInvNeg 896 897 // ----------------------------------- 898 // Second loop 899 C = madd0(m, q0, t[0]) 900 C, t[0] = madd2(m, q1, t[1], C) 901 C, t[1] = madd2(m, q2, t[2], C) 902 C, t[2] = madd2(m, q3, t[3], C) 903 C, t[3] = madd2(m, q4, t[4], C) 904 C, t[4] = madd2(m, q5, t[5], C) 905 C, t[5] = madd2(m, q6, t[6], C) 906 C, t[6] = madd2(m, q7, t[7], C) 907 C, t[7] = madd2(m, q8, t[8], C) 908 C, t[8] = madd2(m, q9, t[9], C) 909 C, t[9] = madd2(m, q10, t[10], C) 910 C, t[10] = madd2(m, q11, t[11], C) 911 912 t[11], C = bits.Add64(t[12], C, 0) 913 t[12], _ = bits.Add64(0, D, C) 914 // ----------------------------------- 915 // First loop 916 917 C, t[0] = madd1(y[6], x[0], t[0]) 918 C, t[1] = madd2(y[6], x[1], t[1], C) 919 C, t[2] = madd2(y[6], x[2], t[2], C) 920 C, t[3] = madd2(y[6], x[3], t[3], C) 921 C, t[4] = madd2(y[6], x[4], t[4], C) 922 C, t[5] = madd2(y[6], x[5], t[5], C) 923 C, t[6] = madd2(y[6], x[6], t[6], C) 924 C, t[7] = madd2(y[6], x[7], t[7], C) 925 C, t[8] = madd2(y[6], x[8], t[8], C) 926 C, t[9] = madd2(y[6], x[9], t[9], C) 927 C, t[10] = madd2(y[6], x[10], t[10], C) 928 C, t[11] = madd2(y[6], x[11], t[11], C) 929 930 t[12], D = bits.Add64(t[12], C, 0) 931 932 // m = t[0]n'[0] mod W 933 m = t[0] * qInvNeg 934 935 // ----------------------------------- 936 // Second loop 937 C = madd0(m, q0, t[0]) 938 C, t[0] = madd2(m, q1, t[1], C) 939 C, t[1] = madd2(m, q2, t[2], C) 940 C, t[2] = madd2(m, q3, t[3], C) 941 C, t[3] = madd2(m, q4, t[4], C) 942 C, t[4] = madd2(m, q5, t[5], C) 943 C, t[5] = madd2(m, q6, t[6], C) 944 C, t[6] = madd2(m, q7, t[7], C) 945 C, t[7] = madd2(m, q8, t[8], C) 946 C, t[8] = madd2(m, q9, t[9], C) 947 C, t[9] = madd2(m, q10, t[10], C) 948 C, t[10] = madd2(m, q11, t[11], C) 949 950 t[11], C = bits.Add64(t[12], C, 0) 951 t[12], _ = bits.Add64(0, D, C) 952 // ----------------------------------- 953 // First loop 954 955 C, t[0] = madd1(y[7], x[0], t[0]) 956 C, t[1] = madd2(y[7], x[1], t[1], C) 957 C, t[2] = madd2(y[7], x[2], t[2], C) 958 C, t[3] = madd2(y[7], x[3], t[3], C) 959 C, t[4] = madd2(y[7], x[4], t[4], C) 960 C, t[5] = madd2(y[7], x[5], t[5], C) 961 C, t[6] = madd2(y[7], x[6], t[6], C) 962 C, t[7] = madd2(y[7], x[7], t[7], C) 963 C, t[8] = madd2(y[7], x[8], t[8], C) 964 C, t[9] = madd2(y[7], x[9], t[9], C) 965 C, t[10] = madd2(y[7], x[10], t[10], C) 966 C, t[11] = madd2(y[7], x[11], t[11], C) 967 968 t[12], D = bits.Add64(t[12], C, 0) 969 970 // m = t[0]n'[0] mod W 971 m = t[0] * qInvNeg 972 973 // ----------------------------------- 974 // Second loop 975 C = madd0(m, q0, t[0]) 976 C, t[0] = madd2(m, q1, t[1], C) 977 C, t[1] = madd2(m, q2, t[2], C) 978 C, t[2] = madd2(m, q3, t[3], C) 979 C, t[3] = madd2(m, q4, t[4], C) 980 C, t[4] = madd2(m, q5, t[5], C) 981 C, t[5] = madd2(m, q6, t[6], C) 982 C, t[6] = madd2(m, q7, t[7], C) 983 C, t[7] = madd2(m, q8, t[8], C) 984 C, t[8] = madd2(m, q9, t[9], C) 985 C, t[9] = madd2(m, q10, t[10], C) 986 C, t[10] = madd2(m, q11, t[11], C) 987 988 t[11], C = bits.Add64(t[12], C, 0) 989 t[12], _ = bits.Add64(0, D, C) 990 // ----------------------------------- 991 // First loop 992 993 C, t[0] = madd1(y[8], x[0], t[0]) 994 C, t[1] = madd2(y[8], x[1], t[1], C) 995 C, t[2] = madd2(y[8], x[2], t[2], C) 996 C, t[3] = madd2(y[8], x[3], t[3], C) 997 C, t[4] = madd2(y[8], x[4], t[4], C) 998 C, t[5] = madd2(y[8], x[5], t[5], C) 999 C, t[6] = madd2(y[8], x[6], t[6], C) 1000 C, t[7] = madd2(y[8], x[7], t[7], C) 1001 C, t[8] = madd2(y[8], x[8], t[8], C) 1002 C, t[9] = madd2(y[8], x[9], t[9], C) 1003 C, t[10] = madd2(y[8], x[10], t[10], C) 1004 C, t[11] = madd2(y[8], x[11], t[11], C) 1005 1006 t[12], D = bits.Add64(t[12], C, 0) 1007 1008 // m = t[0]n'[0] mod W 1009 m = t[0] * qInvNeg 1010 1011 // ----------------------------------- 1012 // Second loop 1013 C = madd0(m, q0, t[0]) 1014 C, t[0] = madd2(m, q1, t[1], C) 1015 C, t[1] = madd2(m, q2, t[2], C) 1016 C, t[2] = madd2(m, q3, t[3], C) 1017 C, t[3] = madd2(m, q4, t[4], C) 1018 C, t[4] = madd2(m, q5, t[5], C) 1019 C, t[5] = madd2(m, q6, t[6], C) 1020 C, t[6] = madd2(m, q7, t[7], C) 1021 C, t[7] = madd2(m, q8, t[8], C) 1022 C, t[8] = madd2(m, q9, t[9], C) 1023 C, t[9] = madd2(m, q10, t[10], C) 1024 C, t[10] = madd2(m, q11, t[11], C) 1025 1026 t[11], C = bits.Add64(t[12], C, 0) 1027 t[12], _ = bits.Add64(0, D, C) 1028 // ----------------------------------- 1029 // First loop 1030 1031 C, t[0] = madd1(y[9], x[0], t[0]) 1032 C, t[1] = madd2(y[9], x[1], t[1], C) 1033 C, t[2] = madd2(y[9], x[2], t[2], C) 1034 C, t[3] = madd2(y[9], x[3], t[3], C) 1035 C, t[4] = madd2(y[9], x[4], t[4], C) 1036 C, t[5] = madd2(y[9], x[5], t[5], C) 1037 C, t[6] = madd2(y[9], x[6], t[6], C) 1038 C, t[7] = madd2(y[9], x[7], t[7], C) 1039 C, t[8] = madd2(y[9], x[8], t[8], C) 1040 C, t[9] = madd2(y[9], x[9], t[9], C) 1041 C, t[10] = madd2(y[9], x[10], t[10], C) 1042 C, t[11] = madd2(y[9], x[11], t[11], C) 1043 1044 t[12], D = bits.Add64(t[12], C, 0) 1045 1046 // m = t[0]n'[0] mod W 1047 m = t[0] * qInvNeg 1048 1049 // ----------------------------------- 1050 // Second loop 1051 C = madd0(m, q0, t[0]) 1052 C, t[0] = madd2(m, q1, t[1], C) 1053 C, t[1] = madd2(m, q2, t[2], C) 1054 C, t[2] = madd2(m, q3, t[3], C) 1055 C, t[3] = madd2(m, q4, t[4], C) 1056 C, t[4] = madd2(m, q5, t[5], C) 1057 C, t[5] = madd2(m, q6, t[6], C) 1058 C, t[6] = madd2(m, q7, t[7], C) 1059 C, t[7] = madd2(m, q8, t[8], C) 1060 C, t[8] = madd2(m, q9, t[9], C) 1061 C, t[9] = madd2(m, q10, t[10], C) 1062 C, t[10] = madd2(m, q11, t[11], C) 1063 1064 t[11], C = bits.Add64(t[12], C, 0) 1065 t[12], _ = bits.Add64(0, D, C) 1066 // ----------------------------------- 1067 // First loop 1068 1069 C, t[0] = madd1(y[10], x[0], t[0]) 1070 C, t[1] = madd2(y[10], x[1], t[1], C) 1071 C, t[2] = madd2(y[10], x[2], t[2], C) 1072 C, t[3] = madd2(y[10], x[3], t[3], C) 1073 C, t[4] = madd2(y[10], x[4], t[4], C) 1074 C, t[5] = madd2(y[10], x[5], t[5], C) 1075 C, t[6] = madd2(y[10], x[6], t[6], C) 1076 C, t[7] = madd2(y[10], x[7], t[7], C) 1077 C, t[8] = madd2(y[10], x[8], t[8], C) 1078 C, t[9] = madd2(y[10], x[9], t[9], C) 1079 C, t[10] = madd2(y[10], x[10], t[10], C) 1080 C, t[11] = madd2(y[10], x[11], t[11], C) 1081 1082 t[12], D = bits.Add64(t[12], C, 0) 1083 1084 // m = t[0]n'[0] mod W 1085 m = t[0] * qInvNeg 1086 1087 // ----------------------------------- 1088 // Second loop 1089 C = madd0(m, q0, t[0]) 1090 C, t[0] = madd2(m, q1, t[1], C) 1091 C, t[1] = madd2(m, q2, t[2], C) 1092 C, t[2] = madd2(m, q3, t[3], C) 1093 C, t[3] = madd2(m, q4, t[4], C) 1094 C, t[4] = madd2(m, q5, t[5], C) 1095 C, t[5] = madd2(m, q6, t[6], C) 1096 C, t[6] = madd2(m, q7, t[7], C) 1097 C, t[7] = madd2(m, q8, t[8], C) 1098 C, t[8] = madd2(m, q9, t[9], C) 1099 C, t[9] = madd2(m, q10, t[10], C) 1100 C, t[10] = madd2(m, q11, t[11], C) 1101 1102 t[11], C = bits.Add64(t[12], C, 0) 1103 t[12], _ = bits.Add64(0, D, C) 1104 // ----------------------------------- 1105 // First loop 1106 1107 C, t[0] = madd1(y[11], x[0], t[0]) 1108 C, t[1] = madd2(y[11], x[1], t[1], C) 1109 C, t[2] = madd2(y[11], x[2], t[2], C) 1110 C, t[3] = madd2(y[11], x[3], t[3], C) 1111 C, t[4] = madd2(y[11], x[4], t[4], C) 1112 C, t[5] = madd2(y[11], x[5], t[5], C) 1113 C, t[6] = madd2(y[11], x[6], t[6], C) 1114 C, t[7] = madd2(y[11], x[7], t[7], C) 1115 C, t[8] = madd2(y[11], x[8], t[8], C) 1116 C, t[9] = madd2(y[11], x[9], t[9], C) 1117 C, t[10] = madd2(y[11], x[10], t[10], C) 1118 C, t[11] = madd2(y[11], x[11], t[11], C) 1119 1120 t[12], D = bits.Add64(t[12], C, 0) 1121 1122 // m = t[0]n'[0] mod W 1123 m = t[0] * qInvNeg 1124 1125 // ----------------------------------- 1126 // Second loop 1127 C = madd0(m, q0, t[0]) 1128 C, t[0] = madd2(m, q1, t[1], C) 1129 C, t[1] = madd2(m, q2, t[2], C) 1130 C, t[2] = madd2(m, q3, t[3], C) 1131 C, t[3] = madd2(m, q4, t[4], C) 1132 C, t[4] = madd2(m, q5, t[5], C) 1133 C, t[5] = madd2(m, q6, t[6], C) 1134 C, t[6] = madd2(m, q7, t[7], C) 1135 C, t[7] = madd2(m, q8, t[8], C) 1136 C, t[8] = madd2(m, q9, t[9], C) 1137 C, t[9] = madd2(m, q10, t[10], C) 1138 C, t[10] = madd2(m, q11, t[11], C) 1139 1140 t[11], C = bits.Add64(t[12], C, 0) 1141 t[12], _ = bits.Add64(0, D, C) 1142 1143 if t[12] != 0 { 1144 // we need to reduce, we have a result on 13 words 1145 var b uint64 1146 z[0], b = bits.Sub64(t[0], q0, 0) 1147 z[1], b = bits.Sub64(t[1], q1, b) 1148 z[2], b = bits.Sub64(t[2], q2, b) 1149 z[3], b = bits.Sub64(t[3], q3, b) 1150 z[4], b = bits.Sub64(t[4], q4, b) 1151 z[5], b = bits.Sub64(t[5], q5, b) 1152 z[6], b = bits.Sub64(t[6], q6, b) 1153 z[7], b = bits.Sub64(t[7], q7, b) 1154 z[8], b = bits.Sub64(t[8], q8, b) 1155 z[9], b = bits.Sub64(t[9], q9, b) 1156 z[10], b = bits.Sub64(t[10], q10, b) 1157 z[11], _ = bits.Sub64(t[11], q11, b) 1158 return 1159 } 1160 1161 // copy t into z 1162 z[0] = t[0] 1163 z[1] = t[1] 1164 z[2] = t[2] 1165 z[3] = t[3] 1166 z[4] = t[4] 1167 z[5] = t[5] 1168 z[6] = t[6] 1169 z[7] = t[7] 1170 z[8] = t[8] 1171 z[9] = t[9] 1172 z[10] = t[10] 1173 z[11] = t[11] 1174 1175 // if z ⩾ q → z -= q 1176 if !z.smallerThanModulus() { 1177 var b uint64 1178 z[0], b = bits.Sub64(z[0], q0, 0) 1179 z[1], b = bits.Sub64(z[1], q1, b) 1180 z[2], b = bits.Sub64(z[2], q2, b) 1181 z[3], b = bits.Sub64(z[3], q3, b) 1182 z[4], b = bits.Sub64(z[4], q4, b) 1183 z[5], b = bits.Sub64(z[5], q5, b) 1184 z[6], b = bits.Sub64(z[6], q6, b) 1185 z[7], b = bits.Sub64(z[7], q7, b) 1186 z[8], b = bits.Sub64(z[8], q8, b) 1187 z[9], b = bits.Sub64(z[9], q9, b) 1188 z[10], b = bits.Sub64(z[10], q10, b) 1189 z[11], _ = bits.Sub64(z[11], q11, b) 1190 } 1191 } 1192 1193 func _fromMontGeneric(z *Element) { 1194 // the following lines implement z = z * 1 1195 // with a modified CIOS montgomery multiplication 1196 // see Mul for algorithm documentation 1197 { 1198 // m = z[0]n'[0] mod W 1199 m := z[0] * qInvNeg 1200 C := madd0(m, q0, z[0]) 1201 C, z[0] = madd2(m, q1, z[1], C) 1202 C, z[1] = madd2(m, q2, z[2], C) 1203 C, z[2] = madd2(m, q3, z[3], C) 1204 C, z[3] = madd2(m, q4, z[4], C) 1205 C, z[4] = madd2(m, q5, z[5], C) 1206 C, z[5] = madd2(m, q6, z[6], C) 1207 C, z[6] = madd2(m, q7, z[7], C) 1208 C, z[7] = madd2(m, q8, z[8], C) 1209 C, z[8] = madd2(m, q9, z[9], C) 1210 C, z[9] = madd2(m, q10, z[10], C) 1211 C, z[10] = madd2(m, q11, z[11], C) 1212 z[11] = C 1213 } 1214 { 1215 // m = z[0]n'[0] mod W 1216 m := z[0] * qInvNeg 1217 C := madd0(m, q0, z[0]) 1218 C, z[0] = madd2(m, q1, z[1], C) 1219 C, z[1] = madd2(m, q2, z[2], C) 1220 C, z[2] = madd2(m, q3, z[3], C) 1221 C, z[3] = madd2(m, q4, z[4], C) 1222 C, z[4] = madd2(m, q5, z[5], C) 1223 C, z[5] = madd2(m, q6, z[6], C) 1224 C, z[6] = madd2(m, q7, z[7], C) 1225 C, z[7] = madd2(m, q8, z[8], C) 1226 C, z[8] = madd2(m, q9, z[9], C) 1227 C, z[9] = madd2(m, q10, z[10], C) 1228 C, z[10] = madd2(m, q11, z[11], C) 1229 z[11] = C 1230 } 1231 { 1232 // m = z[0]n'[0] mod W 1233 m := z[0] * qInvNeg 1234 C := madd0(m, q0, z[0]) 1235 C, z[0] = madd2(m, q1, z[1], C) 1236 C, z[1] = madd2(m, q2, z[2], C) 1237 C, z[2] = madd2(m, q3, z[3], C) 1238 C, z[3] = madd2(m, q4, z[4], C) 1239 C, z[4] = madd2(m, q5, z[5], C) 1240 C, z[5] = madd2(m, q6, z[6], C) 1241 C, z[6] = madd2(m, q7, z[7], C) 1242 C, z[7] = madd2(m, q8, z[8], C) 1243 C, z[8] = madd2(m, q9, z[9], C) 1244 C, z[9] = madd2(m, q10, z[10], C) 1245 C, z[10] = madd2(m, q11, z[11], C) 1246 z[11] = C 1247 } 1248 { 1249 // m = z[0]n'[0] mod W 1250 m := z[0] * qInvNeg 1251 C := madd0(m, q0, z[0]) 1252 C, z[0] = madd2(m, q1, z[1], C) 1253 C, z[1] = madd2(m, q2, z[2], C) 1254 C, z[2] = madd2(m, q3, z[3], C) 1255 C, z[3] = madd2(m, q4, z[4], C) 1256 C, z[4] = madd2(m, q5, z[5], C) 1257 C, z[5] = madd2(m, q6, z[6], C) 1258 C, z[6] = madd2(m, q7, z[7], C) 1259 C, z[7] = madd2(m, q8, z[8], C) 1260 C, z[8] = madd2(m, q9, z[9], C) 1261 C, z[9] = madd2(m, q10, z[10], C) 1262 C, z[10] = madd2(m, q11, z[11], C) 1263 z[11] = C 1264 } 1265 { 1266 // m = z[0]n'[0] mod W 1267 m := z[0] * qInvNeg 1268 C := madd0(m, q0, z[0]) 1269 C, z[0] = madd2(m, q1, z[1], C) 1270 C, z[1] = madd2(m, q2, z[2], C) 1271 C, z[2] = madd2(m, q3, z[3], C) 1272 C, z[3] = madd2(m, q4, z[4], C) 1273 C, z[4] = madd2(m, q5, z[5], C) 1274 C, z[5] = madd2(m, q6, z[6], C) 1275 C, z[6] = madd2(m, q7, z[7], C) 1276 C, z[7] = madd2(m, q8, z[8], C) 1277 C, z[8] = madd2(m, q9, z[9], C) 1278 C, z[9] = madd2(m, q10, z[10], C) 1279 C, z[10] = madd2(m, q11, z[11], C) 1280 z[11] = C 1281 } 1282 { 1283 // m = z[0]n'[0] mod W 1284 m := z[0] * qInvNeg 1285 C := madd0(m, q0, z[0]) 1286 C, z[0] = madd2(m, q1, z[1], C) 1287 C, z[1] = madd2(m, q2, z[2], C) 1288 C, z[2] = madd2(m, q3, z[3], C) 1289 C, z[3] = madd2(m, q4, z[4], C) 1290 C, z[4] = madd2(m, q5, z[5], C) 1291 C, z[5] = madd2(m, q6, z[6], C) 1292 C, z[6] = madd2(m, q7, z[7], C) 1293 C, z[7] = madd2(m, q8, z[8], C) 1294 C, z[8] = madd2(m, q9, z[9], C) 1295 C, z[9] = madd2(m, q10, z[10], C) 1296 C, z[10] = madd2(m, q11, z[11], C) 1297 z[11] = C 1298 } 1299 { 1300 // m = z[0]n'[0] mod W 1301 m := z[0] * qInvNeg 1302 C := madd0(m, q0, z[0]) 1303 C, z[0] = madd2(m, q1, z[1], C) 1304 C, z[1] = madd2(m, q2, z[2], C) 1305 C, z[2] = madd2(m, q3, z[3], C) 1306 C, z[3] = madd2(m, q4, z[4], C) 1307 C, z[4] = madd2(m, q5, z[5], C) 1308 C, z[5] = madd2(m, q6, z[6], C) 1309 C, z[6] = madd2(m, q7, z[7], C) 1310 C, z[7] = madd2(m, q8, z[8], C) 1311 C, z[8] = madd2(m, q9, z[9], C) 1312 C, z[9] = madd2(m, q10, z[10], C) 1313 C, z[10] = madd2(m, q11, z[11], C) 1314 z[11] = C 1315 } 1316 { 1317 // m = z[0]n'[0] mod W 1318 m := z[0] * qInvNeg 1319 C := madd0(m, q0, z[0]) 1320 C, z[0] = madd2(m, q1, z[1], C) 1321 C, z[1] = madd2(m, q2, z[2], C) 1322 C, z[2] = madd2(m, q3, z[3], C) 1323 C, z[3] = madd2(m, q4, z[4], C) 1324 C, z[4] = madd2(m, q5, z[5], C) 1325 C, z[5] = madd2(m, q6, z[6], C) 1326 C, z[6] = madd2(m, q7, z[7], C) 1327 C, z[7] = madd2(m, q8, z[8], C) 1328 C, z[8] = madd2(m, q9, z[9], C) 1329 C, z[9] = madd2(m, q10, z[10], C) 1330 C, z[10] = madd2(m, q11, z[11], C) 1331 z[11] = C 1332 } 1333 { 1334 // m = z[0]n'[0] mod W 1335 m := z[0] * qInvNeg 1336 C := madd0(m, q0, z[0]) 1337 C, z[0] = madd2(m, q1, z[1], C) 1338 C, z[1] = madd2(m, q2, z[2], C) 1339 C, z[2] = madd2(m, q3, z[3], C) 1340 C, z[3] = madd2(m, q4, z[4], C) 1341 C, z[4] = madd2(m, q5, z[5], C) 1342 C, z[5] = madd2(m, q6, z[6], C) 1343 C, z[6] = madd2(m, q7, z[7], C) 1344 C, z[7] = madd2(m, q8, z[8], C) 1345 C, z[8] = madd2(m, q9, z[9], C) 1346 C, z[9] = madd2(m, q10, z[10], C) 1347 C, z[10] = madd2(m, q11, z[11], C) 1348 z[11] = C 1349 } 1350 { 1351 // m = z[0]n'[0] mod W 1352 m := z[0] * qInvNeg 1353 C := madd0(m, q0, z[0]) 1354 C, z[0] = madd2(m, q1, z[1], C) 1355 C, z[1] = madd2(m, q2, z[2], C) 1356 C, z[2] = madd2(m, q3, z[3], C) 1357 C, z[3] = madd2(m, q4, z[4], C) 1358 C, z[4] = madd2(m, q5, z[5], C) 1359 C, z[5] = madd2(m, q6, z[6], C) 1360 C, z[6] = madd2(m, q7, z[7], C) 1361 C, z[7] = madd2(m, q8, z[8], C) 1362 C, z[8] = madd2(m, q9, z[9], C) 1363 C, z[9] = madd2(m, q10, z[10], C) 1364 C, z[10] = madd2(m, q11, z[11], C) 1365 z[11] = C 1366 } 1367 { 1368 // m = z[0]n'[0] mod W 1369 m := z[0] * qInvNeg 1370 C := madd0(m, q0, z[0]) 1371 C, z[0] = madd2(m, q1, z[1], C) 1372 C, z[1] = madd2(m, q2, z[2], C) 1373 C, z[2] = madd2(m, q3, z[3], C) 1374 C, z[3] = madd2(m, q4, z[4], C) 1375 C, z[4] = madd2(m, q5, z[5], C) 1376 C, z[5] = madd2(m, q6, z[6], C) 1377 C, z[6] = madd2(m, q7, z[7], C) 1378 C, z[7] = madd2(m, q8, z[8], C) 1379 C, z[8] = madd2(m, q9, z[9], C) 1380 C, z[9] = madd2(m, q10, z[10], C) 1381 C, z[10] = madd2(m, q11, z[11], C) 1382 z[11] = C 1383 } 1384 { 1385 // m = z[0]n'[0] mod W 1386 m := z[0] * qInvNeg 1387 C := madd0(m, q0, z[0]) 1388 C, z[0] = madd2(m, q1, z[1], C) 1389 C, z[1] = madd2(m, q2, z[2], C) 1390 C, z[2] = madd2(m, q3, z[3], C) 1391 C, z[3] = madd2(m, q4, z[4], C) 1392 C, z[4] = madd2(m, q5, z[5], C) 1393 C, z[5] = madd2(m, q6, z[6], C) 1394 C, z[6] = madd2(m, q7, z[7], C) 1395 C, z[7] = madd2(m, q8, z[8], C) 1396 C, z[8] = madd2(m, q9, z[9], C) 1397 C, z[9] = madd2(m, q10, z[10], C) 1398 C, z[10] = madd2(m, q11, z[11], C) 1399 z[11] = C 1400 } 1401 1402 // if z ⩾ q → z -= q 1403 if !z.smallerThanModulus() { 1404 var b uint64 1405 z[0], b = bits.Sub64(z[0], q0, 0) 1406 z[1], b = bits.Sub64(z[1], q1, b) 1407 z[2], b = bits.Sub64(z[2], q2, b) 1408 z[3], b = bits.Sub64(z[3], q3, b) 1409 z[4], b = bits.Sub64(z[4], q4, b) 1410 z[5], b = bits.Sub64(z[5], q5, b) 1411 z[6], b = bits.Sub64(z[6], q6, b) 1412 z[7], b = bits.Sub64(z[7], q7, b) 1413 z[8], b = bits.Sub64(z[8], q8, b) 1414 z[9], b = bits.Sub64(z[9], q9, b) 1415 z[10], b = bits.Sub64(z[10], q10, b) 1416 z[11], _ = bits.Sub64(z[11], q11, b) 1417 } 1418 } 1419 1420 func _reduceGeneric(z *Element) { 1421 1422 // if z ⩾ q → z -= q 1423 if !z.smallerThanModulus() { 1424 var b uint64 1425 z[0], b = bits.Sub64(z[0], q0, 0) 1426 z[1], b = bits.Sub64(z[1], q1, b) 1427 z[2], b = bits.Sub64(z[2], q2, b) 1428 z[3], b = bits.Sub64(z[3], q3, b) 1429 z[4], b = bits.Sub64(z[4], q4, b) 1430 z[5], b = bits.Sub64(z[5], q5, b) 1431 z[6], b = bits.Sub64(z[6], q6, b) 1432 z[7], b = bits.Sub64(z[7], q7, b) 1433 z[8], b = bits.Sub64(z[8], q8, b) 1434 z[9], b = bits.Sub64(z[9], q9, b) 1435 z[10], b = bits.Sub64(z[10], q10, b) 1436 z[11], _ = bits.Sub64(z[11], q11, b) 1437 } 1438 } 1439 1440 // BatchInvert returns a new slice with every element inverted. 1441 // Uses Montgomery batch inversion trick 1442 func BatchInvert(a []Element) []Element { 1443 res := make([]Element, len(a)) 1444 if len(a) == 0 { 1445 return res 1446 } 1447 1448 zeroes := bitset.New(uint(len(a))) 1449 accumulator := One() 1450 1451 for i := 0; i < len(a); i++ { 1452 if a[i].IsZero() { 1453 zeroes.Set(uint(i)) 1454 continue 1455 } 1456 res[i] = accumulator 1457 accumulator.Mul(&accumulator, &a[i]) 1458 } 1459 1460 accumulator.Inverse(&accumulator) 1461 1462 for i := len(a) - 1; i >= 0; i-- { 1463 if zeroes.Test(uint(i)) { 1464 continue 1465 } 1466 res[i].Mul(&res[i], &accumulator) 1467 accumulator.Mul(&accumulator, &a[i]) 1468 } 1469 1470 return res 1471 } 1472 1473 func _butterflyGeneric(a, b *Element) { 1474 t := *a 1475 a.Add(a, b) 1476 b.Sub(&t, b) 1477 } 1478 1479 // BitLen returns the minimum number of bits needed to represent z 1480 // returns 0 if z == 0 1481 func (z *Element) BitLen() int { 1482 if z[11] != 0 { 1483 return 704 + bits.Len64(z[11]) 1484 } 1485 if z[10] != 0 { 1486 return 640 + bits.Len64(z[10]) 1487 } 1488 if z[9] != 0 { 1489 return 576 + bits.Len64(z[9]) 1490 } 1491 if z[8] != 0 { 1492 return 512 + bits.Len64(z[8]) 1493 } 1494 if z[7] != 0 { 1495 return 448 + bits.Len64(z[7]) 1496 } 1497 if z[6] != 0 { 1498 return 384 + bits.Len64(z[6]) 1499 } 1500 if z[5] != 0 { 1501 return 320 + bits.Len64(z[5]) 1502 } 1503 if z[4] != 0 { 1504 return 256 + bits.Len64(z[4]) 1505 } 1506 if z[3] != 0 { 1507 return 192 + bits.Len64(z[3]) 1508 } 1509 if z[2] != 0 { 1510 return 128 + bits.Len64(z[2]) 1511 } 1512 if z[1] != 0 { 1513 return 64 + bits.Len64(z[1]) 1514 } 1515 return bits.Len64(z[0]) 1516 } 1517 1518 // Hash msg to count prime field elements. 1519 // https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2 1520 func Hash(msg, dst []byte, count int) ([]Element, error) { 1521 // 128 bits of security 1522 // L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128 1523 const Bytes = 1 + (Bits-1)/8 1524 const L = 16 + Bytes 1525 1526 lenInBytes := count * L 1527 pseudoRandomBytes, err := hash.ExpandMsgXmd(msg, dst, lenInBytes) 1528 if err != nil { 1529 return nil, err 1530 } 1531 1532 // get temporary big int from the pool 1533 vv := pool.BigInt.Get() 1534 1535 res := make([]Element, count) 1536 for i := 0; i < count; i++ { 1537 vv.SetBytes(pseudoRandomBytes[i*L : (i+1)*L]) 1538 res[i].SetBigInt(vv) 1539 } 1540 1541 // release object into pool 1542 pool.BigInt.Put(vv) 1543 1544 return res, nil 1545 } 1546 1547 // Exp z = xᵏ (mod q) 1548 func (z *Element) Exp(x Element, k *big.Int) *Element { 1549 if k.IsUint64() && k.Uint64() == 0 { 1550 return z.SetOne() 1551 } 1552 1553 e := k 1554 if k.Sign() == -1 { 1555 // negative k, we invert 1556 // if k < 0: xᵏ (mod q) == (x⁻¹)ᵏ (mod q) 1557 x.Inverse(&x) 1558 1559 // we negate k in a temp big.Int since 1560 // Int.Bit(_) of k and -k is different 1561 e = pool.BigInt.Get() 1562 defer pool.BigInt.Put(e) 1563 e.Neg(k) 1564 } 1565 1566 z.Set(&x) 1567 1568 for i := e.BitLen() - 2; i >= 0; i-- { 1569 z.Square(z) 1570 if e.Bit(i) == 1 { 1571 z.Mul(z, &x) 1572 } 1573 } 1574 1575 return z 1576 } 1577 1578 // rSquare where r is the Montgommery constant 1579 // see section 2.3.2 of Tolga Acar's thesis 1580 // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf 1581 var rSquare = Element{ 1582 11214533042317621956, 1583 4418601975293183768, 1584 2233550636059863627, 1585 13772400071271951950, 1586 13010224617750716256, 1587 15582310590478290871, 1588 6301429202206019695, 1589 15624904615961126890, 1590 14411832617204527559, 1591 10495912060283172777, 1592 8432856701560321958, 1593 4166778949326216, 1594 } 1595 1596 // toMont converts z to Montgomery form 1597 // sets and returns z = z * r² 1598 func (z *Element) toMont() *Element { 1599 return z.Mul(z, &rSquare) 1600 } 1601 1602 // String returns the decimal representation of z as generated by 1603 // z.Text(10). 1604 func (z *Element) String() string { 1605 return z.Text(10) 1606 } 1607 1608 // toBigInt returns z as a big.Int in Montgomery form 1609 func (z *Element) toBigInt(res *big.Int) *big.Int { 1610 var b [Bytes]byte 1611 binary.BigEndian.PutUint64(b[88:96], z[0]) 1612 binary.BigEndian.PutUint64(b[80:88], z[1]) 1613 binary.BigEndian.PutUint64(b[72:80], z[2]) 1614 binary.BigEndian.PutUint64(b[64:72], z[3]) 1615 binary.BigEndian.PutUint64(b[56:64], z[4]) 1616 binary.BigEndian.PutUint64(b[48:56], z[5]) 1617 binary.BigEndian.PutUint64(b[40:48], z[6]) 1618 binary.BigEndian.PutUint64(b[32:40], z[7]) 1619 binary.BigEndian.PutUint64(b[24:32], z[8]) 1620 binary.BigEndian.PutUint64(b[16:24], z[9]) 1621 binary.BigEndian.PutUint64(b[8:16], z[10]) 1622 binary.BigEndian.PutUint64(b[0:8], z[11]) 1623 1624 return res.SetBytes(b[:]) 1625 } 1626 1627 // Text returns the string representation of z in the given base. 1628 // Base must be between 2 and 36, inclusive. The result uses the 1629 // lower-case letters 'a' to 'z' for digit values 10 to 35. 1630 // No prefix (such as "0x") is added to the string. If z is a nil 1631 // pointer it returns "<nil>". 1632 // If base == 10 and -z fits in a uint16 prefix "-" is added to the string. 1633 func (z *Element) Text(base int) string { 1634 if base < 2 || base > 36 { 1635 panic("invalid base") 1636 } 1637 if z == nil { 1638 return "<nil>" 1639 } 1640 1641 const maxUint16 = 65535 1642 if base == 10 { 1643 var zzNeg Element 1644 zzNeg.Neg(z) 1645 zzNeg.fromMont() 1646 if zzNeg.FitsOnOneWord() && zzNeg[0] <= maxUint16 && zzNeg[0] != 0 { 1647 return "-" + strconv.FormatUint(zzNeg[0], base) 1648 } 1649 } 1650 zz := *z 1651 zz.fromMont() 1652 if zz.FitsOnOneWord() { 1653 return strconv.FormatUint(zz[0], base) 1654 } 1655 vv := pool.BigInt.Get() 1656 r := zz.toBigInt(vv).Text(base) 1657 pool.BigInt.Put(vv) 1658 return r 1659 } 1660 1661 // BigInt sets and return z as a *big.Int 1662 func (z *Element) BigInt(res *big.Int) *big.Int { 1663 _z := *z 1664 _z.fromMont() 1665 return _z.toBigInt(res) 1666 } 1667 1668 // ToBigIntRegular returns z as a big.Int in regular form 1669 // 1670 // Deprecated: use BigInt(*big.Int) instead 1671 func (z Element) ToBigIntRegular(res *big.Int) *big.Int { 1672 z.fromMont() 1673 return z.toBigInt(res) 1674 } 1675 1676 // Bits provides access to z by returning its value as a little-endian [12]uint64 array. 1677 // Bits is intended to support implementation of missing low-level Element 1678 // functionality outside this package; it should be avoided otherwise. 1679 func (z *Element) Bits() [12]uint64 { 1680 _z := *z 1681 fromMont(&_z) 1682 return _z 1683 } 1684 1685 // Bytes returns the value of z as a big-endian byte array 1686 func (z *Element) Bytes() (res [Bytes]byte) { 1687 BigEndian.PutElement(&res, *z) 1688 return 1689 } 1690 1691 // Marshal returns the value of z as a big-endian byte slice 1692 func (z *Element) Marshal() []byte { 1693 b := z.Bytes() 1694 return b[:] 1695 } 1696 1697 // Unmarshal is an alias for SetBytes, it sets z to the value of e. 1698 func (z *Element) Unmarshal(e []byte) { 1699 z.SetBytes(e) 1700 } 1701 1702 // SetBytes interprets e as the bytes of a big-endian unsigned integer, 1703 // sets z to that value, and returns z. 1704 func (z *Element) SetBytes(e []byte) *Element { 1705 if len(e) == Bytes { 1706 // fast path 1707 v, err := BigEndian.Element((*[Bytes]byte)(e)) 1708 if err == nil { 1709 *z = v 1710 return z 1711 } 1712 } 1713 1714 // slow path. 1715 // get a big int from our pool 1716 vv := pool.BigInt.Get() 1717 vv.SetBytes(e) 1718 1719 // set big int 1720 z.SetBigInt(vv) 1721 1722 // put temporary object back in pool 1723 pool.BigInt.Put(vv) 1724 1725 return z 1726 } 1727 1728 // SetBytesCanonical interprets e as the bytes of a big-endian 96-byte integer. 1729 // If e is not a 96-byte slice or encodes a value higher than q, 1730 // SetBytesCanonical returns an error. 1731 func (z *Element) SetBytesCanonical(e []byte) error { 1732 if len(e) != Bytes { 1733 return errors.New("invalid fp.Element encoding") 1734 } 1735 v, err := BigEndian.Element((*[Bytes]byte)(e)) 1736 if err != nil { 1737 return err 1738 } 1739 *z = v 1740 return nil 1741 } 1742 1743 // SetBigInt sets z to v and returns z 1744 func (z *Element) SetBigInt(v *big.Int) *Element { 1745 z.SetZero() 1746 1747 var zero big.Int 1748 1749 // fast path 1750 c := v.Cmp(&_modulus) 1751 if c == 0 { 1752 // v == 0 1753 return z 1754 } else if c != 1 && v.Cmp(&zero) != -1 { 1755 // 0 < v < q 1756 return z.setBigInt(v) 1757 } 1758 1759 // get temporary big int from the pool 1760 vv := pool.BigInt.Get() 1761 1762 // copy input + modular reduction 1763 vv.Mod(v, &_modulus) 1764 1765 // set big int byte value 1766 z.setBigInt(vv) 1767 1768 // release object into pool 1769 pool.BigInt.Put(vv) 1770 return z 1771 } 1772 1773 // setBigInt assumes 0 ⩽ v < q 1774 func (z *Element) setBigInt(v *big.Int) *Element { 1775 vBits := v.Bits() 1776 1777 if bits.UintSize == 64 { 1778 for i := 0; i < len(vBits); i++ { 1779 z[i] = uint64(vBits[i]) 1780 } 1781 } else { 1782 for i := 0; i < len(vBits); i++ { 1783 if i%2 == 0 { 1784 z[i/2] = uint64(vBits[i]) 1785 } else { 1786 z[i/2] |= uint64(vBits[i]) << 32 1787 } 1788 } 1789 } 1790 1791 return z.toMont() 1792 } 1793 1794 // SetString creates a big.Int with number and calls SetBigInt on z 1795 // 1796 // The number prefix determines the actual base: A prefix of 1797 // ”0b” or ”0B” selects base 2, ”0”, ”0o” or ”0O” selects base 8, 1798 // and ”0x” or ”0X” selects base 16. Otherwise, the selected base is 10 1799 // and no prefix is accepted. 1800 // 1801 // For base 16, lower and upper case letters are considered the same: 1802 // The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15. 1803 // 1804 // An underscore character ”_” may appear between a base 1805 // prefix and an adjacent digit, and between successive digits; such 1806 // underscores do not change the value of the number. 1807 // Incorrect placement of underscores is reported as a panic if there 1808 // are no other errors. 1809 // 1810 // If the number is invalid this method leaves z unchanged and returns nil, error. 1811 func (z *Element) SetString(number string) (*Element, error) { 1812 // get temporary big int from the pool 1813 vv := pool.BigInt.Get() 1814 1815 if _, ok := vv.SetString(number, 0); !ok { 1816 return nil, errors.New("Element.SetString failed -> can't parse number into a big.Int " + number) 1817 } 1818 1819 z.SetBigInt(vv) 1820 1821 // release object into pool 1822 pool.BigInt.Put(vv) 1823 1824 return z, nil 1825 } 1826 1827 // MarshalJSON returns json encoding of z (z.Text(10)) 1828 // If z == nil, returns null 1829 func (z *Element) MarshalJSON() ([]byte, error) { 1830 if z == nil { 1831 return []byte("null"), nil 1832 } 1833 const maxSafeBound = 15 // we encode it as number if it's small 1834 s := z.Text(10) 1835 if len(s) <= maxSafeBound { 1836 return []byte(s), nil 1837 } 1838 var sbb strings.Builder 1839 sbb.WriteByte('"') 1840 sbb.WriteString(s) 1841 sbb.WriteByte('"') 1842 return []byte(sbb.String()), nil 1843 } 1844 1845 // UnmarshalJSON accepts numbers and strings as input 1846 // See Element.SetString for valid prefixes (0x, 0b, ...) 1847 func (z *Element) UnmarshalJSON(data []byte) error { 1848 s := string(data) 1849 if len(s) > Bits*3 { 1850 return errors.New("value too large (max = Element.Bits * 3)") 1851 } 1852 1853 // we accept numbers and strings, remove leading and trailing quotes if any 1854 if len(s) > 0 && s[0] == '"' { 1855 s = s[1:] 1856 } 1857 if len(s) > 0 && s[len(s)-1] == '"' { 1858 s = s[:len(s)-1] 1859 } 1860 1861 // get temporary big int from the pool 1862 vv := pool.BigInt.Get() 1863 1864 if _, ok := vv.SetString(s, 0); !ok { 1865 return errors.New("can't parse into a big.Int: " + s) 1866 } 1867 1868 z.SetBigInt(vv) 1869 1870 // release object into pool 1871 pool.BigInt.Put(vv) 1872 return nil 1873 } 1874 1875 // A ByteOrder specifies how to convert byte slices into a Element 1876 type ByteOrder interface { 1877 Element(*[Bytes]byte) (Element, error) 1878 PutElement(*[Bytes]byte, Element) 1879 String() string 1880 } 1881 1882 // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. 1883 var BigEndian bigEndian 1884 1885 type bigEndian struct{} 1886 1887 // Element interpret b is a big-endian 96-byte slice. 1888 // If b encodes a value higher than q, Element returns error. 1889 func (bigEndian) Element(b *[Bytes]byte) (Element, error) { 1890 var z Element 1891 z[0] = binary.BigEndian.Uint64((*b)[88:96]) 1892 z[1] = binary.BigEndian.Uint64((*b)[80:88]) 1893 z[2] = binary.BigEndian.Uint64((*b)[72:80]) 1894 z[3] = binary.BigEndian.Uint64((*b)[64:72]) 1895 z[4] = binary.BigEndian.Uint64((*b)[56:64]) 1896 z[5] = binary.BigEndian.Uint64((*b)[48:56]) 1897 z[6] = binary.BigEndian.Uint64((*b)[40:48]) 1898 z[7] = binary.BigEndian.Uint64((*b)[32:40]) 1899 z[8] = binary.BigEndian.Uint64((*b)[24:32]) 1900 z[9] = binary.BigEndian.Uint64((*b)[16:24]) 1901 z[10] = binary.BigEndian.Uint64((*b)[8:16]) 1902 z[11] = binary.BigEndian.Uint64((*b)[0:8]) 1903 1904 if !z.smallerThanModulus() { 1905 return Element{}, errors.New("invalid fp.Element encoding") 1906 } 1907 1908 z.toMont() 1909 return z, nil 1910 } 1911 1912 func (bigEndian) PutElement(b *[Bytes]byte, e Element) { 1913 e.fromMont() 1914 binary.BigEndian.PutUint64((*b)[88:96], e[0]) 1915 binary.BigEndian.PutUint64((*b)[80:88], e[1]) 1916 binary.BigEndian.PutUint64((*b)[72:80], e[2]) 1917 binary.BigEndian.PutUint64((*b)[64:72], e[3]) 1918 binary.BigEndian.PutUint64((*b)[56:64], e[4]) 1919 binary.BigEndian.PutUint64((*b)[48:56], e[5]) 1920 binary.BigEndian.PutUint64((*b)[40:48], e[6]) 1921 binary.BigEndian.PutUint64((*b)[32:40], e[7]) 1922 binary.BigEndian.PutUint64((*b)[24:32], e[8]) 1923 binary.BigEndian.PutUint64((*b)[16:24], e[9]) 1924 binary.BigEndian.PutUint64((*b)[8:16], e[10]) 1925 binary.BigEndian.PutUint64((*b)[0:8], e[11]) 1926 } 1927 1928 func (bigEndian) String() string { return "BigEndian" } 1929 1930 // LittleEndian is the little-endian implementation of ByteOrder and AppendByteOrder. 1931 var LittleEndian littleEndian 1932 1933 type littleEndian struct{} 1934 1935 func (littleEndian) Element(b *[Bytes]byte) (Element, error) { 1936 var z Element 1937 z[0] = binary.LittleEndian.Uint64((*b)[0:8]) 1938 z[1] = binary.LittleEndian.Uint64((*b)[8:16]) 1939 z[2] = binary.LittleEndian.Uint64((*b)[16:24]) 1940 z[3] = binary.LittleEndian.Uint64((*b)[24:32]) 1941 z[4] = binary.LittleEndian.Uint64((*b)[32:40]) 1942 z[5] = binary.LittleEndian.Uint64((*b)[40:48]) 1943 z[6] = binary.LittleEndian.Uint64((*b)[48:56]) 1944 z[7] = binary.LittleEndian.Uint64((*b)[56:64]) 1945 z[8] = binary.LittleEndian.Uint64((*b)[64:72]) 1946 z[9] = binary.LittleEndian.Uint64((*b)[72:80]) 1947 z[10] = binary.LittleEndian.Uint64((*b)[80:88]) 1948 z[11] = binary.LittleEndian.Uint64((*b)[88:96]) 1949 1950 if !z.smallerThanModulus() { 1951 return Element{}, errors.New("invalid fp.Element encoding") 1952 } 1953 1954 z.toMont() 1955 return z, nil 1956 } 1957 1958 func (littleEndian) PutElement(b *[Bytes]byte, e Element) { 1959 e.fromMont() 1960 binary.LittleEndian.PutUint64((*b)[0:8], e[0]) 1961 binary.LittleEndian.PutUint64((*b)[8:16], e[1]) 1962 binary.LittleEndian.PutUint64((*b)[16:24], e[2]) 1963 binary.LittleEndian.PutUint64((*b)[24:32], e[3]) 1964 binary.LittleEndian.PutUint64((*b)[32:40], e[4]) 1965 binary.LittleEndian.PutUint64((*b)[40:48], e[5]) 1966 binary.LittleEndian.PutUint64((*b)[48:56], e[6]) 1967 binary.LittleEndian.PutUint64((*b)[56:64], e[7]) 1968 binary.LittleEndian.PutUint64((*b)[64:72], e[8]) 1969 binary.LittleEndian.PutUint64((*b)[72:80], e[9]) 1970 binary.LittleEndian.PutUint64((*b)[80:88], e[10]) 1971 binary.LittleEndian.PutUint64((*b)[88:96], e[11]) 1972 } 1973 1974 func (littleEndian) String() string { return "LittleEndian" } 1975 1976 // Legendre returns the Legendre symbol of z (either +1, -1, or 0.) 1977 func (z *Element) Legendre() int { 1978 var l Element 1979 // z^((q-1)/2) 1980 l.expByLegendreExp(*z) 1981 1982 if l.IsZero() { 1983 return 0 1984 } 1985 1986 // if l == 1 1987 if l.IsOne() { 1988 return 1 1989 } 1990 return -1 1991 } 1992 1993 // Sqrt z = √x (mod q) 1994 // if the square root doesn't exist (x is not a square mod q) 1995 // Sqrt leaves z unchanged and returns nil 1996 func (z *Element) Sqrt(x *Element) *Element { 1997 // q ≡ 1 (mod 4) 1998 // see modSqrtTonelliShanks in math/big/int.go 1999 // using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf 2000 2001 var y, b, t, w Element 2002 // w = x^((s-1)/2)) 2003 w.expBySqrtExp(*x) 2004 2005 // y = x^((s+1)/2)) = w * x 2006 y.Mul(x, &w) 2007 2008 // b = xˢ = w * w * x = y * x 2009 b.Mul(&w, &y) 2010 2011 // g = nonResidue ^ s 2012 var g = Element{ 2013 17302715199413996045, 2014 15077845457253267709, 2015 8842885729139027579, 2016 12189878420705505575, 2017 12380986790262239346, 2018 585111498723936856, 2019 4947215576903759546, 2020 1186632482028566920, 2021 14543050817583235372, 2022 5644943604719368358, 2023 9440830989708189862, 2024 1039766423535362, 2025 } 2026 r := uint64(82) 2027 2028 // compute legendre symbol 2029 // t = x^((q-1)/2) = r-1 squaring of xˢ 2030 t = b 2031 for i := uint64(0); i < r-1; i++ { 2032 t.Square(&t) 2033 } 2034 if t.IsZero() { 2035 return z.SetZero() 2036 } 2037 if !t.IsOne() { 2038 // t != 1, we don't have a square root 2039 return nil 2040 } 2041 for { 2042 var m uint64 2043 t = b 2044 2045 // for t != 1 2046 for !t.IsOne() { 2047 t.Square(&t) 2048 m++ 2049 } 2050 2051 if m == 0 { 2052 return z.Set(&y) 2053 } 2054 // t = g^(2^(r-m-1)) (mod q) 2055 ge := int(r - m - 1) 2056 t = g 2057 for ge > 0 { 2058 t.Square(&t) 2059 ge-- 2060 } 2061 2062 g.Square(&t) 2063 y.Mul(&y, &t) 2064 b.Mul(&b, &g) 2065 r = m 2066 } 2067 } 2068 2069 const ( 2070 k = 32 // word size / 2 2071 signBitSelector = uint64(1) << 63 2072 approxLowBitsN = k - 1 2073 approxHighBitsN = k + 1 2074 ) 2075 2076 const ( 2077 inversionCorrectionFactorWord0 = 16061512306393401370 2078 inversionCorrectionFactorWord1 = 12469388396993975658 2079 inversionCorrectionFactorWord2 = 12941199289357671440 2080 inversionCorrectionFactorWord3 = 7124172912896157387 2081 inversionCorrectionFactorWord4 = 7772575019676086033 2082 inversionCorrectionFactorWord5 = 5410978411075096125 2083 inversionCorrectionFactorWord6 = 15135850590536056079 2084 inversionCorrectionFactorWord7 = 14366933837510102702 2085 inversionCorrectionFactorWord8 = 17864238268145908760 2086 inversionCorrectionFactorWord9 = 11845167622525040086 2087 inversionCorrectionFactorWord10 = 12428223085045138512 2088 inversionCorrectionFactorWord11 = 2992926161591192 2089 invIterationsN = 50 2090 ) 2091 2092 // Inverse z = x⁻¹ (mod q) 2093 // 2094 // if x == 0, sets and returns z = x 2095 func (z *Element) Inverse(x *Element) *Element { 2096 // Implements "Optimized Binary GCD for Modular Inversion" 2097 // https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf 2098 2099 a := *x 2100 b := Element{ 2101 q0, 2102 q1, 2103 q2, 2104 q3, 2105 q4, 2106 q5, 2107 q6, 2108 q7, 2109 q8, 2110 q9, 2111 q10, 2112 q11, 2113 } // b := q 2114 2115 u := Element{1} 2116 2117 // Update factors: we get [u; v] ← [f₀ g₀; f₁ g₁] [u; v] 2118 // cᵢ = fᵢ + 2³¹ - 1 + 2³² * (gᵢ + 2³¹ - 1) 2119 var c0, c1 int64 2120 2121 // Saved update factors to reduce the number of field multiplications 2122 var pf0, pf1, pg0, pg1 int64 2123 2124 var i uint 2125 2126 var v, s Element 2127 2128 // Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations 2129 // This also lets us get away with half as many updates to u,v 2130 // To make this constant-time-ish, replace the condition with i < invIterationsN 2131 for i = 0; i&1 == 1 || !a.IsZero(); i++ { 2132 n := max(a.BitLen(), b.BitLen()) 2133 aApprox, bApprox := approximate(&a, n), approximate(&b, n) 2134 2135 // f₀, g₀, f₁, g₁ = 1, 0, 0, 1 2136 c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1 2137 2138 for j := 0; j < approxLowBitsN; j++ { 2139 2140 // -2ʲ < f₀, f₁ ≤ 2ʲ 2141 // |f₀| + |f₁| < 2ʲ⁺¹ 2142 2143 if aApprox&1 == 0 { 2144 aApprox /= 2 2145 } else { 2146 s, borrow := bits.Sub64(aApprox, bApprox, 0) 2147 if borrow == 1 { 2148 s = bApprox - aApprox 2149 bApprox = aApprox 2150 c0, c1 = c1, c0 2151 // invariants unchanged 2152 } 2153 2154 aApprox = s / 2 2155 c0 = c0 - c1 2156 2157 // Now |f₀| < 2ʲ⁺¹ ≤ 2ʲ⁺¹ (only the weaker inequality is needed, strictly speaking) 2158 // Started with f₀ > -2ʲ and f₁ ≤ 2ʲ, so f₀ - f₁ > -2ʲ⁺¹ 2159 // Invariants unchanged for f₁ 2160 } 2161 2162 c1 *= 2 2163 // -2ʲ⁺¹ < f₁ ≤ 2ʲ⁺¹ 2164 // So now |f₀| + |f₁| < 2ʲ⁺² 2165 } 2166 2167 s = a 2168 2169 var g0 int64 2170 // from this point on c0 aliases for f0 2171 c0, g0 = updateFactorsDecompose(c0) 2172 aHi := a.linearCombNonModular(&s, c0, &b, g0) 2173 if aHi&signBitSelector != 0 { 2174 // if aHi < 0 2175 c0, g0 = -c0, -g0 2176 aHi = negL(&a, aHi) 2177 } 2178 // right-shift a by k-1 bits 2179 a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN) 2180 a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN) 2181 a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN) 2182 a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN) 2183 a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN) 2184 a[5] = (a[5] >> approxLowBitsN) | ((a[6]) << approxHighBitsN) 2185 a[6] = (a[6] >> approxLowBitsN) | ((a[7]) << approxHighBitsN) 2186 a[7] = (a[7] >> approxLowBitsN) | ((a[8]) << approxHighBitsN) 2187 a[8] = (a[8] >> approxLowBitsN) | ((a[9]) << approxHighBitsN) 2188 a[9] = (a[9] >> approxLowBitsN) | ((a[10]) << approxHighBitsN) 2189 a[10] = (a[10] >> approxLowBitsN) | ((a[11]) << approxHighBitsN) 2190 a[11] = (a[11] >> approxLowBitsN) | (aHi << approxHighBitsN) 2191 2192 var f1 int64 2193 // from this point on c1 aliases for g0 2194 f1, c1 = updateFactorsDecompose(c1) 2195 bHi := b.linearCombNonModular(&s, f1, &b, c1) 2196 if bHi&signBitSelector != 0 { 2197 // if bHi < 0 2198 f1, c1 = -f1, -c1 2199 bHi = negL(&b, bHi) 2200 } 2201 // right-shift b by k-1 bits 2202 b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN) 2203 b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN) 2204 b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN) 2205 b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN) 2206 b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN) 2207 b[5] = (b[5] >> approxLowBitsN) | ((b[6]) << approxHighBitsN) 2208 b[6] = (b[6] >> approxLowBitsN) | ((b[7]) << approxHighBitsN) 2209 b[7] = (b[7] >> approxLowBitsN) | ((b[8]) << approxHighBitsN) 2210 b[8] = (b[8] >> approxLowBitsN) | ((b[9]) << approxHighBitsN) 2211 b[9] = (b[9] >> approxLowBitsN) | ((b[10]) << approxHighBitsN) 2212 b[10] = (b[10] >> approxLowBitsN) | ((b[11]) << approxHighBitsN) 2213 b[11] = (b[11] >> approxLowBitsN) | (bHi << approxHighBitsN) 2214 2215 if i&1 == 1 { 2216 // Combine current update factors with previously stored ones 2217 // [F₀, G₀; F₁, G₁] ← [f₀, g₀; f₁, g₁] [pf₀, pg₀; pf₁, pg₁], with capital letters denoting new combined values 2218 // We get |F₀| = | f₀pf₀ + g₀pf₁ | ≤ |f₀pf₀| + |g₀pf₁| = |f₀| |pf₀| + |g₀| |pf₁| ≤ 2ᵏ⁻¹|pf₀| + 2ᵏ⁻¹|pf₁| 2219 // = 2ᵏ⁻¹ (|pf₀| + |pf₁|) < 2ᵏ⁻¹ 2ᵏ = 2²ᵏ⁻¹ 2220 // So |F₀| < 2²ᵏ⁻¹ meaning it fits in a 2k-bit signed register 2221 2222 // c₀ aliases f₀, c₁ aliases g₁ 2223 c0, g0, f1, c1 = c0*pf0+g0*pf1, 2224 c0*pg0+g0*pg1, 2225 f1*pf0+c1*pf1, 2226 f1*pg0+c1*pg1 2227 2228 s = u 2229 2230 // 0 ≤ u, v < 2²⁵⁵ 2231 // |F₀|, |G₀| < 2⁶³ 2232 u.linearComb(&u, c0, &v, g0) 2233 // |F₁|, |G₁| < 2⁶³ 2234 v.linearComb(&s, f1, &v, c1) 2235 2236 } else { 2237 // Save update factors 2238 pf0, pg0, pf1, pg1 = c0, g0, f1, c1 2239 } 2240 } 2241 2242 // For every iteration that we miss, v is not being multiplied by 2ᵏ⁻² 2243 const pSq uint64 = 1 << (2 * (k - 1)) 2244 a = Element{pSq} 2245 // If the function is constant-time ish, this loop will not run (no need to take it out explicitly) 2246 for ; i < invIterationsN; i += 2 { 2247 // could optimize further with mul by word routine or by pre-computing a table since with k=26, 2248 // we would multiply by pSq up to 13times; 2249 // on x86, the assembly routine outperforms generic code for mul by word 2250 // on arm64, we may loose up to ~5% for 6 limbs 2251 v.Mul(&v, &a) 2252 } 2253 2254 u.Set(x) // for correctness check 2255 2256 z.Mul(&v, &Element{ 2257 inversionCorrectionFactorWord0, 2258 inversionCorrectionFactorWord1, 2259 inversionCorrectionFactorWord2, 2260 inversionCorrectionFactorWord3, 2261 inversionCorrectionFactorWord4, 2262 inversionCorrectionFactorWord5, 2263 inversionCorrectionFactorWord6, 2264 inversionCorrectionFactorWord7, 2265 inversionCorrectionFactorWord8, 2266 inversionCorrectionFactorWord9, 2267 inversionCorrectionFactorWord10, 2268 inversionCorrectionFactorWord11, 2269 }) 2270 2271 // correctness check 2272 v.Mul(&u, z) 2273 if !v.IsOne() && !u.IsZero() { 2274 return z.inverseExp(u) 2275 } 2276 2277 return z 2278 } 2279 2280 // inverseExp computes z = x⁻¹ (mod q) = x**(q-2) (mod q) 2281 func (z *Element) inverseExp(x Element) *Element { 2282 // e == q-2 2283 e := Modulus() 2284 e.Sub(e, big.NewInt(2)) 2285 2286 z.Set(&x) 2287 2288 for i := e.BitLen() - 2; i >= 0; i-- { 2289 z.Square(z) 2290 if e.Bit(i) == 1 { 2291 z.Mul(z, &x) 2292 } 2293 } 2294 2295 return z 2296 } 2297 2298 // approximate a big number x into a single 64 bit word using its uppermost and lowermost bits 2299 // if x fits in a word as is, no approximation necessary 2300 func approximate(x *Element, nBits int) uint64 { 2301 2302 if nBits <= 64 { 2303 return x[0] 2304 } 2305 2306 const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones 2307 lo := mask & x[0] 2308 2309 hiWordIndex := (nBits - 1) / 64 2310 2311 hiWordBitsAvailable := nBits - hiWordIndex*64 2312 hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN) 2313 2314 mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1)) 2315 hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable) 2316 2317 mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1) 2318 mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed 2319 2320 return lo | mid | hi 2321 } 2322 2323 // linearComb z = xC * x + yC * y; 2324 // 0 ≤ x, y < 2⁷⁵⁶ 2325 // |xC|, |yC| < 2⁶³ 2326 func (z *Element) linearComb(x *Element, xC int64, y *Element, yC int64) { 2327 // | (hi, z) | < 2 * 2⁶³ * 2⁷⁵⁶ = 2⁸²⁰ 2328 // therefore | hi | < 2⁵² ≤ 2⁶³ 2329 hi := z.linearCombNonModular(x, xC, y, yC) 2330 z.montReduceSigned(z, hi) 2331 } 2332 2333 // montReduceSigned z = (xHi * r + x) * r⁻¹ using the SOS algorithm 2334 // Requires |xHi| < 2⁶³. Most significant bit of xHi is the sign bit. 2335 func (z *Element) montReduceSigned(x *Element, xHi uint64) { 2336 const signBitRemover = ^signBitSelector 2337 mustNeg := xHi&signBitSelector != 0 2338 // the SOS implementation requires that most significant bit is 0 2339 // Let X be xHi*r + x 2340 // If X is negative we would have initially stored it as 2⁶⁴ r + X (à la 2's complement) 2341 xHi &= signBitRemover 2342 // with this a negative X is now represented as 2⁶³ r + X 2343 2344 var t [2*Limbs - 1]uint64 2345 var C uint64 2346 2347 m := x[0] * qInvNeg 2348 2349 C = madd0(m, q0, x[0]) 2350 C, t[1] = madd2(m, q1, x[1], C) 2351 C, t[2] = madd2(m, q2, x[2], C) 2352 C, t[3] = madd2(m, q3, x[3], C) 2353 C, t[4] = madd2(m, q4, x[4], C) 2354 C, t[5] = madd2(m, q5, x[5], C) 2355 C, t[6] = madd2(m, q6, x[6], C) 2356 C, t[7] = madd2(m, q7, x[7], C) 2357 C, t[8] = madd2(m, q8, x[8], C) 2358 C, t[9] = madd2(m, q9, x[9], C) 2359 C, t[10] = madd2(m, q10, x[10], C) 2360 C, t[11] = madd2(m, q11, x[11], C) 2361 2362 // m * qElement[11] ≤ (2⁶⁴ - 1) * (2⁶³ - 1) = 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 2363 // x[11] + C ≤ 2*(2⁶⁴ - 1) = 2⁶⁵ - 2 2364 // On LHS, (C, t[11]) ≤ 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 + 2⁶⁵ - 2 = 2¹²⁷ + 2⁶³ - 1 2365 // So on LHS, C ≤ 2⁶³ 2366 t[12] = xHi + C 2367 // xHi + C < 2⁶³ + 2⁶³ = 2⁶⁴ 2368 2369 // <standard SOS> 2370 { 2371 const i = 1 2372 m = t[i] * qInvNeg 2373 2374 C = madd0(m, q0, t[i+0]) 2375 C, t[i+1] = madd2(m, q1, t[i+1], C) 2376 C, t[i+2] = madd2(m, q2, t[i+2], C) 2377 C, t[i+3] = madd2(m, q3, t[i+3], C) 2378 C, t[i+4] = madd2(m, q4, t[i+4], C) 2379 C, t[i+5] = madd2(m, q5, t[i+5], C) 2380 C, t[i+6] = madd2(m, q6, t[i+6], C) 2381 C, t[i+7] = madd2(m, q7, t[i+7], C) 2382 C, t[i+8] = madd2(m, q8, t[i+8], C) 2383 C, t[i+9] = madd2(m, q9, t[i+9], C) 2384 C, t[i+10] = madd2(m, q10, t[i+10], C) 2385 C, t[i+11] = madd2(m, q11, t[i+11], C) 2386 2387 t[i+Limbs] += C 2388 } 2389 { 2390 const i = 2 2391 m = t[i] * qInvNeg 2392 2393 C = madd0(m, q0, t[i+0]) 2394 C, t[i+1] = madd2(m, q1, t[i+1], C) 2395 C, t[i+2] = madd2(m, q2, t[i+2], C) 2396 C, t[i+3] = madd2(m, q3, t[i+3], C) 2397 C, t[i+4] = madd2(m, q4, t[i+4], C) 2398 C, t[i+5] = madd2(m, q5, t[i+5], C) 2399 C, t[i+6] = madd2(m, q6, t[i+6], C) 2400 C, t[i+7] = madd2(m, q7, t[i+7], C) 2401 C, t[i+8] = madd2(m, q8, t[i+8], C) 2402 C, t[i+9] = madd2(m, q9, t[i+9], C) 2403 C, t[i+10] = madd2(m, q10, t[i+10], C) 2404 C, t[i+11] = madd2(m, q11, t[i+11], C) 2405 2406 t[i+Limbs] += C 2407 } 2408 { 2409 const i = 3 2410 m = t[i] * qInvNeg 2411 2412 C = madd0(m, q0, t[i+0]) 2413 C, t[i+1] = madd2(m, q1, t[i+1], C) 2414 C, t[i+2] = madd2(m, q2, t[i+2], C) 2415 C, t[i+3] = madd2(m, q3, t[i+3], C) 2416 C, t[i+4] = madd2(m, q4, t[i+4], C) 2417 C, t[i+5] = madd2(m, q5, t[i+5], C) 2418 C, t[i+6] = madd2(m, q6, t[i+6], C) 2419 C, t[i+7] = madd2(m, q7, t[i+7], C) 2420 C, t[i+8] = madd2(m, q8, t[i+8], C) 2421 C, t[i+9] = madd2(m, q9, t[i+9], C) 2422 C, t[i+10] = madd2(m, q10, t[i+10], C) 2423 C, t[i+11] = madd2(m, q11, t[i+11], C) 2424 2425 t[i+Limbs] += C 2426 } 2427 { 2428 const i = 4 2429 m = t[i] * qInvNeg 2430 2431 C = madd0(m, q0, t[i+0]) 2432 C, t[i+1] = madd2(m, q1, t[i+1], C) 2433 C, t[i+2] = madd2(m, q2, t[i+2], C) 2434 C, t[i+3] = madd2(m, q3, t[i+3], C) 2435 C, t[i+4] = madd2(m, q4, t[i+4], C) 2436 C, t[i+5] = madd2(m, q5, t[i+5], C) 2437 C, t[i+6] = madd2(m, q6, t[i+6], C) 2438 C, t[i+7] = madd2(m, q7, t[i+7], C) 2439 C, t[i+8] = madd2(m, q8, t[i+8], C) 2440 C, t[i+9] = madd2(m, q9, t[i+9], C) 2441 C, t[i+10] = madd2(m, q10, t[i+10], C) 2442 C, t[i+11] = madd2(m, q11, t[i+11], C) 2443 2444 t[i+Limbs] += C 2445 } 2446 { 2447 const i = 5 2448 m = t[i] * qInvNeg 2449 2450 C = madd0(m, q0, t[i+0]) 2451 C, t[i+1] = madd2(m, q1, t[i+1], C) 2452 C, t[i+2] = madd2(m, q2, t[i+2], C) 2453 C, t[i+3] = madd2(m, q3, t[i+3], C) 2454 C, t[i+4] = madd2(m, q4, t[i+4], C) 2455 C, t[i+5] = madd2(m, q5, t[i+5], C) 2456 C, t[i+6] = madd2(m, q6, t[i+6], C) 2457 C, t[i+7] = madd2(m, q7, t[i+7], C) 2458 C, t[i+8] = madd2(m, q8, t[i+8], C) 2459 C, t[i+9] = madd2(m, q9, t[i+9], C) 2460 C, t[i+10] = madd2(m, q10, t[i+10], C) 2461 C, t[i+11] = madd2(m, q11, t[i+11], C) 2462 2463 t[i+Limbs] += C 2464 } 2465 { 2466 const i = 6 2467 m = t[i] * qInvNeg 2468 2469 C = madd0(m, q0, t[i+0]) 2470 C, t[i+1] = madd2(m, q1, t[i+1], C) 2471 C, t[i+2] = madd2(m, q2, t[i+2], C) 2472 C, t[i+3] = madd2(m, q3, t[i+3], C) 2473 C, t[i+4] = madd2(m, q4, t[i+4], C) 2474 C, t[i+5] = madd2(m, q5, t[i+5], C) 2475 C, t[i+6] = madd2(m, q6, t[i+6], C) 2476 C, t[i+7] = madd2(m, q7, t[i+7], C) 2477 C, t[i+8] = madd2(m, q8, t[i+8], C) 2478 C, t[i+9] = madd2(m, q9, t[i+9], C) 2479 C, t[i+10] = madd2(m, q10, t[i+10], C) 2480 C, t[i+11] = madd2(m, q11, t[i+11], C) 2481 2482 t[i+Limbs] += C 2483 } 2484 { 2485 const i = 7 2486 m = t[i] * qInvNeg 2487 2488 C = madd0(m, q0, t[i+0]) 2489 C, t[i+1] = madd2(m, q1, t[i+1], C) 2490 C, t[i+2] = madd2(m, q2, t[i+2], C) 2491 C, t[i+3] = madd2(m, q3, t[i+3], C) 2492 C, t[i+4] = madd2(m, q4, t[i+4], C) 2493 C, t[i+5] = madd2(m, q5, t[i+5], C) 2494 C, t[i+6] = madd2(m, q6, t[i+6], C) 2495 C, t[i+7] = madd2(m, q7, t[i+7], C) 2496 C, t[i+8] = madd2(m, q8, t[i+8], C) 2497 C, t[i+9] = madd2(m, q9, t[i+9], C) 2498 C, t[i+10] = madd2(m, q10, t[i+10], C) 2499 C, t[i+11] = madd2(m, q11, t[i+11], C) 2500 2501 t[i+Limbs] += C 2502 } 2503 { 2504 const i = 8 2505 m = t[i] * qInvNeg 2506 2507 C = madd0(m, q0, t[i+0]) 2508 C, t[i+1] = madd2(m, q1, t[i+1], C) 2509 C, t[i+2] = madd2(m, q2, t[i+2], C) 2510 C, t[i+3] = madd2(m, q3, t[i+3], C) 2511 C, t[i+4] = madd2(m, q4, t[i+4], C) 2512 C, t[i+5] = madd2(m, q5, t[i+5], C) 2513 C, t[i+6] = madd2(m, q6, t[i+6], C) 2514 C, t[i+7] = madd2(m, q7, t[i+7], C) 2515 C, t[i+8] = madd2(m, q8, t[i+8], C) 2516 C, t[i+9] = madd2(m, q9, t[i+9], C) 2517 C, t[i+10] = madd2(m, q10, t[i+10], C) 2518 C, t[i+11] = madd2(m, q11, t[i+11], C) 2519 2520 t[i+Limbs] += C 2521 } 2522 { 2523 const i = 9 2524 m = t[i] * qInvNeg 2525 2526 C = madd0(m, q0, t[i+0]) 2527 C, t[i+1] = madd2(m, q1, t[i+1], C) 2528 C, t[i+2] = madd2(m, q2, t[i+2], C) 2529 C, t[i+3] = madd2(m, q3, t[i+3], C) 2530 C, t[i+4] = madd2(m, q4, t[i+4], C) 2531 C, t[i+5] = madd2(m, q5, t[i+5], C) 2532 C, t[i+6] = madd2(m, q6, t[i+6], C) 2533 C, t[i+7] = madd2(m, q7, t[i+7], C) 2534 C, t[i+8] = madd2(m, q8, t[i+8], C) 2535 C, t[i+9] = madd2(m, q9, t[i+9], C) 2536 C, t[i+10] = madd2(m, q10, t[i+10], C) 2537 C, t[i+11] = madd2(m, q11, t[i+11], C) 2538 2539 t[i+Limbs] += C 2540 } 2541 { 2542 const i = 10 2543 m = t[i] * qInvNeg 2544 2545 C = madd0(m, q0, t[i+0]) 2546 C, t[i+1] = madd2(m, q1, t[i+1], C) 2547 C, t[i+2] = madd2(m, q2, t[i+2], C) 2548 C, t[i+3] = madd2(m, q3, t[i+3], C) 2549 C, t[i+4] = madd2(m, q4, t[i+4], C) 2550 C, t[i+5] = madd2(m, q5, t[i+5], C) 2551 C, t[i+6] = madd2(m, q6, t[i+6], C) 2552 C, t[i+7] = madd2(m, q7, t[i+7], C) 2553 C, t[i+8] = madd2(m, q8, t[i+8], C) 2554 C, t[i+9] = madd2(m, q9, t[i+9], C) 2555 C, t[i+10] = madd2(m, q10, t[i+10], C) 2556 C, t[i+11] = madd2(m, q11, t[i+11], C) 2557 2558 t[i+Limbs] += C 2559 } 2560 { 2561 const i = 11 2562 m := t[i] * qInvNeg 2563 2564 C = madd0(m, q0, t[i+0]) 2565 C, z[0] = madd2(m, q1, t[i+1], C) 2566 C, z[1] = madd2(m, q2, t[i+2], C) 2567 C, z[2] = madd2(m, q3, t[i+3], C) 2568 C, z[3] = madd2(m, q4, t[i+4], C) 2569 C, z[4] = madd2(m, q5, t[i+5], C) 2570 C, z[5] = madd2(m, q6, t[i+6], C) 2571 C, z[6] = madd2(m, q7, t[i+7], C) 2572 C, z[7] = madd2(m, q8, t[i+8], C) 2573 C, z[8] = madd2(m, q9, t[i+9], C) 2574 C, z[9] = madd2(m, q10, t[i+10], C) 2575 z[11], z[10] = madd2(m, q11, t[i+11], C) 2576 } 2577 2578 // if z ⩾ q → z -= q 2579 if !z.smallerThanModulus() { 2580 var b uint64 2581 z[0], b = bits.Sub64(z[0], q0, 0) 2582 z[1], b = bits.Sub64(z[1], q1, b) 2583 z[2], b = bits.Sub64(z[2], q2, b) 2584 z[3], b = bits.Sub64(z[3], q3, b) 2585 z[4], b = bits.Sub64(z[4], q4, b) 2586 z[5], b = bits.Sub64(z[5], q5, b) 2587 z[6], b = bits.Sub64(z[6], q6, b) 2588 z[7], b = bits.Sub64(z[7], q7, b) 2589 z[8], b = bits.Sub64(z[8], q8, b) 2590 z[9], b = bits.Sub64(z[9], q9, b) 2591 z[10], b = bits.Sub64(z[10], q10, b) 2592 z[11], _ = bits.Sub64(z[11], q11, b) 2593 } 2594 // </standard SOS> 2595 2596 if mustNeg { 2597 // We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead 2598 var b uint64 2599 z[0], b = bits.Sub64(z[0], signBitSelector, 0) 2600 z[1], b = bits.Sub64(z[1], 0, b) 2601 z[2], b = bits.Sub64(z[2], 0, b) 2602 z[3], b = bits.Sub64(z[3], 0, b) 2603 z[4], b = bits.Sub64(z[4], 0, b) 2604 z[5], b = bits.Sub64(z[5], 0, b) 2605 z[6], b = bits.Sub64(z[6], 0, b) 2606 z[7], b = bits.Sub64(z[7], 0, b) 2607 z[8], b = bits.Sub64(z[8], 0, b) 2608 z[9], b = bits.Sub64(z[9], 0, b) 2609 z[10], b = bits.Sub64(z[10], 0, b) 2610 z[11], b = bits.Sub64(z[11], 0, b) 2611 2612 // Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0 2613 2614 if b != 0 { 2615 // z[11] = -1 2616 // negative: add q 2617 const neg1 = 0xFFFFFFFFFFFFFFFF 2618 2619 var carry uint64 2620 2621 z[0], carry = bits.Add64(z[0], q0, 0) 2622 z[1], carry = bits.Add64(z[1], q1, carry) 2623 z[2], carry = bits.Add64(z[2], q2, carry) 2624 z[3], carry = bits.Add64(z[3], q3, carry) 2625 z[4], carry = bits.Add64(z[4], q4, carry) 2626 z[5], carry = bits.Add64(z[5], q5, carry) 2627 z[6], carry = bits.Add64(z[6], q6, carry) 2628 z[7], carry = bits.Add64(z[7], q7, carry) 2629 z[8], carry = bits.Add64(z[8], q8, carry) 2630 z[9], carry = bits.Add64(z[9], q9, carry) 2631 z[10], carry = bits.Add64(z[10], q10, carry) 2632 z[11], _ = bits.Add64(neg1, q11, carry) 2633 } 2634 } 2635 } 2636 2637 const ( 2638 updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1) 2639 updateFactorIdentityMatrixRow0 = 1 2640 updateFactorIdentityMatrixRow1 = 1 << 32 2641 ) 2642 2643 func updateFactorsDecompose(c int64) (int64, int64) { 2644 c += updateFactorsConversionBias 2645 const low32BitsFilter int64 = 0xFFFFFFFF 2646 f := c&low32BitsFilter - 0x7FFFFFFF 2647 g := c>>32&low32BitsFilter - 0x7FFFFFFF 2648 return f, g 2649 } 2650 2651 // negL negates in place [x | xHi] and return the new most significant word xHi 2652 func negL(x *Element, xHi uint64) uint64 { 2653 var b uint64 2654 2655 x[0], b = bits.Sub64(0, x[0], 0) 2656 x[1], b = bits.Sub64(0, x[1], b) 2657 x[2], b = bits.Sub64(0, x[2], b) 2658 x[3], b = bits.Sub64(0, x[3], b) 2659 x[4], b = bits.Sub64(0, x[4], b) 2660 x[5], b = bits.Sub64(0, x[5], b) 2661 x[6], b = bits.Sub64(0, x[6], b) 2662 x[7], b = bits.Sub64(0, x[7], b) 2663 x[8], b = bits.Sub64(0, x[8], b) 2664 x[9], b = bits.Sub64(0, x[9], b) 2665 x[10], b = bits.Sub64(0, x[10], b) 2666 x[11], b = bits.Sub64(0, x[11], b) 2667 xHi, _ = bits.Sub64(0, xHi, b) 2668 2669 return xHi 2670 } 2671 2672 // mulWNonModular multiplies by one word in non-montgomery, without reducing 2673 func (z *Element) mulWNonModular(x *Element, y int64) uint64 { 2674 2675 // w := abs(y) 2676 m := y >> 63 2677 w := uint64((y ^ m) - m) 2678 2679 var c uint64 2680 c, z[0] = bits.Mul64(x[0], w) 2681 c, z[1] = madd1(x[1], w, c) 2682 c, z[2] = madd1(x[2], w, c) 2683 c, z[3] = madd1(x[3], w, c) 2684 c, z[4] = madd1(x[4], w, c) 2685 c, z[5] = madd1(x[5], w, c) 2686 c, z[6] = madd1(x[6], w, c) 2687 c, z[7] = madd1(x[7], w, c) 2688 c, z[8] = madd1(x[8], w, c) 2689 c, z[9] = madd1(x[9], w, c) 2690 c, z[10] = madd1(x[10], w, c) 2691 c, z[11] = madd1(x[11], w, c) 2692 2693 if y < 0 { 2694 c = negL(z, c) 2695 } 2696 2697 return c 2698 } 2699 2700 // linearCombNonModular computes a linear combination without modular reduction 2701 func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 { 2702 var yTimes Element 2703 2704 yHi := yTimes.mulWNonModular(y, yC) 2705 xHi := z.mulWNonModular(x, xC) 2706 2707 var carry uint64 2708 z[0], carry = bits.Add64(z[0], yTimes[0], 0) 2709 z[1], carry = bits.Add64(z[1], yTimes[1], carry) 2710 z[2], carry = bits.Add64(z[2], yTimes[2], carry) 2711 z[3], carry = bits.Add64(z[3], yTimes[3], carry) 2712 z[4], carry = bits.Add64(z[4], yTimes[4], carry) 2713 z[5], carry = bits.Add64(z[5], yTimes[5], carry) 2714 z[6], carry = bits.Add64(z[6], yTimes[6], carry) 2715 z[7], carry = bits.Add64(z[7], yTimes[7], carry) 2716 z[8], carry = bits.Add64(z[8], yTimes[8], carry) 2717 z[9], carry = bits.Add64(z[9], yTimes[9], carry) 2718 z[10], carry = bits.Add64(z[10], yTimes[10], carry) 2719 z[11], carry = bits.Add64(z[11], yTimes[11], carry) 2720 2721 yHi, _ = bits.Add64(xHi, yHi, carry) 2722 2723 return yHi 2724 }