github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-633/fp/element.go (about) 1 // Copyright 2020 ConsenSys Software Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Code generated by consensys/gnark-crypto DO NOT EDIT 16 17 package fp 18 19 import ( 20 "crypto/rand" 21 "encoding/binary" 22 "errors" 23 "io" 24 "math/big" 25 "math/bits" 26 "reflect" 27 "strconv" 28 "strings" 29 30 "github.com/bits-and-blooms/bitset" 31 "github.com/consensys/gnark-crypto/field/hash" 32 "github.com/consensys/gnark-crypto/field/pool" 33 ) 34 35 // Element represents a field element stored on 10 words (uint64) 36 // 37 // Element are assumed to be in Montgomery form in all methods. 38 // 39 // Modulus q = 40 // 41 // q[base10] = 20494478644167774678813387386538961497669590920908778075528754551012016751717791778743535050360001387419576570244406805463255765034468441182772056330021723098661967429339971741066259394985997 42 // q[base16] = 0x126633cc0f35f63fc1a174f01d72ab5a8fcd8c75d79d2c74e59769ad9bbda2f8152a6c0fadea490b8da9f5e83f57c497e0e8850edbda407d7b5ce7ab839c2253d369bd31147f73cd74916ea4570000d 43 // 44 // # Warning 45 // 46 // This code has not been audited and is provided as-is. In particular, there is no security guarantees such as constant time implementation or side-channel attack resistance. 47 type Element [10]uint64 48 49 const ( 50 Limbs = 10 // number of 64 bits words needed to represent a Element 51 Bits = 633 // number of bits needed to represent a Element 52 Bytes = 80 // number of bytes needed to represent a Element 53 ) 54 55 // Field modulus q 56 const ( 57 q0 uint64 = 15512955586897510413 58 q1 uint64 = 4410884215886313276 59 q2 uint64 = 15543556715411259941 60 q3 uint64 = 9083347379620258823 61 q4 uint64 = 13320134076191308873 62 q5 uint64 = 9318693926755804304 63 q6 uint64 = 5645674015335635503 64 q7 uint64 = 12176845843281334983 65 q8 uint64 = 18165857675053050549 66 q9 uint64 = 82862755739295587 67 ) 68 69 var qElement = Element{ 70 q0, 71 q1, 72 q2, 73 q3, 74 q4, 75 q5, 76 q6, 77 q7, 78 q8, 79 q9, 80 } 81 82 var _modulus big.Int // q stored as big.Int 83 84 // Modulus returns q as a big.Int 85 // 86 // q[base10] = 20494478644167774678813387386538961497669590920908778075528754551012016751717791778743535050360001387419576570244406805463255765034468441182772056330021723098661967429339971741066259394985997 87 // q[base16] = 0x126633cc0f35f63fc1a174f01d72ab5a8fcd8c75d79d2c74e59769ad9bbda2f8152a6c0fadea490b8da9f5e83f57c497e0e8850edbda407d7b5ce7ab839c2253d369bd31147f73cd74916ea4570000d 88 func Modulus() *big.Int { 89 return new(big.Int).Set(&_modulus) 90 } 91 92 // q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r 93 // used for Montgomery reduction 94 const qInvNeg uint64 = 13046692460116554043 95 96 func init() { 97 _modulus.SetString("126633cc0f35f63fc1a174f01d72ab5a8fcd8c75d79d2c74e59769ad9bbda2f8152a6c0fadea490b8da9f5e83f57c497e0e8850edbda407d7b5ce7ab839c2253d369bd31147f73cd74916ea4570000d", 16) 98 } 99 100 // NewElement returns a new Element from a uint64 value 101 // 102 // it is equivalent to 103 // 104 // var v Element 105 // v.SetUint64(...) 106 func NewElement(v uint64) Element { 107 z := Element{v} 108 z.Mul(&z, &rSquare) 109 return z 110 } 111 112 // SetUint64 sets z to v and returns z 113 func (z *Element) SetUint64(v uint64) *Element { 114 // sets z LSB to v (non-Montgomery form) and convert z to Montgomery form 115 *z = Element{v} 116 return z.Mul(z, &rSquare) // z.toMont() 117 } 118 119 // SetInt64 sets z to v and returns z 120 func (z *Element) SetInt64(v int64) *Element { 121 122 // absolute value of v 123 m := v >> 63 124 z.SetUint64(uint64((v ^ m) - m)) 125 126 if m != 0 { 127 // v is negative 128 z.Neg(z) 129 } 130 131 return z 132 } 133 134 // Set z = x and returns z 135 func (z *Element) Set(x *Element) *Element { 136 z[0] = x[0] 137 z[1] = x[1] 138 z[2] = x[2] 139 z[3] = x[3] 140 z[4] = x[4] 141 z[5] = x[5] 142 z[6] = x[6] 143 z[7] = x[7] 144 z[8] = x[8] 145 z[9] = x[9] 146 return z 147 } 148 149 // SetInterface converts provided interface into Element 150 // returns an error if provided type is not supported 151 // supported types: 152 // 153 // Element 154 // *Element 155 // uint64 156 // int 157 // string (see SetString for valid formats) 158 // *big.Int 159 // big.Int 160 // []byte 161 func (z *Element) SetInterface(i1 interface{}) (*Element, error) { 162 if i1 == nil { 163 return nil, errors.New("can't set fp.Element with <nil>") 164 } 165 166 switch c1 := i1.(type) { 167 case Element: 168 return z.Set(&c1), nil 169 case *Element: 170 if c1 == nil { 171 return nil, errors.New("can't set fp.Element with <nil>") 172 } 173 return z.Set(c1), nil 174 case uint8: 175 return z.SetUint64(uint64(c1)), nil 176 case uint16: 177 return z.SetUint64(uint64(c1)), nil 178 case uint32: 179 return z.SetUint64(uint64(c1)), nil 180 case uint: 181 return z.SetUint64(uint64(c1)), nil 182 case uint64: 183 return z.SetUint64(c1), nil 184 case int8: 185 return z.SetInt64(int64(c1)), nil 186 case int16: 187 return z.SetInt64(int64(c1)), nil 188 case int32: 189 return z.SetInt64(int64(c1)), nil 190 case int64: 191 return z.SetInt64(c1), nil 192 case int: 193 return z.SetInt64(int64(c1)), nil 194 case string: 195 return z.SetString(c1) 196 case *big.Int: 197 if c1 == nil { 198 return nil, errors.New("can't set fp.Element with <nil>") 199 } 200 return z.SetBigInt(c1), nil 201 case big.Int: 202 return z.SetBigInt(&c1), nil 203 case []byte: 204 return z.SetBytes(c1), nil 205 default: 206 return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String()) 207 } 208 } 209 210 // SetZero z = 0 211 func (z *Element) SetZero() *Element { 212 z[0] = 0 213 z[1] = 0 214 z[2] = 0 215 z[3] = 0 216 z[4] = 0 217 z[5] = 0 218 z[6] = 0 219 z[7] = 0 220 z[8] = 0 221 z[9] = 0 222 return z 223 } 224 225 // SetOne z = 1 (in Montgomery form) 226 func (z *Element) SetOne() *Element { 227 z[0] = 5665001492438840506 228 z[1] = 16907884053554239805 229 z[2] = 17318295036095996852 230 z[3] = 12638729832353218866 231 z[4] = 12856030952767240260 232 z[5] = 15732028589390776959 233 z[6] = 1038965607738428109 234 z[7] = 8411601626847721258 235 z[8] = 7016548280614581879 236 z[9] = 51212299585931083 237 return z 238 } 239 240 // Div z = x*y⁻¹ (mod q) 241 func (z *Element) Div(x, y *Element) *Element { 242 var yInv Element 243 yInv.Inverse(y) 244 z.Mul(x, &yInv) 245 return z 246 } 247 248 // Equal returns z == x; constant-time 249 func (z *Element) Equal(x *Element) bool { 250 return z.NotEqual(x) == 0 251 } 252 253 // NotEqual returns 0 if and only if z == x; constant-time 254 func (z *Element) NotEqual(x *Element) uint64 { 255 return (z[9] ^ x[9]) | (z[8] ^ x[8]) | (z[7] ^ x[7]) | (z[6] ^ x[6]) | (z[5] ^ x[5]) | (z[4] ^ x[4]) | (z[3] ^ x[3]) | (z[2] ^ x[2]) | (z[1] ^ x[1]) | (z[0] ^ x[0]) 256 } 257 258 // IsZero returns z == 0 259 func (z *Element) IsZero() bool { 260 return (z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0 261 } 262 263 // IsOne returns z == 1 264 func (z *Element) IsOne() bool { 265 return ((z[9] ^ 51212299585931083) | (z[8] ^ 7016548280614581879) | (z[7] ^ 8411601626847721258) | (z[6] ^ 1038965607738428109) | (z[5] ^ 15732028589390776959) | (z[4] ^ 12856030952767240260) | (z[3] ^ 12638729832353218866) | (z[2] ^ 17318295036095996852) | (z[1] ^ 16907884053554239805) | (z[0] ^ 5665001492438840506)) == 0 266 } 267 268 // IsUint64 reports whether z can be represented as an uint64. 269 func (z *Element) IsUint64() bool { 270 zz := *z 271 zz.fromMont() 272 return zz.FitsOnOneWord() 273 } 274 275 // Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined. 276 func (z *Element) Uint64() uint64 { 277 return z.Bits()[0] 278 } 279 280 // FitsOnOneWord reports whether z words (except the least significant word) are 0 281 // 282 // It is the responsibility of the caller to convert from Montgomery to Regular form if needed. 283 func (z *Element) FitsOnOneWord() bool { 284 return (z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1]) == 0 285 } 286 287 // Cmp compares (lexicographic order) z and x and returns: 288 // 289 // -1 if z < x 290 // 0 if z == x 291 // +1 if z > x 292 func (z *Element) Cmp(x *Element) int { 293 _z := z.Bits() 294 _x := x.Bits() 295 if _z[9] > _x[9] { 296 return 1 297 } else if _z[9] < _x[9] { 298 return -1 299 } 300 if _z[8] > _x[8] { 301 return 1 302 } else if _z[8] < _x[8] { 303 return -1 304 } 305 if _z[7] > _x[7] { 306 return 1 307 } else if _z[7] < _x[7] { 308 return -1 309 } 310 if _z[6] > _x[6] { 311 return 1 312 } else if _z[6] < _x[6] { 313 return -1 314 } 315 if _z[5] > _x[5] { 316 return 1 317 } else if _z[5] < _x[5] { 318 return -1 319 } 320 if _z[4] > _x[4] { 321 return 1 322 } else if _z[4] < _x[4] { 323 return -1 324 } 325 if _z[3] > _x[3] { 326 return 1 327 } else if _z[3] < _x[3] { 328 return -1 329 } 330 if _z[2] > _x[2] { 331 return 1 332 } else if _z[2] < _x[2] { 333 return -1 334 } 335 if _z[1] > _x[1] { 336 return 1 337 } else if _z[1] < _x[1] { 338 return -1 339 } 340 if _z[0] > _x[0] { 341 return 1 342 } else if _z[0] < _x[0] { 343 return -1 344 } 345 return 0 346 } 347 348 // LexicographicallyLargest returns true if this element is strictly lexicographically 349 // larger than its negation, false otherwise 350 func (z *Element) LexicographicallyLargest() bool { 351 // adapted from github.com/zkcrypto/bls12_381 352 // we check if the element is larger than (q-1) / 2 353 // if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2 354 355 _z := z.Bits() 356 357 var b uint64 358 _, b = bits.Sub64(_z[0], 7756477793448755207, 0) 359 _, b = bits.Sub64(_z[1], 11428814144797932446, b) 360 _, b = bits.Sub64(_z[2], 16995150394560405778, b) 361 _, b = bits.Sub64(_z[3], 13765045726664905219, b) 362 _, b = bits.Sub64(_z[4], 6660067038095654436, b) 363 _, b = bits.Sub64(_z[5], 13882719000232677960, b) 364 _, b = bits.Sub64(_z[6], 12046209044522593559, b) 365 _, b = bits.Sub64(_z[7], 15311794958495443299, b) 366 _, b = bits.Sub64(_z[8], 18306300874381301082, b) 367 _, b = bits.Sub64(_z[9], 41431377869647793, b) 368 369 return b == 0 370 } 371 372 // SetRandom sets z to a uniform random value in [0, q). 373 // 374 // This might error only if reading from crypto/rand.Reader errors, 375 // in which case, value of z is undefined. 376 func (z *Element) SetRandom() (*Element, error) { 377 // this code is generated for all modulus 378 // and derived from go/src/crypto/rand/util.go 379 380 // l is number of limbs * 8; the number of bytes needed to reconstruct 10 uint64 381 const l = 80 382 383 // bitLen is the maximum bit length needed to encode a value < q. 384 const bitLen = 633 385 386 // k is the maximum byte length needed to encode a value < q. 387 const k = (bitLen + 7) / 8 388 389 // b is the number of bits in the most significant byte of q-1. 390 b := uint(bitLen % 8) 391 if b == 0 { 392 b = 8 393 } 394 395 var bytes [l]byte 396 397 for { 398 // note that bytes[k:l] is always 0 399 if _, err := io.ReadFull(rand.Reader, bytes[:k]); err != nil { 400 return nil, err 401 } 402 403 // Clear unused bits in in the most significant byte to increase probability 404 // that the candidate is < q. 405 bytes[k-1] &= uint8(int(1<<b) - 1) 406 z[0] = binary.LittleEndian.Uint64(bytes[0:8]) 407 z[1] = binary.LittleEndian.Uint64(bytes[8:16]) 408 z[2] = binary.LittleEndian.Uint64(bytes[16:24]) 409 z[3] = binary.LittleEndian.Uint64(bytes[24:32]) 410 z[4] = binary.LittleEndian.Uint64(bytes[32:40]) 411 z[5] = binary.LittleEndian.Uint64(bytes[40:48]) 412 z[6] = binary.LittleEndian.Uint64(bytes[48:56]) 413 z[7] = binary.LittleEndian.Uint64(bytes[56:64]) 414 z[8] = binary.LittleEndian.Uint64(bytes[64:72]) 415 z[9] = binary.LittleEndian.Uint64(bytes[72:80]) 416 417 if !z.smallerThanModulus() { 418 continue // ignore the candidate and re-sample 419 } 420 421 return z, nil 422 } 423 } 424 425 // smallerThanModulus returns true if z < q 426 // This is not constant time 427 func (z *Element) smallerThanModulus() bool { 428 return (z[9] < q9 || (z[9] == q9 && (z[8] < q8 || (z[8] == q8 && (z[7] < q7 || (z[7] == q7 && (z[6] < q6 || (z[6] == q6 && (z[5] < q5 || (z[5] == q5 && (z[4] < q4 || (z[4] == q4 && (z[3] < q3 || (z[3] == q3 && (z[2] < q2 || (z[2] == q2 && (z[1] < q1 || (z[1] == q1 && (z[0] < q0))))))))))))))))))) 429 } 430 431 // One returns 1 432 func One() Element { 433 var one Element 434 one.SetOne() 435 return one 436 } 437 438 // Halve sets z to z / 2 (mod q) 439 func (z *Element) Halve() { 440 var carry uint64 441 442 if z[0]&1 == 1 { 443 // z = z + q 444 z[0], carry = bits.Add64(z[0], q0, 0) 445 z[1], carry = bits.Add64(z[1], q1, carry) 446 z[2], carry = bits.Add64(z[2], q2, carry) 447 z[3], carry = bits.Add64(z[3], q3, carry) 448 z[4], carry = bits.Add64(z[4], q4, carry) 449 z[5], carry = bits.Add64(z[5], q5, carry) 450 z[6], carry = bits.Add64(z[6], q6, carry) 451 z[7], carry = bits.Add64(z[7], q7, carry) 452 z[8], carry = bits.Add64(z[8], q8, carry) 453 z[9], _ = bits.Add64(z[9], q9, carry) 454 455 } 456 // z = z >> 1 457 z[0] = z[0]>>1 | z[1]<<63 458 z[1] = z[1]>>1 | z[2]<<63 459 z[2] = z[2]>>1 | z[3]<<63 460 z[3] = z[3]>>1 | z[4]<<63 461 z[4] = z[4]>>1 | z[5]<<63 462 z[5] = z[5]>>1 | z[6]<<63 463 z[6] = z[6]>>1 | z[7]<<63 464 z[7] = z[7]>>1 | z[8]<<63 465 z[8] = z[8]>>1 | z[9]<<63 466 z[9] >>= 1 467 468 } 469 470 // fromMont converts z in place (i.e. mutates) from Montgomery to regular representation 471 // sets and returns z = z * 1 472 func (z *Element) fromMont() *Element { 473 fromMont(z) 474 return z 475 } 476 477 // Add z = x + y (mod q) 478 func (z *Element) Add(x, y *Element) *Element { 479 480 var carry uint64 481 z[0], carry = bits.Add64(x[0], y[0], 0) 482 z[1], carry = bits.Add64(x[1], y[1], carry) 483 z[2], carry = bits.Add64(x[2], y[2], carry) 484 z[3], carry = bits.Add64(x[3], y[3], carry) 485 z[4], carry = bits.Add64(x[4], y[4], carry) 486 z[5], carry = bits.Add64(x[5], y[5], carry) 487 z[6], carry = bits.Add64(x[6], y[6], carry) 488 z[7], carry = bits.Add64(x[7], y[7], carry) 489 z[8], carry = bits.Add64(x[8], y[8], carry) 490 z[9], _ = bits.Add64(x[9], y[9], carry) 491 492 // if z ⩾ q → z -= q 493 if !z.smallerThanModulus() { 494 var b uint64 495 z[0], b = bits.Sub64(z[0], q0, 0) 496 z[1], b = bits.Sub64(z[1], q1, b) 497 z[2], b = bits.Sub64(z[2], q2, b) 498 z[3], b = bits.Sub64(z[3], q3, b) 499 z[4], b = bits.Sub64(z[4], q4, b) 500 z[5], b = bits.Sub64(z[5], q5, b) 501 z[6], b = bits.Sub64(z[6], q6, b) 502 z[7], b = bits.Sub64(z[7], q7, b) 503 z[8], b = bits.Sub64(z[8], q8, b) 504 z[9], _ = bits.Sub64(z[9], q9, b) 505 } 506 return z 507 } 508 509 // Double z = x + x (mod q), aka Lsh 1 510 func (z *Element) Double(x *Element) *Element { 511 512 var carry uint64 513 z[0], carry = bits.Add64(x[0], x[0], 0) 514 z[1], carry = bits.Add64(x[1], x[1], carry) 515 z[2], carry = bits.Add64(x[2], x[2], carry) 516 z[3], carry = bits.Add64(x[3], x[3], carry) 517 z[4], carry = bits.Add64(x[4], x[4], carry) 518 z[5], carry = bits.Add64(x[5], x[5], carry) 519 z[6], carry = bits.Add64(x[6], x[6], carry) 520 z[7], carry = bits.Add64(x[7], x[7], carry) 521 z[8], carry = bits.Add64(x[8], x[8], carry) 522 z[9], _ = bits.Add64(x[9], x[9], carry) 523 524 // if z ⩾ q → z -= q 525 if !z.smallerThanModulus() { 526 var b uint64 527 z[0], b = bits.Sub64(z[0], q0, 0) 528 z[1], b = bits.Sub64(z[1], q1, b) 529 z[2], b = bits.Sub64(z[2], q2, b) 530 z[3], b = bits.Sub64(z[3], q3, b) 531 z[4], b = bits.Sub64(z[4], q4, b) 532 z[5], b = bits.Sub64(z[5], q5, b) 533 z[6], b = bits.Sub64(z[6], q6, b) 534 z[7], b = bits.Sub64(z[7], q7, b) 535 z[8], b = bits.Sub64(z[8], q8, b) 536 z[9], _ = bits.Sub64(z[9], q9, b) 537 } 538 return z 539 } 540 541 // Sub z = x - y (mod q) 542 func (z *Element) Sub(x, y *Element) *Element { 543 var b uint64 544 z[0], b = bits.Sub64(x[0], y[0], 0) 545 z[1], b = bits.Sub64(x[1], y[1], b) 546 z[2], b = bits.Sub64(x[2], y[2], b) 547 z[3], b = bits.Sub64(x[3], y[3], b) 548 z[4], b = bits.Sub64(x[4], y[4], b) 549 z[5], b = bits.Sub64(x[5], y[5], b) 550 z[6], b = bits.Sub64(x[6], y[6], b) 551 z[7], b = bits.Sub64(x[7], y[7], b) 552 z[8], b = bits.Sub64(x[8], y[8], b) 553 z[9], b = bits.Sub64(x[9], y[9], b) 554 if b != 0 { 555 var c uint64 556 z[0], c = bits.Add64(z[0], q0, 0) 557 z[1], c = bits.Add64(z[1], q1, c) 558 z[2], c = bits.Add64(z[2], q2, c) 559 z[3], c = bits.Add64(z[3], q3, c) 560 z[4], c = bits.Add64(z[4], q4, c) 561 z[5], c = bits.Add64(z[5], q5, c) 562 z[6], c = bits.Add64(z[6], q6, c) 563 z[7], c = bits.Add64(z[7], q7, c) 564 z[8], c = bits.Add64(z[8], q8, c) 565 z[9], _ = bits.Add64(z[9], q9, c) 566 } 567 return z 568 } 569 570 // Neg z = q - x 571 func (z *Element) Neg(x *Element) *Element { 572 if x.IsZero() { 573 z.SetZero() 574 return z 575 } 576 var borrow uint64 577 z[0], borrow = bits.Sub64(q0, x[0], 0) 578 z[1], borrow = bits.Sub64(q1, x[1], borrow) 579 z[2], borrow = bits.Sub64(q2, x[2], borrow) 580 z[3], borrow = bits.Sub64(q3, x[3], borrow) 581 z[4], borrow = bits.Sub64(q4, x[4], borrow) 582 z[5], borrow = bits.Sub64(q5, x[5], borrow) 583 z[6], borrow = bits.Sub64(q6, x[6], borrow) 584 z[7], borrow = bits.Sub64(q7, x[7], borrow) 585 z[8], borrow = bits.Sub64(q8, x[8], borrow) 586 z[9], _ = bits.Sub64(q9, x[9], borrow) 587 return z 588 } 589 590 // Select is a constant-time conditional move. 591 // If c=0, z = x0. Else z = x1 592 func (z *Element) Select(c int, x0 *Element, x1 *Element) *Element { 593 cC := uint64((int64(c) | -int64(c)) >> 63) // "canonicized" into: 0 if c=0, -1 otherwise 594 z[0] = x0[0] ^ cC&(x0[0]^x1[0]) 595 z[1] = x0[1] ^ cC&(x0[1]^x1[1]) 596 z[2] = x0[2] ^ cC&(x0[2]^x1[2]) 597 z[3] = x0[3] ^ cC&(x0[3]^x1[3]) 598 z[4] = x0[4] ^ cC&(x0[4]^x1[4]) 599 z[5] = x0[5] ^ cC&(x0[5]^x1[5]) 600 z[6] = x0[6] ^ cC&(x0[6]^x1[6]) 601 z[7] = x0[7] ^ cC&(x0[7]^x1[7]) 602 z[8] = x0[8] ^ cC&(x0[8]^x1[8]) 603 z[9] = x0[9] ^ cC&(x0[9]^x1[9]) 604 return z 605 } 606 607 // _mulGeneric is unoptimized textbook CIOS 608 // it is a fallback solution on x86 when ADX instruction set is not available 609 // and is used for testing purposes. 610 func _mulGeneric(z, x, y *Element) { 611 612 // Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis 613 // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf 614 // 615 // The algorithm: 616 // 617 // for i=0 to N-1 618 // C := 0 619 // for j=0 to N-1 620 // (C,t[j]) := t[j] + x[j]*y[i] + C 621 // (t[N+1],t[N]) := t[N] + C 622 // 623 // C := 0 624 // m := t[0]*q'[0] mod D 625 // (C,_) := t[0] + m*q[0] 626 // for j=1 to N-1 627 // (C,t[j-1]) := t[j] + m*q[j] + C 628 // 629 // (C,t[N-1]) := t[N] + C 630 // t[N] := t[N+1] + C 631 // 632 // → N is the number of machine words needed to store the modulus q 633 // → D is the word size. For example, on a 64-bit architecture D is 2 64 634 // → x[i], y[i], q[i] is the ith word of the numbers x,y,q 635 // → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs. 636 // → t is a temporary array of size N+2 637 // → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number 638 639 var t [11]uint64 640 var D uint64 641 var m, C uint64 642 // ----------------------------------- 643 // First loop 644 645 C, t[0] = bits.Mul64(y[0], x[0]) 646 C, t[1] = madd1(y[0], x[1], C) 647 C, t[2] = madd1(y[0], x[2], C) 648 C, t[3] = madd1(y[0], x[3], C) 649 C, t[4] = madd1(y[0], x[4], C) 650 C, t[5] = madd1(y[0], x[5], C) 651 C, t[6] = madd1(y[0], x[6], C) 652 C, t[7] = madd1(y[0], x[7], C) 653 C, t[8] = madd1(y[0], x[8], C) 654 C, t[9] = madd1(y[0], x[9], C) 655 656 t[10], D = bits.Add64(t[10], C, 0) 657 658 // m = t[0]n'[0] mod W 659 m = t[0] * qInvNeg 660 661 // ----------------------------------- 662 // Second loop 663 C = madd0(m, q0, t[0]) 664 C, t[0] = madd2(m, q1, t[1], C) 665 C, t[1] = madd2(m, q2, t[2], C) 666 C, t[2] = madd2(m, q3, t[3], C) 667 C, t[3] = madd2(m, q4, t[4], C) 668 C, t[4] = madd2(m, q5, t[5], C) 669 C, t[5] = madd2(m, q6, t[6], C) 670 C, t[6] = madd2(m, q7, t[7], C) 671 C, t[7] = madd2(m, q8, t[8], C) 672 C, t[8] = madd2(m, q9, t[9], C) 673 674 t[9], C = bits.Add64(t[10], C, 0) 675 t[10], _ = bits.Add64(0, D, C) 676 // ----------------------------------- 677 // First loop 678 679 C, t[0] = madd1(y[1], x[0], t[0]) 680 C, t[1] = madd2(y[1], x[1], t[1], C) 681 C, t[2] = madd2(y[1], x[2], t[2], C) 682 C, t[3] = madd2(y[1], x[3], t[3], C) 683 C, t[4] = madd2(y[1], x[4], t[4], C) 684 C, t[5] = madd2(y[1], x[5], t[5], C) 685 C, t[6] = madd2(y[1], x[6], t[6], C) 686 C, t[7] = madd2(y[1], x[7], t[7], C) 687 C, t[8] = madd2(y[1], x[8], t[8], C) 688 C, t[9] = madd2(y[1], x[9], t[9], C) 689 690 t[10], D = bits.Add64(t[10], C, 0) 691 692 // m = t[0]n'[0] mod W 693 m = t[0] * qInvNeg 694 695 // ----------------------------------- 696 // Second loop 697 C = madd0(m, q0, t[0]) 698 C, t[0] = madd2(m, q1, t[1], C) 699 C, t[1] = madd2(m, q2, t[2], C) 700 C, t[2] = madd2(m, q3, t[3], C) 701 C, t[3] = madd2(m, q4, t[4], C) 702 C, t[4] = madd2(m, q5, t[5], C) 703 C, t[5] = madd2(m, q6, t[6], C) 704 C, t[6] = madd2(m, q7, t[7], C) 705 C, t[7] = madd2(m, q8, t[8], C) 706 C, t[8] = madd2(m, q9, t[9], C) 707 708 t[9], C = bits.Add64(t[10], C, 0) 709 t[10], _ = bits.Add64(0, D, C) 710 // ----------------------------------- 711 // First loop 712 713 C, t[0] = madd1(y[2], x[0], t[0]) 714 C, t[1] = madd2(y[2], x[1], t[1], C) 715 C, t[2] = madd2(y[2], x[2], t[2], C) 716 C, t[3] = madd2(y[2], x[3], t[3], C) 717 C, t[4] = madd2(y[2], x[4], t[4], C) 718 C, t[5] = madd2(y[2], x[5], t[5], C) 719 C, t[6] = madd2(y[2], x[6], t[6], C) 720 C, t[7] = madd2(y[2], x[7], t[7], C) 721 C, t[8] = madd2(y[2], x[8], t[8], C) 722 C, t[9] = madd2(y[2], x[9], t[9], C) 723 724 t[10], D = bits.Add64(t[10], C, 0) 725 726 // m = t[0]n'[0] mod W 727 m = t[0] * qInvNeg 728 729 // ----------------------------------- 730 // Second loop 731 C = madd0(m, q0, t[0]) 732 C, t[0] = madd2(m, q1, t[1], C) 733 C, t[1] = madd2(m, q2, t[2], C) 734 C, t[2] = madd2(m, q3, t[3], C) 735 C, t[3] = madd2(m, q4, t[4], C) 736 C, t[4] = madd2(m, q5, t[5], C) 737 C, t[5] = madd2(m, q6, t[6], C) 738 C, t[6] = madd2(m, q7, t[7], C) 739 C, t[7] = madd2(m, q8, t[8], C) 740 C, t[8] = madd2(m, q9, t[9], C) 741 742 t[9], C = bits.Add64(t[10], C, 0) 743 t[10], _ = bits.Add64(0, D, C) 744 // ----------------------------------- 745 // First loop 746 747 C, t[0] = madd1(y[3], x[0], t[0]) 748 C, t[1] = madd2(y[3], x[1], t[1], C) 749 C, t[2] = madd2(y[3], x[2], t[2], C) 750 C, t[3] = madd2(y[3], x[3], t[3], C) 751 C, t[4] = madd2(y[3], x[4], t[4], C) 752 C, t[5] = madd2(y[3], x[5], t[5], C) 753 C, t[6] = madd2(y[3], x[6], t[6], C) 754 C, t[7] = madd2(y[3], x[7], t[7], C) 755 C, t[8] = madd2(y[3], x[8], t[8], C) 756 C, t[9] = madd2(y[3], x[9], t[9], C) 757 758 t[10], D = bits.Add64(t[10], C, 0) 759 760 // m = t[0]n'[0] mod W 761 m = t[0] * qInvNeg 762 763 // ----------------------------------- 764 // Second loop 765 C = madd0(m, q0, t[0]) 766 C, t[0] = madd2(m, q1, t[1], C) 767 C, t[1] = madd2(m, q2, t[2], C) 768 C, t[2] = madd2(m, q3, t[3], C) 769 C, t[3] = madd2(m, q4, t[4], C) 770 C, t[4] = madd2(m, q5, t[5], C) 771 C, t[5] = madd2(m, q6, t[6], C) 772 C, t[6] = madd2(m, q7, t[7], C) 773 C, t[7] = madd2(m, q8, t[8], C) 774 C, t[8] = madd2(m, q9, t[9], C) 775 776 t[9], C = bits.Add64(t[10], C, 0) 777 t[10], _ = bits.Add64(0, D, C) 778 // ----------------------------------- 779 // First loop 780 781 C, t[0] = madd1(y[4], x[0], t[0]) 782 C, t[1] = madd2(y[4], x[1], t[1], C) 783 C, t[2] = madd2(y[4], x[2], t[2], C) 784 C, t[3] = madd2(y[4], x[3], t[3], C) 785 C, t[4] = madd2(y[4], x[4], t[4], C) 786 C, t[5] = madd2(y[4], x[5], t[5], C) 787 C, t[6] = madd2(y[4], x[6], t[6], C) 788 C, t[7] = madd2(y[4], x[7], t[7], C) 789 C, t[8] = madd2(y[4], x[8], t[8], C) 790 C, t[9] = madd2(y[4], x[9], t[9], C) 791 792 t[10], D = bits.Add64(t[10], C, 0) 793 794 // m = t[0]n'[0] mod W 795 m = t[0] * qInvNeg 796 797 // ----------------------------------- 798 // Second loop 799 C = madd0(m, q0, t[0]) 800 C, t[0] = madd2(m, q1, t[1], C) 801 C, t[1] = madd2(m, q2, t[2], C) 802 C, t[2] = madd2(m, q3, t[3], C) 803 C, t[3] = madd2(m, q4, t[4], C) 804 C, t[4] = madd2(m, q5, t[5], C) 805 C, t[5] = madd2(m, q6, t[6], C) 806 C, t[6] = madd2(m, q7, t[7], C) 807 C, t[7] = madd2(m, q8, t[8], C) 808 C, t[8] = madd2(m, q9, t[9], C) 809 810 t[9], C = bits.Add64(t[10], C, 0) 811 t[10], _ = bits.Add64(0, D, C) 812 // ----------------------------------- 813 // First loop 814 815 C, t[0] = madd1(y[5], x[0], t[0]) 816 C, t[1] = madd2(y[5], x[1], t[1], C) 817 C, t[2] = madd2(y[5], x[2], t[2], C) 818 C, t[3] = madd2(y[5], x[3], t[3], C) 819 C, t[4] = madd2(y[5], x[4], t[4], C) 820 C, t[5] = madd2(y[5], x[5], t[5], C) 821 C, t[6] = madd2(y[5], x[6], t[6], C) 822 C, t[7] = madd2(y[5], x[7], t[7], C) 823 C, t[8] = madd2(y[5], x[8], t[8], C) 824 C, t[9] = madd2(y[5], x[9], t[9], C) 825 826 t[10], D = bits.Add64(t[10], C, 0) 827 828 // m = t[0]n'[0] mod W 829 m = t[0] * qInvNeg 830 831 // ----------------------------------- 832 // Second loop 833 C = madd0(m, q0, t[0]) 834 C, t[0] = madd2(m, q1, t[1], C) 835 C, t[1] = madd2(m, q2, t[2], C) 836 C, t[2] = madd2(m, q3, t[3], C) 837 C, t[3] = madd2(m, q4, t[4], C) 838 C, t[4] = madd2(m, q5, t[5], C) 839 C, t[5] = madd2(m, q6, t[6], C) 840 C, t[6] = madd2(m, q7, t[7], C) 841 C, t[7] = madd2(m, q8, t[8], C) 842 C, t[8] = madd2(m, q9, t[9], C) 843 844 t[9], C = bits.Add64(t[10], C, 0) 845 t[10], _ = bits.Add64(0, D, C) 846 // ----------------------------------- 847 // First loop 848 849 C, t[0] = madd1(y[6], x[0], t[0]) 850 C, t[1] = madd2(y[6], x[1], t[1], C) 851 C, t[2] = madd2(y[6], x[2], t[2], C) 852 C, t[3] = madd2(y[6], x[3], t[3], C) 853 C, t[4] = madd2(y[6], x[4], t[4], C) 854 C, t[5] = madd2(y[6], x[5], t[5], C) 855 C, t[6] = madd2(y[6], x[6], t[6], C) 856 C, t[7] = madd2(y[6], x[7], t[7], C) 857 C, t[8] = madd2(y[6], x[8], t[8], C) 858 C, t[9] = madd2(y[6], x[9], t[9], C) 859 860 t[10], D = bits.Add64(t[10], C, 0) 861 862 // m = t[0]n'[0] mod W 863 m = t[0] * qInvNeg 864 865 // ----------------------------------- 866 // Second loop 867 C = madd0(m, q0, t[0]) 868 C, t[0] = madd2(m, q1, t[1], C) 869 C, t[1] = madd2(m, q2, t[2], C) 870 C, t[2] = madd2(m, q3, t[3], C) 871 C, t[3] = madd2(m, q4, t[4], C) 872 C, t[4] = madd2(m, q5, t[5], C) 873 C, t[5] = madd2(m, q6, t[6], C) 874 C, t[6] = madd2(m, q7, t[7], C) 875 C, t[7] = madd2(m, q8, t[8], C) 876 C, t[8] = madd2(m, q9, t[9], C) 877 878 t[9], C = bits.Add64(t[10], C, 0) 879 t[10], _ = bits.Add64(0, D, C) 880 // ----------------------------------- 881 // First loop 882 883 C, t[0] = madd1(y[7], x[0], t[0]) 884 C, t[1] = madd2(y[7], x[1], t[1], C) 885 C, t[2] = madd2(y[7], x[2], t[2], C) 886 C, t[3] = madd2(y[7], x[3], t[3], C) 887 C, t[4] = madd2(y[7], x[4], t[4], C) 888 C, t[5] = madd2(y[7], x[5], t[5], C) 889 C, t[6] = madd2(y[7], x[6], t[6], C) 890 C, t[7] = madd2(y[7], x[7], t[7], C) 891 C, t[8] = madd2(y[7], x[8], t[8], C) 892 C, t[9] = madd2(y[7], x[9], t[9], C) 893 894 t[10], D = bits.Add64(t[10], C, 0) 895 896 // m = t[0]n'[0] mod W 897 m = t[0] * qInvNeg 898 899 // ----------------------------------- 900 // Second loop 901 C = madd0(m, q0, t[0]) 902 C, t[0] = madd2(m, q1, t[1], C) 903 C, t[1] = madd2(m, q2, t[2], C) 904 C, t[2] = madd2(m, q3, t[3], C) 905 C, t[3] = madd2(m, q4, t[4], C) 906 C, t[4] = madd2(m, q5, t[5], C) 907 C, t[5] = madd2(m, q6, t[6], C) 908 C, t[6] = madd2(m, q7, t[7], C) 909 C, t[7] = madd2(m, q8, t[8], C) 910 C, t[8] = madd2(m, q9, t[9], C) 911 912 t[9], C = bits.Add64(t[10], C, 0) 913 t[10], _ = bits.Add64(0, D, C) 914 // ----------------------------------- 915 // First loop 916 917 C, t[0] = madd1(y[8], x[0], t[0]) 918 C, t[1] = madd2(y[8], x[1], t[1], C) 919 C, t[2] = madd2(y[8], x[2], t[2], C) 920 C, t[3] = madd2(y[8], x[3], t[3], C) 921 C, t[4] = madd2(y[8], x[4], t[4], C) 922 C, t[5] = madd2(y[8], x[5], t[5], C) 923 C, t[6] = madd2(y[8], x[6], t[6], C) 924 C, t[7] = madd2(y[8], x[7], t[7], C) 925 C, t[8] = madd2(y[8], x[8], t[8], C) 926 C, t[9] = madd2(y[8], x[9], t[9], C) 927 928 t[10], D = bits.Add64(t[10], C, 0) 929 930 // m = t[0]n'[0] mod W 931 m = t[0] * qInvNeg 932 933 // ----------------------------------- 934 // Second loop 935 C = madd0(m, q0, t[0]) 936 C, t[0] = madd2(m, q1, t[1], C) 937 C, t[1] = madd2(m, q2, t[2], C) 938 C, t[2] = madd2(m, q3, t[3], C) 939 C, t[3] = madd2(m, q4, t[4], C) 940 C, t[4] = madd2(m, q5, t[5], C) 941 C, t[5] = madd2(m, q6, t[6], C) 942 C, t[6] = madd2(m, q7, t[7], C) 943 C, t[7] = madd2(m, q8, t[8], C) 944 C, t[8] = madd2(m, q9, t[9], C) 945 946 t[9], C = bits.Add64(t[10], C, 0) 947 t[10], _ = bits.Add64(0, D, C) 948 // ----------------------------------- 949 // First loop 950 951 C, t[0] = madd1(y[9], x[0], t[0]) 952 C, t[1] = madd2(y[9], x[1], t[1], C) 953 C, t[2] = madd2(y[9], x[2], t[2], C) 954 C, t[3] = madd2(y[9], x[3], t[3], C) 955 C, t[4] = madd2(y[9], x[4], t[4], C) 956 C, t[5] = madd2(y[9], x[5], t[5], C) 957 C, t[6] = madd2(y[9], x[6], t[6], C) 958 C, t[7] = madd2(y[9], x[7], t[7], C) 959 C, t[8] = madd2(y[9], x[8], t[8], C) 960 C, t[9] = madd2(y[9], x[9], t[9], C) 961 962 t[10], D = bits.Add64(t[10], C, 0) 963 964 // m = t[0]n'[0] mod W 965 m = t[0] * qInvNeg 966 967 // ----------------------------------- 968 // Second loop 969 C = madd0(m, q0, t[0]) 970 C, t[0] = madd2(m, q1, t[1], C) 971 C, t[1] = madd2(m, q2, t[2], C) 972 C, t[2] = madd2(m, q3, t[3], C) 973 C, t[3] = madd2(m, q4, t[4], C) 974 C, t[4] = madd2(m, q5, t[5], C) 975 C, t[5] = madd2(m, q6, t[6], C) 976 C, t[6] = madd2(m, q7, t[7], C) 977 C, t[7] = madd2(m, q8, t[8], C) 978 C, t[8] = madd2(m, q9, t[9], C) 979 980 t[9], C = bits.Add64(t[10], C, 0) 981 t[10], _ = bits.Add64(0, D, C) 982 983 if t[10] != 0 { 984 // we need to reduce, we have a result on 11 words 985 var b uint64 986 z[0], b = bits.Sub64(t[0], q0, 0) 987 z[1], b = bits.Sub64(t[1], q1, b) 988 z[2], b = bits.Sub64(t[2], q2, b) 989 z[3], b = bits.Sub64(t[3], q3, b) 990 z[4], b = bits.Sub64(t[4], q4, b) 991 z[5], b = bits.Sub64(t[5], q5, b) 992 z[6], b = bits.Sub64(t[6], q6, b) 993 z[7], b = bits.Sub64(t[7], q7, b) 994 z[8], b = bits.Sub64(t[8], q8, b) 995 z[9], _ = bits.Sub64(t[9], q9, b) 996 return 997 } 998 999 // copy t into z 1000 z[0] = t[0] 1001 z[1] = t[1] 1002 z[2] = t[2] 1003 z[3] = t[3] 1004 z[4] = t[4] 1005 z[5] = t[5] 1006 z[6] = t[6] 1007 z[7] = t[7] 1008 z[8] = t[8] 1009 z[9] = t[9] 1010 1011 // if z ⩾ q → z -= q 1012 if !z.smallerThanModulus() { 1013 var b uint64 1014 z[0], b = bits.Sub64(z[0], q0, 0) 1015 z[1], b = bits.Sub64(z[1], q1, b) 1016 z[2], b = bits.Sub64(z[2], q2, b) 1017 z[3], b = bits.Sub64(z[3], q3, b) 1018 z[4], b = bits.Sub64(z[4], q4, b) 1019 z[5], b = bits.Sub64(z[5], q5, b) 1020 z[6], b = bits.Sub64(z[6], q6, b) 1021 z[7], b = bits.Sub64(z[7], q7, b) 1022 z[8], b = bits.Sub64(z[8], q8, b) 1023 z[9], _ = bits.Sub64(z[9], q9, b) 1024 } 1025 } 1026 1027 func _fromMontGeneric(z *Element) { 1028 // the following lines implement z = z * 1 1029 // with a modified CIOS montgomery multiplication 1030 // see Mul for algorithm documentation 1031 { 1032 // m = z[0]n'[0] mod W 1033 m := z[0] * qInvNeg 1034 C := madd0(m, q0, z[0]) 1035 C, z[0] = madd2(m, q1, z[1], C) 1036 C, z[1] = madd2(m, q2, z[2], C) 1037 C, z[2] = madd2(m, q3, z[3], C) 1038 C, z[3] = madd2(m, q4, z[4], C) 1039 C, z[4] = madd2(m, q5, z[5], C) 1040 C, z[5] = madd2(m, q6, z[6], C) 1041 C, z[6] = madd2(m, q7, z[7], C) 1042 C, z[7] = madd2(m, q8, z[8], C) 1043 C, z[8] = madd2(m, q9, z[9], C) 1044 z[9] = C 1045 } 1046 { 1047 // m = z[0]n'[0] mod W 1048 m := z[0] * qInvNeg 1049 C := madd0(m, q0, z[0]) 1050 C, z[0] = madd2(m, q1, z[1], C) 1051 C, z[1] = madd2(m, q2, z[2], C) 1052 C, z[2] = madd2(m, q3, z[3], C) 1053 C, z[3] = madd2(m, q4, z[4], C) 1054 C, z[4] = madd2(m, q5, z[5], C) 1055 C, z[5] = madd2(m, q6, z[6], C) 1056 C, z[6] = madd2(m, q7, z[7], C) 1057 C, z[7] = madd2(m, q8, z[8], C) 1058 C, z[8] = madd2(m, q9, z[9], C) 1059 z[9] = C 1060 } 1061 { 1062 // m = z[0]n'[0] mod W 1063 m := z[0] * qInvNeg 1064 C := madd0(m, q0, z[0]) 1065 C, z[0] = madd2(m, q1, z[1], C) 1066 C, z[1] = madd2(m, q2, z[2], C) 1067 C, z[2] = madd2(m, q3, z[3], C) 1068 C, z[3] = madd2(m, q4, z[4], C) 1069 C, z[4] = madd2(m, q5, z[5], C) 1070 C, z[5] = madd2(m, q6, z[6], C) 1071 C, z[6] = madd2(m, q7, z[7], C) 1072 C, z[7] = madd2(m, q8, z[8], C) 1073 C, z[8] = madd2(m, q9, z[9], C) 1074 z[9] = C 1075 } 1076 { 1077 // m = z[0]n'[0] mod W 1078 m := z[0] * qInvNeg 1079 C := madd0(m, q0, z[0]) 1080 C, z[0] = madd2(m, q1, z[1], C) 1081 C, z[1] = madd2(m, q2, z[2], C) 1082 C, z[2] = madd2(m, q3, z[3], C) 1083 C, z[3] = madd2(m, q4, z[4], C) 1084 C, z[4] = madd2(m, q5, z[5], C) 1085 C, z[5] = madd2(m, q6, z[6], C) 1086 C, z[6] = madd2(m, q7, z[7], C) 1087 C, z[7] = madd2(m, q8, z[8], C) 1088 C, z[8] = madd2(m, q9, z[9], C) 1089 z[9] = C 1090 } 1091 { 1092 // m = z[0]n'[0] mod W 1093 m := z[0] * qInvNeg 1094 C := madd0(m, q0, z[0]) 1095 C, z[0] = madd2(m, q1, z[1], C) 1096 C, z[1] = madd2(m, q2, z[2], C) 1097 C, z[2] = madd2(m, q3, z[3], C) 1098 C, z[3] = madd2(m, q4, z[4], C) 1099 C, z[4] = madd2(m, q5, z[5], C) 1100 C, z[5] = madd2(m, q6, z[6], C) 1101 C, z[6] = madd2(m, q7, z[7], C) 1102 C, z[7] = madd2(m, q8, z[8], C) 1103 C, z[8] = madd2(m, q9, z[9], C) 1104 z[9] = C 1105 } 1106 { 1107 // m = z[0]n'[0] mod W 1108 m := z[0] * qInvNeg 1109 C := madd0(m, q0, z[0]) 1110 C, z[0] = madd2(m, q1, z[1], C) 1111 C, z[1] = madd2(m, q2, z[2], C) 1112 C, z[2] = madd2(m, q3, z[3], C) 1113 C, z[3] = madd2(m, q4, z[4], C) 1114 C, z[4] = madd2(m, q5, z[5], C) 1115 C, z[5] = madd2(m, q6, z[6], C) 1116 C, z[6] = madd2(m, q7, z[7], C) 1117 C, z[7] = madd2(m, q8, z[8], C) 1118 C, z[8] = madd2(m, q9, z[9], C) 1119 z[9] = C 1120 } 1121 { 1122 // m = z[0]n'[0] mod W 1123 m := z[0] * qInvNeg 1124 C := madd0(m, q0, z[0]) 1125 C, z[0] = madd2(m, q1, z[1], C) 1126 C, z[1] = madd2(m, q2, z[2], C) 1127 C, z[2] = madd2(m, q3, z[3], C) 1128 C, z[3] = madd2(m, q4, z[4], C) 1129 C, z[4] = madd2(m, q5, z[5], C) 1130 C, z[5] = madd2(m, q6, z[6], C) 1131 C, z[6] = madd2(m, q7, z[7], C) 1132 C, z[7] = madd2(m, q8, z[8], C) 1133 C, z[8] = madd2(m, q9, z[9], C) 1134 z[9] = C 1135 } 1136 { 1137 // m = z[0]n'[0] mod W 1138 m := z[0] * qInvNeg 1139 C := madd0(m, q0, z[0]) 1140 C, z[0] = madd2(m, q1, z[1], C) 1141 C, z[1] = madd2(m, q2, z[2], C) 1142 C, z[2] = madd2(m, q3, z[3], C) 1143 C, z[3] = madd2(m, q4, z[4], C) 1144 C, z[4] = madd2(m, q5, z[5], C) 1145 C, z[5] = madd2(m, q6, z[6], C) 1146 C, z[6] = madd2(m, q7, z[7], C) 1147 C, z[7] = madd2(m, q8, z[8], C) 1148 C, z[8] = madd2(m, q9, z[9], C) 1149 z[9] = C 1150 } 1151 { 1152 // m = z[0]n'[0] mod W 1153 m := z[0] * qInvNeg 1154 C := madd0(m, q0, z[0]) 1155 C, z[0] = madd2(m, q1, z[1], C) 1156 C, z[1] = madd2(m, q2, z[2], C) 1157 C, z[2] = madd2(m, q3, z[3], C) 1158 C, z[3] = madd2(m, q4, z[4], C) 1159 C, z[4] = madd2(m, q5, z[5], C) 1160 C, z[5] = madd2(m, q6, z[6], C) 1161 C, z[6] = madd2(m, q7, z[7], C) 1162 C, z[7] = madd2(m, q8, z[8], C) 1163 C, z[8] = madd2(m, q9, z[9], C) 1164 z[9] = C 1165 } 1166 { 1167 // m = z[0]n'[0] mod W 1168 m := z[0] * qInvNeg 1169 C := madd0(m, q0, z[0]) 1170 C, z[0] = madd2(m, q1, z[1], C) 1171 C, z[1] = madd2(m, q2, z[2], C) 1172 C, z[2] = madd2(m, q3, z[3], C) 1173 C, z[3] = madd2(m, q4, z[4], C) 1174 C, z[4] = madd2(m, q5, z[5], C) 1175 C, z[5] = madd2(m, q6, z[6], C) 1176 C, z[6] = madd2(m, q7, z[7], C) 1177 C, z[7] = madd2(m, q8, z[8], C) 1178 C, z[8] = madd2(m, q9, z[9], C) 1179 z[9] = C 1180 } 1181 1182 // if z ⩾ q → z -= q 1183 if !z.smallerThanModulus() { 1184 var b uint64 1185 z[0], b = bits.Sub64(z[0], q0, 0) 1186 z[1], b = bits.Sub64(z[1], q1, b) 1187 z[2], b = bits.Sub64(z[2], q2, b) 1188 z[3], b = bits.Sub64(z[3], q3, b) 1189 z[4], b = bits.Sub64(z[4], q4, b) 1190 z[5], b = bits.Sub64(z[5], q5, b) 1191 z[6], b = bits.Sub64(z[6], q6, b) 1192 z[7], b = bits.Sub64(z[7], q7, b) 1193 z[8], b = bits.Sub64(z[8], q8, b) 1194 z[9], _ = bits.Sub64(z[9], q9, b) 1195 } 1196 } 1197 1198 func _reduceGeneric(z *Element) { 1199 1200 // if z ⩾ q → z -= q 1201 if !z.smallerThanModulus() { 1202 var b uint64 1203 z[0], b = bits.Sub64(z[0], q0, 0) 1204 z[1], b = bits.Sub64(z[1], q1, b) 1205 z[2], b = bits.Sub64(z[2], q2, b) 1206 z[3], b = bits.Sub64(z[3], q3, b) 1207 z[4], b = bits.Sub64(z[4], q4, b) 1208 z[5], b = bits.Sub64(z[5], q5, b) 1209 z[6], b = bits.Sub64(z[6], q6, b) 1210 z[7], b = bits.Sub64(z[7], q7, b) 1211 z[8], b = bits.Sub64(z[8], q8, b) 1212 z[9], _ = bits.Sub64(z[9], q9, b) 1213 } 1214 } 1215 1216 // BatchInvert returns a new slice with every element inverted. 1217 // Uses Montgomery batch inversion trick 1218 func BatchInvert(a []Element) []Element { 1219 res := make([]Element, len(a)) 1220 if len(a) == 0 { 1221 return res 1222 } 1223 1224 zeroes := bitset.New(uint(len(a))) 1225 accumulator := One() 1226 1227 for i := 0; i < len(a); i++ { 1228 if a[i].IsZero() { 1229 zeroes.Set(uint(i)) 1230 continue 1231 } 1232 res[i] = accumulator 1233 accumulator.Mul(&accumulator, &a[i]) 1234 } 1235 1236 accumulator.Inverse(&accumulator) 1237 1238 for i := len(a) - 1; i >= 0; i-- { 1239 if zeroes.Test(uint(i)) { 1240 continue 1241 } 1242 res[i].Mul(&res[i], &accumulator) 1243 accumulator.Mul(&accumulator, &a[i]) 1244 } 1245 1246 return res 1247 } 1248 1249 func _butterflyGeneric(a, b *Element) { 1250 t := *a 1251 a.Add(a, b) 1252 b.Sub(&t, b) 1253 } 1254 1255 // BitLen returns the minimum number of bits needed to represent z 1256 // returns 0 if z == 0 1257 func (z *Element) BitLen() int { 1258 if z[9] != 0 { 1259 return 576 + bits.Len64(z[9]) 1260 } 1261 if z[8] != 0 { 1262 return 512 + bits.Len64(z[8]) 1263 } 1264 if z[7] != 0 { 1265 return 448 + bits.Len64(z[7]) 1266 } 1267 if z[6] != 0 { 1268 return 384 + bits.Len64(z[6]) 1269 } 1270 if z[5] != 0 { 1271 return 320 + bits.Len64(z[5]) 1272 } 1273 if z[4] != 0 { 1274 return 256 + bits.Len64(z[4]) 1275 } 1276 if z[3] != 0 { 1277 return 192 + bits.Len64(z[3]) 1278 } 1279 if z[2] != 0 { 1280 return 128 + bits.Len64(z[2]) 1281 } 1282 if z[1] != 0 { 1283 return 64 + bits.Len64(z[1]) 1284 } 1285 return bits.Len64(z[0]) 1286 } 1287 1288 // Hash msg to count prime field elements. 1289 // https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2 1290 func Hash(msg, dst []byte, count int) ([]Element, error) { 1291 // 128 bits of security 1292 // L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128 1293 const Bytes = 1 + (Bits-1)/8 1294 const L = 16 + Bytes 1295 1296 lenInBytes := count * L 1297 pseudoRandomBytes, err := hash.ExpandMsgXmd(msg, dst, lenInBytes) 1298 if err != nil { 1299 return nil, err 1300 } 1301 1302 // get temporary big int from the pool 1303 vv := pool.BigInt.Get() 1304 1305 res := make([]Element, count) 1306 for i := 0; i < count; i++ { 1307 vv.SetBytes(pseudoRandomBytes[i*L : (i+1)*L]) 1308 res[i].SetBigInt(vv) 1309 } 1310 1311 // release object into pool 1312 pool.BigInt.Put(vv) 1313 1314 return res, nil 1315 } 1316 1317 // Exp z = xᵏ (mod q) 1318 func (z *Element) Exp(x Element, k *big.Int) *Element { 1319 if k.IsUint64() && k.Uint64() == 0 { 1320 return z.SetOne() 1321 } 1322 1323 e := k 1324 if k.Sign() == -1 { 1325 // negative k, we invert 1326 // if k < 0: xᵏ (mod q) == (x⁻¹)ᵏ (mod q) 1327 x.Inverse(&x) 1328 1329 // we negate k in a temp big.Int since 1330 // Int.Bit(_) of k and -k is different 1331 e = pool.BigInt.Get() 1332 defer pool.BigInt.Put(e) 1333 e.Neg(k) 1334 } 1335 1336 z.Set(&x) 1337 1338 for i := e.BitLen() - 2; i >= 0; i-- { 1339 z.Square(z) 1340 if e.Bit(i) == 1 { 1341 z.Mul(z, &x) 1342 } 1343 } 1344 1345 return z 1346 } 1347 1348 // rSquare where r is the Montgommery constant 1349 // see section 2.3.2 of Tolga Acar's thesis 1350 // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf 1351 var rSquare = Element{ 1352 7358459907925294924, 1353 14414180951914241931, 1354 16619482658146888203, 1355 760736596725344926, 1356 12753071240931896792, 1357 13425190760400245818, 1358 12591714441439252728, 1359 15325516497554583360, 1360 5301152003049442834, 1361 35368377961363834, 1362 } 1363 1364 // toMont converts z to Montgomery form 1365 // sets and returns z = z * r² 1366 func (z *Element) toMont() *Element { 1367 return z.Mul(z, &rSquare) 1368 } 1369 1370 // String returns the decimal representation of z as generated by 1371 // z.Text(10). 1372 func (z *Element) String() string { 1373 return z.Text(10) 1374 } 1375 1376 // toBigInt returns z as a big.Int in Montgomery form 1377 func (z *Element) toBigInt(res *big.Int) *big.Int { 1378 var b [Bytes]byte 1379 binary.BigEndian.PutUint64(b[72:80], z[0]) 1380 binary.BigEndian.PutUint64(b[64:72], z[1]) 1381 binary.BigEndian.PutUint64(b[56:64], z[2]) 1382 binary.BigEndian.PutUint64(b[48:56], z[3]) 1383 binary.BigEndian.PutUint64(b[40:48], z[4]) 1384 binary.BigEndian.PutUint64(b[32:40], z[5]) 1385 binary.BigEndian.PutUint64(b[24:32], z[6]) 1386 binary.BigEndian.PutUint64(b[16:24], z[7]) 1387 binary.BigEndian.PutUint64(b[8:16], z[8]) 1388 binary.BigEndian.PutUint64(b[0:8], z[9]) 1389 1390 return res.SetBytes(b[:]) 1391 } 1392 1393 // Text returns the string representation of z in the given base. 1394 // Base must be between 2 and 36, inclusive. The result uses the 1395 // lower-case letters 'a' to 'z' for digit values 10 to 35. 1396 // No prefix (such as "0x") is added to the string. If z is a nil 1397 // pointer it returns "<nil>". 1398 // If base == 10 and -z fits in a uint16 prefix "-" is added to the string. 1399 func (z *Element) Text(base int) string { 1400 if base < 2 || base > 36 { 1401 panic("invalid base") 1402 } 1403 if z == nil { 1404 return "<nil>" 1405 } 1406 1407 const maxUint16 = 65535 1408 if base == 10 { 1409 var zzNeg Element 1410 zzNeg.Neg(z) 1411 zzNeg.fromMont() 1412 if zzNeg.FitsOnOneWord() && zzNeg[0] <= maxUint16 && zzNeg[0] != 0 { 1413 return "-" + strconv.FormatUint(zzNeg[0], base) 1414 } 1415 } 1416 zz := *z 1417 zz.fromMont() 1418 if zz.FitsOnOneWord() { 1419 return strconv.FormatUint(zz[0], base) 1420 } 1421 vv := pool.BigInt.Get() 1422 r := zz.toBigInt(vv).Text(base) 1423 pool.BigInt.Put(vv) 1424 return r 1425 } 1426 1427 // BigInt sets and return z as a *big.Int 1428 func (z *Element) BigInt(res *big.Int) *big.Int { 1429 _z := *z 1430 _z.fromMont() 1431 return _z.toBigInt(res) 1432 } 1433 1434 // ToBigIntRegular returns z as a big.Int in regular form 1435 // 1436 // Deprecated: use BigInt(*big.Int) instead 1437 func (z Element) ToBigIntRegular(res *big.Int) *big.Int { 1438 z.fromMont() 1439 return z.toBigInt(res) 1440 } 1441 1442 // Bits provides access to z by returning its value as a little-endian [10]uint64 array. 1443 // Bits is intended to support implementation of missing low-level Element 1444 // functionality outside this package; it should be avoided otherwise. 1445 func (z *Element) Bits() [10]uint64 { 1446 _z := *z 1447 fromMont(&_z) 1448 return _z 1449 } 1450 1451 // Bytes returns the value of z as a big-endian byte array 1452 func (z *Element) Bytes() (res [Bytes]byte) { 1453 BigEndian.PutElement(&res, *z) 1454 return 1455 } 1456 1457 // Marshal returns the value of z as a big-endian byte slice 1458 func (z *Element) Marshal() []byte { 1459 b := z.Bytes() 1460 return b[:] 1461 } 1462 1463 // Unmarshal is an alias for SetBytes, it sets z to the value of e. 1464 func (z *Element) Unmarshal(e []byte) { 1465 z.SetBytes(e) 1466 } 1467 1468 // SetBytes interprets e as the bytes of a big-endian unsigned integer, 1469 // sets z to that value, and returns z. 1470 func (z *Element) SetBytes(e []byte) *Element { 1471 if len(e) == Bytes { 1472 // fast path 1473 v, err := BigEndian.Element((*[Bytes]byte)(e)) 1474 if err == nil { 1475 *z = v 1476 return z 1477 } 1478 } 1479 1480 // slow path. 1481 // get a big int from our pool 1482 vv := pool.BigInt.Get() 1483 vv.SetBytes(e) 1484 1485 // set big int 1486 z.SetBigInt(vv) 1487 1488 // put temporary object back in pool 1489 pool.BigInt.Put(vv) 1490 1491 return z 1492 } 1493 1494 // SetBytesCanonical interprets e as the bytes of a big-endian 80-byte integer. 1495 // If e is not a 80-byte slice or encodes a value higher than q, 1496 // SetBytesCanonical returns an error. 1497 func (z *Element) SetBytesCanonical(e []byte) error { 1498 if len(e) != Bytes { 1499 return errors.New("invalid fp.Element encoding") 1500 } 1501 v, err := BigEndian.Element((*[Bytes]byte)(e)) 1502 if err != nil { 1503 return err 1504 } 1505 *z = v 1506 return nil 1507 } 1508 1509 // SetBigInt sets z to v and returns z 1510 func (z *Element) SetBigInt(v *big.Int) *Element { 1511 z.SetZero() 1512 1513 var zero big.Int 1514 1515 // fast path 1516 c := v.Cmp(&_modulus) 1517 if c == 0 { 1518 // v == 0 1519 return z 1520 } else if c != 1 && v.Cmp(&zero) != -1 { 1521 // 0 < v < q 1522 return z.setBigInt(v) 1523 } 1524 1525 // get temporary big int from the pool 1526 vv := pool.BigInt.Get() 1527 1528 // copy input + modular reduction 1529 vv.Mod(v, &_modulus) 1530 1531 // set big int byte value 1532 z.setBigInt(vv) 1533 1534 // release object into pool 1535 pool.BigInt.Put(vv) 1536 return z 1537 } 1538 1539 // setBigInt assumes 0 ⩽ v < q 1540 func (z *Element) setBigInt(v *big.Int) *Element { 1541 vBits := v.Bits() 1542 1543 if bits.UintSize == 64 { 1544 for i := 0; i < len(vBits); i++ { 1545 z[i] = uint64(vBits[i]) 1546 } 1547 } else { 1548 for i := 0; i < len(vBits); i++ { 1549 if i%2 == 0 { 1550 z[i/2] = uint64(vBits[i]) 1551 } else { 1552 z[i/2] |= uint64(vBits[i]) << 32 1553 } 1554 } 1555 } 1556 1557 return z.toMont() 1558 } 1559 1560 // SetString creates a big.Int with number and calls SetBigInt on z 1561 // 1562 // The number prefix determines the actual base: A prefix of 1563 // ”0b” or ”0B” selects base 2, ”0”, ”0o” or ”0O” selects base 8, 1564 // and ”0x” or ”0X” selects base 16. Otherwise, the selected base is 10 1565 // and no prefix is accepted. 1566 // 1567 // For base 16, lower and upper case letters are considered the same: 1568 // The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15. 1569 // 1570 // An underscore character ”_” may appear between a base 1571 // prefix and an adjacent digit, and between successive digits; such 1572 // underscores do not change the value of the number. 1573 // Incorrect placement of underscores is reported as a panic if there 1574 // are no other errors. 1575 // 1576 // If the number is invalid this method leaves z unchanged and returns nil, error. 1577 func (z *Element) SetString(number string) (*Element, error) { 1578 // get temporary big int from the pool 1579 vv := pool.BigInt.Get() 1580 1581 if _, ok := vv.SetString(number, 0); !ok { 1582 return nil, errors.New("Element.SetString failed -> can't parse number into a big.Int " + number) 1583 } 1584 1585 z.SetBigInt(vv) 1586 1587 // release object into pool 1588 pool.BigInt.Put(vv) 1589 1590 return z, nil 1591 } 1592 1593 // MarshalJSON returns json encoding of z (z.Text(10)) 1594 // If z == nil, returns null 1595 func (z *Element) MarshalJSON() ([]byte, error) { 1596 if z == nil { 1597 return []byte("null"), nil 1598 } 1599 const maxSafeBound = 15 // we encode it as number if it's small 1600 s := z.Text(10) 1601 if len(s) <= maxSafeBound { 1602 return []byte(s), nil 1603 } 1604 var sbb strings.Builder 1605 sbb.WriteByte('"') 1606 sbb.WriteString(s) 1607 sbb.WriteByte('"') 1608 return []byte(sbb.String()), nil 1609 } 1610 1611 // UnmarshalJSON accepts numbers and strings as input 1612 // See Element.SetString for valid prefixes (0x, 0b, ...) 1613 func (z *Element) UnmarshalJSON(data []byte) error { 1614 s := string(data) 1615 if len(s) > Bits*3 { 1616 return errors.New("value too large (max = Element.Bits * 3)") 1617 } 1618 1619 // we accept numbers and strings, remove leading and trailing quotes if any 1620 if len(s) > 0 && s[0] == '"' { 1621 s = s[1:] 1622 } 1623 if len(s) > 0 && s[len(s)-1] == '"' { 1624 s = s[:len(s)-1] 1625 } 1626 1627 // get temporary big int from the pool 1628 vv := pool.BigInt.Get() 1629 1630 if _, ok := vv.SetString(s, 0); !ok { 1631 return errors.New("can't parse into a big.Int: " + s) 1632 } 1633 1634 z.SetBigInt(vv) 1635 1636 // release object into pool 1637 pool.BigInt.Put(vv) 1638 return nil 1639 } 1640 1641 // A ByteOrder specifies how to convert byte slices into a Element 1642 type ByteOrder interface { 1643 Element(*[Bytes]byte) (Element, error) 1644 PutElement(*[Bytes]byte, Element) 1645 String() string 1646 } 1647 1648 // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. 1649 var BigEndian bigEndian 1650 1651 type bigEndian struct{} 1652 1653 // Element interpret b is a big-endian 80-byte slice. 1654 // If b encodes a value higher than q, Element returns error. 1655 func (bigEndian) Element(b *[Bytes]byte) (Element, error) { 1656 var z Element 1657 z[0] = binary.BigEndian.Uint64((*b)[72:80]) 1658 z[1] = binary.BigEndian.Uint64((*b)[64:72]) 1659 z[2] = binary.BigEndian.Uint64((*b)[56:64]) 1660 z[3] = binary.BigEndian.Uint64((*b)[48:56]) 1661 z[4] = binary.BigEndian.Uint64((*b)[40:48]) 1662 z[5] = binary.BigEndian.Uint64((*b)[32:40]) 1663 z[6] = binary.BigEndian.Uint64((*b)[24:32]) 1664 z[7] = binary.BigEndian.Uint64((*b)[16:24]) 1665 z[8] = binary.BigEndian.Uint64((*b)[8:16]) 1666 z[9] = binary.BigEndian.Uint64((*b)[0:8]) 1667 1668 if !z.smallerThanModulus() { 1669 return Element{}, errors.New("invalid fp.Element encoding") 1670 } 1671 1672 z.toMont() 1673 return z, nil 1674 } 1675 1676 func (bigEndian) PutElement(b *[Bytes]byte, e Element) { 1677 e.fromMont() 1678 binary.BigEndian.PutUint64((*b)[72:80], e[0]) 1679 binary.BigEndian.PutUint64((*b)[64:72], e[1]) 1680 binary.BigEndian.PutUint64((*b)[56:64], e[2]) 1681 binary.BigEndian.PutUint64((*b)[48:56], e[3]) 1682 binary.BigEndian.PutUint64((*b)[40:48], e[4]) 1683 binary.BigEndian.PutUint64((*b)[32:40], e[5]) 1684 binary.BigEndian.PutUint64((*b)[24:32], e[6]) 1685 binary.BigEndian.PutUint64((*b)[16:24], e[7]) 1686 binary.BigEndian.PutUint64((*b)[8:16], e[8]) 1687 binary.BigEndian.PutUint64((*b)[0:8], e[9]) 1688 } 1689 1690 func (bigEndian) String() string { return "BigEndian" } 1691 1692 // LittleEndian is the little-endian implementation of ByteOrder and AppendByteOrder. 1693 var LittleEndian littleEndian 1694 1695 type littleEndian struct{} 1696 1697 func (littleEndian) Element(b *[Bytes]byte) (Element, error) { 1698 var z Element 1699 z[0] = binary.LittleEndian.Uint64((*b)[0:8]) 1700 z[1] = binary.LittleEndian.Uint64((*b)[8:16]) 1701 z[2] = binary.LittleEndian.Uint64((*b)[16:24]) 1702 z[3] = binary.LittleEndian.Uint64((*b)[24:32]) 1703 z[4] = binary.LittleEndian.Uint64((*b)[32:40]) 1704 z[5] = binary.LittleEndian.Uint64((*b)[40:48]) 1705 z[6] = binary.LittleEndian.Uint64((*b)[48:56]) 1706 z[7] = binary.LittleEndian.Uint64((*b)[56:64]) 1707 z[8] = binary.LittleEndian.Uint64((*b)[64:72]) 1708 z[9] = binary.LittleEndian.Uint64((*b)[72:80]) 1709 1710 if !z.smallerThanModulus() { 1711 return Element{}, errors.New("invalid fp.Element encoding") 1712 } 1713 1714 z.toMont() 1715 return z, nil 1716 } 1717 1718 func (littleEndian) PutElement(b *[Bytes]byte, e Element) { 1719 e.fromMont() 1720 binary.LittleEndian.PutUint64((*b)[0:8], e[0]) 1721 binary.LittleEndian.PutUint64((*b)[8:16], e[1]) 1722 binary.LittleEndian.PutUint64((*b)[16:24], e[2]) 1723 binary.LittleEndian.PutUint64((*b)[24:32], e[3]) 1724 binary.LittleEndian.PutUint64((*b)[32:40], e[4]) 1725 binary.LittleEndian.PutUint64((*b)[40:48], e[5]) 1726 binary.LittleEndian.PutUint64((*b)[48:56], e[6]) 1727 binary.LittleEndian.PutUint64((*b)[56:64], e[7]) 1728 binary.LittleEndian.PutUint64((*b)[64:72], e[8]) 1729 binary.LittleEndian.PutUint64((*b)[72:80], e[9]) 1730 } 1731 1732 func (littleEndian) String() string { return "LittleEndian" } 1733 1734 // Legendre returns the Legendre symbol of z (either +1, -1, or 0.) 1735 func (z *Element) Legendre() int { 1736 var l Element 1737 // z^((q-1)/2) 1738 l.expByLegendreExp(*z) 1739 1740 if l.IsZero() { 1741 return 0 1742 } 1743 1744 // if l == 1 1745 if l.IsOne() { 1746 return 1 1747 } 1748 return -1 1749 } 1750 1751 // Sqrt z = √x (mod q) 1752 // if the square root doesn't exist (x is not a square mod q) 1753 // Sqrt leaves z unchanged and returns nil 1754 func (z *Element) Sqrt(x *Element) *Element { 1755 // q ≡ 5 (mod 8) 1756 // see modSqrt5Mod8Prime in math/big/int.go 1757 var one, alpha, beta, tx, square Element 1758 one.SetOne() 1759 tx.Double(x) 1760 alpha.expBySqrtExp(tx) 1761 1762 beta.Square(&alpha). 1763 Mul(&beta, &tx). 1764 Sub(&beta, &one). 1765 Mul(&beta, x). 1766 Mul(&beta, &alpha) 1767 1768 // as we didn't compute the legendre symbol, ensure we found beta such that beta * beta = x 1769 square.Square(&beta) 1770 if square.Equal(x) { 1771 return z.Set(&beta) 1772 } 1773 return nil 1774 } 1775 1776 const ( 1777 k = 32 // word size / 2 1778 signBitSelector = uint64(1) << 63 1779 approxLowBitsN = k - 1 1780 approxHighBitsN = k + 1 1781 ) 1782 1783 const ( 1784 inversionCorrectionFactorWord0 = 17335095338408674528 1785 inversionCorrectionFactorWord1 = 1935156146725576072 1786 inversionCorrectionFactorWord2 = 12310223143035529855 1787 inversionCorrectionFactorWord3 = 14776388015283991997 1788 inversionCorrectionFactorWord4 = 13807356859349388480 1789 inversionCorrectionFactorWord5 = 10412247811534140886 1790 inversionCorrectionFactorWord6 = 1537112855455741892 1791 inversionCorrectionFactorWord7 = 5281081904757642912 1792 inversionCorrectionFactorWord8 = 14734303888675989218 1793 inversionCorrectionFactorWord9 = 64202171737444348 1794 invIterationsN = 42 1795 ) 1796 1797 // Inverse z = x⁻¹ (mod q) 1798 // 1799 // if x == 0, sets and returns z = x 1800 func (z *Element) Inverse(x *Element) *Element { 1801 // Implements "Optimized Binary GCD for Modular Inversion" 1802 // https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf 1803 1804 a := *x 1805 b := Element{ 1806 q0, 1807 q1, 1808 q2, 1809 q3, 1810 q4, 1811 q5, 1812 q6, 1813 q7, 1814 q8, 1815 q9, 1816 } // b := q 1817 1818 u := Element{1} 1819 1820 // Update factors: we get [u; v] ← [f₀ g₀; f₁ g₁] [u; v] 1821 // cᵢ = fᵢ + 2³¹ - 1 + 2³² * (gᵢ + 2³¹ - 1) 1822 var c0, c1 int64 1823 1824 // Saved update factors to reduce the number of field multiplications 1825 var pf0, pf1, pg0, pg1 int64 1826 1827 var i uint 1828 1829 var v, s Element 1830 1831 // Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations 1832 // This also lets us get away with half as many updates to u,v 1833 // To make this constant-time-ish, replace the condition with i < invIterationsN 1834 for i = 0; i&1 == 1 || !a.IsZero(); i++ { 1835 n := max(a.BitLen(), b.BitLen()) 1836 aApprox, bApprox := approximate(&a, n), approximate(&b, n) 1837 1838 // f₀, g₀, f₁, g₁ = 1, 0, 0, 1 1839 c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1 1840 1841 for j := 0; j < approxLowBitsN; j++ { 1842 1843 // -2ʲ < f₀, f₁ ≤ 2ʲ 1844 // |f₀| + |f₁| < 2ʲ⁺¹ 1845 1846 if aApprox&1 == 0 { 1847 aApprox /= 2 1848 } else { 1849 s, borrow := bits.Sub64(aApprox, bApprox, 0) 1850 if borrow == 1 { 1851 s = bApprox - aApprox 1852 bApprox = aApprox 1853 c0, c1 = c1, c0 1854 // invariants unchanged 1855 } 1856 1857 aApprox = s / 2 1858 c0 = c0 - c1 1859 1860 // Now |f₀| < 2ʲ⁺¹ ≤ 2ʲ⁺¹ (only the weaker inequality is needed, strictly speaking) 1861 // Started with f₀ > -2ʲ and f₁ ≤ 2ʲ, so f₀ - f₁ > -2ʲ⁺¹ 1862 // Invariants unchanged for f₁ 1863 } 1864 1865 c1 *= 2 1866 // -2ʲ⁺¹ < f₁ ≤ 2ʲ⁺¹ 1867 // So now |f₀| + |f₁| < 2ʲ⁺² 1868 } 1869 1870 s = a 1871 1872 var g0 int64 1873 // from this point on c0 aliases for f0 1874 c0, g0 = updateFactorsDecompose(c0) 1875 aHi := a.linearCombNonModular(&s, c0, &b, g0) 1876 if aHi&signBitSelector != 0 { 1877 // if aHi < 0 1878 c0, g0 = -c0, -g0 1879 aHi = negL(&a, aHi) 1880 } 1881 // right-shift a by k-1 bits 1882 a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN) 1883 a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN) 1884 a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN) 1885 a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN) 1886 a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN) 1887 a[5] = (a[5] >> approxLowBitsN) | ((a[6]) << approxHighBitsN) 1888 a[6] = (a[6] >> approxLowBitsN) | ((a[7]) << approxHighBitsN) 1889 a[7] = (a[7] >> approxLowBitsN) | ((a[8]) << approxHighBitsN) 1890 a[8] = (a[8] >> approxLowBitsN) | ((a[9]) << approxHighBitsN) 1891 a[9] = (a[9] >> approxLowBitsN) | (aHi << approxHighBitsN) 1892 1893 var f1 int64 1894 // from this point on c1 aliases for g0 1895 f1, c1 = updateFactorsDecompose(c1) 1896 bHi := b.linearCombNonModular(&s, f1, &b, c1) 1897 if bHi&signBitSelector != 0 { 1898 // if bHi < 0 1899 f1, c1 = -f1, -c1 1900 bHi = negL(&b, bHi) 1901 } 1902 // right-shift b by k-1 bits 1903 b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN) 1904 b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN) 1905 b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN) 1906 b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN) 1907 b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN) 1908 b[5] = (b[5] >> approxLowBitsN) | ((b[6]) << approxHighBitsN) 1909 b[6] = (b[6] >> approxLowBitsN) | ((b[7]) << approxHighBitsN) 1910 b[7] = (b[7] >> approxLowBitsN) | ((b[8]) << approxHighBitsN) 1911 b[8] = (b[8] >> approxLowBitsN) | ((b[9]) << approxHighBitsN) 1912 b[9] = (b[9] >> approxLowBitsN) | (bHi << approxHighBitsN) 1913 1914 if i&1 == 1 { 1915 // Combine current update factors with previously stored ones 1916 // [F₀, G₀; F₁, G₁] ← [f₀, g₀; f₁, g₁] [pf₀, pg₀; pf₁, pg₁], with capital letters denoting new combined values 1917 // We get |F₀| = | f₀pf₀ + g₀pf₁ | ≤ |f₀pf₀| + |g₀pf₁| = |f₀| |pf₀| + |g₀| |pf₁| ≤ 2ᵏ⁻¹|pf₀| + 2ᵏ⁻¹|pf₁| 1918 // = 2ᵏ⁻¹ (|pf₀| + |pf₁|) < 2ᵏ⁻¹ 2ᵏ = 2²ᵏ⁻¹ 1919 // So |F₀| < 2²ᵏ⁻¹ meaning it fits in a 2k-bit signed register 1920 1921 // c₀ aliases f₀, c₁ aliases g₁ 1922 c0, g0, f1, c1 = c0*pf0+g0*pf1, 1923 c0*pg0+g0*pg1, 1924 f1*pf0+c1*pf1, 1925 f1*pg0+c1*pg1 1926 1927 s = u 1928 1929 // 0 ≤ u, v < 2²⁵⁵ 1930 // |F₀|, |G₀| < 2⁶³ 1931 u.linearComb(&u, c0, &v, g0) 1932 // |F₁|, |G₁| < 2⁶³ 1933 v.linearComb(&s, f1, &v, c1) 1934 1935 } else { 1936 // Save update factors 1937 pf0, pg0, pf1, pg1 = c0, g0, f1, c1 1938 } 1939 } 1940 1941 // For every iteration that we miss, v is not being multiplied by 2ᵏ⁻² 1942 const pSq uint64 = 1 << (2 * (k - 1)) 1943 a = Element{pSq} 1944 // If the function is constant-time ish, this loop will not run (no need to take it out explicitly) 1945 for ; i < invIterationsN; i += 2 { 1946 // could optimize further with mul by word routine or by pre-computing a table since with k=26, 1947 // we would multiply by pSq up to 13times; 1948 // on x86, the assembly routine outperforms generic code for mul by word 1949 // on arm64, we may loose up to ~5% for 6 limbs 1950 v.Mul(&v, &a) 1951 } 1952 1953 u.Set(x) // for correctness check 1954 1955 z.Mul(&v, &Element{ 1956 inversionCorrectionFactorWord0, 1957 inversionCorrectionFactorWord1, 1958 inversionCorrectionFactorWord2, 1959 inversionCorrectionFactorWord3, 1960 inversionCorrectionFactorWord4, 1961 inversionCorrectionFactorWord5, 1962 inversionCorrectionFactorWord6, 1963 inversionCorrectionFactorWord7, 1964 inversionCorrectionFactorWord8, 1965 inversionCorrectionFactorWord9, 1966 }) 1967 1968 // correctness check 1969 v.Mul(&u, z) 1970 if !v.IsOne() && !u.IsZero() { 1971 return z.inverseExp(u) 1972 } 1973 1974 return z 1975 } 1976 1977 // inverseExp computes z = x⁻¹ (mod q) = x**(q-2) (mod q) 1978 func (z *Element) inverseExp(x Element) *Element { 1979 // e == q-2 1980 e := Modulus() 1981 e.Sub(e, big.NewInt(2)) 1982 1983 z.Set(&x) 1984 1985 for i := e.BitLen() - 2; i >= 0; i-- { 1986 z.Square(z) 1987 if e.Bit(i) == 1 { 1988 z.Mul(z, &x) 1989 } 1990 } 1991 1992 return z 1993 } 1994 1995 // approximate a big number x into a single 64 bit word using its uppermost and lowermost bits 1996 // if x fits in a word as is, no approximation necessary 1997 func approximate(x *Element, nBits int) uint64 { 1998 1999 if nBits <= 64 { 2000 return x[0] 2001 } 2002 2003 const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones 2004 lo := mask & x[0] 2005 2006 hiWordIndex := (nBits - 1) / 64 2007 2008 hiWordBitsAvailable := nBits - hiWordIndex*64 2009 hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN) 2010 2011 mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1)) 2012 hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable) 2013 2014 mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1) 2015 mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed 2016 2017 return lo | mid | hi 2018 } 2019 2020 // linearComb z = xC * x + yC * y; 2021 // 0 ≤ x, y < 2⁶³³ 2022 // |xC|, |yC| < 2⁶³ 2023 func (z *Element) linearComb(x *Element, xC int64, y *Element, yC int64) { 2024 // | (hi, z) | < 2 * 2⁶³ * 2⁶³³ = 2⁶⁹⁷ 2025 // therefore | hi | < 2⁵⁷ ≤ 2⁶³ 2026 hi := z.linearCombNonModular(x, xC, y, yC) 2027 z.montReduceSigned(z, hi) 2028 } 2029 2030 // montReduceSigned z = (xHi * r + x) * r⁻¹ using the SOS algorithm 2031 // Requires |xHi| < 2⁶³. Most significant bit of xHi is the sign bit. 2032 func (z *Element) montReduceSigned(x *Element, xHi uint64) { 2033 const signBitRemover = ^signBitSelector 2034 mustNeg := xHi&signBitSelector != 0 2035 // the SOS implementation requires that most significant bit is 0 2036 // Let X be xHi*r + x 2037 // If X is negative we would have initially stored it as 2⁶⁴ r + X (à la 2's complement) 2038 xHi &= signBitRemover 2039 // with this a negative X is now represented as 2⁶³ r + X 2040 2041 var t [2*Limbs - 1]uint64 2042 var C uint64 2043 2044 m := x[0] * qInvNeg 2045 2046 C = madd0(m, q0, x[0]) 2047 C, t[1] = madd2(m, q1, x[1], C) 2048 C, t[2] = madd2(m, q2, x[2], C) 2049 C, t[3] = madd2(m, q3, x[3], C) 2050 C, t[4] = madd2(m, q4, x[4], C) 2051 C, t[5] = madd2(m, q5, x[5], C) 2052 C, t[6] = madd2(m, q6, x[6], C) 2053 C, t[7] = madd2(m, q7, x[7], C) 2054 C, t[8] = madd2(m, q8, x[8], C) 2055 C, t[9] = madd2(m, q9, x[9], C) 2056 2057 // m * qElement[9] ≤ (2⁶⁴ - 1) * (2⁶³ - 1) = 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 2058 // x[9] + C ≤ 2*(2⁶⁴ - 1) = 2⁶⁵ - 2 2059 // On LHS, (C, t[9]) ≤ 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 + 2⁶⁵ - 2 = 2¹²⁷ + 2⁶³ - 1 2060 // So on LHS, C ≤ 2⁶³ 2061 t[10] = xHi + C 2062 // xHi + C < 2⁶³ + 2⁶³ = 2⁶⁴ 2063 2064 // <standard SOS> 2065 { 2066 const i = 1 2067 m = t[i] * qInvNeg 2068 2069 C = madd0(m, q0, t[i+0]) 2070 C, t[i+1] = madd2(m, q1, t[i+1], C) 2071 C, t[i+2] = madd2(m, q2, t[i+2], C) 2072 C, t[i+3] = madd2(m, q3, t[i+3], C) 2073 C, t[i+4] = madd2(m, q4, t[i+4], C) 2074 C, t[i+5] = madd2(m, q5, t[i+5], C) 2075 C, t[i+6] = madd2(m, q6, t[i+6], C) 2076 C, t[i+7] = madd2(m, q7, t[i+7], C) 2077 C, t[i+8] = madd2(m, q8, t[i+8], C) 2078 C, t[i+9] = madd2(m, q9, t[i+9], C) 2079 2080 t[i+Limbs] += C 2081 } 2082 { 2083 const i = 2 2084 m = t[i] * qInvNeg 2085 2086 C = madd0(m, q0, t[i+0]) 2087 C, t[i+1] = madd2(m, q1, t[i+1], C) 2088 C, t[i+2] = madd2(m, q2, t[i+2], C) 2089 C, t[i+3] = madd2(m, q3, t[i+3], C) 2090 C, t[i+4] = madd2(m, q4, t[i+4], C) 2091 C, t[i+5] = madd2(m, q5, t[i+5], C) 2092 C, t[i+6] = madd2(m, q6, t[i+6], C) 2093 C, t[i+7] = madd2(m, q7, t[i+7], C) 2094 C, t[i+8] = madd2(m, q8, t[i+8], C) 2095 C, t[i+9] = madd2(m, q9, t[i+9], C) 2096 2097 t[i+Limbs] += C 2098 } 2099 { 2100 const i = 3 2101 m = t[i] * qInvNeg 2102 2103 C = madd0(m, q0, t[i+0]) 2104 C, t[i+1] = madd2(m, q1, t[i+1], C) 2105 C, t[i+2] = madd2(m, q2, t[i+2], C) 2106 C, t[i+3] = madd2(m, q3, t[i+3], C) 2107 C, t[i+4] = madd2(m, q4, t[i+4], C) 2108 C, t[i+5] = madd2(m, q5, t[i+5], C) 2109 C, t[i+6] = madd2(m, q6, t[i+6], C) 2110 C, t[i+7] = madd2(m, q7, t[i+7], C) 2111 C, t[i+8] = madd2(m, q8, t[i+8], C) 2112 C, t[i+9] = madd2(m, q9, t[i+9], C) 2113 2114 t[i+Limbs] += C 2115 } 2116 { 2117 const i = 4 2118 m = t[i] * qInvNeg 2119 2120 C = madd0(m, q0, t[i+0]) 2121 C, t[i+1] = madd2(m, q1, t[i+1], C) 2122 C, t[i+2] = madd2(m, q2, t[i+2], C) 2123 C, t[i+3] = madd2(m, q3, t[i+3], C) 2124 C, t[i+4] = madd2(m, q4, t[i+4], C) 2125 C, t[i+5] = madd2(m, q5, t[i+5], C) 2126 C, t[i+6] = madd2(m, q6, t[i+6], C) 2127 C, t[i+7] = madd2(m, q7, t[i+7], C) 2128 C, t[i+8] = madd2(m, q8, t[i+8], C) 2129 C, t[i+9] = madd2(m, q9, t[i+9], C) 2130 2131 t[i+Limbs] += C 2132 } 2133 { 2134 const i = 5 2135 m = t[i] * qInvNeg 2136 2137 C = madd0(m, q0, t[i+0]) 2138 C, t[i+1] = madd2(m, q1, t[i+1], C) 2139 C, t[i+2] = madd2(m, q2, t[i+2], C) 2140 C, t[i+3] = madd2(m, q3, t[i+3], C) 2141 C, t[i+4] = madd2(m, q4, t[i+4], C) 2142 C, t[i+5] = madd2(m, q5, t[i+5], C) 2143 C, t[i+6] = madd2(m, q6, t[i+6], C) 2144 C, t[i+7] = madd2(m, q7, t[i+7], C) 2145 C, t[i+8] = madd2(m, q8, t[i+8], C) 2146 C, t[i+9] = madd2(m, q9, t[i+9], C) 2147 2148 t[i+Limbs] += C 2149 } 2150 { 2151 const i = 6 2152 m = t[i] * qInvNeg 2153 2154 C = madd0(m, q0, t[i+0]) 2155 C, t[i+1] = madd2(m, q1, t[i+1], C) 2156 C, t[i+2] = madd2(m, q2, t[i+2], C) 2157 C, t[i+3] = madd2(m, q3, t[i+3], C) 2158 C, t[i+4] = madd2(m, q4, t[i+4], C) 2159 C, t[i+5] = madd2(m, q5, t[i+5], C) 2160 C, t[i+6] = madd2(m, q6, t[i+6], C) 2161 C, t[i+7] = madd2(m, q7, t[i+7], C) 2162 C, t[i+8] = madd2(m, q8, t[i+8], C) 2163 C, t[i+9] = madd2(m, q9, t[i+9], C) 2164 2165 t[i+Limbs] += C 2166 } 2167 { 2168 const i = 7 2169 m = t[i] * qInvNeg 2170 2171 C = madd0(m, q0, t[i+0]) 2172 C, t[i+1] = madd2(m, q1, t[i+1], C) 2173 C, t[i+2] = madd2(m, q2, t[i+2], C) 2174 C, t[i+3] = madd2(m, q3, t[i+3], C) 2175 C, t[i+4] = madd2(m, q4, t[i+4], C) 2176 C, t[i+5] = madd2(m, q5, t[i+5], C) 2177 C, t[i+6] = madd2(m, q6, t[i+6], C) 2178 C, t[i+7] = madd2(m, q7, t[i+7], C) 2179 C, t[i+8] = madd2(m, q8, t[i+8], C) 2180 C, t[i+9] = madd2(m, q9, t[i+9], C) 2181 2182 t[i+Limbs] += C 2183 } 2184 { 2185 const i = 8 2186 m = t[i] * qInvNeg 2187 2188 C = madd0(m, q0, t[i+0]) 2189 C, t[i+1] = madd2(m, q1, t[i+1], C) 2190 C, t[i+2] = madd2(m, q2, t[i+2], C) 2191 C, t[i+3] = madd2(m, q3, t[i+3], C) 2192 C, t[i+4] = madd2(m, q4, t[i+4], C) 2193 C, t[i+5] = madd2(m, q5, t[i+5], C) 2194 C, t[i+6] = madd2(m, q6, t[i+6], C) 2195 C, t[i+7] = madd2(m, q7, t[i+7], C) 2196 C, t[i+8] = madd2(m, q8, t[i+8], C) 2197 C, t[i+9] = madd2(m, q9, t[i+9], C) 2198 2199 t[i+Limbs] += C 2200 } 2201 { 2202 const i = 9 2203 m := t[i] * qInvNeg 2204 2205 C = madd0(m, q0, t[i+0]) 2206 C, z[0] = madd2(m, q1, t[i+1], C) 2207 C, z[1] = madd2(m, q2, t[i+2], C) 2208 C, z[2] = madd2(m, q3, t[i+3], C) 2209 C, z[3] = madd2(m, q4, t[i+4], C) 2210 C, z[4] = madd2(m, q5, t[i+5], C) 2211 C, z[5] = madd2(m, q6, t[i+6], C) 2212 C, z[6] = madd2(m, q7, t[i+7], C) 2213 C, z[7] = madd2(m, q8, t[i+8], C) 2214 z[9], z[8] = madd2(m, q9, t[i+9], C) 2215 } 2216 2217 // if z ⩾ q → z -= q 2218 if !z.smallerThanModulus() { 2219 var b uint64 2220 z[0], b = bits.Sub64(z[0], q0, 0) 2221 z[1], b = bits.Sub64(z[1], q1, b) 2222 z[2], b = bits.Sub64(z[2], q2, b) 2223 z[3], b = bits.Sub64(z[3], q3, b) 2224 z[4], b = bits.Sub64(z[4], q4, b) 2225 z[5], b = bits.Sub64(z[5], q5, b) 2226 z[6], b = bits.Sub64(z[6], q6, b) 2227 z[7], b = bits.Sub64(z[7], q7, b) 2228 z[8], b = bits.Sub64(z[8], q8, b) 2229 z[9], _ = bits.Sub64(z[9], q9, b) 2230 } 2231 // </standard SOS> 2232 2233 if mustNeg { 2234 // We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead 2235 var b uint64 2236 z[0], b = bits.Sub64(z[0], signBitSelector, 0) 2237 z[1], b = bits.Sub64(z[1], 0, b) 2238 z[2], b = bits.Sub64(z[2], 0, b) 2239 z[3], b = bits.Sub64(z[3], 0, b) 2240 z[4], b = bits.Sub64(z[4], 0, b) 2241 z[5], b = bits.Sub64(z[5], 0, b) 2242 z[6], b = bits.Sub64(z[6], 0, b) 2243 z[7], b = bits.Sub64(z[7], 0, b) 2244 z[8], b = bits.Sub64(z[8], 0, b) 2245 z[9], b = bits.Sub64(z[9], 0, b) 2246 2247 // Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0 2248 2249 if b != 0 { 2250 // z[9] = -1 2251 // negative: add q 2252 const neg1 = 0xFFFFFFFFFFFFFFFF 2253 2254 var carry uint64 2255 2256 z[0], carry = bits.Add64(z[0], q0, 0) 2257 z[1], carry = bits.Add64(z[1], q1, carry) 2258 z[2], carry = bits.Add64(z[2], q2, carry) 2259 z[3], carry = bits.Add64(z[3], q3, carry) 2260 z[4], carry = bits.Add64(z[4], q4, carry) 2261 z[5], carry = bits.Add64(z[5], q5, carry) 2262 z[6], carry = bits.Add64(z[6], q6, carry) 2263 z[7], carry = bits.Add64(z[7], q7, carry) 2264 z[8], carry = bits.Add64(z[8], q8, carry) 2265 z[9], _ = bits.Add64(neg1, q9, carry) 2266 } 2267 } 2268 } 2269 2270 const ( 2271 updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1) 2272 updateFactorIdentityMatrixRow0 = 1 2273 updateFactorIdentityMatrixRow1 = 1 << 32 2274 ) 2275 2276 func updateFactorsDecompose(c int64) (int64, int64) { 2277 c += updateFactorsConversionBias 2278 const low32BitsFilter int64 = 0xFFFFFFFF 2279 f := c&low32BitsFilter - 0x7FFFFFFF 2280 g := c>>32&low32BitsFilter - 0x7FFFFFFF 2281 return f, g 2282 } 2283 2284 // negL negates in place [x | xHi] and return the new most significant word xHi 2285 func negL(x *Element, xHi uint64) uint64 { 2286 var b uint64 2287 2288 x[0], b = bits.Sub64(0, x[0], 0) 2289 x[1], b = bits.Sub64(0, x[1], b) 2290 x[2], b = bits.Sub64(0, x[2], b) 2291 x[3], b = bits.Sub64(0, x[3], b) 2292 x[4], b = bits.Sub64(0, x[4], b) 2293 x[5], b = bits.Sub64(0, x[5], b) 2294 x[6], b = bits.Sub64(0, x[6], b) 2295 x[7], b = bits.Sub64(0, x[7], b) 2296 x[8], b = bits.Sub64(0, x[8], b) 2297 x[9], b = bits.Sub64(0, x[9], b) 2298 xHi, _ = bits.Sub64(0, xHi, b) 2299 2300 return xHi 2301 } 2302 2303 // mulWNonModular multiplies by one word in non-montgomery, without reducing 2304 func (z *Element) mulWNonModular(x *Element, y int64) uint64 { 2305 2306 // w := abs(y) 2307 m := y >> 63 2308 w := uint64((y ^ m) - m) 2309 2310 var c uint64 2311 c, z[0] = bits.Mul64(x[0], w) 2312 c, z[1] = madd1(x[1], w, c) 2313 c, z[2] = madd1(x[2], w, c) 2314 c, z[3] = madd1(x[3], w, c) 2315 c, z[4] = madd1(x[4], w, c) 2316 c, z[5] = madd1(x[5], w, c) 2317 c, z[6] = madd1(x[6], w, c) 2318 c, z[7] = madd1(x[7], w, c) 2319 c, z[8] = madd1(x[8], w, c) 2320 c, z[9] = madd1(x[9], w, c) 2321 2322 if y < 0 { 2323 c = negL(z, c) 2324 } 2325 2326 return c 2327 } 2328 2329 // linearCombNonModular computes a linear combination without modular reduction 2330 func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 { 2331 var yTimes Element 2332 2333 yHi := yTimes.mulWNonModular(y, yC) 2334 xHi := z.mulWNonModular(x, xC) 2335 2336 var carry uint64 2337 z[0], carry = bits.Add64(z[0], yTimes[0], 0) 2338 z[1], carry = bits.Add64(z[1], yTimes[1], carry) 2339 z[2], carry = bits.Add64(z[2], yTimes[2], carry) 2340 z[3], carry = bits.Add64(z[3], yTimes[3], carry) 2341 z[4], carry = bits.Add64(z[4], yTimes[4], carry) 2342 z[5], carry = bits.Add64(z[5], yTimes[5], carry) 2343 z[6], carry = bits.Add64(z[6], yTimes[6], carry) 2344 z[7], carry = bits.Add64(z[7], yTimes[7], carry) 2345 z[8], carry = bits.Add64(z[8], yTimes[8], carry) 2346 z[9], carry = bits.Add64(z[9], yTimes[9], carry) 2347 2348 yHi, _ = bits.Add64(xHi, yHi, carry) 2349 2350 return yHi 2351 }