github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-761/fp/element.go (about) 1 // Copyright 2020 ConsenSys Software Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Code generated by consensys/gnark-crypto DO NOT EDIT 16 17 package fp 18 19 import ( 20 "crypto/rand" 21 "encoding/binary" 22 "errors" 23 "io" 24 "math/big" 25 "math/bits" 26 "reflect" 27 "strconv" 28 "strings" 29 30 "github.com/bits-and-blooms/bitset" 31 "github.com/consensys/gnark-crypto/field/hash" 32 "github.com/consensys/gnark-crypto/field/pool" 33 ) 34 35 // Element represents a field element stored on 12 words (uint64) 36 // 37 // Element are assumed to be in Montgomery form in all methods. 38 // 39 // Modulus q = 40 // 41 // q[base10] = 6891450384315732539396789682275657542479668912536150109513790160209623422243491736087683183289411687640864567753786613451161759120554247759349511699125301598951605099378508850372543631423596795951899700429969112842764913119068299 42 // q[base16] = 0x122e824fb83ce0ad187c94004faff3eb926186a81d14688528275ef8087be41707ba638e584e91903cebaff25b423048689c8ed12f9fd9071dcd3dc73ebff2e98a116c25667a8f8160cf8aeeaf0a437e6913e6870000082f49d00000000008b 43 // 44 // # Warning 45 // 46 // This code has not been audited and is provided as-is. In particular, there is no security guarantees such as constant time implementation or side-channel attack resistance. 47 type Element [12]uint64 48 49 const ( 50 Limbs = 12 // number of 64 bits words needed to represent a Element 51 Bits = 761 // number of bits needed to represent a Element 52 Bytes = 96 // number of bytes needed to represent a Element 53 ) 54 55 // Field modulus q 56 const ( 57 q0 uint64 = 17626244516597989515 58 q1 uint64 = 16614129118623039618 59 q2 uint64 = 1588918198704579639 60 q3 uint64 = 10998096788944562424 61 q4 uint64 = 8204665564953313070 62 q5 uint64 = 9694500593442880912 63 q6 uint64 = 274362232328168196 64 q7 uint64 = 8105254717682411801 65 q8 uint64 = 5945444129596489281 66 q9 uint64 = 13341377791855249032 67 q10 uint64 = 15098257552581525310 68 q11 uint64 = 81882988782276106 69 ) 70 71 var qElement = Element{ 72 q0, 73 q1, 74 q2, 75 q3, 76 q4, 77 q5, 78 q6, 79 q7, 80 q8, 81 q9, 82 q10, 83 q11, 84 } 85 86 var _modulus big.Int // q stored as big.Int 87 88 // Modulus returns q as a big.Int 89 // 90 // q[base10] = 6891450384315732539396789682275657542479668912536150109513790160209623422243491736087683183289411687640864567753786613451161759120554247759349511699125301598951605099378508850372543631423596795951899700429969112842764913119068299 91 // q[base16] = 0x122e824fb83ce0ad187c94004faff3eb926186a81d14688528275ef8087be41707ba638e584e91903cebaff25b423048689c8ed12f9fd9071dcd3dc73ebff2e98a116c25667a8f8160cf8aeeaf0a437e6913e6870000082f49d00000000008b 92 func Modulus() *big.Int { 93 return new(big.Int).Set(&_modulus) 94 } 95 96 // q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r 97 // used for Montgomery reduction 98 const qInvNeg uint64 = 744663313386281181 99 100 func init() { 101 _modulus.SetString("122e824fb83ce0ad187c94004faff3eb926186a81d14688528275ef8087be41707ba638e584e91903cebaff25b423048689c8ed12f9fd9071dcd3dc73ebff2e98a116c25667a8f8160cf8aeeaf0a437e6913e6870000082f49d00000000008b", 16) 102 } 103 104 // NewElement returns a new Element from a uint64 value 105 // 106 // it is equivalent to 107 // 108 // var v Element 109 // v.SetUint64(...) 110 func NewElement(v uint64) Element { 111 z := Element{v} 112 z.Mul(&z, &rSquare) 113 return z 114 } 115 116 // SetUint64 sets z to v and returns z 117 func (z *Element) SetUint64(v uint64) *Element { 118 // sets z LSB to v (non-Montgomery form) and convert z to Montgomery form 119 *z = Element{v} 120 return z.Mul(z, &rSquare) // z.toMont() 121 } 122 123 // SetInt64 sets z to v and returns z 124 func (z *Element) SetInt64(v int64) *Element { 125 126 // absolute value of v 127 m := v >> 63 128 z.SetUint64(uint64((v ^ m) - m)) 129 130 if m != 0 { 131 // v is negative 132 z.Neg(z) 133 } 134 135 return z 136 } 137 138 // Set z = x and returns z 139 func (z *Element) Set(x *Element) *Element { 140 z[0] = x[0] 141 z[1] = x[1] 142 z[2] = x[2] 143 z[3] = x[3] 144 z[4] = x[4] 145 z[5] = x[5] 146 z[6] = x[6] 147 z[7] = x[7] 148 z[8] = x[8] 149 z[9] = x[9] 150 z[10] = x[10] 151 z[11] = x[11] 152 return z 153 } 154 155 // SetInterface converts provided interface into Element 156 // returns an error if provided type is not supported 157 // supported types: 158 // 159 // Element 160 // *Element 161 // uint64 162 // int 163 // string (see SetString for valid formats) 164 // *big.Int 165 // big.Int 166 // []byte 167 func (z *Element) SetInterface(i1 interface{}) (*Element, error) { 168 if i1 == nil { 169 return nil, errors.New("can't set fp.Element with <nil>") 170 } 171 172 switch c1 := i1.(type) { 173 case Element: 174 return z.Set(&c1), nil 175 case *Element: 176 if c1 == nil { 177 return nil, errors.New("can't set fp.Element with <nil>") 178 } 179 return z.Set(c1), nil 180 case uint8: 181 return z.SetUint64(uint64(c1)), nil 182 case uint16: 183 return z.SetUint64(uint64(c1)), nil 184 case uint32: 185 return z.SetUint64(uint64(c1)), nil 186 case uint: 187 return z.SetUint64(uint64(c1)), nil 188 case uint64: 189 return z.SetUint64(c1), nil 190 case int8: 191 return z.SetInt64(int64(c1)), nil 192 case int16: 193 return z.SetInt64(int64(c1)), nil 194 case int32: 195 return z.SetInt64(int64(c1)), nil 196 case int64: 197 return z.SetInt64(c1), nil 198 case int: 199 return z.SetInt64(int64(c1)), nil 200 case string: 201 return z.SetString(c1) 202 case *big.Int: 203 if c1 == nil { 204 return nil, errors.New("can't set fp.Element with <nil>") 205 } 206 return z.SetBigInt(c1), nil 207 case big.Int: 208 return z.SetBigInt(&c1), nil 209 case []byte: 210 return z.SetBytes(c1), nil 211 default: 212 return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String()) 213 } 214 } 215 216 // SetZero z = 0 217 func (z *Element) SetZero() *Element { 218 z[0] = 0 219 z[1] = 0 220 z[2] = 0 221 z[3] = 0 222 z[4] = 0 223 z[5] = 0 224 z[6] = 0 225 z[7] = 0 226 z[8] = 0 227 z[9] = 0 228 z[10] = 0 229 z[11] = 0 230 return z 231 } 232 233 // SetOne z = 1 (in Montgomery form) 234 func (z *Element) SetOne() *Element { 235 z[0] = 144959613005956565 236 z[1] = 6509995272855063783 237 z[2] = 11428286765660613342 238 z[3] = 15738672438262922740 239 z[4] = 17071399330169272331 240 z[5] = 13899911246788437003 241 z[6] = 12055474021000362245 242 z[7] = 2545351818702954755 243 z[8] = 8887388221587179644 244 z[9] = 5009280847225881135 245 z[10] = 15539704305423854047 246 z[11] = 23071597697427581 247 return z 248 } 249 250 // Div z = x*y⁻¹ (mod q) 251 func (z *Element) Div(x, y *Element) *Element { 252 var yInv Element 253 yInv.Inverse(y) 254 z.Mul(x, &yInv) 255 return z 256 } 257 258 // Equal returns z == x; constant-time 259 func (z *Element) Equal(x *Element) bool { 260 return z.NotEqual(x) == 0 261 } 262 263 // NotEqual returns 0 if and only if z == x; constant-time 264 func (z *Element) NotEqual(x *Element) uint64 { 265 return (z[11] ^ x[11]) | (z[10] ^ x[10]) | (z[9] ^ x[9]) | (z[8] ^ x[8]) | (z[7] ^ x[7]) | (z[6] ^ x[6]) | (z[5] ^ x[5]) | (z[4] ^ x[4]) | (z[3] ^ x[3]) | (z[2] ^ x[2]) | (z[1] ^ x[1]) | (z[0] ^ x[0]) 266 } 267 268 // IsZero returns z == 0 269 func (z *Element) IsZero() bool { 270 return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0 271 } 272 273 // IsOne returns z == 1 274 func (z *Element) IsOne() bool { 275 return ((z[11] ^ 23071597697427581) | (z[10] ^ 15539704305423854047) | (z[9] ^ 5009280847225881135) | (z[8] ^ 8887388221587179644) | (z[7] ^ 2545351818702954755) | (z[6] ^ 12055474021000362245) | (z[5] ^ 13899911246788437003) | (z[4] ^ 17071399330169272331) | (z[3] ^ 15738672438262922740) | (z[2] ^ 11428286765660613342) | (z[1] ^ 6509995272855063783) | (z[0] ^ 144959613005956565)) == 0 276 } 277 278 // IsUint64 reports whether z can be represented as an uint64. 279 func (z *Element) IsUint64() bool { 280 zz := *z 281 zz.fromMont() 282 return zz.FitsOnOneWord() 283 } 284 285 // Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined. 286 func (z *Element) Uint64() uint64 { 287 return z.Bits()[0] 288 } 289 290 // FitsOnOneWord reports whether z words (except the least significant word) are 0 291 // 292 // It is the responsibility of the caller to convert from Montgomery to Regular form if needed. 293 func (z *Element) FitsOnOneWord() bool { 294 return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1]) == 0 295 } 296 297 // Cmp compares (lexicographic order) z and x and returns: 298 // 299 // -1 if z < x 300 // 0 if z == x 301 // +1 if z > x 302 func (z *Element) Cmp(x *Element) int { 303 _z := z.Bits() 304 _x := x.Bits() 305 if _z[11] > _x[11] { 306 return 1 307 } else if _z[11] < _x[11] { 308 return -1 309 } 310 if _z[10] > _x[10] { 311 return 1 312 } else if _z[10] < _x[10] { 313 return -1 314 } 315 if _z[9] > _x[9] { 316 return 1 317 } else if _z[9] < _x[9] { 318 return -1 319 } 320 if _z[8] > _x[8] { 321 return 1 322 } else if _z[8] < _x[8] { 323 return -1 324 } 325 if _z[7] > _x[7] { 326 return 1 327 } else if _z[7] < _x[7] { 328 return -1 329 } 330 if _z[6] > _x[6] { 331 return 1 332 } else if _z[6] < _x[6] { 333 return -1 334 } 335 if _z[5] > _x[5] { 336 return 1 337 } else if _z[5] < _x[5] { 338 return -1 339 } 340 if _z[4] > _x[4] { 341 return 1 342 } else if _z[4] < _x[4] { 343 return -1 344 } 345 if _z[3] > _x[3] { 346 return 1 347 } else if _z[3] < _x[3] { 348 return -1 349 } 350 if _z[2] > _x[2] { 351 return 1 352 } else if _z[2] < _x[2] { 353 return -1 354 } 355 if _z[1] > _x[1] { 356 return 1 357 } else if _z[1] < _x[1] { 358 return -1 359 } 360 if _z[0] > _x[0] { 361 return 1 362 } else if _z[0] < _x[0] { 363 return -1 364 } 365 return 0 366 } 367 368 // LexicographicallyLargest returns true if this element is strictly lexicographically 369 // larger than its negation, false otherwise 370 func (z *Element) LexicographicallyLargest() bool { 371 // adapted from github.com/zkcrypto/bls12_381 372 // we check if the element is larger than (q-1) / 2 373 // if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2 374 375 _z := z.Bits() 376 377 var b uint64 378 _, b = bits.Sub64(_z[0], 8813122258298994758, 0) 379 _, b = bits.Sub64(_z[1], 17530436596166295617, b) 380 _, b = bits.Sub64(_z[2], 794459099352289819, b) 381 _, b = bits.Sub64(_z[3], 5499048394472281212, b) 382 _, b = bits.Sub64(_z[4], 4102332782476656535, b) 383 _, b = bits.Sub64(_z[5], 4847250296721440456, b) 384 _, b = bits.Sub64(_z[6], 9360553153018859906, b) 385 _, b = bits.Sub64(_z[7], 13275999395695981708, b) 386 _, b = bits.Sub64(_z[8], 2972722064798244640, b) 387 _, b = bits.Sub64(_z[9], 6670688895927624516, b) 388 _, b = bits.Sub64(_z[10], 7549128776290762655, b) 389 _, b = bits.Sub64(_z[11], 40941494391138053, b) 390 391 return b == 0 392 } 393 394 // SetRandom sets z to a uniform random value in [0, q). 395 // 396 // This might error only if reading from crypto/rand.Reader errors, 397 // in which case, value of z is undefined. 398 func (z *Element) SetRandom() (*Element, error) { 399 // this code is generated for all modulus 400 // and derived from go/src/crypto/rand/util.go 401 402 // l is number of limbs * 8; the number of bytes needed to reconstruct 12 uint64 403 const l = 96 404 405 // bitLen is the maximum bit length needed to encode a value < q. 406 const bitLen = 761 407 408 // k is the maximum byte length needed to encode a value < q. 409 const k = (bitLen + 7) / 8 410 411 // b is the number of bits in the most significant byte of q-1. 412 b := uint(bitLen % 8) 413 if b == 0 { 414 b = 8 415 } 416 417 var bytes [l]byte 418 419 for { 420 // note that bytes[k:l] is always 0 421 if _, err := io.ReadFull(rand.Reader, bytes[:k]); err != nil { 422 return nil, err 423 } 424 425 // Clear unused bits in in the most significant byte to increase probability 426 // that the candidate is < q. 427 bytes[k-1] &= uint8(int(1<<b) - 1) 428 z[0] = binary.LittleEndian.Uint64(bytes[0:8]) 429 z[1] = binary.LittleEndian.Uint64(bytes[8:16]) 430 z[2] = binary.LittleEndian.Uint64(bytes[16:24]) 431 z[3] = binary.LittleEndian.Uint64(bytes[24:32]) 432 z[4] = binary.LittleEndian.Uint64(bytes[32:40]) 433 z[5] = binary.LittleEndian.Uint64(bytes[40:48]) 434 z[6] = binary.LittleEndian.Uint64(bytes[48:56]) 435 z[7] = binary.LittleEndian.Uint64(bytes[56:64]) 436 z[8] = binary.LittleEndian.Uint64(bytes[64:72]) 437 z[9] = binary.LittleEndian.Uint64(bytes[72:80]) 438 z[10] = binary.LittleEndian.Uint64(bytes[80:88]) 439 z[11] = binary.LittleEndian.Uint64(bytes[88:96]) 440 441 if !z.smallerThanModulus() { 442 continue // ignore the candidate and re-sample 443 } 444 445 return z, nil 446 } 447 } 448 449 // smallerThanModulus returns true if z < q 450 // This is not constant time 451 func (z *Element) smallerThanModulus() bool { 452 return (z[11] < q11 || (z[11] == q11 && (z[10] < q10 || (z[10] == q10 && (z[9] < q9 || (z[9] == q9 && (z[8] < q8 || (z[8] == q8 && (z[7] < q7 || (z[7] == q7 && (z[6] < q6 || (z[6] == q6 && (z[5] < q5 || (z[5] == q5 && (z[4] < q4 || (z[4] == q4 && (z[3] < q3 || (z[3] == q3 && (z[2] < q2 || (z[2] == q2 && (z[1] < q1 || (z[1] == q1 && (z[0] < q0))))))))))))))))))))))) 453 } 454 455 // One returns 1 456 func One() Element { 457 var one Element 458 one.SetOne() 459 return one 460 } 461 462 // Halve sets z to z / 2 (mod q) 463 func (z *Element) Halve() { 464 var carry uint64 465 466 if z[0]&1 == 1 { 467 // z = z + q 468 z[0], carry = bits.Add64(z[0], q0, 0) 469 z[1], carry = bits.Add64(z[1], q1, carry) 470 z[2], carry = bits.Add64(z[2], q2, carry) 471 z[3], carry = bits.Add64(z[3], q3, carry) 472 z[4], carry = bits.Add64(z[4], q4, carry) 473 z[5], carry = bits.Add64(z[5], q5, carry) 474 z[6], carry = bits.Add64(z[6], q6, carry) 475 z[7], carry = bits.Add64(z[7], q7, carry) 476 z[8], carry = bits.Add64(z[8], q8, carry) 477 z[9], carry = bits.Add64(z[9], q9, carry) 478 z[10], carry = bits.Add64(z[10], q10, carry) 479 z[11], _ = bits.Add64(z[11], q11, carry) 480 481 } 482 // z = z >> 1 483 z[0] = z[0]>>1 | z[1]<<63 484 z[1] = z[1]>>1 | z[2]<<63 485 z[2] = z[2]>>1 | z[3]<<63 486 z[3] = z[3]>>1 | z[4]<<63 487 z[4] = z[4]>>1 | z[5]<<63 488 z[5] = z[5]>>1 | z[6]<<63 489 z[6] = z[6]>>1 | z[7]<<63 490 z[7] = z[7]>>1 | z[8]<<63 491 z[8] = z[8]>>1 | z[9]<<63 492 z[9] = z[9]>>1 | z[10]<<63 493 z[10] = z[10]>>1 | z[11]<<63 494 z[11] >>= 1 495 496 } 497 498 // fromMont converts z in place (i.e. mutates) from Montgomery to regular representation 499 // sets and returns z = z * 1 500 func (z *Element) fromMont() *Element { 501 fromMont(z) 502 return z 503 } 504 505 // Add z = x + y (mod q) 506 func (z *Element) Add(x, y *Element) *Element { 507 508 var carry uint64 509 z[0], carry = bits.Add64(x[0], y[0], 0) 510 z[1], carry = bits.Add64(x[1], y[1], carry) 511 z[2], carry = bits.Add64(x[2], y[2], carry) 512 z[3], carry = bits.Add64(x[3], y[3], carry) 513 z[4], carry = bits.Add64(x[4], y[4], carry) 514 z[5], carry = bits.Add64(x[5], y[5], carry) 515 z[6], carry = bits.Add64(x[6], y[6], carry) 516 z[7], carry = bits.Add64(x[7], y[7], carry) 517 z[8], carry = bits.Add64(x[8], y[8], carry) 518 z[9], carry = bits.Add64(x[9], y[9], carry) 519 z[10], carry = bits.Add64(x[10], y[10], carry) 520 z[11], _ = bits.Add64(x[11], y[11], carry) 521 522 // if z ⩾ q → z -= q 523 if !z.smallerThanModulus() { 524 var b uint64 525 z[0], b = bits.Sub64(z[0], q0, 0) 526 z[1], b = bits.Sub64(z[1], q1, b) 527 z[2], b = bits.Sub64(z[2], q2, b) 528 z[3], b = bits.Sub64(z[3], q3, b) 529 z[4], b = bits.Sub64(z[4], q4, b) 530 z[5], b = bits.Sub64(z[5], q5, b) 531 z[6], b = bits.Sub64(z[6], q6, b) 532 z[7], b = bits.Sub64(z[7], q7, b) 533 z[8], b = bits.Sub64(z[8], q8, b) 534 z[9], b = bits.Sub64(z[9], q9, b) 535 z[10], b = bits.Sub64(z[10], q10, b) 536 z[11], _ = bits.Sub64(z[11], q11, b) 537 } 538 return z 539 } 540 541 // Double z = x + x (mod q), aka Lsh 1 542 func (z *Element) Double(x *Element) *Element { 543 544 var carry uint64 545 z[0], carry = bits.Add64(x[0], x[0], 0) 546 z[1], carry = bits.Add64(x[1], x[1], carry) 547 z[2], carry = bits.Add64(x[2], x[2], carry) 548 z[3], carry = bits.Add64(x[3], x[3], carry) 549 z[4], carry = bits.Add64(x[4], x[4], carry) 550 z[5], carry = bits.Add64(x[5], x[5], carry) 551 z[6], carry = bits.Add64(x[6], x[6], carry) 552 z[7], carry = bits.Add64(x[7], x[7], carry) 553 z[8], carry = bits.Add64(x[8], x[8], carry) 554 z[9], carry = bits.Add64(x[9], x[9], carry) 555 z[10], carry = bits.Add64(x[10], x[10], carry) 556 z[11], _ = bits.Add64(x[11], x[11], carry) 557 558 // if z ⩾ q → z -= q 559 if !z.smallerThanModulus() { 560 var b uint64 561 z[0], b = bits.Sub64(z[0], q0, 0) 562 z[1], b = bits.Sub64(z[1], q1, b) 563 z[2], b = bits.Sub64(z[2], q2, b) 564 z[3], b = bits.Sub64(z[3], q3, b) 565 z[4], b = bits.Sub64(z[4], q4, b) 566 z[5], b = bits.Sub64(z[5], q5, b) 567 z[6], b = bits.Sub64(z[6], q6, b) 568 z[7], b = bits.Sub64(z[7], q7, b) 569 z[8], b = bits.Sub64(z[8], q8, b) 570 z[9], b = bits.Sub64(z[9], q9, b) 571 z[10], b = bits.Sub64(z[10], q10, b) 572 z[11], _ = bits.Sub64(z[11], q11, b) 573 } 574 return z 575 } 576 577 // Sub z = x - y (mod q) 578 func (z *Element) Sub(x, y *Element) *Element { 579 var b uint64 580 z[0], b = bits.Sub64(x[0], y[0], 0) 581 z[1], b = bits.Sub64(x[1], y[1], b) 582 z[2], b = bits.Sub64(x[2], y[2], b) 583 z[3], b = bits.Sub64(x[3], y[3], b) 584 z[4], b = bits.Sub64(x[4], y[4], b) 585 z[5], b = bits.Sub64(x[5], y[5], b) 586 z[6], b = bits.Sub64(x[6], y[6], b) 587 z[7], b = bits.Sub64(x[7], y[7], b) 588 z[8], b = bits.Sub64(x[8], y[8], b) 589 z[9], b = bits.Sub64(x[9], y[9], b) 590 z[10], b = bits.Sub64(x[10], y[10], b) 591 z[11], b = bits.Sub64(x[11], y[11], b) 592 if b != 0 { 593 var c uint64 594 z[0], c = bits.Add64(z[0], q0, 0) 595 z[1], c = bits.Add64(z[1], q1, c) 596 z[2], c = bits.Add64(z[2], q2, c) 597 z[3], c = bits.Add64(z[3], q3, c) 598 z[4], c = bits.Add64(z[4], q4, c) 599 z[5], c = bits.Add64(z[5], q5, c) 600 z[6], c = bits.Add64(z[6], q6, c) 601 z[7], c = bits.Add64(z[7], q7, c) 602 z[8], c = bits.Add64(z[8], q8, c) 603 z[9], c = bits.Add64(z[9], q9, c) 604 z[10], c = bits.Add64(z[10], q10, c) 605 z[11], _ = bits.Add64(z[11], q11, c) 606 } 607 return z 608 } 609 610 // Neg z = q - x 611 func (z *Element) Neg(x *Element) *Element { 612 if x.IsZero() { 613 z.SetZero() 614 return z 615 } 616 var borrow uint64 617 z[0], borrow = bits.Sub64(q0, x[0], 0) 618 z[1], borrow = bits.Sub64(q1, x[1], borrow) 619 z[2], borrow = bits.Sub64(q2, x[2], borrow) 620 z[3], borrow = bits.Sub64(q3, x[3], borrow) 621 z[4], borrow = bits.Sub64(q4, x[4], borrow) 622 z[5], borrow = bits.Sub64(q5, x[5], borrow) 623 z[6], borrow = bits.Sub64(q6, x[6], borrow) 624 z[7], borrow = bits.Sub64(q7, x[7], borrow) 625 z[8], borrow = bits.Sub64(q8, x[8], borrow) 626 z[9], borrow = bits.Sub64(q9, x[9], borrow) 627 z[10], borrow = bits.Sub64(q10, x[10], borrow) 628 z[11], _ = bits.Sub64(q11, x[11], borrow) 629 return z 630 } 631 632 // Select is a constant-time conditional move. 633 // If c=0, z = x0. Else z = x1 634 func (z *Element) Select(c int, x0 *Element, x1 *Element) *Element { 635 cC := uint64((int64(c) | -int64(c)) >> 63) // "canonicized" into: 0 if c=0, -1 otherwise 636 z[0] = x0[0] ^ cC&(x0[0]^x1[0]) 637 z[1] = x0[1] ^ cC&(x0[1]^x1[1]) 638 z[2] = x0[2] ^ cC&(x0[2]^x1[2]) 639 z[3] = x0[3] ^ cC&(x0[3]^x1[3]) 640 z[4] = x0[4] ^ cC&(x0[4]^x1[4]) 641 z[5] = x0[5] ^ cC&(x0[5]^x1[5]) 642 z[6] = x0[6] ^ cC&(x0[6]^x1[6]) 643 z[7] = x0[7] ^ cC&(x0[7]^x1[7]) 644 z[8] = x0[8] ^ cC&(x0[8]^x1[8]) 645 z[9] = x0[9] ^ cC&(x0[9]^x1[9]) 646 z[10] = x0[10] ^ cC&(x0[10]^x1[10]) 647 z[11] = x0[11] ^ cC&(x0[11]^x1[11]) 648 return z 649 } 650 651 // _mulGeneric is unoptimized textbook CIOS 652 // it is a fallback solution on x86 when ADX instruction set is not available 653 // and is used for testing purposes. 654 func _mulGeneric(z, x, y *Element) { 655 656 // Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis 657 // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf 658 // 659 // The algorithm: 660 // 661 // for i=0 to N-1 662 // C := 0 663 // for j=0 to N-1 664 // (C,t[j]) := t[j] + x[j]*y[i] + C 665 // (t[N+1],t[N]) := t[N] + C 666 // 667 // C := 0 668 // m := t[0]*q'[0] mod D 669 // (C,_) := t[0] + m*q[0] 670 // for j=1 to N-1 671 // (C,t[j-1]) := t[j] + m*q[j] + C 672 // 673 // (C,t[N-1]) := t[N] + C 674 // t[N] := t[N+1] + C 675 // 676 // → N is the number of machine words needed to store the modulus q 677 // → D is the word size. For example, on a 64-bit architecture D is 2 64 678 // → x[i], y[i], q[i] is the ith word of the numbers x,y,q 679 // → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs. 680 // → t is a temporary array of size N+2 681 // → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number 682 683 var t [13]uint64 684 var D uint64 685 var m, C uint64 686 // ----------------------------------- 687 // First loop 688 689 C, t[0] = bits.Mul64(y[0], x[0]) 690 C, t[1] = madd1(y[0], x[1], C) 691 C, t[2] = madd1(y[0], x[2], C) 692 C, t[3] = madd1(y[0], x[3], C) 693 C, t[4] = madd1(y[0], x[4], C) 694 C, t[5] = madd1(y[0], x[5], C) 695 C, t[6] = madd1(y[0], x[6], C) 696 C, t[7] = madd1(y[0], x[7], C) 697 C, t[8] = madd1(y[0], x[8], C) 698 C, t[9] = madd1(y[0], x[9], C) 699 C, t[10] = madd1(y[0], x[10], C) 700 C, t[11] = madd1(y[0], x[11], C) 701 702 t[12], D = bits.Add64(t[12], C, 0) 703 704 // m = t[0]n'[0] mod W 705 m = t[0] * qInvNeg 706 707 // ----------------------------------- 708 // Second loop 709 C = madd0(m, q0, t[0]) 710 C, t[0] = madd2(m, q1, t[1], C) 711 C, t[1] = madd2(m, q2, t[2], C) 712 C, t[2] = madd2(m, q3, t[3], C) 713 C, t[3] = madd2(m, q4, t[4], C) 714 C, t[4] = madd2(m, q5, t[5], C) 715 C, t[5] = madd2(m, q6, t[6], C) 716 C, t[6] = madd2(m, q7, t[7], C) 717 C, t[7] = madd2(m, q8, t[8], C) 718 C, t[8] = madd2(m, q9, t[9], C) 719 C, t[9] = madd2(m, q10, t[10], C) 720 C, t[10] = madd2(m, q11, t[11], C) 721 722 t[11], C = bits.Add64(t[12], C, 0) 723 t[12], _ = bits.Add64(0, D, C) 724 // ----------------------------------- 725 // First loop 726 727 C, t[0] = madd1(y[1], x[0], t[0]) 728 C, t[1] = madd2(y[1], x[1], t[1], C) 729 C, t[2] = madd2(y[1], x[2], t[2], C) 730 C, t[3] = madd2(y[1], x[3], t[3], C) 731 C, t[4] = madd2(y[1], x[4], t[4], C) 732 C, t[5] = madd2(y[1], x[5], t[5], C) 733 C, t[6] = madd2(y[1], x[6], t[6], C) 734 C, t[7] = madd2(y[1], x[7], t[7], C) 735 C, t[8] = madd2(y[1], x[8], t[8], C) 736 C, t[9] = madd2(y[1], x[9], t[9], C) 737 C, t[10] = madd2(y[1], x[10], t[10], C) 738 C, t[11] = madd2(y[1], x[11], t[11], C) 739 740 t[12], D = bits.Add64(t[12], C, 0) 741 742 // m = t[0]n'[0] mod W 743 m = t[0] * qInvNeg 744 745 // ----------------------------------- 746 // Second loop 747 C = madd0(m, q0, t[0]) 748 C, t[0] = madd2(m, q1, t[1], C) 749 C, t[1] = madd2(m, q2, t[2], C) 750 C, t[2] = madd2(m, q3, t[3], C) 751 C, t[3] = madd2(m, q4, t[4], C) 752 C, t[4] = madd2(m, q5, t[5], C) 753 C, t[5] = madd2(m, q6, t[6], C) 754 C, t[6] = madd2(m, q7, t[7], C) 755 C, t[7] = madd2(m, q8, t[8], C) 756 C, t[8] = madd2(m, q9, t[9], C) 757 C, t[9] = madd2(m, q10, t[10], C) 758 C, t[10] = madd2(m, q11, t[11], C) 759 760 t[11], C = bits.Add64(t[12], C, 0) 761 t[12], _ = bits.Add64(0, D, C) 762 // ----------------------------------- 763 // First loop 764 765 C, t[0] = madd1(y[2], x[0], t[0]) 766 C, t[1] = madd2(y[2], x[1], t[1], C) 767 C, t[2] = madd2(y[2], x[2], t[2], C) 768 C, t[3] = madd2(y[2], x[3], t[3], C) 769 C, t[4] = madd2(y[2], x[4], t[4], C) 770 C, t[5] = madd2(y[2], x[5], t[5], C) 771 C, t[6] = madd2(y[2], x[6], t[6], C) 772 C, t[7] = madd2(y[2], x[7], t[7], C) 773 C, t[8] = madd2(y[2], x[8], t[8], C) 774 C, t[9] = madd2(y[2], x[9], t[9], C) 775 C, t[10] = madd2(y[2], x[10], t[10], C) 776 C, t[11] = madd2(y[2], x[11], t[11], C) 777 778 t[12], D = bits.Add64(t[12], C, 0) 779 780 // m = t[0]n'[0] mod W 781 m = t[0] * qInvNeg 782 783 // ----------------------------------- 784 // Second loop 785 C = madd0(m, q0, t[0]) 786 C, t[0] = madd2(m, q1, t[1], C) 787 C, t[1] = madd2(m, q2, t[2], C) 788 C, t[2] = madd2(m, q3, t[3], C) 789 C, t[3] = madd2(m, q4, t[4], C) 790 C, t[4] = madd2(m, q5, t[5], C) 791 C, t[5] = madd2(m, q6, t[6], C) 792 C, t[6] = madd2(m, q7, t[7], C) 793 C, t[7] = madd2(m, q8, t[8], C) 794 C, t[8] = madd2(m, q9, t[9], C) 795 C, t[9] = madd2(m, q10, t[10], C) 796 C, t[10] = madd2(m, q11, t[11], C) 797 798 t[11], C = bits.Add64(t[12], C, 0) 799 t[12], _ = bits.Add64(0, D, C) 800 // ----------------------------------- 801 // First loop 802 803 C, t[0] = madd1(y[3], x[0], t[0]) 804 C, t[1] = madd2(y[3], x[1], t[1], C) 805 C, t[2] = madd2(y[3], x[2], t[2], C) 806 C, t[3] = madd2(y[3], x[3], t[3], C) 807 C, t[4] = madd2(y[3], x[4], t[4], C) 808 C, t[5] = madd2(y[3], x[5], t[5], C) 809 C, t[6] = madd2(y[3], x[6], t[6], C) 810 C, t[7] = madd2(y[3], x[7], t[7], C) 811 C, t[8] = madd2(y[3], x[8], t[8], C) 812 C, t[9] = madd2(y[3], x[9], t[9], C) 813 C, t[10] = madd2(y[3], x[10], t[10], C) 814 C, t[11] = madd2(y[3], x[11], t[11], C) 815 816 t[12], D = bits.Add64(t[12], C, 0) 817 818 // m = t[0]n'[0] mod W 819 m = t[0] * qInvNeg 820 821 // ----------------------------------- 822 // Second loop 823 C = madd0(m, q0, t[0]) 824 C, t[0] = madd2(m, q1, t[1], C) 825 C, t[1] = madd2(m, q2, t[2], C) 826 C, t[2] = madd2(m, q3, t[3], C) 827 C, t[3] = madd2(m, q4, t[4], C) 828 C, t[4] = madd2(m, q5, t[5], C) 829 C, t[5] = madd2(m, q6, t[6], C) 830 C, t[6] = madd2(m, q7, t[7], C) 831 C, t[7] = madd2(m, q8, t[8], C) 832 C, t[8] = madd2(m, q9, t[9], C) 833 C, t[9] = madd2(m, q10, t[10], C) 834 C, t[10] = madd2(m, q11, t[11], C) 835 836 t[11], C = bits.Add64(t[12], C, 0) 837 t[12], _ = bits.Add64(0, D, C) 838 // ----------------------------------- 839 // First loop 840 841 C, t[0] = madd1(y[4], x[0], t[0]) 842 C, t[1] = madd2(y[4], x[1], t[1], C) 843 C, t[2] = madd2(y[4], x[2], t[2], C) 844 C, t[3] = madd2(y[4], x[3], t[3], C) 845 C, t[4] = madd2(y[4], x[4], t[4], C) 846 C, t[5] = madd2(y[4], x[5], t[5], C) 847 C, t[6] = madd2(y[4], x[6], t[6], C) 848 C, t[7] = madd2(y[4], x[7], t[7], C) 849 C, t[8] = madd2(y[4], x[8], t[8], C) 850 C, t[9] = madd2(y[4], x[9], t[9], C) 851 C, t[10] = madd2(y[4], x[10], t[10], C) 852 C, t[11] = madd2(y[4], x[11], t[11], C) 853 854 t[12], D = bits.Add64(t[12], C, 0) 855 856 // m = t[0]n'[0] mod W 857 m = t[0] * qInvNeg 858 859 // ----------------------------------- 860 // Second loop 861 C = madd0(m, q0, t[0]) 862 C, t[0] = madd2(m, q1, t[1], C) 863 C, t[1] = madd2(m, q2, t[2], C) 864 C, t[2] = madd2(m, q3, t[3], C) 865 C, t[3] = madd2(m, q4, t[4], C) 866 C, t[4] = madd2(m, q5, t[5], C) 867 C, t[5] = madd2(m, q6, t[6], C) 868 C, t[6] = madd2(m, q7, t[7], C) 869 C, t[7] = madd2(m, q8, t[8], C) 870 C, t[8] = madd2(m, q9, t[9], C) 871 C, t[9] = madd2(m, q10, t[10], C) 872 C, t[10] = madd2(m, q11, t[11], C) 873 874 t[11], C = bits.Add64(t[12], C, 0) 875 t[12], _ = bits.Add64(0, D, C) 876 // ----------------------------------- 877 // First loop 878 879 C, t[0] = madd1(y[5], x[0], t[0]) 880 C, t[1] = madd2(y[5], x[1], t[1], C) 881 C, t[2] = madd2(y[5], x[2], t[2], C) 882 C, t[3] = madd2(y[5], x[3], t[3], C) 883 C, t[4] = madd2(y[5], x[4], t[4], C) 884 C, t[5] = madd2(y[5], x[5], t[5], C) 885 C, t[6] = madd2(y[5], x[6], t[6], C) 886 C, t[7] = madd2(y[5], x[7], t[7], C) 887 C, t[8] = madd2(y[5], x[8], t[8], C) 888 C, t[9] = madd2(y[5], x[9], t[9], C) 889 C, t[10] = madd2(y[5], x[10], t[10], C) 890 C, t[11] = madd2(y[5], x[11], t[11], C) 891 892 t[12], D = bits.Add64(t[12], C, 0) 893 894 // m = t[0]n'[0] mod W 895 m = t[0] * qInvNeg 896 897 // ----------------------------------- 898 // Second loop 899 C = madd0(m, q0, t[0]) 900 C, t[0] = madd2(m, q1, t[1], C) 901 C, t[1] = madd2(m, q2, t[2], C) 902 C, t[2] = madd2(m, q3, t[3], C) 903 C, t[3] = madd2(m, q4, t[4], C) 904 C, t[4] = madd2(m, q5, t[5], C) 905 C, t[5] = madd2(m, q6, t[6], C) 906 C, t[6] = madd2(m, q7, t[7], C) 907 C, t[7] = madd2(m, q8, t[8], C) 908 C, t[8] = madd2(m, q9, t[9], C) 909 C, t[9] = madd2(m, q10, t[10], C) 910 C, t[10] = madd2(m, q11, t[11], C) 911 912 t[11], C = bits.Add64(t[12], C, 0) 913 t[12], _ = bits.Add64(0, D, C) 914 // ----------------------------------- 915 // First loop 916 917 C, t[0] = madd1(y[6], x[0], t[0]) 918 C, t[1] = madd2(y[6], x[1], t[1], C) 919 C, t[2] = madd2(y[6], x[2], t[2], C) 920 C, t[3] = madd2(y[6], x[3], t[3], C) 921 C, t[4] = madd2(y[6], x[4], t[4], C) 922 C, t[5] = madd2(y[6], x[5], t[5], C) 923 C, t[6] = madd2(y[6], x[6], t[6], C) 924 C, t[7] = madd2(y[6], x[7], t[7], C) 925 C, t[8] = madd2(y[6], x[8], t[8], C) 926 C, t[9] = madd2(y[6], x[9], t[9], C) 927 C, t[10] = madd2(y[6], x[10], t[10], C) 928 C, t[11] = madd2(y[6], x[11], t[11], C) 929 930 t[12], D = bits.Add64(t[12], C, 0) 931 932 // m = t[0]n'[0] mod W 933 m = t[0] * qInvNeg 934 935 // ----------------------------------- 936 // Second loop 937 C = madd0(m, q0, t[0]) 938 C, t[0] = madd2(m, q1, t[1], C) 939 C, t[1] = madd2(m, q2, t[2], C) 940 C, t[2] = madd2(m, q3, t[3], C) 941 C, t[3] = madd2(m, q4, t[4], C) 942 C, t[4] = madd2(m, q5, t[5], C) 943 C, t[5] = madd2(m, q6, t[6], C) 944 C, t[6] = madd2(m, q7, t[7], C) 945 C, t[7] = madd2(m, q8, t[8], C) 946 C, t[8] = madd2(m, q9, t[9], C) 947 C, t[9] = madd2(m, q10, t[10], C) 948 C, t[10] = madd2(m, q11, t[11], C) 949 950 t[11], C = bits.Add64(t[12], C, 0) 951 t[12], _ = bits.Add64(0, D, C) 952 // ----------------------------------- 953 // First loop 954 955 C, t[0] = madd1(y[7], x[0], t[0]) 956 C, t[1] = madd2(y[7], x[1], t[1], C) 957 C, t[2] = madd2(y[7], x[2], t[2], C) 958 C, t[3] = madd2(y[7], x[3], t[3], C) 959 C, t[4] = madd2(y[7], x[4], t[4], C) 960 C, t[5] = madd2(y[7], x[5], t[5], C) 961 C, t[6] = madd2(y[7], x[6], t[6], C) 962 C, t[7] = madd2(y[7], x[7], t[7], C) 963 C, t[8] = madd2(y[7], x[8], t[8], C) 964 C, t[9] = madd2(y[7], x[9], t[9], C) 965 C, t[10] = madd2(y[7], x[10], t[10], C) 966 C, t[11] = madd2(y[7], x[11], t[11], C) 967 968 t[12], D = bits.Add64(t[12], C, 0) 969 970 // m = t[0]n'[0] mod W 971 m = t[0] * qInvNeg 972 973 // ----------------------------------- 974 // Second loop 975 C = madd0(m, q0, t[0]) 976 C, t[0] = madd2(m, q1, t[1], C) 977 C, t[1] = madd2(m, q2, t[2], C) 978 C, t[2] = madd2(m, q3, t[3], C) 979 C, t[3] = madd2(m, q4, t[4], C) 980 C, t[4] = madd2(m, q5, t[5], C) 981 C, t[5] = madd2(m, q6, t[6], C) 982 C, t[6] = madd2(m, q7, t[7], C) 983 C, t[7] = madd2(m, q8, t[8], C) 984 C, t[8] = madd2(m, q9, t[9], C) 985 C, t[9] = madd2(m, q10, t[10], C) 986 C, t[10] = madd2(m, q11, t[11], C) 987 988 t[11], C = bits.Add64(t[12], C, 0) 989 t[12], _ = bits.Add64(0, D, C) 990 // ----------------------------------- 991 // First loop 992 993 C, t[0] = madd1(y[8], x[0], t[0]) 994 C, t[1] = madd2(y[8], x[1], t[1], C) 995 C, t[2] = madd2(y[8], x[2], t[2], C) 996 C, t[3] = madd2(y[8], x[3], t[3], C) 997 C, t[4] = madd2(y[8], x[4], t[4], C) 998 C, t[5] = madd2(y[8], x[5], t[5], C) 999 C, t[6] = madd2(y[8], x[6], t[6], C) 1000 C, t[7] = madd2(y[8], x[7], t[7], C) 1001 C, t[8] = madd2(y[8], x[8], t[8], C) 1002 C, t[9] = madd2(y[8], x[9], t[9], C) 1003 C, t[10] = madd2(y[8], x[10], t[10], C) 1004 C, t[11] = madd2(y[8], x[11], t[11], C) 1005 1006 t[12], D = bits.Add64(t[12], C, 0) 1007 1008 // m = t[0]n'[0] mod W 1009 m = t[0] * qInvNeg 1010 1011 // ----------------------------------- 1012 // Second loop 1013 C = madd0(m, q0, t[0]) 1014 C, t[0] = madd2(m, q1, t[1], C) 1015 C, t[1] = madd2(m, q2, t[2], C) 1016 C, t[2] = madd2(m, q3, t[3], C) 1017 C, t[3] = madd2(m, q4, t[4], C) 1018 C, t[4] = madd2(m, q5, t[5], C) 1019 C, t[5] = madd2(m, q6, t[6], C) 1020 C, t[6] = madd2(m, q7, t[7], C) 1021 C, t[7] = madd2(m, q8, t[8], C) 1022 C, t[8] = madd2(m, q9, t[9], C) 1023 C, t[9] = madd2(m, q10, t[10], C) 1024 C, t[10] = madd2(m, q11, t[11], C) 1025 1026 t[11], C = bits.Add64(t[12], C, 0) 1027 t[12], _ = bits.Add64(0, D, C) 1028 // ----------------------------------- 1029 // First loop 1030 1031 C, t[0] = madd1(y[9], x[0], t[0]) 1032 C, t[1] = madd2(y[9], x[1], t[1], C) 1033 C, t[2] = madd2(y[9], x[2], t[2], C) 1034 C, t[3] = madd2(y[9], x[3], t[3], C) 1035 C, t[4] = madd2(y[9], x[4], t[4], C) 1036 C, t[5] = madd2(y[9], x[5], t[5], C) 1037 C, t[6] = madd2(y[9], x[6], t[6], C) 1038 C, t[7] = madd2(y[9], x[7], t[7], C) 1039 C, t[8] = madd2(y[9], x[8], t[8], C) 1040 C, t[9] = madd2(y[9], x[9], t[9], C) 1041 C, t[10] = madd2(y[9], x[10], t[10], C) 1042 C, t[11] = madd2(y[9], x[11], t[11], C) 1043 1044 t[12], D = bits.Add64(t[12], C, 0) 1045 1046 // m = t[0]n'[0] mod W 1047 m = t[0] * qInvNeg 1048 1049 // ----------------------------------- 1050 // Second loop 1051 C = madd0(m, q0, t[0]) 1052 C, t[0] = madd2(m, q1, t[1], C) 1053 C, t[1] = madd2(m, q2, t[2], C) 1054 C, t[2] = madd2(m, q3, t[3], C) 1055 C, t[3] = madd2(m, q4, t[4], C) 1056 C, t[4] = madd2(m, q5, t[5], C) 1057 C, t[5] = madd2(m, q6, t[6], C) 1058 C, t[6] = madd2(m, q7, t[7], C) 1059 C, t[7] = madd2(m, q8, t[8], C) 1060 C, t[8] = madd2(m, q9, t[9], C) 1061 C, t[9] = madd2(m, q10, t[10], C) 1062 C, t[10] = madd2(m, q11, t[11], C) 1063 1064 t[11], C = bits.Add64(t[12], C, 0) 1065 t[12], _ = bits.Add64(0, D, C) 1066 // ----------------------------------- 1067 // First loop 1068 1069 C, t[0] = madd1(y[10], x[0], t[0]) 1070 C, t[1] = madd2(y[10], x[1], t[1], C) 1071 C, t[2] = madd2(y[10], x[2], t[2], C) 1072 C, t[3] = madd2(y[10], x[3], t[3], C) 1073 C, t[4] = madd2(y[10], x[4], t[4], C) 1074 C, t[5] = madd2(y[10], x[5], t[5], C) 1075 C, t[6] = madd2(y[10], x[6], t[6], C) 1076 C, t[7] = madd2(y[10], x[7], t[7], C) 1077 C, t[8] = madd2(y[10], x[8], t[8], C) 1078 C, t[9] = madd2(y[10], x[9], t[9], C) 1079 C, t[10] = madd2(y[10], x[10], t[10], C) 1080 C, t[11] = madd2(y[10], x[11], t[11], C) 1081 1082 t[12], D = bits.Add64(t[12], C, 0) 1083 1084 // m = t[0]n'[0] mod W 1085 m = t[0] * qInvNeg 1086 1087 // ----------------------------------- 1088 // Second loop 1089 C = madd0(m, q0, t[0]) 1090 C, t[0] = madd2(m, q1, t[1], C) 1091 C, t[1] = madd2(m, q2, t[2], C) 1092 C, t[2] = madd2(m, q3, t[3], C) 1093 C, t[3] = madd2(m, q4, t[4], C) 1094 C, t[4] = madd2(m, q5, t[5], C) 1095 C, t[5] = madd2(m, q6, t[6], C) 1096 C, t[6] = madd2(m, q7, t[7], C) 1097 C, t[7] = madd2(m, q8, t[8], C) 1098 C, t[8] = madd2(m, q9, t[9], C) 1099 C, t[9] = madd2(m, q10, t[10], C) 1100 C, t[10] = madd2(m, q11, t[11], C) 1101 1102 t[11], C = bits.Add64(t[12], C, 0) 1103 t[12], _ = bits.Add64(0, D, C) 1104 // ----------------------------------- 1105 // First loop 1106 1107 C, t[0] = madd1(y[11], x[0], t[0]) 1108 C, t[1] = madd2(y[11], x[1], t[1], C) 1109 C, t[2] = madd2(y[11], x[2], t[2], C) 1110 C, t[3] = madd2(y[11], x[3], t[3], C) 1111 C, t[4] = madd2(y[11], x[4], t[4], C) 1112 C, t[5] = madd2(y[11], x[5], t[5], C) 1113 C, t[6] = madd2(y[11], x[6], t[6], C) 1114 C, t[7] = madd2(y[11], x[7], t[7], C) 1115 C, t[8] = madd2(y[11], x[8], t[8], C) 1116 C, t[9] = madd2(y[11], x[9], t[9], C) 1117 C, t[10] = madd2(y[11], x[10], t[10], C) 1118 C, t[11] = madd2(y[11], x[11], t[11], C) 1119 1120 t[12], D = bits.Add64(t[12], C, 0) 1121 1122 // m = t[0]n'[0] mod W 1123 m = t[0] * qInvNeg 1124 1125 // ----------------------------------- 1126 // Second loop 1127 C = madd0(m, q0, t[0]) 1128 C, t[0] = madd2(m, q1, t[1], C) 1129 C, t[1] = madd2(m, q2, t[2], C) 1130 C, t[2] = madd2(m, q3, t[3], C) 1131 C, t[3] = madd2(m, q4, t[4], C) 1132 C, t[4] = madd2(m, q5, t[5], C) 1133 C, t[5] = madd2(m, q6, t[6], C) 1134 C, t[6] = madd2(m, q7, t[7], C) 1135 C, t[7] = madd2(m, q8, t[8], C) 1136 C, t[8] = madd2(m, q9, t[9], C) 1137 C, t[9] = madd2(m, q10, t[10], C) 1138 C, t[10] = madd2(m, q11, t[11], C) 1139 1140 t[11], C = bits.Add64(t[12], C, 0) 1141 t[12], _ = bits.Add64(0, D, C) 1142 1143 if t[12] != 0 { 1144 // we need to reduce, we have a result on 13 words 1145 var b uint64 1146 z[0], b = bits.Sub64(t[0], q0, 0) 1147 z[1], b = bits.Sub64(t[1], q1, b) 1148 z[2], b = bits.Sub64(t[2], q2, b) 1149 z[3], b = bits.Sub64(t[3], q3, b) 1150 z[4], b = bits.Sub64(t[4], q4, b) 1151 z[5], b = bits.Sub64(t[5], q5, b) 1152 z[6], b = bits.Sub64(t[6], q6, b) 1153 z[7], b = bits.Sub64(t[7], q7, b) 1154 z[8], b = bits.Sub64(t[8], q8, b) 1155 z[9], b = bits.Sub64(t[9], q9, b) 1156 z[10], b = bits.Sub64(t[10], q10, b) 1157 z[11], _ = bits.Sub64(t[11], q11, b) 1158 return 1159 } 1160 1161 // copy t into z 1162 z[0] = t[0] 1163 z[1] = t[1] 1164 z[2] = t[2] 1165 z[3] = t[3] 1166 z[4] = t[4] 1167 z[5] = t[5] 1168 z[6] = t[6] 1169 z[7] = t[7] 1170 z[8] = t[8] 1171 z[9] = t[9] 1172 z[10] = t[10] 1173 z[11] = t[11] 1174 1175 // if z ⩾ q → z -= q 1176 if !z.smallerThanModulus() { 1177 var b uint64 1178 z[0], b = bits.Sub64(z[0], q0, 0) 1179 z[1], b = bits.Sub64(z[1], q1, b) 1180 z[2], b = bits.Sub64(z[2], q2, b) 1181 z[3], b = bits.Sub64(z[3], q3, b) 1182 z[4], b = bits.Sub64(z[4], q4, b) 1183 z[5], b = bits.Sub64(z[5], q5, b) 1184 z[6], b = bits.Sub64(z[6], q6, b) 1185 z[7], b = bits.Sub64(z[7], q7, b) 1186 z[8], b = bits.Sub64(z[8], q8, b) 1187 z[9], b = bits.Sub64(z[9], q9, b) 1188 z[10], b = bits.Sub64(z[10], q10, b) 1189 z[11], _ = bits.Sub64(z[11], q11, b) 1190 } 1191 } 1192 1193 func _fromMontGeneric(z *Element) { 1194 // the following lines implement z = z * 1 1195 // with a modified CIOS montgomery multiplication 1196 // see Mul for algorithm documentation 1197 { 1198 // m = z[0]n'[0] mod W 1199 m := z[0] * qInvNeg 1200 C := madd0(m, q0, z[0]) 1201 C, z[0] = madd2(m, q1, z[1], C) 1202 C, z[1] = madd2(m, q2, z[2], C) 1203 C, z[2] = madd2(m, q3, z[3], C) 1204 C, z[3] = madd2(m, q4, z[4], C) 1205 C, z[4] = madd2(m, q5, z[5], C) 1206 C, z[5] = madd2(m, q6, z[6], C) 1207 C, z[6] = madd2(m, q7, z[7], C) 1208 C, z[7] = madd2(m, q8, z[8], C) 1209 C, z[8] = madd2(m, q9, z[9], C) 1210 C, z[9] = madd2(m, q10, z[10], C) 1211 C, z[10] = madd2(m, q11, z[11], C) 1212 z[11] = C 1213 } 1214 { 1215 // m = z[0]n'[0] mod W 1216 m := z[0] * qInvNeg 1217 C := madd0(m, q0, z[0]) 1218 C, z[0] = madd2(m, q1, z[1], C) 1219 C, z[1] = madd2(m, q2, z[2], C) 1220 C, z[2] = madd2(m, q3, z[3], C) 1221 C, z[3] = madd2(m, q4, z[4], C) 1222 C, z[4] = madd2(m, q5, z[5], C) 1223 C, z[5] = madd2(m, q6, z[6], C) 1224 C, z[6] = madd2(m, q7, z[7], C) 1225 C, z[7] = madd2(m, q8, z[8], C) 1226 C, z[8] = madd2(m, q9, z[9], C) 1227 C, z[9] = madd2(m, q10, z[10], C) 1228 C, z[10] = madd2(m, q11, z[11], C) 1229 z[11] = C 1230 } 1231 { 1232 // m = z[0]n'[0] mod W 1233 m := z[0] * qInvNeg 1234 C := madd0(m, q0, z[0]) 1235 C, z[0] = madd2(m, q1, z[1], C) 1236 C, z[1] = madd2(m, q2, z[2], C) 1237 C, z[2] = madd2(m, q3, z[3], C) 1238 C, z[3] = madd2(m, q4, z[4], C) 1239 C, z[4] = madd2(m, q5, z[5], C) 1240 C, z[5] = madd2(m, q6, z[6], C) 1241 C, z[6] = madd2(m, q7, z[7], C) 1242 C, z[7] = madd2(m, q8, z[8], C) 1243 C, z[8] = madd2(m, q9, z[9], C) 1244 C, z[9] = madd2(m, q10, z[10], C) 1245 C, z[10] = madd2(m, q11, z[11], C) 1246 z[11] = C 1247 } 1248 { 1249 // m = z[0]n'[0] mod W 1250 m := z[0] * qInvNeg 1251 C := madd0(m, q0, z[0]) 1252 C, z[0] = madd2(m, q1, z[1], C) 1253 C, z[1] = madd2(m, q2, z[2], C) 1254 C, z[2] = madd2(m, q3, z[3], C) 1255 C, z[3] = madd2(m, q4, z[4], C) 1256 C, z[4] = madd2(m, q5, z[5], C) 1257 C, z[5] = madd2(m, q6, z[6], C) 1258 C, z[6] = madd2(m, q7, z[7], C) 1259 C, z[7] = madd2(m, q8, z[8], C) 1260 C, z[8] = madd2(m, q9, z[9], C) 1261 C, z[9] = madd2(m, q10, z[10], C) 1262 C, z[10] = madd2(m, q11, z[11], C) 1263 z[11] = C 1264 } 1265 { 1266 // m = z[0]n'[0] mod W 1267 m := z[0] * qInvNeg 1268 C := madd0(m, q0, z[0]) 1269 C, z[0] = madd2(m, q1, z[1], C) 1270 C, z[1] = madd2(m, q2, z[2], C) 1271 C, z[2] = madd2(m, q3, z[3], C) 1272 C, z[3] = madd2(m, q4, z[4], C) 1273 C, z[4] = madd2(m, q5, z[5], C) 1274 C, z[5] = madd2(m, q6, z[6], C) 1275 C, z[6] = madd2(m, q7, z[7], C) 1276 C, z[7] = madd2(m, q8, z[8], C) 1277 C, z[8] = madd2(m, q9, z[9], C) 1278 C, z[9] = madd2(m, q10, z[10], C) 1279 C, z[10] = madd2(m, q11, z[11], C) 1280 z[11] = C 1281 } 1282 { 1283 // m = z[0]n'[0] mod W 1284 m := z[0] * qInvNeg 1285 C := madd0(m, q0, z[0]) 1286 C, z[0] = madd2(m, q1, z[1], C) 1287 C, z[1] = madd2(m, q2, z[2], C) 1288 C, z[2] = madd2(m, q3, z[3], C) 1289 C, z[3] = madd2(m, q4, z[4], C) 1290 C, z[4] = madd2(m, q5, z[5], C) 1291 C, z[5] = madd2(m, q6, z[6], C) 1292 C, z[6] = madd2(m, q7, z[7], C) 1293 C, z[7] = madd2(m, q8, z[8], C) 1294 C, z[8] = madd2(m, q9, z[9], C) 1295 C, z[9] = madd2(m, q10, z[10], C) 1296 C, z[10] = madd2(m, q11, z[11], C) 1297 z[11] = C 1298 } 1299 { 1300 // m = z[0]n'[0] mod W 1301 m := z[0] * qInvNeg 1302 C := madd0(m, q0, z[0]) 1303 C, z[0] = madd2(m, q1, z[1], C) 1304 C, z[1] = madd2(m, q2, z[2], C) 1305 C, z[2] = madd2(m, q3, z[3], C) 1306 C, z[3] = madd2(m, q4, z[4], C) 1307 C, z[4] = madd2(m, q5, z[5], C) 1308 C, z[5] = madd2(m, q6, z[6], C) 1309 C, z[6] = madd2(m, q7, z[7], C) 1310 C, z[7] = madd2(m, q8, z[8], C) 1311 C, z[8] = madd2(m, q9, z[9], C) 1312 C, z[9] = madd2(m, q10, z[10], C) 1313 C, z[10] = madd2(m, q11, z[11], C) 1314 z[11] = C 1315 } 1316 { 1317 // m = z[0]n'[0] mod W 1318 m := z[0] * qInvNeg 1319 C := madd0(m, q0, z[0]) 1320 C, z[0] = madd2(m, q1, z[1], C) 1321 C, z[1] = madd2(m, q2, z[2], C) 1322 C, z[2] = madd2(m, q3, z[3], C) 1323 C, z[3] = madd2(m, q4, z[4], C) 1324 C, z[4] = madd2(m, q5, z[5], C) 1325 C, z[5] = madd2(m, q6, z[6], C) 1326 C, z[6] = madd2(m, q7, z[7], C) 1327 C, z[7] = madd2(m, q8, z[8], C) 1328 C, z[8] = madd2(m, q9, z[9], C) 1329 C, z[9] = madd2(m, q10, z[10], C) 1330 C, z[10] = madd2(m, q11, z[11], C) 1331 z[11] = C 1332 } 1333 { 1334 // m = z[0]n'[0] mod W 1335 m := z[0] * qInvNeg 1336 C := madd0(m, q0, z[0]) 1337 C, z[0] = madd2(m, q1, z[1], C) 1338 C, z[1] = madd2(m, q2, z[2], C) 1339 C, z[2] = madd2(m, q3, z[3], C) 1340 C, z[3] = madd2(m, q4, z[4], C) 1341 C, z[4] = madd2(m, q5, z[5], C) 1342 C, z[5] = madd2(m, q6, z[6], C) 1343 C, z[6] = madd2(m, q7, z[7], C) 1344 C, z[7] = madd2(m, q8, z[8], C) 1345 C, z[8] = madd2(m, q9, z[9], C) 1346 C, z[9] = madd2(m, q10, z[10], C) 1347 C, z[10] = madd2(m, q11, z[11], C) 1348 z[11] = C 1349 } 1350 { 1351 // m = z[0]n'[0] mod W 1352 m := z[0] * qInvNeg 1353 C := madd0(m, q0, z[0]) 1354 C, z[0] = madd2(m, q1, z[1], C) 1355 C, z[1] = madd2(m, q2, z[2], C) 1356 C, z[2] = madd2(m, q3, z[3], C) 1357 C, z[3] = madd2(m, q4, z[4], C) 1358 C, z[4] = madd2(m, q5, z[5], C) 1359 C, z[5] = madd2(m, q6, z[6], C) 1360 C, z[6] = madd2(m, q7, z[7], C) 1361 C, z[7] = madd2(m, q8, z[8], C) 1362 C, z[8] = madd2(m, q9, z[9], C) 1363 C, z[9] = madd2(m, q10, z[10], C) 1364 C, z[10] = madd2(m, q11, z[11], C) 1365 z[11] = C 1366 } 1367 { 1368 // m = z[0]n'[0] mod W 1369 m := z[0] * qInvNeg 1370 C := madd0(m, q0, z[0]) 1371 C, z[0] = madd2(m, q1, z[1], C) 1372 C, z[1] = madd2(m, q2, z[2], C) 1373 C, z[2] = madd2(m, q3, z[3], C) 1374 C, z[3] = madd2(m, q4, z[4], C) 1375 C, z[4] = madd2(m, q5, z[5], C) 1376 C, z[5] = madd2(m, q6, z[6], C) 1377 C, z[6] = madd2(m, q7, z[7], C) 1378 C, z[7] = madd2(m, q8, z[8], C) 1379 C, z[8] = madd2(m, q9, z[9], C) 1380 C, z[9] = madd2(m, q10, z[10], C) 1381 C, z[10] = madd2(m, q11, z[11], C) 1382 z[11] = C 1383 } 1384 { 1385 // m = z[0]n'[0] mod W 1386 m := z[0] * qInvNeg 1387 C := madd0(m, q0, z[0]) 1388 C, z[0] = madd2(m, q1, z[1], C) 1389 C, z[1] = madd2(m, q2, z[2], C) 1390 C, z[2] = madd2(m, q3, z[3], C) 1391 C, z[3] = madd2(m, q4, z[4], C) 1392 C, z[4] = madd2(m, q5, z[5], C) 1393 C, z[5] = madd2(m, q6, z[6], C) 1394 C, z[6] = madd2(m, q7, z[7], C) 1395 C, z[7] = madd2(m, q8, z[8], C) 1396 C, z[8] = madd2(m, q9, z[9], C) 1397 C, z[9] = madd2(m, q10, z[10], C) 1398 C, z[10] = madd2(m, q11, z[11], C) 1399 z[11] = C 1400 } 1401 1402 // if z ⩾ q → z -= q 1403 if !z.smallerThanModulus() { 1404 var b uint64 1405 z[0], b = bits.Sub64(z[0], q0, 0) 1406 z[1], b = bits.Sub64(z[1], q1, b) 1407 z[2], b = bits.Sub64(z[2], q2, b) 1408 z[3], b = bits.Sub64(z[3], q3, b) 1409 z[4], b = bits.Sub64(z[4], q4, b) 1410 z[5], b = bits.Sub64(z[5], q5, b) 1411 z[6], b = bits.Sub64(z[6], q6, b) 1412 z[7], b = bits.Sub64(z[7], q7, b) 1413 z[8], b = bits.Sub64(z[8], q8, b) 1414 z[9], b = bits.Sub64(z[9], q9, b) 1415 z[10], b = bits.Sub64(z[10], q10, b) 1416 z[11], _ = bits.Sub64(z[11], q11, b) 1417 } 1418 } 1419 1420 func _reduceGeneric(z *Element) { 1421 1422 // if z ⩾ q → z -= q 1423 if !z.smallerThanModulus() { 1424 var b uint64 1425 z[0], b = bits.Sub64(z[0], q0, 0) 1426 z[1], b = bits.Sub64(z[1], q1, b) 1427 z[2], b = bits.Sub64(z[2], q2, b) 1428 z[3], b = bits.Sub64(z[3], q3, b) 1429 z[4], b = bits.Sub64(z[4], q4, b) 1430 z[5], b = bits.Sub64(z[5], q5, b) 1431 z[6], b = bits.Sub64(z[6], q6, b) 1432 z[7], b = bits.Sub64(z[7], q7, b) 1433 z[8], b = bits.Sub64(z[8], q8, b) 1434 z[9], b = bits.Sub64(z[9], q9, b) 1435 z[10], b = bits.Sub64(z[10], q10, b) 1436 z[11], _ = bits.Sub64(z[11], q11, b) 1437 } 1438 } 1439 1440 // BatchInvert returns a new slice with every element inverted. 1441 // Uses Montgomery batch inversion trick 1442 func BatchInvert(a []Element) []Element { 1443 res := make([]Element, len(a)) 1444 if len(a) == 0 { 1445 return res 1446 } 1447 1448 zeroes := bitset.New(uint(len(a))) 1449 accumulator := One() 1450 1451 for i := 0; i < len(a); i++ { 1452 if a[i].IsZero() { 1453 zeroes.Set(uint(i)) 1454 continue 1455 } 1456 res[i] = accumulator 1457 accumulator.Mul(&accumulator, &a[i]) 1458 } 1459 1460 accumulator.Inverse(&accumulator) 1461 1462 for i := len(a) - 1; i >= 0; i-- { 1463 if zeroes.Test(uint(i)) { 1464 continue 1465 } 1466 res[i].Mul(&res[i], &accumulator) 1467 accumulator.Mul(&accumulator, &a[i]) 1468 } 1469 1470 return res 1471 } 1472 1473 func _butterflyGeneric(a, b *Element) { 1474 t := *a 1475 a.Add(a, b) 1476 b.Sub(&t, b) 1477 } 1478 1479 // BitLen returns the minimum number of bits needed to represent z 1480 // returns 0 if z == 0 1481 func (z *Element) BitLen() int { 1482 if z[11] != 0 { 1483 return 704 + bits.Len64(z[11]) 1484 } 1485 if z[10] != 0 { 1486 return 640 + bits.Len64(z[10]) 1487 } 1488 if z[9] != 0 { 1489 return 576 + bits.Len64(z[9]) 1490 } 1491 if z[8] != 0 { 1492 return 512 + bits.Len64(z[8]) 1493 } 1494 if z[7] != 0 { 1495 return 448 + bits.Len64(z[7]) 1496 } 1497 if z[6] != 0 { 1498 return 384 + bits.Len64(z[6]) 1499 } 1500 if z[5] != 0 { 1501 return 320 + bits.Len64(z[5]) 1502 } 1503 if z[4] != 0 { 1504 return 256 + bits.Len64(z[4]) 1505 } 1506 if z[3] != 0 { 1507 return 192 + bits.Len64(z[3]) 1508 } 1509 if z[2] != 0 { 1510 return 128 + bits.Len64(z[2]) 1511 } 1512 if z[1] != 0 { 1513 return 64 + bits.Len64(z[1]) 1514 } 1515 return bits.Len64(z[0]) 1516 } 1517 1518 // Hash msg to count prime field elements. 1519 // https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2 1520 func Hash(msg, dst []byte, count int) ([]Element, error) { 1521 // 128 bits of security 1522 // L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128 1523 const Bytes = 1 + (Bits-1)/8 1524 const L = 16 + Bytes 1525 1526 lenInBytes := count * L 1527 pseudoRandomBytes, err := hash.ExpandMsgXmd(msg, dst, lenInBytes) 1528 if err != nil { 1529 return nil, err 1530 } 1531 1532 // get temporary big int from the pool 1533 vv := pool.BigInt.Get() 1534 1535 res := make([]Element, count) 1536 for i := 0; i < count; i++ { 1537 vv.SetBytes(pseudoRandomBytes[i*L : (i+1)*L]) 1538 res[i].SetBigInt(vv) 1539 } 1540 1541 // release object into pool 1542 pool.BigInt.Put(vv) 1543 1544 return res, nil 1545 } 1546 1547 // Exp z = xᵏ (mod q) 1548 func (z *Element) Exp(x Element, k *big.Int) *Element { 1549 if k.IsUint64() && k.Uint64() == 0 { 1550 return z.SetOne() 1551 } 1552 1553 e := k 1554 if k.Sign() == -1 { 1555 // negative k, we invert 1556 // if k < 0: xᵏ (mod q) == (x⁻¹)ᵏ (mod q) 1557 x.Inverse(&x) 1558 1559 // we negate k in a temp big.Int since 1560 // Int.Bit(_) of k and -k is different 1561 e = pool.BigInt.Get() 1562 defer pool.BigInt.Put(e) 1563 e.Neg(k) 1564 } 1565 1566 z.Set(&x) 1567 1568 for i := e.BitLen() - 2; i >= 0; i-- { 1569 z.Square(z) 1570 if e.Bit(i) == 1 { 1571 z.Mul(z, &x) 1572 } 1573 } 1574 1575 return z 1576 } 1577 1578 // rSquare where r is the Montgommery constant 1579 // see section 2.3.2 of Tolga Acar's thesis 1580 // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf 1581 var rSquare = Element{ 1582 14305184132582319705, 1583 8868935336694416555, 1584 9196887162930508889, 1585 15486798265448570248, 1586 5402985275949444416, 1587 10893197322525159598, 1588 3204916688966998390, 1589 12417238192559061753, 1590 12426306557607898622, 1591 1305582522441154384, 1592 10311846026977660324, 1593 48736111365249031, 1594 } 1595 1596 // toMont converts z to Montgomery form 1597 // sets and returns z = z * r² 1598 func (z *Element) toMont() *Element { 1599 return z.Mul(z, &rSquare) 1600 } 1601 1602 // String returns the decimal representation of z as generated by 1603 // z.Text(10). 1604 func (z *Element) String() string { 1605 return z.Text(10) 1606 } 1607 1608 // toBigInt returns z as a big.Int in Montgomery form 1609 func (z *Element) toBigInt(res *big.Int) *big.Int { 1610 var b [Bytes]byte 1611 binary.BigEndian.PutUint64(b[88:96], z[0]) 1612 binary.BigEndian.PutUint64(b[80:88], z[1]) 1613 binary.BigEndian.PutUint64(b[72:80], z[2]) 1614 binary.BigEndian.PutUint64(b[64:72], z[3]) 1615 binary.BigEndian.PutUint64(b[56:64], z[4]) 1616 binary.BigEndian.PutUint64(b[48:56], z[5]) 1617 binary.BigEndian.PutUint64(b[40:48], z[6]) 1618 binary.BigEndian.PutUint64(b[32:40], z[7]) 1619 binary.BigEndian.PutUint64(b[24:32], z[8]) 1620 binary.BigEndian.PutUint64(b[16:24], z[9]) 1621 binary.BigEndian.PutUint64(b[8:16], z[10]) 1622 binary.BigEndian.PutUint64(b[0:8], z[11]) 1623 1624 return res.SetBytes(b[:]) 1625 } 1626 1627 // Text returns the string representation of z in the given base. 1628 // Base must be between 2 and 36, inclusive. The result uses the 1629 // lower-case letters 'a' to 'z' for digit values 10 to 35. 1630 // No prefix (such as "0x") is added to the string. If z is a nil 1631 // pointer it returns "<nil>". 1632 // If base == 10 and -z fits in a uint16 prefix "-" is added to the string. 1633 func (z *Element) Text(base int) string { 1634 if base < 2 || base > 36 { 1635 panic("invalid base") 1636 } 1637 if z == nil { 1638 return "<nil>" 1639 } 1640 1641 const maxUint16 = 65535 1642 if base == 10 { 1643 var zzNeg Element 1644 zzNeg.Neg(z) 1645 zzNeg.fromMont() 1646 if zzNeg.FitsOnOneWord() && zzNeg[0] <= maxUint16 && zzNeg[0] != 0 { 1647 return "-" + strconv.FormatUint(zzNeg[0], base) 1648 } 1649 } 1650 zz := *z 1651 zz.fromMont() 1652 if zz.FitsOnOneWord() { 1653 return strconv.FormatUint(zz[0], base) 1654 } 1655 vv := pool.BigInt.Get() 1656 r := zz.toBigInt(vv).Text(base) 1657 pool.BigInt.Put(vv) 1658 return r 1659 } 1660 1661 // BigInt sets and return z as a *big.Int 1662 func (z *Element) BigInt(res *big.Int) *big.Int { 1663 _z := *z 1664 _z.fromMont() 1665 return _z.toBigInt(res) 1666 } 1667 1668 // ToBigIntRegular returns z as a big.Int in regular form 1669 // 1670 // Deprecated: use BigInt(*big.Int) instead 1671 func (z Element) ToBigIntRegular(res *big.Int) *big.Int { 1672 z.fromMont() 1673 return z.toBigInt(res) 1674 } 1675 1676 // Bits provides access to z by returning its value as a little-endian [12]uint64 array. 1677 // Bits is intended to support implementation of missing low-level Element 1678 // functionality outside this package; it should be avoided otherwise. 1679 func (z *Element) Bits() [12]uint64 { 1680 _z := *z 1681 fromMont(&_z) 1682 return _z 1683 } 1684 1685 // Bytes returns the value of z as a big-endian byte array 1686 func (z *Element) Bytes() (res [Bytes]byte) { 1687 BigEndian.PutElement(&res, *z) 1688 return 1689 } 1690 1691 // Marshal returns the value of z as a big-endian byte slice 1692 func (z *Element) Marshal() []byte { 1693 b := z.Bytes() 1694 return b[:] 1695 } 1696 1697 // Unmarshal is an alias for SetBytes, it sets z to the value of e. 1698 func (z *Element) Unmarshal(e []byte) { 1699 z.SetBytes(e) 1700 } 1701 1702 // SetBytes interprets e as the bytes of a big-endian unsigned integer, 1703 // sets z to that value, and returns z. 1704 func (z *Element) SetBytes(e []byte) *Element { 1705 if len(e) == Bytes { 1706 // fast path 1707 v, err := BigEndian.Element((*[Bytes]byte)(e)) 1708 if err == nil { 1709 *z = v 1710 return z 1711 } 1712 } 1713 1714 // slow path. 1715 // get a big int from our pool 1716 vv := pool.BigInt.Get() 1717 vv.SetBytes(e) 1718 1719 // set big int 1720 z.SetBigInt(vv) 1721 1722 // put temporary object back in pool 1723 pool.BigInt.Put(vv) 1724 1725 return z 1726 } 1727 1728 // SetBytesCanonical interprets e as the bytes of a big-endian 96-byte integer. 1729 // If e is not a 96-byte slice or encodes a value higher than q, 1730 // SetBytesCanonical returns an error. 1731 func (z *Element) SetBytesCanonical(e []byte) error { 1732 if len(e) != Bytes { 1733 return errors.New("invalid fp.Element encoding") 1734 } 1735 v, err := BigEndian.Element((*[Bytes]byte)(e)) 1736 if err != nil { 1737 return err 1738 } 1739 *z = v 1740 return nil 1741 } 1742 1743 // SetBigInt sets z to v and returns z 1744 func (z *Element) SetBigInt(v *big.Int) *Element { 1745 z.SetZero() 1746 1747 var zero big.Int 1748 1749 // fast path 1750 c := v.Cmp(&_modulus) 1751 if c == 0 { 1752 // v == 0 1753 return z 1754 } else if c != 1 && v.Cmp(&zero) != -1 { 1755 // 0 < v < q 1756 return z.setBigInt(v) 1757 } 1758 1759 // get temporary big int from the pool 1760 vv := pool.BigInt.Get() 1761 1762 // copy input + modular reduction 1763 vv.Mod(v, &_modulus) 1764 1765 // set big int byte value 1766 z.setBigInt(vv) 1767 1768 // release object into pool 1769 pool.BigInt.Put(vv) 1770 return z 1771 } 1772 1773 // setBigInt assumes 0 ⩽ v < q 1774 func (z *Element) setBigInt(v *big.Int) *Element { 1775 vBits := v.Bits() 1776 1777 if bits.UintSize == 64 { 1778 for i := 0; i < len(vBits); i++ { 1779 z[i] = uint64(vBits[i]) 1780 } 1781 } else { 1782 for i := 0; i < len(vBits); i++ { 1783 if i%2 == 0 { 1784 z[i/2] = uint64(vBits[i]) 1785 } else { 1786 z[i/2] |= uint64(vBits[i]) << 32 1787 } 1788 } 1789 } 1790 1791 return z.toMont() 1792 } 1793 1794 // SetString creates a big.Int with number and calls SetBigInt on z 1795 // 1796 // The number prefix determines the actual base: A prefix of 1797 // ”0b” or ”0B” selects base 2, ”0”, ”0o” or ”0O” selects base 8, 1798 // and ”0x” or ”0X” selects base 16. Otherwise, the selected base is 10 1799 // and no prefix is accepted. 1800 // 1801 // For base 16, lower and upper case letters are considered the same: 1802 // The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15. 1803 // 1804 // An underscore character ”_” may appear between a base 1805 // prefix and an adjacent digit, and between successive digits; such 1806 // underscores do not change the value of the number. 1807 // Incorrect placement of underscores is reported as a panic if there 1808 // are no other errors. 1809 // 1810 // If the number is invalid this method leaves z unchanged and returns nil, error. 1811 func (z *Element) SetString(number string) (*Element, error) { 1812 // get temporary big int from the pool 1813 vv := pool.BigInt.Get() 1814 1815 if _, ok := vv.SetString(number, 0); !ok { 1816 return nil, errors.New("Element.SetString failed -> can't parse number into a big.Int " + number) 1817 } 1818 1819 z.SetBigInt(vv) 1820 1821 // release object into pool 1822 pool.BigInt.Put(vv) 1823 1824 return z, nil 1825 } 1826 1827 // MarshalJSON returns json encoding of z (z.Text(10)) 1828 // If z == nil, returns null 1829 func (z *Element) MarshalJSON() ([]byte, error) { 1830 if z == nil { 1831 return []byte("null"), nil 1832 } 1833 const maxSafeBound = 15 // we encode it as number if it's small 1834 s := z.Text(10) 1835 if len(s) <= maxSafeBound { 1836 return []byte(s), nil 1837 } 1838 var sbb strings.Builder 1839 sbb.WriteByte('"') 1840 sbb.WriteString(s) 1841 sbb.WriteByte('"') 1842 return []byte(sbb.String()), nil 1843 } 1844 1845 // UnmarshalJSON accepts numbers and strings as input 1846 // See Element.SetString for valid prefixes (0x, 0b, ...) 1847 func (z *Element) UnmarshalJSON(data []byte) error { 1848 s := string(data) 1849 if len(s) > Bits*3 { 1850 return errors.New("value too large (max = Element.Bits * 3)") 1851 } 1852 1853 // we accept numbers and strings, remove leading and trailing quotes if any 1854 if len(s) > 0 && s[0] == '"' { 1855 s = s[1:] 1856 } 1857 if len(s) > 0 && s[len(s)-1] == '"' { 1858 s = s[:len(s)-1] 1859 } 1860 1861 // get temporary big int from the pool 1862 vv := pool.BigInt.Get() 1863 1864 if _, ok := vv.SetString(s, 0); !ok { 1865 return errors.New("can't parse into a big.Int: " + s) 1866 } 1867 1868 z.SetBigInt(vv) 1869 1870 // release object into pool 1871 pool.BigInt.Put(vv) 1872 return nil 1873 } 1874 1875 // A ByteOrder specifies how to convert byte slices into a Element 1876 type ByteOrder interface { 1877 Element(*[Bytes]byte) (Element, error) 1878 PutElement(*[Bytes]byte, Element) 1879 String() string 1880 } 1881 1882 // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. 1883 var BigEndian bigEndian 1884 1885 type bigEndian struct{} 1886 1887 // Element interpret b is a big-endian 96-byte slice. 1888 // If b encodes a value higher than q, Element returns error. 1889 func (bigEndian) Element(b *[Bytes]byte) (Element, error) { 1890 var z Element 1891 z[0] = binary.BigEndian.Uint64((*b)[88:96]) 1892 z[1] = binary.BigEndian.Uint64((*b)[80:88]) 1893 z[2] = binary.BigEndian.Uint64((*b)[72:80]) 1894 z[3] = binary.BigEndian.Uint64((*b)[64:72]) 1895 z[4] = binary.BigEndian.Uint64((*b)[56:64]) 1896 z[5] = binary.BigEndian.Uint64((*b)[48:56]) 1897 z[6] = binary.BigEndian.Uint64((*b)[40:48]) 1898 z[7] = binary.BigEndian.Uint64((*b)[32:40]) 1899 z[8] = binary.BigEndian.Uint64((*b)[24:32]) 1900 z[9] = binary.BigEndian.Uint64((*b)[16:24]) 1901 z[10] = binary.BigEndian.Uint64((*b)[8:16]) 1902 z[11] = binary.BigEndian.Uint64((*b)[0:8]) 1903 1904 if !z.smallerThanModulus() { 1905 return Element{}, errors.New("invalid fp.Element encoding") 1906 } 1907 1908 z.toMont() 1909 return z, nil 1910 } 1911 1912 func (bigEndian) PutElement(b *[Bytes]byte, e Element) { 1913 e.fromMont() 1914 binary.BigEndian.PutUint64((*b)[88:96], e[0]) 1915 binary.BigEndian.PutUint64((*b)[80:88], e[1]) 1916 binary.BigEndian.PutUint64((*b)[72:80], e[2]) 1917 binary.BigEndian.PutUint64((*b)[64:72], e[3]) 1918 binary.BigEndian.PutUint64((*b)[56:64], e[4]) 1919 binary.BigEndian.PutUint64((*b)[48:56], e[5]) 1920 binary.BigEndian.PutUint64((*b)[40:48], e[6]) 1921 binary.BigEndian.PutUint64((*b)[32:40], e[7]) 1922 binary.BigEndian.PutUint64((*b)[24:32], e[8]) 1923 binary.BigEndian.PutUint64((*b)[16:24], e[9]) 1924 binary.BigEndian.PutUint64((*b)[8:16], e[10]) 1925 binary.BigEndian.PutUint64((*b)[0:8], e[11]) 1926 } 1927 1928 func (bigEndian) String() string { return "BigEndian" } 1929 1930 // LittleEndian is the little-endian implementation of ByteOrder and AppendByteOrder. 1931 var LittleEndian littleEndian 1932 1933 type littleEndian struct{} 1934 1935 func (littleEndian) Element(b *[Bytes]byte) (Element, error) { 1936 var z Element 1937 z[0] = binary.LittleEndian.Uint64((*b)[0:8]) 1938 z[1] = binary.LittleEndian.Uint64((*b)[8:16]) 1939 z[2] = binary.LittleEndian.Uint64((*b)[16:24]) 1940 z[3] = binary.LittleEndian.Uint64((*b)[24:32]) 1941 z[4] = binary.LittleEndian.Uint64((*b)[32:40]) 1942 z[5] = binary.LittleEndian.Uint64((*b)[40:48]) 1943 z[6] = binary.LittleEndian.Uint64((*b)[48:56]) 1944 z[7] = binary.LittleEndian.Uint64((*b)[56:64]) 1945 z[8] = binary.LittleEndian.Uint64((*b)[64:72]) 1946 z[9] = binary.LittleEndian.Uint64((*b)[72:80]) 1947 z[10] = binary.LittleEndian.Uint64((*b)[80:88]) 1948 z[11] = binary.LittleEndian.Uint64((*b)[88:96]) 1949 1950 if !z.smallerThanModulus() { 1951 return Element{}, errors.New("invalid fp.Element encoding") 1952 } 1953 1954 z.toMont() 1955 return z, nil 1956 } 1957 1958 func (littleEndian) PutElement(b *[Bytes]byte, e Element) { 1959 e.fromMont() 1960 binary.LittleEndian.PutUint64((*b)[0:8], e[0]) 1961 binary.LittleEndian.PutUint64((*b)[8:16], e[1]) 1962 binary.LittleEndian.PutUint64((*b)[16:24], e[2]) 1963 binary.LittleEndian.PutUint64((*b)[24:32], e[3]) 1964 binary.LittleEndian.PutUint64((*b)[32:40], e[4]) 1965 binary.LittleEndian.PutUint64((*b)[40:48], e[5]) 1966 binary.LittleEndian.PutUint64((*b)[48:56], e[6]) 1967 binary.LittleEndian.PutUint64((*b)[56:64], e[7]) 1968 binary.LittleEndian.PutUint64((*b)[64:72], e[8]) 1969 binary.LittleEndian.PutUint64((*b)[72:80], e[9]) 1970 binary.LittleEndian.PutUint64((*b)[80:88], e[10]) 1971 binary.LittleEndian.PutUint64((*b)[88:96], e[11]) 1972 } 1973 1974 func (littleEndian) String() string { return "LittleEndian" } 1975 1976 // Legendre returns the Legendre symbol of z (either +1, -1, or 0.) 1977 func (z *Element) Legendre() int { 1978 var l Element 1979 // z^((q-1)/2) 1980 l.expByLegendreExp(*z) 1981 1982 if l.IsZero() { 1983 return 0 1984 } 1985 1986 // if l == 1 1987 if l.IsOne() { 1988 return 1 1989 } 1990 return -1 1991 } 1992 1993 // Sqrt z = √x (mod q) 1994 // if the square root doesn't exist (x is not a square mod q) 1995 // Sqrt leaves z unchanged and returns nil 1996 func (z *Element) Sqrt(x *Element) *Element { 1997 // q ≡ 3 (mod 4) 1998 // using z ≡ ± x^((p+1)/4) (mod q) 1999 var y, square Element 2000 y.expBySqrtExp(*x) 2001 // as we didn't compute the legendre symbol, ensure we found y such that y * y = x 2002 square.Square(&y) 2003 if square.Equal(x) { 2004 return z.Set(&y) 2005 } 2006 return nil 2007 } 2008 2009 const ( 2010 k = 32 // word size / 2 2011 signBitSelector = uint64(1) << 63 2012 approxLowBitsN = k - 1 2013 approxHighBitsN = k + 1 2014 ) 2015 2016 const ( 2017 inversionCorrectionFactorWord0 = 10031381836020524396 2018 inversionCorrectionFactorWord1 = 13512348327667036555 2019 inversionCorrectionFactorWord2 = 4712458382768910368 2020 inversionCorrectionFactorWord3 = 10608536430169864474 2021 inversionCorrectionFactorWord4 = 6011516409926961524 2022 inversionCorrectionFactorWord5 = 453532925360796333 2023 inversionCorrectionFactorWord6 = 12814326068023107562 2024 inversionCorrectionFactorWord7 = 4764844688547002673 2025 inversionCorrectionFactorWord8 = 297975318568649638 2026 inversionCorrectionFactorWord9 = 3076225984588847531 2027 inversionCorrectionFactorWord10 = 327844855039329024 2028 inversionCorrectionFactorWord11 = 65221078716978344 2029 invIterationsN = 50 2030 ) 2031 2032 // Inverse z = x⁻¹ (mod q) 2033 // 2034 // if x == 0, sets and returns z = x 2035 func (z *Element) Inverse(x *Element) *Element { 2036 // Implements "Optimized Binary GCD for Modular Inversion" 2037 // https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf 2038 2039 a := *x 2040 b := Element{ 2041 q0, 2042 q1, 2043 q2, 2044 q3, 2045 q4, 2046 q5, 2047 q6, 2048 q7, 2049 q8, 2050 q9, 2051 q10, 2052 q11, 2053 } // b := q 2054 2055 u := Element{1} 2056 2057 // Update factors: we get [u; v] ← [f₀ g₀; f₁ g₁] [u; v] 2058 // cᵢ = fᵢ + 2³¹ - 1 + 2³² * (gᵢ + 2³¹ - 1) 2059 var c0, c1 int64 2060 2061 // Saved update factors to reduce the number of field multiplications 2062 var pf0, pf1, pg0, pg1 int64 2063 2064 var i uint 2065 2066 var v, s Element 2067 2068 // Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations 2069 // This also lets us get away with half as many updates to u,v 2070 // To make this constant-time-ish, replace the condition with i < invIterationsN 2071 for i = 0; i&1 == 1 || !a.IsZero(); i++ { 2072 n := max(a.BitLen(), b.BitLen()) 2073 aApprox, bApprox := approximate(&a, n), approximate(&b, n) 2074 2075 // f₀, g₀, f₁, g₁ = 1, 0, 0, 1 2076 c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1 2077 2078 for j := 0; j < approxLowBitsN; j++ { 2079 2080 // -2ʲ < f₀, f₁ ≤ 2ʲ 2081 // |f₀| + |f₁| < 2ʲ⁺¹ 2082 2083 if aApprox&1 == 0 { 2084 aApprox /= 2 2085 } else { 2086 s, borrow := bits.Sub64(aApprox, bApprox, 0) 2087 if borrow == 1 { 2088 s = bApprox - aApprox 2089 bApprox = aApprox 2090 c0, c1 = c1, c0 2091 // invariants unchanged 2092 } 2093 2094 aApprox = s / 2 2095 c0 = c0 - c1 2096 2097 // Now |f₀| < 2ʲ⁺¹ ≤ 2ʲ⁺¹ (only the weaker inequality is needed, strictly speaking) 2098 // Started with f₀ > -2ʲ and f₁ ≤ 2ʲ, so f₀ - f₁ > -2ʲ⁺¹ 2099 // Invariants unchanged for f₁ 2100 } 2101 2102 c1 *= 2 2103 // -2ʲ⁺¹ < f₁ ≤ 2ʲ⁺¹ 2104 // So now |f₀| + |f₁| < 2ʲ⁺² 2105 } 2106 2107 s = a 2108 2109 var g0 int64 2110 // from this point on c0 aliases for f0 2111 c0, g0 = updateFactorsDecompose(c0) 2112 aHi := a.linearCombNonModular(&s, c0, &b, g0) 2113 if aHi&signBitSelector != 0 { 2114 // if aHi < 0 2115 c0, g0 = -c0, -g0 2116 aHi = negL(&a, aHi) 2117 } 2118 // right-shift a by k-1 bits 2119 a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN) 2120 a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN) 2121 a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN) 2122 a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN) 2123 a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN) 2124 a[5] = (a[5] >> approxLowBitsN) | ((a[6]) << approxHighBitsN) 2125 a[6] = (a[6] >> approxLowBitsN) | ((a[7]) << approxHighBitsN) 2126 a[7] = (a[7] >> approxLowBitsN) | ((a[8]) << approxHighBitsN) 2127 a[8] = (a[8] >> approxLowBitsN) | ((a[9]) << approxHighBitsN) 2128 a[9] = (a[9] >> approxLowBitsN) | ((a[10]) << approxHighBitsN) 2129 a[10] = (a[10] >> approxLowBitsN) | ((a[11]) << approxHighBitsN) 2130 a[11] = (a[11] >> approxLowBitsN) | (aHi << approxHighBitsN) 2131 2132 var f1 int64 2133 // from this point on c1 aliases for g0 2134 f1, c1 = updateFactorsDecompose(c1) 2135 bHi := b.linearCombNonModular(&s, f1, &b, c1) 2136 if bHi&signBitSelector != 0 { 2137 // if bHi < 0 2138 f1, c1 = -f1, -c1 2139 bHi = negL(&b, bHi) 2140 } 2141 // right-shift b by k-1 bits 2142 b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN) 2143 b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN) 2144 b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN) 2145 b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN) 2146 b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN) 2147 b[5] = (b[5] >> approxLowBitsN) | ((b[6]) << approxHighBitsN) 2148 b[6] = (b[6] >> approxLowBitsN) | ((b[7]) << approxHighBitsN) 2149 b[7] = (b[7] >> approxLowBitsN) | ((b[8]) << approxHighBitsN) 2150 b[8] = (b[8] >> approxLowBitsN) | ((b[9]) << approxHighBitsN) 2151 b[9] = (b[9] >> approxLowBitsN) | ((b[10]) << approxHighBitsN) 2152 b[10] = (b[10] >> approxLowBitsN) | ((b[11]) << approxHighBitsN) 2153 b[11] = (b[11] >> approxLowBitsN) | (bHi << approxHighBitsN) 2154 2155 if i&1 == 1 { 2156 // Combine current update factors with previously stored ones 2157 // [F₀, G₀; F₁, G₁] ← [f₀, g₀; f₁, g₁] [pf₀, pg₀; pf₁, pg₁], with capital letters denoting new combined values 2158 // We get |F₀| = | f₀pf₀ + g₀pf₁ | ≤ |f₀pf₀| + |g₀pf₁| = |f₀| |pf₀| + |g₀| |pf₁| ≤ 2ᵏ⁻¹|pf₀| + 2ᵏ⁻¹|pf₁| 2159 // = 2ᵏ⁻¹ (|pf₀| + |pf₁|) < 2ᵏ⁻¹ 2ᵏ = 2²ᵏ⁻¹ 2160 // So |F₀| < 2²ᵏ⁻¹ meaning it fits in a 2k-bit signed register 2161 2162 // c₀ aliases f₀, c₁ aliases g₁ 2163 c0, g0, f1, c1 = c0*pf0+g0*pf1, 2164 c0*pg0+g0*pg1, 2165 f1*pf0+c1*pf1, 2166 f1*pg0+c1*pg1 2167 2168 s = u 2169 2170 // 0 ≤ u, v < 2²⁵⁵ 2171 // |F₀|, |G₀| < 2⁶³ 2172 u.linearComb(&u, c0, &v, g0) 2173 // |F₁|, |G₁| < 2⁶³ 2174 v.linearComb(&s, f1, &v, c1) 2175 2176 } else { 2177 // Save update factors 2178 pf0, pg0, pf1, pg1 = c0, g0, f1, c1 2179 } 2180 } 2181 2182 // For every iteration that we miss, v is not being multiplied by 2ᵏ⁻² 2183 const pSq uint64 = 1 << (2 * (k - 1)) 2184 a = Element{pSq} 2185 // If the function is constant-time ish, this loop will not run (no need to take it out explicitly) 2186 for ; i < invIterationsN; i += 2 { 2187 // could optimize further with mul by word routine or by pre-computing a table since with k=26, 2188 // we would multiply by pSq up to 13times; 2189 // on x86, the assembly routine outperforms generic code for mul by word 2190 // on arm64, we may loose up to ~5% for 6 limbs 2191 v.Mul(&v, &a) 2192 } 2193 2194 u.Set(x) // for correctness check 2195 2196 z.Mul(&v, &Element{ 2197 inversionCorrectionFactorWord0, 2198 inversionCorrectionFactorWord1, 2199 inversionCorrectionFactorWord2, 2200 inversionCorrectionFactorWord3, 2201 inversionCorrectionFactorWord4, 2202 inversionCorrectionFactorWord5, 2203 inversionCorrectionFactorWord6, 2204 inversionCorrectionFactorWord7, 2205 inversionCorrectionFactorWord8, 2206 inversionCorrectionFactorWord9, 2207 inversionCorrectionFactorWord10, 2208 inversionCorrectionFactorWord11, 2209 }) 2210 2211 // correctness check 2212 v.Mul(&u, z) 2213 if !v.IsOne() && !u.IsZero() { 2214 return z.inverseExp(u) 2215 } 2216 2217 return z 2218 } 2219 2220 // inverseExp computes z = x⁻¹ (mod q) = x**(q-2) (mod q) 2221 func (z *Element) inverseExp(x Element) *Element { 2222 // e == q-2 2223 e := Modulus() 2224 e.Sub(e, big.NewInt(2)) 2225 2226 z.Set(&x) 2227 2228 for i := e.BitLen() - 2; i >= 0; i-- { 2229 z.Square(z) 2230 if e.Bit(i) == 1 { 2231 z.Mul(z, &x) 2232 } 2233 } 2234 2235 return z 2236 } 2237 2238 // approximate a big number x into a single 64 bit word using its uppermost and lowermost bits 2239 // if x fits in a word as is, no approximation necessary 2240 func approximate(x *Element, nBits int) uint64 { 2241 2242 if nBits <= 64 { 2243 return x[0] 2244 } 2245 2246 const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones 2247 lo := mask & x[0] 2248 2249 hiWordIndex := (nBits - 1) / 64 2250 2251 hiWordBitsAvailable := nBits - hiWordIndex*64 2252 hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN) 2253 2254 mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1)) 2255 hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable) 2256 2257 mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1) 2258 mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed 2259 2260 return lo | mid | hi 2261 } 2262 2263 // linearComb z = xC * x + yC * y; 2264 // 0 ≤ x, y < 2⁷⁶¹ 2265 // |xC|, |yC| < 2⁶³ 2266 func (z *Element) linearComb(x *Element, xC int64, y *Element, yC int64) { 2267 // | (hi, z) | < 2 * 2⁶³ * 2⁷⁶¹ = 2⁸²⁵ 2268 // therefore | hi | < 2⁵⁷ ≤ 2⁶³ 2269 hi := z.linearCombNonModular(x, xC, y, yC) 2270 z.montReduceSigned(z, hi) 2271 } 2272 2273 // montReduceSigned z = (xHi * r + x) * r⁻¹ using the SOS algorithm 2274 // Requires |xHi| < 2⁶³. Most significant bit of xHi is the sign bit. 2275 func (z *Element) montReduceSigned(x *Element, xHi uint64) { 2276 const signBitRemover = ^signBitSelector 2277 mustNeg := xHi&signBitSelector != 0 2278 // the SOS implementation requires that most significant bit is 0 2279 // Let X be xHi*r + x 2280 // If X is negative we would have initially stored it as 2⁶⁴ r + X (à la 2's complement) 2281 xHi &= signBitRemover 2282 // with this a negative X is now represented as 2⁶³ r + X 2283 2284 var t [2*Limbs - 1]uint64 2285 var C uint64 2286 2287 m := x[0] * qInvNeg 2288 2289 C = madd0(m, q0, x[0]) 2290 C, t[1] = madd2(m, q1, x[1], C) 2291 C, t[2] = madd2(m, q2, x[2], C) 2292 C, t[3] = madd2(m, q3, x[3], C) 2293 C, t[4] = madd2(m, q4, x[4], C) 2294 C, t[5] = madd2(m, q5, x[5], C) 2295 C, t[6] = madd2(m, q6, x[6], C) 2296 C, t[7] = madd2(m, q7, x[7], C) 2297 C, t[8] = madd2(m, q8, x[8], C) 2298 C, t[9] = madd2(m, q9, x[9], C) 2299 C, t[10] = madd2(m, q10, x[10], C) 2300 C, t[11] = madd2(m, q11, x[11], C) 2301 2302 // m * qElement[11] ≤ (2⁶⁴ - 1) * (2⁶³ - 1) = 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 2303 // x[11] + C ≤ 2*(2⁶⁴ - 1) = 2⁶⁵ - 2 2304 // On LHS, (C, t[11]) ≤ 2¹²⁷ - 2⁶⁴ - 2⁶³ + 1 + 2⁶⁵ - 2 = 2¹²⁷ + 2⁶³ - 1 2305 // So on LHS, C ≤ 2⁶³ 2306 t[12] = xHi + C 2307 // xHi + C < 2⁶³ + 2⁶³ = 2⁶⁴ 2308 2309 // <standard SOS> 2310 { 2311 const i = 1 2312 m = t[i] * qInvNeg 2313 2314 C = madd0(m, q0, t[i+0]) 2315 C, t[i+1] = madd2(m, q1, t[i+1], C) 2316 C, t[i+2] = madd2(m, q2, t[i+2], C) 2317 C, t[i+3] = madd2(m, q3, t[i+3], C) 2318 C, t[i+4] = madd2(m, q4, t[i+4], C) 2319 C, t[i+5] = madd2(m, q5, t[i+5], C) 2320 C, t[i+6] = madd2(m, q6, t[i+6], C) 2321 C, t[i+7] = madd2(m, q7, t[i+7], C) 2322 C, t[i+8] = madd2(m, q8, t[i+8], C) 2323 C, t[i+9] = madd2(m, q9, t[i+9], C) 2324 C, t[i+10] = madd2(m, q10, t[i+10], C) 2325 C, t[i+11] = madd2(m, q11, t[i+11], C) 2326 2327 t[i+Limbs] += C 2328 } 2329 { 2330 const i = 2 2331 m = t[i] * qInvNeg 2332 2333 C = madd0(m, q0, t[i+0]) 2334 C, t[i+1] = madd2(m, q1, t[i+1], C) 2335 C, t[i+2] = madd2(m, q2, t[i+2], C) 2336 C, t[i+3] = madd2(m, q3, t[i+3], C) 2337 C, t[i+4] = madd2(m, q4, t[i+4], C) 2338 C, t[i+5] = madd2(m, q5, t[i+5], C) 2339 C, t[i+6] = madd2(m, q6, t[i+6], C) 2340 C, t[i+7] = madd2(m, q7, t[i+7], C) 2341 C, t[i+8] = madd2(m, q8, t[i+8], C) 2342 C, t[i+9] = madd2(m, q9, t[i+9], C) 2343 C, t[i+10] = madd2(m, q10, t[i+10], C) 2344 C, t[i+11] = madd2(m, q11, t[i+11], C) 2345 2346 t[i+Limbs] += C 2347 } 2348 { 2349 const i = 3 2350 m = t[i] * qInvNeg 2351 2352 C = madd0(m, q0, t[i+0]) 2353 C, t[i+1] = madd2(m, q1, t[i+1], C) 2354 C, t[i+2] = madd2(m, q2, t[i+2], C) 2355 C, t[i+3] = madd2(m, q3, t[i+3], C) 2356 C, t[i+4] = madd2(m, q4, t[i+4], C) 2357 C, t[i+5] = madd2(m, q5, t[i+5], C) 2358 C, t[i+6] = madd2(m, q6, t[i+6], C) 2359 C, t[i+7] = madd2(m, q7, t[i+7], C) 2360 C, t[i+8] = madd2(m, q8, t[i+8], C) 2361 C, t[i+9] = madd2(m, q9, t[i+9], C) 2362 C, t[i+10] = madd2(m, q10, t[i+10], C) 2363 C, t[i+11] = madd2(m, q11, t[i+11], C) 2364 2365 t[i+Limbs] += C 2366 } 2367 { 2368 const i = 4 2369 m = t[i] * qInvNeg 2370 2371 C = madd0(m, q0, t[i+0]) 2372 C, t[i+1] = madd2(m, q1, t[i+1], C) 2373 C, t[i+2] = madd2(m, q2, t[i+2], C) 2374 C, t[i+3] = madd2(m, q3, t[i+3], C) 2375 C, t[i+4] = madd2(m, q4, t[i+4], C) 2376 C, t[i+5] = madd2(m, q5, t[i+5], C) 2377 C, t[i+6] = madd2(m, q6, t[i+6], C) 2378 C, t[i+7] = madd2(m, q7, t[i+7], C) 2379 C, t[i+8] = madd2(m, q8, t[i+8], C) 2380 C, t[i+9] = madd2(m, q9, t[i+9], C) 2381 C, t[i+10] = madd2(m, q10, t[i+10], C) 2382 C, t[i+11] = madd2(m, q11, t[i+11], C) 2383 2384 t[i+Limbs] += C 2385 } 2386 { 2387 const i = 5 2388 m = t[i] * qInvNeg 2389 2390 C = madd0(m, q0, t[i+0]) 2391 C, t[i+1] = madd2(m, q1, t[i+1], C) 2392 C, t[i+2] = madd2(m, q2, t[i+2], C) 2393 C, t[i+3] = madd2(m, q3, t[i+3], C) 2394 C, t[i+4] = madd2(m, q4, t[i+4], C) 2395 C, t[i+5] = madd2(m, q5, t[i+5], C) 2396 C, t[i+6] = madd2(m, q6, t[i+6], C) 2397 C, t[i+7] = madd2(m, q7, t[i+7], C) 2398 C, t[i+8] = madd2(m, q8, t[i+8], C) 2399 C, t[i+9] = madd2(m, q9, t[i+9], C) 2400 C, t[i+10] = madd2(m, q10, t[i+10], C) 2401 C, t[i+11] = madd2(m, q11, t[i+11], C) 2402 2403 t[i+Limbs] += C 2404 } 2405 { 2406 const i = 6 2407 m = t[i] * qInvNeg 2408 2409 C = madd0(m, q0, t[i+0]) 2410 C, t[i+1] = madd2(m, q1, t[i+1], C) 2411 C, t[i+2] = madd2(m, q2, t[i+2], C) 2412 C, t[i+3] = madd2(m, q3, t[i+3], C) 2413 C, t[i+4] = madd2(m, q4, t[i+4], C) 2414 C, t[i+5] = madd2(m, q5, t[i+5], C) 2415 C, t[i+6] = madd2(m, q6, t[i+6], C) 2416 C, t[i+7] = madd2(m, q7, t[i+7], C) 2417 C, t[i+8] = madd2(m, q8, t[i+8], C) 2418 C, t[i+9] = madd2(m, q9, t[i+9], C) 2419 C, t[i+10] = madd2(m, q10, t[i+10], C) 2420 C, t[i+11] = madd2(m, q11, t[i+11], C) 2421 2422 t[i+Limbs] += C 2423 } 2424 { 2425 const i = 7 2426 m = t[i] * qInvNeg 2427 2428 C = madd0(m, q0, t[i+0]) 2429 C, t[i+1] = madd2(m, q1, t[i+1], C) 2430 C, t[i+2] = madd2(m, q2, t[i+2], C) 2431 C, t[i+3] = madd2(m, q3, t[i+3], C) 2432 C, t[i+4] = madd2(m, q4, t[i+4], C) 2433 C, t[i+5] = madd2(m, q5, t[i+5], C) 2434 C, t[i+6] = madd2(m, q6, t[i+6], C) 2435 C, t[i+7] = madd2(m, q7, t[i+7], C) 2436 C, t[i+8] = madd2(m, q8, t[i+8], C) 2437 C, t[i+9] = madd2(m, q9, t[i+9], C) 2438 C, t[i+10] = madd2(m, q10, t[i+10], C) 2439 C, t[i+11] = madd2(m, q11, t[i+11], C) 2440 2441 t[i+Limbs] += C 2442 } 2443 { 2444 const i = 8 2445 m = t[i] * qInvNeg 2446 2447 C = madd0(m, q0, t[i+0]) 2448 C, t[i+1] = madd2(m, q1, t[i+1], C) 2449 C, t[i+2] = madd2(m, q2, t[i+2], C) 2450 C, t[i+3] = madd2(m, q3, t[i+3], C) 2451 C, t[i+4] = madd2(m, q4, t[i+4], C) 2452 C, t[i+5] = madd2(m, q5, t[i+5], C) 2453 C, t[i+6] = madd2(m, q6, t[i+6], C) 2454 C, t[i+7] = madd2(m, q7, t[i+7], C) 2455 C, t[i+8] = madd2(m, q8, t[i+8], C) 2456 C, t[i+9] = madd2(m, q9, t[i+9], C) 2457 C, t[i+10] = madd2(m, q10, t[i+10], C) 2458 C, t[i+11] = madd2(m, q11, t[i+11], C) 2459 2460 t[i+Limbs] += C 2461 } 2462 { 2463 const i = 9 2464 m = t[i] * qInvNeg 2465 2466 C = madd0(m, q0, t[i+0]) 2467 C, t[i+1] = madd2(m, q1, t[i+1], C) 2468 C, t[i+2] = madd2(m, q2, t[i+2], C) 2469 C, t[i+3] = madd2(m, q3, t[i+3], C) 2470 C, t[i+4] = madd2(m, q4, t[i+4], C) 2471 C, t[i+5] = madd2(m, q5, t[i+5], C) 2472 C, t[i+6] = madd2(m, q6, t[i+6], C) 2473 C, t[i+7] = madd2(m, q7, t[i+7], C) 2474 C, t[i+8] = madd2(m, q8, t[i+8], C) 2475 C, t[i+9] = madd2(m, q9, t[i+9], C) 2476 C, t[i+10] = madd2(m, q10, t[i+10], C) 2477 C, t[i+11] = madd2(m, q11, t[i+11], C) 2478 2479 t[i+Limbs] += C 2480 } 2481 { 2482 const i = 10 2483 m = t[i] * qInvNeg 2484 2485 C = madd0(m, q0, t[i+0]) 2486 C, t[i+1] = madd2(m, q1, t[i+1], C) 2487 C, t[i+2] = madd2(m, q2, t[i+2], C) 2488 C, t[i+3] = madd2(m, q3, t[i+3], C) 2489 C, t[i+4] = madd2(m, q4, t[i+4], C) 2490 C, t[i+5] = madd2(m, q5, t[i+5], C) 2491 C, t[i+6] = madd2(m, q6, t[i+6], C) 2492 C, t[i+7] = madd2(m, q7, t[i+7], C) 2493 C, t[i+8] = madd2(m, q8, t[i+8], C) 2494 C, t[i+9] = madd2(m, q9, t[i+9], C) 2495 C, t[i+10] = madd2(m, q10, t[i+10], C) 2496 C, t[i+11] = madd2(m, q11, t[i+11], C) 2497 2498 t[i+Limbs] += C 2499 } 2500 { 2501 const i = 11 2502 m := t[i] * qInvNeg 2503 2504 C = madd0(m, q0, t[i+0]) 2505 C, z[0] = madd2(m, q1, t[i+1], C) 2506 C, z[1] = madd2(m, q2, t[i+2], C) 2507 C, z[2] = madd2(m, q3, t[i+3], C) 2508 C, z[3] = madd2(m, q4, t[i+4], C) 2509 C, z[4] = madd2(m, q5, t[i+5], C) 2510 C, z[5] = madd2(m, q6, t[i+6], C) 2511 C, z[6] = madd2(m, q7, t[i+7], C) 2512 C, z[7] = madd2(m, q8, t[i+8], C) 2513 C, z[8] = madd2(m, q9, t[i+9], C) 2514 C, z[9] = madd2(m, q10, t[i+10], C) 2515 z[11], z[10] = madd2(m, q11, t[i+11], C) 2516 } 2517 2518 // if z ⩾ q → z -= q 2519 if !z.smallerThanModulus() { 2520 var b uint64 2521 z[0], b = bits.Sub64(z[0], q0, 0) 2522 z[1], b = bits.Sub64(z[1], q1, b) 2523 z[2], b = bits.Sub64(z[2], q2, b) 2524 z[3], b = bits.Sub64(z[3], q3, b) 2525 z[4], b = bits.Sub64(z[4], q4, b) 2526 z[5], b = bits.Sub64(z[5], q5, b) 2527 z[6], b = bits.Sub64(z[6], q6, b) 2528 z[7], b = bits.Sub64(z[7], q7, b) 2529 z[8], b = bits.Sub64(z[8], q8, b) 2530 z[9], b = bits.Sub64(z[9], q9, b) 2531 z[10], b = bits.Sub64(z[10], q10, b) 2532 z[11], _ = bits.Sub64(z[11], q11, b) 2533 } 2534 // </standard SOS> 2535 2536 if mustNeg { 2537 // We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead 2538 var b uint64 2539 z[0], b = bits.Sub64(z[0], signBitSelector, 0) 2540 z[1], b = bits.Sub64(z[1], 0, b) 2541 z[2], b = bits.Sub64(z[2], 0, b) 2542 z[3], b = bits.Sub64(z[3], 0, b) 2543 z[4], b = bits.Sub64(z[4], 0, b) 2544 z[5], b = bits.Sub64(z[5], 0, b) 2545 z[6], b = bits.Sub64(z[6], 0, b) 2546 z[7], b = bits.Sub64(z[7], 0, b) 2547 z[8], b = bits.Sub64(z[8], 0, b) 2548 z[9], b = bits.Sub64(z[9], 0, b) 2549 z[10], b = bits.Sub64(z[10], 0, b) 2550 z[11], b = bits.Sub64(z[11], 0, b) 2551 2552 // Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0 2553 2554 if b != 0 { 2555 // z[11] = -1 2556 // negative: add q 2557 const neg1 = 0xFFFFFFFFFFFFFFFF 2558 2559 var carry uint64 2560 2561 z[0], carry = bits.Add64(z[0], q0, 0) 2562 z[1], carry = bits.Add64(z[1], q1, carry) 2563 z[2], carry = bits.Add64(z[2], q2, carry) 2564 z[3], carry = bits.Add64(z[3], q3, carry) 2565 z[4], carry = bits.Add64(z[4], q4, carry) 2566 z[5], carry = bits.Add64(z[5], q5, carry) 2567 z[6], carry = bits.Add64(z[6], q6, carry) 2568 z[7], carry = bits.Add64(z[7], q7, carry) 2569 z[8], carry = bits.Add64(z[8], q8, carry) 2570 z[9], carry = bits.Add64(z[9], q9, carry) 2571 z[10], carry = bits.Add64(z[10], q10, carry) 2572 z[11], _ = bits.Add64(neg1, q11, carry) 2573 } 2574 } 2575 } 2576 2577 const ( 2578 updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1) 2579 updateFactorIdentityMatrixRow0 = 1 2580 updateFactorIdentityMatrixRow1 = 1 << 32 2581 ) 2582 2583 func updateFactorsDecompose(c int64) (int64, int64) { 2584 c += updateFactorsConversionBias 2585 const low32BitsFilter int64 = 0xFFFFFFFF 2586 f := c&low32BitsFilter - 0x7FFFFFFF 2587 g := c>>32&low32BitsFilter - 0x7FFFFFFF 2588 return f, g 2589 } 2590 2591 // negL negates in place [x | xHi] and return the new most significant word xHi 2592 func negL(x *Element, xHi uint64) uint64 { 2593 var b uint64 2594 2595 x[0], b = bits.Sub64(0, x[0], 0) 2596 x[1], b = bits.Sub64(0, x[1], b) 2597 x[2], b = bits.Sub64(0, x[2], b) 2598 x[3], b = bits.Sub64(0, x[3], b) 2599 x[4], b = bits.Sub64(0, x[4], b) 2600 x[5], b = bits.Sub64(0, x[5], b) 2601 x[6], b = bits.Sub64(0, x[6], b) 2602 x[7], b = bits.Sub64(0, x[7], b) 2603 x[8], b = bits.Sub64(0, x[8], b) 2604 x[9], b = bits.Sub64(0, x[9], b) 2605 x[10], b = bits.Sub64(0, x[10], b) 2606 x[11], b = bits.Sub64(0, x[11], b) 2607 xHi, _ = bits.Sub64(0, xHi, b) 2608 2609 return xHi 2610 } 2611 2612 // mulWNonModular multiplies by one word in non-montgomery, without reducing 2613 func (z *Element) mulWNonModular(x *Element, y int64) uint64 { 2614 2615 // w := abs(y) 2616 m := y >> 63 2617 w := uint64((y ^ m) - m) 2618 2619 var c uint64 2620 c, z[0] = bits.Mul64(x[0], w) 2621 c, z[1] = madd1(x[1], w, c) 2622 c, z[2] = madd1(x[2], w, c) 2623 c, z[3] = madd1(x[3], w, c) 2624 c, z[4] = madd1(x[4], w, c) 2625 c, z[5] = madd1(x[5], w, c) 2626 c, z[6] = madd1(x[6], w, c) 2627 c, z[7] = madd1(x[7], w, c) 2628 c, z[8] = madd1(x[8], w, c) 2629 c, z[9] = madd1(x[9], w, c) 2630 c, z[10] = madd1(x[10], w, c) 2631 c, z[11] = madd1(x[11], w, c) 2632 2633 if y < 0 { 2634 c = negL(z, c) 2635 } 2636 2637 return c 2638 } 2639 2640 // linearCombNonModular computes a linear combination without modular reduction 2641 func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 { 2642 var yTimes Element 2643 2644 yHi := yTimes.mulWNonModular(y, yC) 2645 xHi := z.mulWNonModular(x, xC) 2646 2647 var carry uint64 2648 z[0], carry = bits.Add64(z[0], yTimes[0], 0) 2649 z[1], carry = bits.Add64(z[1], yTimes[1], carry) 2650 z[2], carry = bits.Add64(z[2], yTimes[2], carry) 2651 z[3], carry = bits.Add64(z[3], yTimes[3], carry) 2652 z[4], carry = bits.Add64(z[4], yTimes[4], carry) 2653 z[5], carry = bits.Add64(z[5], yTimes[5], carry) 2654 z[6], carry = bits.Add64(z[6], yTimes[6], carry) 2655 z[7], carry = bits.Add64(z[7], yTimes[7], carry) 2656 z[8], carry = bits.Add64(z[8], yTimes[8], carry) 2657 z[9], carry = bits.Add64(z[9], yTimes[9], carry) 2658 z[10], carry = bits.Add64(z[10], yTimes[10], carry) 2659 z[11], carry = bits.Add64(z[11], yTimes[11], carry) 2660 2661 yHi, _ = bits.Add64(xHi, yHi, carry) 2662 2663 return yHi 2664 }