github.com/AESNooper/go/src@v0.0.0-20220218095104-b56a4ab1bbbb/crypto/elliptic/p256_asm.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the Go wrapper for the constant-time, 64-bit assembly 6 // implementation of P256. The optimizations performed here are described in 7 // detail in: 8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 9 // 256-bit primes" 10 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x 11 // https://eprint.iacr.org/2013/816.pdf 12 13 //go:build amd64 || arm64 14 15 package elliptic 16 17 import ( 18 "math/big" 19 ) 20 21 //go:generate go run -tags=tablegen gen_p256_table.go 22 23 type ( 24 p256Curve struct { 25 *CurveParams 26 } 27 28 p256Point struct { 29 xyz [12]uint64 30 } 31 ) 32 33 var p256 p256Curve 34 35 func initP256() { 36 // See FIPS 186-3, section D.2.3 37 p256.CurveParams = &CurveParams{Name: "P-256"} 38 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 39 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 40 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 41 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 42 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 43 p256.BitSize = 256 44 } 45 46 func (curve p256Curve) Params() *CurveParams { 47 return curve.CurveParams 48 } 49 50 // Functions implemented in p256_asm_*64.s 51 // Montgomery multiplication modulo P256 52 //go:noescape 53 func p256Mul(res, in1, in2 []uint64) 54 55 // Montgomery square modulo P256, repeated n times (n >= 1) 56 //go:noescape 57 func p256Sqr(res, in []uint64, n int) 58 59 // Montgomery multiplication by 1 60 //go:noescape 61 func p256FromMont(res, in []uint64) 62 63 // iff cond == 1 val <- -val 64 //go:noescape 65 func p256NegCond(val []uint64, cond int) 66 67 // if cond == 0 res <- b; else res <- a 68 //go:noescape 69 func p256MovCond(res, a, b []uint64, cond int) 70 71 // Endianness swap 72 //go:noescape 73 func p256BigToLittle(res []uint64, in []byte) 74 75 //go:noescape 76 func p256LittleToBig(res []byte, in []uint64) 77 78 // Constant time table access 79 //go:noescape 80 func p256Select(point, table []uint64, idx int) 81 82 //go:noescape 83 func p256SelectBase(point *[12]uint64, table string, idx int) 84 85 // Montgomery multiplication modulo Ord(G) 86 //go:noescape 87 func p256OrdMul(res, in1, in2 []uint64) 88 89 // Montgomery square modulo Ord(G), repeated n times 90 //go:noescape 91 func p256OrdSqr(res, in []uint64, n int) 92 93 // Point add with in2 being affine point 94 // If sign == 1 -> in2 = -in2 95 // If sel == 0 -> res = in1 96 // if zero == 0 -> res = in2 97 //go:noescape 98 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) 99 100 // Point add. Returns one if the two input points were equal and zero 101 // otherwise. (Note that, due to the way that the equations work out, some 102 // representations of ∞ are considered equal to everything by this function.) 103 //go:noescape 104 func p256PointAddAsm(res, in1, in2 []uint64) int 105 106 // Point double 107 //go:noescape 108 func p256PointDoubleAsm(res, in []uint64) 109 110 func (curve p256Curve) Inverse(k *big.Int) *big.Int { 111 if k.Sign() < 0 { 112 // This should never happen. 113 k = new(big.Int).Neg(k) 114 } 115 116 if k.Cmp(p256.N) >= 0 { 117 // This should never happen. 118 k = new(big.Int).Mod(k, p256.N) 119 } 120 121 // table will store precomputed powers of x. 122 var table [4 * 9]uint64 123 var ( 124 _1 = table[4*0 : 4*1] 125 _11 = table[4*1 : 4*2] 126 _101 = table[4*2 : 4*3] 127 _111 = table[4*3 : 4*4] 128 _1111 = table[4*4 : 4*5] 129 _10101 = table[4*5 : 4*6] 130 _101111 = table[4*6 : 4*7] 131 x = table[4*7 : 4*8] 132 t = table[4*8 : 4*9] 133 ) 134 135 fromBig(x[:], k) 136 // This code operates in the Montgomery domain where R = 2^256 mod n 137 // and n is the order of the scalar field. (See initP256 for the 138 // value.) Elements in the Montgomery domain take the form a×R and 139 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 140 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 141 // i.e. converts x into the Montgomery domain. 142 // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion 143 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620} 144 p256OrdMul(_1, x, RR) // _1 145 p256OrdSqr(x, _1, 1) // _10 146 p256OrdMul(_11, x, _1) // _11 147 p256OrdMul(_101, x, _11) // _101 148 p256OrdMul(_111, x, _101) // _111 149 p256OrdSqr(x, _101, 1) // _1010 150 p256OrdMul(_1111, _101, x) // _1111 151 152 p256OrdSqr(t, x, 1) // _10100 153 p256OrdMul(_10101, t, _1) // _10101 154 p256OrdSqr(x, _10101, 1) // _101010 155 p256OrdMul(_101111, _101, x) // _101111 156 p256OrdMul(x, _10101, x) // _111111 = x6 157 p256OrdSqr(t, x, 2) // _11111100 158 p256OrdMul(t, t, _11) // _11111111 = x8 159 p256OrdSqr(x, t, 8) // _ff00 160 p256OrdMul(x, x, t) // _ffff = x16 161 p256OrdSqr(t, x, 16) // _ffff0000 162 p256OrdMul(t, t, x) // _ffffffff = x32 163 164 p256OrdSqr(x, t, 64) 165 p256OrdMul(x, x, t) 166 p256OrdSqr(x, x, 32) 167 p256OrdMul(x, x, t) 168 169 sqrs := []uint8{ 170 6, 5, 4, 5, 5, 171 4, 3, 3, 5, 9, 172 6, 2, 5, 6, 5, 173 4, 5, 5, 3, 10, 174 2, 5, 5, 3, 7, 6} 175 muls := [][]uint64{ 176 _101111, _111, _11, _1111, _10101, 177 _101, _101, _101, _111, _101111, 178 _1111, _1, _1, _1111, _111, 179 _111, _111, _101, _11, _101111, 180 _11, _11, _11, _1, _10101, _1111} 181 182 for i, s := range sqrs { 183 p256OrdSqr(x, x, int(s)) 184 p256OrdMul(x, x, muls[i]) 185 } 186 187 // Multiplying by one in the Montgomery domain converts a Montgomery 188 // value out of the domain. 189 one := []uint64{1, 0, 0, 0} 190 p256OrdMul(x, x, one) 191 192 xOut := make([]byte, 32) 193 p256LittleToBig(xOut, x) 194 return new(big.Int).SetBytes(xOut) 195 } 196 197 // fromBig converts a *big.Int into a format used by this code. 198 func fromBig(out []uint64, big *big.Int) { 199 for i := range out { 200 out[i] = 0 201 } 202 203 for i, v := range big.Bits() { 204 out[i] = uint64(v) 205 } 206 } 207 208 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 209 // to out. If the scalar is equal or greater than the order of the group, it's 210 // reduced modulo that order. 211 func p256GetScalar(out []uint64, in []byte) { 212 n := new(big.Int).SetBytes(in) 213 214 if n.Cmp(p256.N) >= 0 { 215 n.Mod(n, p256.N) 216 } 217 fromBig(out, n) 218 } 219 220 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the 221 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 222 // R×R mod p. See comment in Inverse about how this is used. 223 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd} 224 225 func maybeReduceModP(in *big.Int) *big.Int { 226 if in.Cmp(p256.P) < 0 { 227 return in 228 } 229 return new(big.Int).Mod(in, p256.P) 230 } 231 232 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 233 scalarReversed := make([]uint64, 4) 234 var r1, r2 p256Point 235 p256GetScalar(scalarReversed, baseScalar) 236 r1IsInfinity := scalarIsZero(scalarReversed) 237 r1.p256BaseMult(scalarReversed) 238 239 p256GetScalar(scalarReversed, scalar) 240 r2IsInfinity := scalarIsZero(scalarReversed) 241 fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) 242 fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) 243 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) 244 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) 245 246 // This sets r2's Z value to 1, in the Montgomery domain. 247 r2.xyz[8] = 0x0000000000000001 248 r2.xyz[9] = 0xffffffff00000000 249 r2.xyz[10] = 0xffffffffffffffff 250 r2.xyz[11] = 0x00000000fffffffe 251 252 r2.p256ScalarMult(scalarReversed) 253 254 var sum, double p256Point 255 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:]) 256 p256PointDoubleAsm(double.xyz[:], r1.xyz[:]) 257 sum.CopyConditional(&double, pointsEqual) 258 sum.CopyConditional(&r1, r2IsInfinity) 259 sum.CopyConditional(&r2, r1IsInfinity) 260 261 return sum.p256PointToAffine() 262 } 263 264 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 265 scalarReversed := make([]uint64, 4) 266 p256GetScalar(scalarReversed, scalar) 267 268 var r p256Point 269 r.p256BaseMult(scalarReversed) 270 return r.p256PointToAffine() 271 } 272 273 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 274 scalarReversed := make([]uint64, 4) 275 p256GetScalar(scalarReversed, scalar) 276 277 var r p256Point 278 fromBig(r.xyz[0:4], maybeReduceModP(bigX)) 279 fromBig(r.xyz[4:8], maybeReduceModP(bigY)) 280 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) 281 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) 282 // This sets r2's Z value to 1, in the Montgomery domain. 283 r.xyz[8] = 0x0000000000000001 284 r.xyz[9] = 0xffffffff00000000 285 r.xyz[10] = 0xffffffffffffffff 286 r.xyz[11] = 0x00000000fffffffe 287 288 r.p256ScalarMult(scalarReversed) 289 return r.p256PointToAffine() 290 } 291 292 // uint64IsZero returns 1 if x is zero and zero otherwise. 293 func uint64IsZero(x uint64) int { 294 x = ^x 295 x &= x >> 32 296 x &= x >> 16 297 x &= x >> 8 298 x &= x >> 4 299 x &= x >> 2 300 x &= x >> 1 301 return int(x & 1) 302 } 303 304 // scalarIsZero returns 1 if scalar represents the zero value, and zero 305 // otherwise. 306 func scalarIsZero(scalar []uint64) int { 307 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3]) 308 } 309 310 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 311 zInv := make([]uint64, 4) 312 zInvSq := make([]uint64, 4) 313 p256Inverse(zInv, p.xyz[8:12]) 314 p256Sqr(zInvSq, zInv, 1) 315 p256Mul(zInv, zInv, zInvSq) 316 317 p256Mul(zInvSq, p.xyz[0:4], zInvSq) 318 p256Mul(zInv, p.xyz[4:8], zInv) 319 320 p256FromMont(zInvSq, zInvSq) 321 p256FromMont(zInv, zInv) 322 323 xOut := make([]byte, 32) 324 yOut := make([]byte, 32) 325 p256LittleToBig(xOut, zInvSq) 326 p256LittleToBig(yOut, zInv) 327 328 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) 329 } 330 331 // CopyConditional copies overwrites p with src if v == 1, and leaves p 332 // unchanged if v == 0. 333 func (p *p256Point) CopyConditional(src *p256Point, v int) { 334 pMask := uint64(v) - 1 335 srcMask := ^pMask 336 337 for i, n := range p.xyz { 338 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask) 339 } 340 } 341 342 // p256Inverse sets out to in^-1 mod p. 343 func p256Inverse(out, in []uint64) { 344 var stack [6 * 4]uint64 345 p2 := stack[4*0 : 4*0+4] 346 p4 := stack[4*1 : 4*1+4] 347 p8 := stack[4*2 : 4*2+4] 348 p16 := stack[4*3 : 4*3+4] 349 p32 := stack[4*4 : 4*4+4] 350 351 p256Sqr(out, in, 1) 352 p256Mul(p2, out, in) // 3*p 353 354 p256Sqr(out, p2, 2) 355 p256Mul(p4, out, p2) // f*p 356 357 p256Sqr(out, p4, 4) 358 p256Mul(p8, out, p4) // ff*p 359 360 p256Sqr(out, p8, 8) 361 p256Mul(p16, out, p8) // ffff*p 362 363 p256Sqr(out, p16, 16) 364 p256Mul(p32, out, p16) // ffffffff*p 365 366 p256Sqr(out, p32, 32) 367 p256Mul(out, out, in) 368 369 p256Sqr(out, out, 128) 370 p256Mul(out, out, p32) 371 372 p256Sqr(out, out, 32) 373 p256Mul(out, out, p32) 374 375 p256Sqr(out, out, 16) 376 p256Mul(out, out, p16) 377 378 p256Sqr(out, out, 8) 379 p256Mul(out, out, p8) 380 381 p256Sqr(out, out, 4) 382 p256Mul(out, out, p4) 383 384 p256Sqr(out, out, 2) 385 p256Mul(out, out, p2) 386 387 p256Sqr(out, out, 2) 388 p256Mul(out, out, in) 389 } 390 391 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { 392 copy(r[index*12:], p.xyz[:]) 393 } 394 395 func boothW5(in uint) (int, int) { 396 var s uint = ^((in >> 5) - 1) 397 var d uint = (1 << 6) - in - 1 398 d = (d & s) | (in & (^s)) 399 d = (d >> 1) + (d & 1) 400 return int(d), int(s & 1) 401 } 402 403 func boothW6(in uint) (int, int) { 404 var s uint = ^((in >> 6) - 1) 405 var d uint = (1 << 7) - in - 1 406 d = (d & s) | (in & (^s)) 407 d = (d >> 1) + (d & 1) 408 return int(d), int(s & 1) 409 } 410 411 func (p *p256Point) p256BaseMult(scalar []uint64) { 412 wvalue := (scalar[0] << 1) & 0x7f 413 sel, sign := boothW6(uint(wvalue)) 414 p256SelectBase(&p.xyz, p256Precomputed, sel) 415 p256NegCond(p.xyz[4:8], sign) 416 417 // (This is one, in the Montgomery domain.) 418 p.xyz[8] = 0x0000000000000001 419 p.xyz[9] = 0xffffffff00000000 420 p.xyz[10] = 0xffffffffffffffff 421 p.xyz[11] = 0x00000000fffffffe 422 423 var t0 p256Point 424 // (This is one, in the Montgomery domain.) 425 t0.xyz[8] = 0x0000000000000001 426 t0.xyz[9] = 0xffffffff00000000 427 t0.xyz[10] = 0xffffffffffffffff 428 t0.xyz[11] = 0x00000000fffffffe 429 430 index := uint(5) 431 zero := sel 432 433 for i := 1; i < 43; i++ { 434 if index < 192 { 435 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f 436 } else { 437 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f 438 } 439 index += 6 440 sel, sign = boothW6(uint(wvalue)) 441 p256SelectBase(&t0.xyz, p256Precomputed[i*32*8*8:], sel) 442 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) 443 zero |= sel 444 } 445 } 446 447 func (p *p256Point) p256ScalarMult(scalar []uint64) { 448 // precomp is a table of precomputed points that stores powers of p 449 // from p^1 to p^16. 450 var precomp [16 * 4 * 3]uint64 451 var t0, t1, t2, t3 p256Point 452 453 // Prepare the table 454 p.p256StorePoint(&precomp, 0) // 1 455 456 p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) 457 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) 458 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) 459 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) 460 t0.p256StorePoint(&precomp, 1) // 2 461 t1.p256StorePoint(&precomp, 3) // 4 462 t2.p256StorePoint(&precomp, 7) // 8 463 t3.p256StorePoint(&precomp, 15) // 16 464 465 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 466 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 467 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 468 t0.p256StorePoint(&precomp, 2) // 3 469 t1.p256StorePoint(&precomp, 4) // 5 470 t2.p256StorePoint(&precomp, 8) // 9 471 472 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 473 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) 474 t0.p256StorePoint(&precomp, 5) // 6 475 t1.p256StorePoint(&precomp, 9) // 10 476 477 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) 478 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 479 t2.p256StorePoint(&precomp, 6) // 7 480 t1.p256StorePoint(&precomp, 10) // 11 481 482 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 483 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) 484 t0.p256StorePoint(&precomp, 11) // 12 485 t2.p256StorePoint(&precomp, 13) // 14 486 487 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 488 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 489 t0.p256StorePoint(&precomp, 12) // 13 490 t2.p256StorePoint(&precomp, 14) // 15 491 492 // Start scanning the window from top bit 493 index := uint(254) 494 var sel, sign int 495 496 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 497 sel, _ = boothW5(uint(wvalue)) 498 499 p256Select(p.xyz[0:12], precomp[0:], sel) 500 zero := sel 501 502 for index > 4 { 503 index -= 5 504 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 505 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 506 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 507 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 508 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 509 510 if index < 192 { 511 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 512 } else { 513 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 514 } 515 516 sel, sign = boothW5(uint(wvalue)) 517 518 p256Select(t0.xyz[0:], precomp[0:], sel) 519 p256NegCond(t0.xyz[4:8], sign) 520 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 521 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 522 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 523 zero |= sel 524 } 525 526 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 527 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 528 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 529 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 530 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 531 532 wvalue = (scalar[0] << 1) & 0x3f 533 sel, sign = boothW5(uint(wvalue)) 534 535 p256Select(t0.xyz[0:], precomp[0:], sel) 536 p256NegCond(t0.xyz[4:8], sign) 537 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 538 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 539 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 540 }