github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/crypto/elliptic/p256_asm.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the Go wrapper for the constant-time, 64-bit assembly 6 // implementation of P256. The optimizations performed here are described in 7 // detail in: 8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 9 // 256-bit primes" 10 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x 11 // https://eprint.iacr.org/2013/816.pdf 12 13 //go:build amd64 || arm64 14 // +build amd64 arm64 15 16 package elliptic 17 18 import ( 19 "math/big" 20 ) 21 22 type ( 23 p256Curve struct { 24 *CurveParams 25 } 26 27 p256Point struct { 28 xyz [12]uint64 29 } 30 ) 31 32 var p256 p256Curve 33 34 func initP256() { 35 // See FIPS 186-3, section D.2.3 36 p256.CurveParams = &CurveParams{Name: "P-256"} 37 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 38 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 39 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 40 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 41 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 42 p256.BitSize = 256 43 } 44 45 func (curve p256Curve) Params() *CurveParams { 46 return curve.CurveParams 47 } 48 49 // Functions implemented in p256_asm_*64.s 50 // Montgomery multiplication modulo P256 51 //go:noescape 52 func p256Mul(res, in1, in2 []uint64) 53 54 // Montgomery square modulo P256, repeated n times (n >= 1) 55 //go:noescape 56 func p256Sqr(res, in []uint64, n int) 57 58 // Montgomery multiplication by 1 59 //go:noescape 60 func p256FromMont(res, in []uint64) 61 62 // iff cond == 1 val <- -val 63 //go:noescape 64 func p256NegCond(val []uint64, cond int) 65 66 // if cond == 0 res <- b; else res <- a 67 //go:noescape 68 func p256MovCond(res, a, b []uint64, cond int) 69 70 // Endianness swap 71 //go:noescape 72 func p256BigToLittle(res []uint64, in []byte) 73 74 //go:noescape 75 func p256LittleToBig(res []byte, in []uint64) 76 77 // Constant time table access 78 //go:noescape 79 func p256Select(point, table []uint64, idx int) 80 81 //go:noescape 82 func p256SelectBase(point, table []uint64, idx int) 83 84 // Montgomery multiplication modulo Ord(G) 85 //go:noescape 86 func p256OrdMul(res, in1, in2 []uint64) 87 88 // Montgomery square modulo Ord(G), repeated n times 89 //go:noescape 90 func p256OrdSqr(res, in []uint64, n int) 91 92 // Point add with in2 being affine point 93 // If sign == 1 -> in2 = -in2 94 // If sel == 0 -> res = in1 95 // if zero == 0 -> res = in2 96 //go:noescape 97 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) 98 99 // Point add. Returns one if the two input points were equal and zero 100 // otherwise. (Note that, due to the way that the equations work out, some 101 // representations of ∞ are considered equal to everything by this function.) 102 //go:noescape 103 func p256PointAddAsm(res, in1, in2 []uint64) int 104 105 // Point double 106 //go:noescape 107 func p256PointDoubleAsm(res, in []uint64) 108 109 func (curve p256Curve) Inverse(k *big.Int) *big.Int { 110 if k.Sign() < 0 { 111 // This should never happen. 112 k = new(big.Int).Neg(k) 113 } 114 115 if k.Cmp(p256.N) >= 0 { 116 // This should never happen. 117 k = new(big.Int).Mod(k, p256.N) 118 } 119 120 // table will store precomputed powers of x. 121 var table [4 * 9]uint64 122 var ( 123 _1 = table[4*0 : 4*1] 124 _11 = table[4*1 : 4*2] 125 _101 = table[4*2 : 4*3] 126 _111 = table[4*3 : 4*4] 127 _1111 = table[4*4 : 4*5] 128 _10101 = table[4*5 : 4*6] 129 _101111 = table[4*6 : 4*7] 130 x = table[4*7 : 4*8] 131 t = table[4*8 : 4*9] 132 ) 133 134 fromBig(x[:], k) 135 // This code operates in the Montgomery domain where R = 2^256 mod n 136 // and n is the order of the scalar field. (See initP256 for the 137 // value.) Elements in the Montgomery domain take the form a×R and 138 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 139 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 140 // i.e. converts x into the Montgomery domain. 141 // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion 142 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620} 143 p256OrdMul(_1, x, RR) // _1 144 p256OrdSqr(x, _1, 1) // _10 145 p256OrdMul(_11, x, _1) // _11 146 p256OrdMul(_101, x, _11) // _101 147 p256OrdMul(_111, x, _101) // _111 148 p256OrdSqr(x, _101, 1) // _1010 149 p256OrdMul(_1111, _101, x) // _1111 150 151 p256OrdSqr(t, x, 1) // _10100 152 p256OrdMul(_10101, t, _1) // _10101 153 p256OrdSqr(x, _10101, 1) // _101010 154 p256OrdMul(_101111, _101, x) // _101111 155 p256OrdMul(x, _10101, x) // _111111 = x6 156 p256OrdSqr(t, x, 2) // _11111100 157 p256OrdMul(t, t, _11) // _11111111 = x8 158 p256OrdSqr(x, t, 8) // _ff00 159 p256OrdMul(x, x, t) // _ffff = x16 160 p256OrdSqr(t, x, 16) // _ffff0000 161 p256OrdMul(t, t, x) // _ffffffff = x32 162 163 p256OrdSqr(x, t, 64) 164 p256OrdMul(x, x, t) 165 p256OrdSqr(x, x, 32) 166 p256OrdMul(x, x, t) 167 168 sqrs := []uint8{ 169 6, 5, 4, 5, 5, 170 4, 3, 3, 5, 9, 171 6, 2, 5, 6, 5, 172 4, 5, 5, 3, 10, 173 2, 5, 5, 3, 7, 6} 174 muls := [][]uint64{ 175 _101111, _111, _11, _1111, _10101, 176 _101, _101, _101, _111, _101111, 177 _1111, _1, _1, _1111, _111, 178 _111, _111, _101, _11, _101111, 179 _11, _11, _11, _1, _10101, _1111} 180 181 for i, s := range sqrs { 182 p256OrdSqr(x, x, int(s)) 183 p256OrdMul(x, x, muls[i]) 184 } 185 186 // Multiplying by one in the Montgomery domain converts a Montgomery 187 // value out of the domain. 188 one := []uint64{1, 0, 0, 0} 189 p256OrdMul(x, x, one) 190 191 xOut := make([]byte, 32) 192 p256LittleToBig(xOut, x) 193 return new(big.Int).SetBytes(xOut) 194 } 195 196 // fromBig converts a *big.Int into a format used by this code. 197 func fromBig(out []uint64, big *big.Int) { 198 for i := range out { 199 out[i] = 0 200 } 201 202 for i, v := range big.Bits() { 203 out[i] = uint64(v) 204 } 205 } 206 207 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 208 // to out. If the scalar is equal or greater than the order of the group, it's 209 // reduced modulo that order. 210 func p256GetScalar(out []uint64, in []byte) { 211 n := new(big.Int).SetBytes(in) 212 213 if n.Cmp(p256.N) >= 0 { 214 n.Mod(n, p256.N) 215 } 216 fromBig(out, n) 217 } 218 219 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the 220 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 221 // R×R mod p. See comment in Inverse about how this is used. 222 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd} 223 224 func maybeReduceModP(in *big.Int) *big.Int { 225 if in.Cmp(p256.P) < 0 { 226 return in 227 } 228 return new(big.Int).Mod(in, p256.P) 229 } 230 231 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 232 scalarReversed := make([]uint64, 4) 233 var r1, r2 p256Point 234 p256GetScalar(scalarReversed, baseScalar) 235 r1IsInfinity := scalarIsZero(scalarReversed) 236 r1.p256BaseMult(scalarReversed) 237 238 p256GetScalar(scalarReversed, scalar) 239 r2IsInfinity := scalarIsZero(scalarReversed) 240 fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) 241 fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) 242 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) 243 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) 244 245 // This sets r2's Z value to 1, in the Montgomery domain. 246 r2.xyz[8] = 0x0000000000000001 247 r2.xyz[9] = 0xffffffff00000000 248 r2.xyz[10] = 0xffffffffffffffff 249 r2.xyz[11] = 0x00000000fffffffe 250 251 r2.p256ScalarMult(scalarReversed) 252 253 var sum, double p256Point 254 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:]) 255 p256PointDoubleAsm(double.xyz[:], r1.xyz[:]) 256 sum.CopyConditional(&double, pointsEqual) 257 sum.CopyConditional(&r1, r2IsInfinity) 258 sum.CopyConditional(&r2, r1IsInfinity) 259 260 return sum.p256PointToAffine() 261 } 262 263 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 264 scalarReversed := make([]uint64, 4) 265 p256GetScalar(scalarReversed, scalar) 266 267 var r p256Point 268 r.p256BaseMult(scalarReversed) 269 return r.p256PointToAffine() 270 } 271 272 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 273 scalarReversed := make([]uint64, 4) 274 p256GetScalar(scalarReversed, scalar) 275 276 var r p256Point 277 fromBig(r.xyz[0:4], maybeReduceModP(bigX)) 278 fromBig(r.xyz[4:8], maybeReduceModP(bigY)) 279 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) 280 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) 281 // This sets r2's Z value to 1, in the Montgomery domain. 282 r.xyz[8] = 0x0000000000000001 283 r.xyz[9] = 0xffffffff00000000 284 r.xyz[10] = 0xffffffffffffffff 285 r.xyz[11] = 0x00000000fffffffe 286 287 r.p256ScalarMult(scalarReversed) 288 return r.p256PointToAffine() 289 } 290 291 // uint64IsZero returns 1 if x is zero and zero otherwise. 292 func uint64IsZero(x uint64) int { 293 x = ^x 294 x &= x >> 32 295 x &= x >> 16 296 x &= x >> 8 297 x &= x >> 4 298 x &= x >> 2 299 x &= x >> 1 300 return int(x & 1) 301 } 302 303 // scalarIsZero returns 1 if scalar represents the zero value, and zero 304 // otherwise. 305 func scalarIsZero(scalar []uint64) int { 306 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3]) 307 } 308 309 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 310 zInv := make([]uint64, 4) 311 zInvSq := make([]uint64, 4) 312 p256Inverse(zInv, p.xyz[8:12]) 313 p256Sqr(zInvSq, zInv, 1) 314 p256Mul(zInv, zInv, zInvSq) 315 316 p256Mul(zInvSq, p.xyz[0:4], zInvSq) 317 p256Mul(zInv, p.xyz[4:8], zInv) 318 319 p256FromMont(zInvSq, zInvSq) 320 p256FromMont(zInv, zInv) 321 322 xOut := make([]byte, 32) 323 yOut := make([]byte, 32) 324 p256LittleToBig(xOut, zInvSq) 325 p256LittleToBig(yOut, zInv) 326 327 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) 328 } 329 330 // CopyConditional copies overwrites p with src if v == 1, and leaves p 331 // unchanged if v == 0. 332 func (p *p256Point) CopyConditional(src *p256Point, v int) { 333 pMask := uint64(v) - 1 334 srcMask := ^pMask 335 336 for i, n := range p.xyz { 337 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask) 338 } 339 } 340 341 // p256Inverse sets out to in^-1 mod p. 342 func p256Inverse(out, in []uint64) { 343 var stack [6 * 4]uint64 344 p2 := stack[4*0 : 4*0+4] 345 p4 := stack[4*1 : 4*1+4] 346 p8 := stack[4*2 : 4*2+4] 347 p16 := stack[4*3 : 4*3+4] 348 p32 := stack[4*4 : 4*4+4] 349 350 p256Sqr(out, in, 1) 351 p256Mul(p2, out, in) // 3*p 352 353 p256Sqr(out, p2, 2) 354 p256Mul(p4, out, p2) // f*p 355 356 p256Sqr(out, p4, 4) 357 p256Mul(p8, out, p4) // ff*p 358 359 p256Sqr(out, p8, 8) 360 p256Mul(p16, out, p8) // ffff*p 361 362 p256Sqr(out, p16, 16) 363 p256Mul(p32, out, p16) // ffffffff*p 364 365 p256Sqr(out, p32, 32) 366 p256Mul(out, out, in) 367 368 p256Sqr(out, out, 128) 369 p256Mul(out, out, p32) 370 371 p256Sqr(out, out, 32) 372 p256Mul(out, out, p32) 373 374 p256Sqr(out, out, 16) 375 p256Mul(out, out, p16) 376 377 p256Sqr(out, out, 8) 378 p256Mul(out, out, p8) 379 380 p256Sqr(out, out, 4) 381 p256Mul(out, out, p4) 382 383 p256Sqr(out, out, 2) 384 p256Mul(out, out, p2) 385 386 p256Sqr(out, out, 2) 387 p256Mul(out, out, in) 388 } 389 390 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { 391 copy(r[index*12:], p.xyz[:]) 392 } 393 394 func boothW5(in uint) (int, int) { 395 var s uint = ^((in >> 5) - 1) 396 var d uint = (1 << 6) - in - 1 397 d = (d & s) | (in & (^s)) 398 d = (d >> 1) + (d & 1) 399 return int(d), int(s & 1) 400 } 401 402 func boothW6(in uint) (int, int) { 403 var s uint = ^((in >> 6) - 1) 404 var d uint = (1 << 7) - in - 1 405 d = (d & s) | (in & (^s)) 406 d = (d >> 1) + (d & 1) 407 return int(d), int(s & 1) 408 } 409 410 func (p *p256Point) p256BaseMult(scalar []uint64) { 411 wvalue := (scalar[0] << 1) & 0x7f 412 sel, sign := boothW6(uint(wvalue)) 413 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel) 414 p256NegCond(p.xyz[4:8], sign) 415 416 // (This is one, in the Montgomery domain.) 417 p.xyz[8] = 0x0000000000000001 418 p.xyz[9] = 0xffffffff00000000 419 p.xyz[10] = 0xffffffffffffffff 420 p.xyz[11] = 0x00000000fffffffe 421 422 var t0 p256Point 423 // (This is one, in the Montgomery domain.) 424 t0.xyz[8] = 0x0000000000000001 425 t0.xyz[9] = 0xffffffff00000000 426 t0.xyz[10] = 0xffffffffffffffff 427 t0.xyz[11] = 0x00000000fffffffe 428 429 index := uint(5) 430 zero := sel 431 432 for i := 1; i < 43; i++ { 433 if index < 192 { 434 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f 435 } else { 436 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f 437 } 438 index += 6 439 sel, sign = boothW6(uint(wvalue)) 440 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel) 441 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) 442 zero |= sel 443 } 444 } 445 446 func (p *p256Point) p256ScalarMult(scalar []uint64) { 447 // precomp is a table of precomputed points that stores powers of p 448 // from p^1 to p^16. 449 var precomp [16 * 4 * 3]uint64 450 var t0, t1, t2, t3 p256Point 451 452 // Prepare the table 453 p.p256StorePoint(&precomp, 0) // 1 454 455 p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) 456 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) 457 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) 458 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) 459 t0.p256StorePoint(&precomp, 1) // 2 460 t1.p256StorePoint(&precomp, 3) // 4 461 t2.p256StorePoint(&precomp, 7) // 8 462 t3.p256StorePoint(&precomp, 15) // 16 463 464 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 465 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 466 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 467 t0.p256StorePoint(&precomp, 2) // 3 468 t1.p256StorePoint(&precomp, 4) // 5 469 t2.p256StorePoint(&precomp, 8) // 9 470 471 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 472 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) 473 t0.p256StorePoint(&precomp, 5) // 6 474 t1.p256StorePoint(&precomp, 9) // 10 475 476 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) 477 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 478 t2.p256StorePoint(&precomp, 6) // 7 479 t1.p256StorePoint(&precomp, 10) // 11 480 481 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 482 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) 483 t0.p256StorePoint(&precomp, 11) // 12 484 t2.p256StorePoint(&precomp, 13) // 14 485 486 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 487 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 488 t0.p256StorePoint(&precomp, 12) // 13 489 t2.p256StorePoint(&precomp, 14) // 15 490 491 // Start scanning the window from top bit 492 index := uint(254) 493 var sel, sign int 494 495 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 496 sel, _ = boothW5(uint(wvalue)) 497 498 p256Select(p.xyz[0:12], precomp[0:], sel) 499 zero := sel 500 501 for index > 4 { 502 index -= 5 503 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 504 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 505 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 506 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 507 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 508 509 if index < 192 { 510 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 511 } else { 512 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 513 } 514 515 sel, sign = boothW5(uint(wvalue)) 516 517 p256Select(t0.xyz[0:], precomp[0:], sel) 518 p256NegCond(t0.xyz[4:8], sign) 519 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 520 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 521 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 522 zero |= sel 523 } 524 525 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 526 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 527 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 528 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 529 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 530 531 wvalue = (scalar[0] << 1) & 0x3f 532 sel, sign = boothW5(uint(wvalue)) 533 534 p256Select(t0.xyz[0:], precomp[0:], sel) 535 p256NegCond(t0.xyz[4:8], sign) 536 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 537 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 538 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 539 }