github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/crypto/elliptic/p256_amd64.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the Go wrapper for the constant-time, 64-bit assembly 6 // implementation of P256. The optimizations performed here are described in 7 // detail in: 8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 9 // 256-bit primes" 10 // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x 11 // https://eprint.iacr.org/2013/816.pdf 12 13 // +build amd64 14 15 package elliptic 16 17 import ( 18 "math/big" 19 "sync" 20 ) 21 22 type ( 23 p256Curve struct { 24 *CurveParams 25 } 26 27 p256Point struct { 28 xyz [12]uint64 29 } 30 ) 31 32 var ( 33 p256 p256Curve 34 p256Precomputed *[37][64 * 8]uint64 35 precomputeOnce sync.Once 36 ) 37 38 func initP256() { 39 // See FIPS 186-3, section D.2.3 40 p256.CurveParams = &CurveParams{Name: "P-256"} 41 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 42 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 43 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 44 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 45 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 46 p256.BitSize = 256 47 } 48 49 func (curve p256Curve) Params() *CurveParams { 50 return curve.CurveParams 51 } 52 53 // Functions implemented in p256_asm_amd64.s 54 // Montgomery multiplication modulo P256 55 func p256Mul(res, in1, in2 []uint64) 56 57 // Montgomery square modulo P256 58 func p256Sqr(res, in []uint64) 59 60 // Montgomery multiplication by 1 61 func p256FromMont(res, in []uint64) 62 63 // iff cond == 1 val <- -val 64 func p256NegCond(val []uint64, cond int) 65 66 // if cond == 0 res <- b; else res <- a 67 func p256MovCond(res, a, b []uint64, cond int) 68 69 // Endianess swap 70 func p256BigToLittle(res []uint64, in []byte) 71 func p256LittleToBig(res []byte, in []uint64) 72 73 // Constant time table access 74 func p256Select(point, table []uint64, idx int) 75 func p256SelectBase(point, table []uint64, idx int) 76 77 // Montgomery multiplication modulo Ord(G) 78 func p256OrdMul(res, in1, in2 []uint64) 79 80 // Montgomery square modulo Ord(G), repeated n times 81 func p256OrdSqr(res, in []uint64, n int) 82 83 // Point add with in2 being affine point 84 // If sign == 1 -> in2 = -in2 85 // If sel == 0 -> res = in1 86 // if zero == 0 -> res = in2 87 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) 88 89 // Point add 90 func p256PointAddAsm(res, in1, in2 []uint64) 91 92 // Point double 93 func p256PointDoubleAsm(res, in []uint64) 94 95 func (curve p256Curve) Inverse(k *big.Int) *big.Int { 96 if k.Cmp(p256.N) >= 0 { 97 // This should never happen. 98 reducedK := new(big.Int).Mod(k, p256.N) 99 k = reducedK 100 } 101 102 // table will store precomputed powers of x. The four words at index 103 // 4×i store x^(i+1). 104 var table [4 * 15]uint64 105 106 x := make([]uint64, 4) 107 fromBig(x[:], k) 108 // This code operates in the Montgomery domain where R = 2^256 mod n 109 // and n is the order of the scalar field. (See initP256 for the 110 // value.) Elements in the Montgomery domain take the form a×R and 111 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 112 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 113 // i.e. converts x into the Montgomery domain. 114 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620} 115 p256OrdMul(table[:4], x, RR) 116 117 // Prepare the table, no need in constant time access, because the 118 // power is not a secret. (Entry 0 is never used.) 119 for i := 2; i < 16; i += 2 { 120 p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1) 121 p256OrdMul(table[4*i:], table[4*(i-1):], table[:4]) 122 } 123 124 x[0] = table[4*14+0] // f 125 x[1] = table[4*14+1] 126 x[2] = table[4*14+2] 127 x[3] = table[4*14+3] 128 129 p256OrdSqr(x, x, 4) 130 p256OrdMul(x, x, table[4*14:4*14+4]) // ff 131 t := make([]uint64, 4, 4) 132 t[0] = x[0] 133 t[1] = x[1] 134 t[2] = x[2] 135 t[3] = x[3] 136 137 p256OrdSqr(x, x, 8) 138 p256OrdMul(x, x, t) // ffff 139 t[0] = x[0] 140 t[1] = x[1] 141 t[2] = x[2] 142 t[3] = x[3] 143 144 p256OrdSqr(x, x, 16) 145 p256OrdMul(x, x, t) // ffffffff 146 t[0] = x[0] 147 t[1] = x[1] 148 t[2] = x[2] 149 t[3] = x[3] 150 151 p256OrdSqr(x, x, 64) // ffffffff0000000000000000 152 p256OrdMul(x, x, t) // ffffffff00000000ffffffff 153 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000 154 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff 155 156 // Remaining 32 windows 157 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf} 158 for i := 0; i < 32; i++ { 159 p256OrdSqr(x, x, 4) 160 p256OrdMul(x, x, table[4*(expLo[i]-1):]) 161 } 162 163 // Multiplying by one in the Montgomery domain converts a Montgomery 164 // value out of the domain. 165 one := []uint64{1, 0, 0, 0} 166 p256OrdMul(x, x, one) 167 168 xOut := make([]byte, 32) 169 p256LittleToBig(xOut, x) 170 return new(big.Int).SetBytes(xOut) 171 } 172 173 // fromBig converts a *big.Int into a format used by this code. 174 func fromBig(out []uint64, big *big.Int) { 175 for i := range out { 176 out[i] = 0 177 } 178 179 for i, v := range big.Bits() { 180 out[i] = uint64(v) 181 } 182 } 183 184 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 185 // to out. If the scalar is equal or greater than the order of the group, it's 186 // reduced modulo that order. 187 func p256GetScalar(out []uint64, in []byte) { 188 n := new(big.Int).SetBytes(in) 189 190 if n.Cmp(p256.N) >= 0 { 191 n.Mod(n, p256.N) 192 } 193 fromBig(out, n) 194 } 195 196 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the 197 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 198 // R×R mod p. See comment in Inverse about how this is used. 199 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd} 200 201 func maybeReduceModP(in *big.Int) *big.Int { 202 if in.Cmp(p256.P) < 0 { 203 return in 204 } 205 return new(big.Int).Mod(in, p256.P) 206 } 207 208 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 209 scalarReversed := make([]uint64, 4) 210 var r1, r2 p256Point 211 p256GetScalar(scalarReversed, baseScalar) 212 r1.p256BaseMult(scalarReversed) 213 214 p256GetScalar(scalarReversed, scalar) 215 fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) 216 fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) 217 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) 218 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) 219 220 // This sets r2's Z value to 1, in the Montgomery domain. 221 r2.xyz[8] = 0x0000000000000001 222 r2.xyz[9] = 0xffffffff00000000 223 r2.xyz[10] = 0xffffffffffffffff 224 r2.xyz[11] = 0x00000000fffffffe 225 226 r2.p256ScalarMult(scalarReversed) 227 p256PointAddAsm(r1.xyz[:], r1.xyz[:], r2.xyz[:]) 228 return r1.p256PointToAffine() 229 } 230 231 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 232 scalarReversed := make([]uint64, 4) 233 p256GetScalar(scalarReversed, scalar) 234 235 var r p256Point 236 r.p256BaseMult(scalarReversed) 237 return r.p256PointToAffine() 238 } 239 240 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 241 scalarReversed := make([]uint64, 4) 242 p256GetScalar(scalarReversed, scalar) 243 244 var r p256Point 245 fromBig(r.xyz[0:4], maybeReduceModP(bigX)) 246 fromBig(r.xyz[4:8], maybeReduceModP(bigY)) 247 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) 248 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) 249 // This sets r2's Z value to 1, in the Montgomery domain. 250 r.xyz[8] = 0x0000000000000001 251 r.xyz[9] = 0xffffffff00000000 252 r.xyz[10] = 0xffffffffffffffff 253 r.xyz[11] = 0x00000000fffffffe 254 255 r.p256ScalarMult(scalarReversed) 256 return r.p256PointToAffine() 257 } 258 259 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 260 zInv := make([]uint64, 4) 261 zInvSq := make([]uint64, 4) 262 p256Inverse(zInv, p.xyz[8:12]) 263 p256Sqr(zInvSq, zInv) 264 p256Mul(zInv, zInv, zInvSq) 265 266 p256Mul(zInvSq, p.xyz[0:4], zInvSq) 267 p256Mul(zInv, p.xyz[4:8], zInv) 268 269 p256FromMont(zInvSq, zInvSq) 270 p256FromMont(zInv, zInv) 271 272 xOut := make([]byte, 32) 273 yOut := make([]byte, 32) 274 p256LittleToBig(xOut, zInvSq) 275 p256LittleToBig(yOut, zInv) 276 277 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) 278 } 279 280 // p256Inverse sets out to in^-1 mod p. 281 func p256Inverse(out, in []uint64) { 282 var stack [6 * 4]uint64 283 p2 := stack[4*0 : 4*0+4] 284 p4 := stack[4*1 : 4*1+4] 285 p8 := stack[4*2 : 4*2+4] 286 p16 := stack[4*3 : 4*3+4] 287 p32 := stack[4*4 : 4*4+4] 288 289 p256Sqr(out, in) 290 p256Mul(p2, out, in) // 3*p 291 292 p256Sqr(out, p2) 293 p256Sqr(out, out) 294 p256Mul(p4, out, p2) // f*p 295 296 p256Sqr(out, p4) 297 p256Sqr(out, out) 298 p256Sqr(out, out) 299 p256Sqr(out, out) 300 p256Mul(p8, out, p4) // ff*p 301 302 p256Sqr(out, p8) 303 304 for i := 0; i < 7; i++ { 305 p256Sqr(out, out) 306 } 307 p256Mul(p16, out, p8) // ffff*p 308 309 p256Sqr(out, p16) 310 for i := 0; i < 15; i++ { 311 p256Sqr(out, out) 312 } 313 p256Mul(p32, out, p16) // ffffffff*p 314 315 p256Sqr(out, p32) 316 317 for i := 0; i < 31; i++ { 318 p256Sqr(out, out) 319 } 320 p256Mul(out, out, in) 321 322 for i := 0; i < 32*4; i++ { 323 p256Sqr(out, out) 324 } 325 p256Mul(out, out, p32) 326 327 for i := 0; i < 32; i++ { 328 p256Sqr(out, out) 329 } 330 p256Mul(out, out, p32) 331 332 for i := 0; i < 16; i++ { 333 p256Sqr(out, out) 334 } 335 p256Mul(out, out, p16) 336 337 for i := 0; i < 8; i++ { 338 p256Sqr(out, out) 339 } 340 p256Mul(out, out, p8) 341 342 p256Sqr(out, out) 343 p256Sqr(out, out) 344 p256Sqr(out, out) 345 p256Sqr(out, out) 346 p256Mul(out, out, p4) 347 348 p256Sqr(out, out) 349 p256Sqr(out, out) 350 p256Mul(out, out, p2) 351 352 p256Sqr(out, out) 353 p256Sqr(out, out) 354 p256Mul(out, out, in) 355 } 356 357 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { 358 copy(r[index*12:], p.xyz[:]) 359 } 360 361 func boothW5(in uint) (int, int) { 362 var s uint = ^((in >> 5) - 1) 363 var d uint = (1 << 6) - in - 1 364 d = (d & s) | (in & (^s)) 365 d = (d >> 1) + (d & 1) 366 return int(d), int(s & 1) 367 } 368 369 func boothW7(in uint) (int, int) { 370 var s uint = ^((in >> 7) - 1) 371 var d uint = (1 << 8) - in - 1 372 d = (d & s) | (in & (^s)) 373 d = (d >> 1) + (d & 1) 374 return int(d), int(s & 1) 375 } 376 377 func initTable() { 378 p256Precomputed = new([37][64 * 8]uint64) 379 380 basePoint := []uint64{ 381 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6, 382 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85, 383 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe, 384 } 385 t1 := make([]uint64, 12) 386 t2 := make([]uint64, 12) 387 copy(t2, basePoint) 388 389 zInv := make([]uint64, 4) 390 zInvSq := make([]uint64, 4) 391 for j := 0; j < 64; j++ { 392 copy(t1, t2) 393 for i := 0; i < 37; i++ { 394 // The window size is 7 so we need to double 7 times. 395 if i != 0 { 396 for k := 0; k < 7; k++ { 397 p256PointDoubleAsm(t1, t1) 398 } 399 } 400 // Convert the point to affine form. (Its values are 401 // still in Montgomery form however.) 402 p256Inverse(zInv, t1[8:12]) 403 p256Sqr(zInvSq, zInv) 404 p256Mul(zInv, zInv, zInvSq) 405 406 p256Mul(t1[:4], t1[:4], zInvSq) 407 p256Mul(t1[4:8], t1[4:8], zInv) 408 409 copy(t1[8:12], basePoint[8:12]) 410 // Update the table entry 411 copy(p256Precomputed[i][j*8:], t1[:8]) 412 } 413 if j == 0 { 414 p256PointDoubleAsm(t2, basePoint) 415 } else { 416 p256PointAddAsm(t2, t2, basePoint) 417 } 418 } 419 } 420 421 func (p *p256Point) p256BaseMult(scalar []uint64) { 422 precomputeOnce.Do(initTable) 423 424 wvalue := (scalar[0] << 1) & 0xff 425 sel, sign := boothW7(uint(wvalue)) 426 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel) 427 p256NegCond(p.xyz[4:8], sign) 428 429 // (This is one, in the Montgomery domain.) 430 p.xyz[8] = 0x0000000000000001 431 p.xyz[9] = 0xffffffff00000000 432 p.xyz[10] = 0xffffffffffffffff 433 p.xyz[11] = 0x00000000fffffffe 434 435 var t0 p256Point 436 // (This is one, in the Montgomery domain.) 437 t0.xyz[8] = 0x0000000000000001 438 t0.xyz[9] = 0xffffffff00000000 439 t0.xyz[10] = 0xffffffffffffffff 440 t0.xyz[11] = 0x00000000fffffffe 441 442 index := uint(6) 443 zero := sel 444 445 for i := 1; i < 37; i++ { 446 if index < 192 { 447 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff 448 } else { 449 wvalue = (scalar[index/64] >> (index % 64)) & 0xff 450 } 451 index += 7 452 sel, sign = boothW7(uint(wvalue)) 453 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel) 454 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) 455 zero |= sel 456 } 457 } 458 459 func (p *p256Point) p256ScalarMult(scalar []uint64) { 460 // precomp is a table of precomputed points that stores powers of p 461 // from p^1 to p^16. 462 var precomp [16 * 4 * 3]uint64 463 var t0, t1, t2, t3 p256Point 464 465 // Prepare the table 466 p.p256StorePoint(&precomp, 0) // 1 467 468 p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) 469 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) 470 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) 471 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) 472 t0.p256StorePoint(&precomp, 1) // 2 473 t1.p256StorePoint(&precomp, 3) // 4 474 t2.p256StorePoint(&precomp, 7) // 8 475 t3.p256StorePoint(&precomp, 15) // 16 476 477 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 478 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 479 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 480 t0.p256StorePoint(&precomp, 2) // 3 481 t1.p256StorePoint(&precomp, 4) // 5 482 t2.p256StorePoint(&precomp, 8) // 9 483 484 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 485 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) 486 t0.p256StorePoint(&precomp, 5) // 6 487 t1.p256StorePoint(&precomp, 9) // 10 488 489 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) 490 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 491 t2.p256StorePoint(&precomp, 6) // 7 492 t1.p256StorePoint(&precomp, 10) // 11 493 494 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 495 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) 496 t0.p256StorePoint(&precomp, 11) // 12 497 t2.p256StorePoint(&precomp, 13) // 14 498 499 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 500 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 501 t0.p256StorePoint(&precomp, 12) // 13 502 t2.p256StorePoint(&precomp, 14) // 15 503 504 // Start scanning the window from top bit 505 index := uint(254) 506 var sel, sign int 507 508 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 509 sel, _ = boothW5(uint(wvalue)) 510 511 p256Select(p.xyz[0:12], precomp[0:], sel) 512 zero := sel 513 514 for index > 4 { 515 index -= 5 516 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 517 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 518 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 519 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 520 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 521 522 if index < 192 { 523 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 524 } else { 525 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 526 } 527 528 sel, sign = boothW5(uint(wvalue)) 529 530 p256Select(t0.xyz[0:], precomp[0:], sel) 531 p256NegCond(t0.xyz[4:8], sign) 532 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 533 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 534 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 535 zero |= sel 536 } 537 538 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 539 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 540 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 541 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 542 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 543 544 wvalue = (scalar[0] << 1) & 0x3f 545 sel, sign = boothW5(uint(wvalue)) 546 547 p256Select(t0.xyz[0:], precomp[0:], sel) 548 p256NegCond(t0.xyz[4:8], sign) 549 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 550 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 551 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 552 }