github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/crypto/elliptic/p256_amd64.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the Go wrapper for the constant-time, 64-bit assembly 6 // implementation of P256. The optimizations performed here are described in 7 // detail in: 8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 9 // 256-bit primes" 10 // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x 11 // https://eprint.iacr.org/2013/816.pdf 12 13 // +build amd64 14 15 package elliptic 16 17 import ( 18 "math/big" 19 "sync" 20 ) 21 22 type ( 23 p256Curve struct { 24 *CurveParams 25 } 26 27 p256Point struct { 28 xyz [12]uint64 29 } 30 ) 31 32 var ( 33 p256 p256Curve 34 p256Precomputed *[37][64 * 8]uint64 35 precomputeOnce sync.Once 36 ) 37 38 func initP256() { 39 // See FIPS 186-3, section D.2.3 40 p256.CurveParams = &CurveParams{Name: "P-256"} 41 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 42 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 43 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 44 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 45 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 46 p256.BitSize = 256 47 } 48 49 func (curve p256Curve) Params() *CurveParams { 50 return curve.CurveParams 51 } 52 53 // Functions implemented in p256_asm_amd64.s 54 // Montgomery multiplication modulo P256 55 func p256Mul(res, in1, in2 []uint64) 56 57 // Montgomery square modulo P256 58 func p256Sqr(res, in []uint64) 59 60 // Montgomery multiplication by 1 61 func p256FromMont(res, in []uint64) 62 63 // iff cond == 1 val <- -val 64 func p256NegCond(val []uint64, cond int) 65 66 // if cond == 0 res <- b; else res <- a 67 func p256MovCond(res, a, b []uint64, cond int) 68 69 // Endianness swap 70 func p256BigToLittle(res []uint64, in []byte) 71 func p256LittleToBig(res []byte, in []uint64) 72 73 // Constant time table access 74 func p256Select(point, table []uint64, idx int) 75 func p256SelectBase(point, table []uint64, idx int) 76 77 // Montgomery multiplication modulo Ord(G) 78 func p256OrdMul(res, in1, in2 []uint64) 79 80 // Montgomery square modulo Ord(G), repeated n times 81 func p256OrdSqr(res, in []uint64, n int) 82 83 // Point add with in2 being affine point 84 // If sign == 1 -> in2 = -in2 85 // If sel == 0 -> res = in1 86 // if zero == 0 -> res = in2 87 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) 88 89 // Point add. Returns one if the two input points were equal and zero 90 // otherwise. (Note that, due to the way that the equations work out, some 91 // representations of ∞ are considered equal to everything by this function.) 92 func p256PointAddAsm(res, in1, in2 []uint64) int 93 94 // Point double 95 func p256PointDoubleAsm(res, in []uint64) 96 97 func (curve p256Curve) Inverse(k *big.Int) *big.Int { 98 if k.Sign() < 0 { 99 // This should never happen. 100 k = new(big.Int).Neg(k) 101 } 102 103 if k.Cmp(p256.N) >= 0 { 104 // This should never happen. 105 k = new(big.Int).Mod(k, p256.N) 106 } 107 108 // table will store precomputed powers of x. The four words at index 109 // 4×i store x^(i+1). 110 var table [4 * 15]uint64 111 112 x := make([]uint64, 4) 113 fromBig(x[:], k) 114 // This code operates in the Montgomery domain where R = 2^256 mod n 115 // and n is the order of the scalar field. (See initP256 for the 116 // value.) Elements in the Montgomery domain take the form a×R and 117 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 118 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 119 // i.e. converts x into the Montgomery domain. 120 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620} 121 p256OrdMul(table[:4], x, RR) 122 123 // Prepare the table, no need in constant time access, because the 124 // power is not a secret. (Entry 0 is never used.) 125 for i := 2; i < 16; i += 2 { 126 p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1) 127 p256OrdMul(table[4*i:], table[4*(i-1):], table[:4]) 128 } 129 130 x[0] = table[4*14+0] // f 131 x[1] = table[4*14+1] 132 x[2] = table[4*14+2] 133 x[3] = table[4*14+3] 134 135 p256OrdSqr(x, x, 4) 136 p256OrdMul(x, x, table[4*14:4*14+4]) // ff 137 t := make([]uint64, 4, 4) 138 t[0] = x[0] 139 t[1] = x[1] 140 t[2] = x[2] 141 t[3] = x[3] 142 143 p256OrdSqr(x, x, 8) 144 p256OrdMul(x, x, t) // ffff 145 t[0] = x[0] 146 t[1] = x[1] 147 t[2] = x[2] 148 t[3] = x[3] 149 150 p256OrdSqr(x, x, 16) 151 p256OrdMul(x, x, t) // ffffffff 152 t[0] = x[0] 153 t[1] = x[1] 154 t[2] = x[2] 155 t[3] = x[3] 156 157 p256OrdSqr(x, x, 64) // ffffffff0000000000000000 158 p256OrdMul(x, x, t) // ffffffff00000000ffffffff 159 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000 160 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff 161 162 // Remaining 32 windows 163 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf} 164 for i := 0; i < 32; i++ { 165 p256OrdSqr(x, x, 4) 166 p256OrdMul(x, x, table[4*(expLo[i]-1):]) 167 } 168 169 // Multiplying by one in the Montgomery domain converts a Montgomery 170 // value out of the domain. 171 one := []uint64{1, 0, 0, 0} 172 p256OrdMul(x, x, one) 173 174 xOut := make([]byte, 32) 175 p256LittleToBig(xOut, x) 176 return new(big.Int).SetBytes(xOut) 177 } 178 179 // fromBig converts a *big.Int into a format used by this code. 180 func fromBig(out []uint64, big *big.Int) { 181 for i := range out { 182 out[i] = 0 183 } 184 185 for i, v := range big.Bits() { 186 out[i] = uint64(v) 187 } 188 } 189 190 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 191 // to out. If the scalar is equal or greater than the order of the group, it's 192 // reduced modulo that order. 193 func p256GetScalar(out []uint64, in []byte) { 194 n := new(big.Int).SetBytes(in) 195 196 if n.Cmp(p256.N) >= 0 { 197 n.Mod(n, p256.N) 198 } 199 fromBig(out, n) 200 } 201 202 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the 203 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 204 // R×R mod p. See comment in Inverse about how this is used. 205 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd} 206 207 func maybeReduceModP(in *big.Int) *big.Int { 208 if in.Cmp(p256.P) < 0 { 209 return in 210 } 211 return new(big.Int).Mod(in, p256.P) 212 } 213 214 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 215 scalarReversed := make([]uint64, 4) 216 var r1, r2 p256Point 217 p256GetScalar(scalarReversed, baseScalar) 218 r1IsInfinity := scalarIsZero(scalarReversed) 219 r1.p256BaseMult(scalarReversed) 220 221 p256GetScalar(scalarReversed, scalar) 222 r2IsInfinity := scalarIsZero(scalarReversed) 223 fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) 224 fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) 225 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) 226 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) 227 228 // This sets r2's Z value to 1, in the Montgomery domain. 229 r2.xyz[8] = 0x0000000000000001 230 r2.xyz[9] = 0xffffffff00000000 231 r2.xyz[10] = 0xffffffffffffffff 232 r2.xyz[11] = 0x00000000fffffffe 233 234 r2.p256ScalarMult(scalarReversed) 235 236 var sum, double p256Point 237 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:]) 238 p256PointDoubleAsm(double.xyz[:], r1.xyz[:]) 239 sum.CopyConditional(&double, pointsEqual) 240 sum.CopyConditional(&r1, r2IsInfinity) 241 sum.CopyConditional(&r2, r1IsInfinity) 242 243 return sum.p256PointToAffine() 244 } 245 246 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 247 scalarReversed := make([]uint64, 4) 248 p256GetScalar(scalarReversed, scalar) 249 250 var r p256Point 251 r.p256BaseMult(scalarReversed) 252 return r.p256PointToAffine() 253 } 254 255 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 256 scalarReversed := make([]uint64, 4) 257 p256GetScalar(scalarReversed, scalar) 258 259 var r p256Point 260 fromBig(r.xyz[0:4], maybeReduceModP(bigX)) 261 fromBig(r.xyz[4:8], maybeReduceModP(bigY)) 262 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) 263 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) 264 // This sets r2's Z value to 1, in the Montgomery domain. 265 r.xyz[8] = 0x0000000000000001 266 r.xyz[9] = 0xffffffff00000000 267 r.xyz[10] = 0xffffffffffffffff 268 r.xyz[11] = 0x00000000fffffffe 269 270 r.p256ScalarMult(scalarReversed) 271 return r.p256PointToAffine() 272 } 273 274 // uint64IsZero returns 1 if x is zero and zero otherwise. 275 func uint64IsZero(x uint64) int { 276 x = ^x 277 x &= x >> 32 278 x &= x >> 16 279 x &= x >> 8 280 x &= x >> 4 281 x &= x >> 2 282 x &= x >> 1 283 return int(x & 1) 284 } 285 286 // scalarIsZero returns 1 if scalar represents the zero value, and zero 287 // otherwise. 288 func scalarIsZero(scalar []uint64) int { 289 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3]) 290 } 291 292 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 293 zInv := make([]uint64, 4) 294 zInvSq := make([]uint64, 4) 295 p256Inverse(zInv, p.xyz[8:12]) 296 p256Sqr(zInvSq, zInv) 297 p256Mul(zInv, zInv, zInvSq) 298 299 p256Mul(zInvSq, p.xyz[0:4], zInvSq) 300 p256Mul(zInv, p.xyz[4:8], zInv) 301 302 p256FromMont(zInvSq, zInvSq) 303 p256FromMont(zInv, zInv) 304 305 xOut := make([]byte, 32) 306 yOut := make([]byte, 32) 307 p256LittleToBig(xOut, zInvSq) 308 p256LittleToBig(yOut, zInv) 309 310 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) 311 } 312 313 // CopyConditional copies overwrites p with src if v == 1, and leaves p 314 // unchanged if v == 0. 315 func (p *p256Point) CopyConditional(src *p256Point, v int) { 316 pMask := uint64(v) - 1 317 srcMask := ^pMask 318 319 for i, n := range p.xyz { 320 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask) 321 } 322 } 323 324 // p256Inverse sets out to in^-1 mod p. 325 func p256Inverse(out, in []uint64) { 326 var stack [6 * 4]uint64 327 p2 := stack[4*0 : 4*0+4] 328 p4 := stack[4*1 : 4*1+4] 329 p8 := stack[4*2 : 4*2+4] 330 p16 := stack[4*3 : 4*3+4] 331 p32 := stack[4*4 : 4*4+4] 332 333 p256Sqr(out, in) 334 p256Mul(p2, out, in) // 3*p 335 336 p256Sqr(out, p2) 337 p256Sqr(out, out) 338 p256Mul(p4, out, p2) // f*p 339 340 p256Sqr(out, p4) 341 p256Sqr(out, out) 342 p256Sqr(out, out) 343 p256Sqr(out, out) 344 p256Mul(p8, out, p4) // ff*p 345 346 p256Sqr(out, p8) 347 348 for i := 0; i < 7; i++ { 349 p256Sqr(out, out) 350 } 351 p256Mul(p16, out, p8) // ffff*p 352 353 p256Sqr(out, p16) 354 for i := 0; i < 15; i++ { 355 p256Sqr(out, out) 356 } 357 p256Mul(p32, out, p16) // ffffffff*p 358 359 p256Sqr(out, p32) 360 361 for i := 0; i < 31; i++ { 362 p256Sqr(out, out) 363 } 364 p256Mul(out, out, in) 365 366 for i := 0; i < 32*4; i++ { 367 p256Sqr(out, out) 368 } 369 p256Mul(out, out, p32) 370 371 for i := 0; i < 32; i++ { 372 p256Sqr(out, out) 373 } 374 p256Mul(out, out, p32) 375 376 for i := 0; i < 16; i++ { 377 p256Sqr(out, out) 378 } 379 p256Mul(out, out, p16) 380 381 for i := 0; i < 8; i++ { 382 p256Sqr(out, out) 383 } 384 p256Mul(out, out, p8) 385 386 p256Sqr(out, out) 387 p256Sqr(out, out) 388 p256Sqr(out, out) 389 p256Sqr(out, out) 390 p256Mul(out, out, p4) 391 392 p256Sqr(out, out) 393 p256Sqr(out, out) 394 p256Mul(out, out, p2) 395 396 p256Sqr(out, out) 397 p256Sqr(out, out) 398 p256Mul(out, out, in) 399 } 400 401 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { 402 copy(r[index*12:], p.xyz[:]) 403 } 404 405 func boothW5(in uint) (int, int) { 406 var s uint = ^((in >> 5) - 1) 407 var d uint = (1 << 6) - in - 1 408 d = (d & s) | (in & (^s)) 409 d = (d >> 1) + (d & 1) 410 return int(d), int(s & 1) 411 } 412 413 func boothW7(in uint) (int, int) { 414 var s uint = ^((in >> 7) - 1) 415 var d uint = (1 << 8) - in - 1 416 d = (d & s) | (in & (^s)) 417 d = (d >> 1) + (d & 1) 418 return int(d), int(s & 1) 419 } 420 421 func initTable() { 422 p256Precomputed = new([37][64 * 8]uint64) 423 424 basePoint := []uint64{ 425 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6, 426 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85, 427 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe, 428 } 429 t1 := make([]uint64, 12) 430 t2 := make([]uint64, 12) 431 copy(t2, basePoint) 432 433 zInv := make([]uint64, 4) 434 zInvSq := make([]uint64, 4) 435 for j := 0; j < 64; j++ { 436 copy(t1, t2) 437 for i := 0; i < 37; i++ { 438 // The window size is 7 so we need to double 7 times. 439 if i != 0 { 440 for k := 0; k < 7; k++ { 441 p256PointDoubleAsm(t1, t1) 442 } 443 } 444 // Convert the point to affine form. (Its values are 445 // still in Montgomery form however.) 446 p256Inverse(zInv, t1[8:12]) 447 p256Sqr(zInvSq, zInv) 448 p256Mul(zInv, zInv, zInvSq) 449 450 p256Mul(t1[:4], t1[:4], zInvSq) 451 p256Mul(t1[4:8], t1[4:8], zInv) 452 453 copy(t1[8:12], basePoint[8:12]) 454 // Update the table entry 455 copy(p256Precomputed[i][j*8:], t1[:8]) 456 } 457 if j == 0 { 458 p256PointDoubleAsm(t2, basePoint) 459 } else { 460 p256PointAddAsm(t2, t2, basePoint) 461 } 462 } 463 } 464 465 func (p *p256Point) p256BaseMult(scalar []uint64) { 466 precomputeOnce.Do(initTable) 467 468 wvalue := (scalar[0] << 1) & 0xff 469 sel, sign := boothW7(uint(wvalue)) 470 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel) 471 p256NegCond(p.xyz[4:8], sign) 472 473 // (This is one, in the Montgomery domain.) 474 p.xyz[8] = 0x0000000000000001 475 p.xyz[9] = 0xffffffff00000000 476 p.xyz[10] = 0xffffffffffffffff 477 p.xyz[11] = 0x00000000fffffffe 478 479 var t0 p256Point 480 // (This is one, in the Montgomery domain.) 481 t0.xyz[8] = 0x0000000000000001 482 t0.xyz[9] = 0xffffffff00000000 483 t0.xyz[10] = 0xffffffffffffffff 484 t0.xyz[11] = 0x00000000fffffffe 485 486 index := uint(6) 487 zero := sel 488 489 for i := 1; i < 37; i++ { 490 if index < 192 { 491 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff 492 } else { 493 wvalue = (scalar[index/64] >> (index % 64)) & 0xff 494 } 495 index += 7 496 sel, sign = boothW7(uint(wvalue)) 497 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel) 498 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) 499 zero |= sel 500 } 501 } 502 503 func (p *p256Point) p256ScalarMult(scalar []uint64) { 504 // precomp is a table of precomputed points that stores powers of p 505 // from p^1 to p^16. 506 var precomp [16 * 4 * 3]uint64 507 var t0, t1, t2, t3 p256Point 508 509 // Prepare the table 510 p.p256StorePoint(&precomp, 0) // 1 511 512 p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) 513 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) 514 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) 515 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) 516 t0.p256StorePoint(&precomp, 1) // 2 517 t1.p256StorePoint(&precomp, 3) // 4 518 t2.p256StorePoint(&precomp, 7) // 8 519 t3.p256StorePoint(&precomp, 15) // 16 520 521 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 522 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 523 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 524 t0.p256StorePoint(&precomp, 2) // 3 525 t1.p256StorePoint(&precomp, 4) // 5 526 t2.p256StorePoint(&precomp, 8) // 9 527 528 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 529 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) 530 t0.p256StorePoint(&precomp, 5) // 6 531 t1.p256StorePoint(&precomp, 9) // 10 532 533 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) 534 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 535 t2.p256StorePoint(&precomp, 6) // 7 536 t1.p256StorePoint(&precomp, 10) // 11 537 538 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 539 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) 540 t0.p256StorePoint(&precomp, 11) // 12 541 t2.p256StorePoint(&precomp, 13) // 14 542 543 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 544 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 545 t0.p256StorePoint(&precomp, 12) // 13 546 t2.p256StorePoint(&precomp, 14) // 15 547 548 // Start scanning the window from top bit 549 index := uint(254) 550 var sel, sign int 551 552 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 553 sel, _ = boothW5(uint(wvalue)) 554 555 p256Select(p.xyz[0:12], precomp[0:], sel) 556 zero := sel 557 558 for index > 4 { 559 index -= 5 560 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 561 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 562 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 563 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 564 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 565 566 if index < 192 { 567 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 568 } else { 569 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 570 } 571 572 sel, sign = boothW5(uint(wvalue)) 573 574 p256Select(t0.xyz[0:], precomp[0:], sel) 575 p256NegCond(t0.xyz[4:8], sign) 576 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 577 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 578 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 579 zero |= sel 580 } 581 582 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 583 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 584 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 585 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 586 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 587 588 wvalue = (scalar[0] << 1) & 0x3f 589 sel, sign = boothW5(uint(wvalue)) 590 591 p256Select(t0.xyz[0:], precomp[0:], sel) 592 p256NegCond(t0.xyz[4:8], sign) 593 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 594 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 595 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 596 }