github.com/FISCO-BCOS/crypto@v0.0.0-20200202032121-bd8ab0b5d4f1/elliptic/p256_asm.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the Go wrapper for the constant-time, 64-bit assembly 6 // implementation of P256. The optimizations performed here are described in 7 // detail in: 8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 9 // 256-bit primes" 10 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x 11 // https://eprint.iacr.org/2013/816.pdf 12 13 // +build amd64 arm64 14 15 package elliptic 16 17 import ( 18 "math/big" 19 "sync" 20 ) 21 22 type ( 23 p256Curve struct { 24 *CurveParams 25 } 26 27 p256Point struct { 28 xyz [12]uint64 29 } 30 ) 31 32 var ( 33 p256 p256Curve 34 p256Precomputed *[43][32 * 8]uint64 35 precomputeOnce sync.Once 36 ) 37 38 func initP256() { 39 // See FIPS 186-3, section D.2.3 40 p256.CurveParams = &CurveParams{Name: "P-256"} 41 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 42 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 43 p256.A, _ = new(big.Int).SetString("ffffffff00000001000000000000000000000000fffffffffffffffffffffffc", 16) 44 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 45 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 46 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 47 p256.BitSize = 256 48 } 49 50 func (curve p256Curve) Params() *CurveParams { 51 return curve.CurveParams 52 } 53 54 // Functions implemented in p256_asm_*64.s 55 // Montgomery multiplication modulo P256 56 //go:noescape 57 func p256Mul(res, in1, in2 []uint64) 58 59 // Montgomery square modulo P256, repeated n times (n >= 1) 60 //go:noescape 61 func p256Sqr(res, in []uint64, n int) 62 63 // Montgomery multiplication by 1 64 //go:noescape 65 func p256FromMont(res, in []uint64) 66 67 // iff cond == 1 val <- -val 68 //go:noescape 69 func p256NegCond(val []uint64, cond int) 70 71 // if cond == 0 res <- b; else res <- a 72 //go:noescape 73 func p256MovCond(res, a, b []uint64, cond int) 74 75 // Endianness swap 76 //go:noescape 77 func p256BigToLittle(res []uint64, in []byte) 78 79 //go:noescape 80 func p256LittleToBig(res []byte, in []uint64) 81 82 // Constant time table access 83 //go:noescape 84 func p256Select(point, table []uint64, idx int) 85 86 //go:noescape 87 func p256SelectBase(point, table []uint64, idx int) 88 89 // Montgomery multiplication modulo Ord(G) 90 //go:noescape 91 func p256OrdMul(res, in1, in2 []uint64) 92 93 // Montgomery square modulo Ord(G), repeated n times 94 //go:noescape 95 func p256OrdSqr(res, in []uint64, n int) 96 97 // Point add with in2 being affine point 98 // If sign == 1 -> in2 = -in2 99 // If sel == 0 -> res = in1 100 // if zero == 0 -> res = in2 101 //go:noescape 102 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) 103 104 // Point add. Returns one if the two input points were equal and zero 105 // otherwise. (Note that, due to the way that the equations work out, some 106 // representations of ∞ are considered equal to everything by this function.) 107 //go:noescape 108 func p256PointAddAsm(res, in1, in2 []uint64) int 109 110 // Point double 111 //go:noescape 112 func p256PointDoubleAsm(res, in []uint64) 113 114 func (curve p256Curve) Inverse(k *big.Int) *big.Int { 115 if k.Sign() < 0 { 116 // This should never happen. 117 k = new(big.Int).Neg(k) 118 } 119 120 if k.Cmp(p256.N) >= 0 { 121 // This should never happen. 122 k = new(big.Int).Mod(k, p256.N) 123 } 124 125 // table will store precomputed powers of x. 126 var table [4 * 9]uint64 127 var ( 128 _1 = table[4*0 : 4*1] 129 _11 = table[4*1 : 4*2] 130 _101 = table[4*2 : 4*3] 131 _111 = table[4*3 : 4*4] 132 _1111 = table[4*4 : 4*5] 133 _10101 = table[4*5 : 4*6] 134 _101111 = table[4*6 : 4*7] 135 x = table[4*7 : 4*8] 136 t = table[4*8 : 4*9] 137 ) 138 139 fromBig(x[:], k) 140 // This code operates in the Montgomery domain where R = 2^256 mod n 141 // and n is the order of the scalar field. (See initP256 for the 142 // value.) Elements in the Montgomery domain take the form a×R and 143 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 144 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 145 // i.e. converts x into the Montgomery domain. 146 // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion 147 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620} 148 p256OrdMul(_1, x, RR) // _1 149 p256OrdSqr(x, _1, 1) // _10 150 p256OrdMul(_11, x, _1) // _11 151 p256OrdMul(_101, x, _11) // _101 152 p256OrdMul(_111, x, _101) // _111 153 p256OrdSqr(x, _101, 1) // _1010 154 p256OrdMul(_1111, _101, x) // _1111 155 156 p256OrdSqr(t, x, 1) // _10100 157 p256OrdMul(_10101, t, _1) // _10101 158 p256OrdSqr(x, _10101, 1) // _101010 159 p256OrdMul(_101111, _101, x) // _101111 160 p256OrdMul(x, _10101, x) // _111111 = x6 161 p256OrdSqr(t, x, 2) // _11111100 162 p256OrdMul(t, t, _11) // _11111111 = x8 163 p256OrdSqr(x, t, 8) // _ff00 164 p256OrdMul(x, x, t) // _ffff = x16 165 p256OrdSqr(t, x, 16) // _ffff0000 166 p256OrdMul(t, t, x) // _ffffffff = x32 167 168 p256OrdSqr(x, t, 64) 169 p256OrdMul(x, x, t) 170 p256OrdSqr(x, x, 32) 171 p256OrdMul(x, x, t) 172 173 sqrs := []uint8{ 174 6, 5, 4, 5, 5, 175 4, 3, 3, 5, 9, 176 6, 2, 5, 6, 5, 177 4, 5, 5, 3, 10, 178 2, 5, 5, 3, 7, 6} 179 muls := [][]uint64{ 180 _101111, _111, _11, _1111, _10101, 181 _101, _101, _101, _111, _101111, 182 _1111, _1, _1, _1111, _111, 183 _111, _111, _101, _11, _101111, 184 _11, _11, _11, _1, _10101, _1111} 185 186 for i, s := range sqrs { 187 p256OrdSqr(x, x, int(s)) 188 p256OrdMul(x, x, muls[i]) 189 } 190 191 // Multiplying by one in the Montgomery domain converts a Montgomery 192 // value out of the domain. 193 one := []uint64{1, 0, 0, 0} 194 p256OrdMul(x, x, one) 195 196 xOut := make([]byte, 32) 197 p256LittleToBig(xOut, x) 198 return new(big.Int).SetBytes(xOut) 199 } 200 201 // fromBig converts a *big.Int into a format used by this code. 202 func fromBig(out []uint64, big *big.Int) { 203 for i := range out { 204 out[i] = 0 205 } 206 207 for i, v := range big.Bits() { 208 out[i] = uint64(v) 209 } 210 } 211 212 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 213 // to out. If the scalar is equal or greater than the order of the group, it's 214 // reduced modulo that order. 215 func p256GetScalar(out []uint64, in []byte) { 216 n := new(big.Int).SetBytes(in) 217 218 if n.Cmp(p256.N) >= 0 { 219 n.Mod(n, p256.N) 220 } 221 fromBig(out, n) 222 } 223 224 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the 225 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 226 // R×R mod p. See comment in Inverse about how this is used. 227 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd} 228 229 func maybeReduceModP(in *big.Int) *big.Int { 230 if in.Cmp(p256.P) < 0 { 231 return in 232 } 233 return new(big.Int).Mod(in, p256.P) 234 } 235 236 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 237 scalarReversed := make([]uint64, 4) 238 var r1, r2 p256Point 239 p256GetScalar(scalarReversed, baseScalar) 240 r1IsInfinity := scalarIsZero(scalarReversed) 241 r1.p256BaseMult(scalarReversed) 242 243 p256GetScalar(scalarReversed, scalar) 244 r2IsInfinity := scalarIsZero(scalarReversed) 245 fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) 246 fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) 247 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) 248 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) 249 250 // This sets r2's Z value to 1, in the Montgomery domain. 251 r2.xyz[8] = 0x0000000000000001 252 r2.xyz[9] = 0xffffffff00000000 253 r2.xyz[10] = 0xffffffffffffffff 254 r2.xyz[11] = 0x00000000fffffffe 255 256 r2.p256ScalarMult(scalarReversed) 257 258 var sum, double p256Point 259 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:]) 260 p256PointDoubleAsm(double.xyz[:], r1.xyz[:]) 261 sum.CopyConditional(&double, pointsEqual) 262 sum.CopyConditional(&r1, r2IsInfinity) 263 sum.CopyConditional(&r2, r1IsInfinity) 264 265 return sum.p256PointToAffine() 266 } 267 268 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 269 scalarReversed := make([]uint64, 4) 270 p256GetScalar(scalarReversed, scalar) 271 272 var r p256Point 273 r.p256BaseMult(scalarReversed) 274 return r.p256PointToAffine() 275 } 276 277 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 278 scalarReversed := make([]uint64, 4) 279 p256GetScalar(scalarReversed, scalar) 280 281 var r p256Point 282 fromBig(r.xyz[0:4], maybeReduceModP(bigX)) 283 fromBig(r.xyz[4:8], maybeReduceModP(bigY)) 284 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) 285 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) 286 // This sets r2's Z value to 1, in the Montgomery domain. 287 r.xyz[8] = 0x0000000000000001 288 r.xyz[9] = 0xffffffff00000000 289 r.xyz[10] = 0xffffffffffffffff 290 r.xyz[11] = 0x00000000fffffffe 291 292 r.p256ScalarMult(scalarReversed) 293 return r.p256PointToAffine() 294 } 295 296 // uint64IsZero returns 1 if x is zero and zero otherwise. 297 func uint64IsZero(x uint64) int { 298 x = ^x 299 x &= x >> 32 300 x &= x >> 16 301 x &= x >> 8 302 x &= x >> 4 303 x &= x >> 2 304 x &= x >> 1 305 return int(x & 1) 306 } 307 308 // scalarIsZero returns 1 if scalar represents the zero value, and zero 309 // otherwise. 310 func scalarIsZero(scalar []uint64) int { 311 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3]) 312 } 313 314 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 315 zInv := make([]uint64, 4) 316 zInvSq := make([]uint64, 4) 317 p256Inverse(zInv, p.xyz[8:12]) 318 p256Sqr(zInvSq, zInv, 1) 319 p256Mul(zInv, zInv, zInvSq) 320 321 p256Mul(zInvSq, p.xyz[0:4], zInvSq) 322 p256Mul(zInv, p.xyz[4:8], zInv) 323 324 p256FromMont(zInvSq, zInvSq) 325 p256FromMont(zInv, zInv) 326 327 xOut := make([]byte, 32) 328 yOut := make([]byte, 32) 329 p256LittleToBig(xOut, zInvSq) 330 p256LittleToBig(yOut, zInv) 331 332 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) 333 } 334 335 // CopyConditional copies overwrites p with src if v == 1, and leaves p 336 // unchanged if v == 0. 337 func (p *p256Point) CopyConditional(src *p256Point, v int) { 338 pMask := uint64(v) - 1 339 srcMask := ^pMask 340 341 for i, n := range p.xyz { 342 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask) 343 } 344 } 345 346 // p256Inverse sets out to in^-1 mod p. 347 func p256Inverse(out, in []uint64) { 348 var stack [6 * 4]uint64 349 p2 := stack[4*0 : 4*0+4] 350 p4 := stack[4*1 : 4*1+4] 351 p8 := stack[4*2 : 4*2+4] 352 p16 := stack[4*3 : 4*3+4] 353 p32 := stack[4*4 : 4*4+4] 354 355 p256Sqr(out, in, 1) 356 p256Mul(p2, out, in) // 3*p 357 358 p256Sqr(out, p2, 2) 359 p256Mul(p4, out, p2) // f*p 360 361 p256Sqr(out, p4, 4) 362 p256Mul(p8, out, p4) // ff*p 363 364 p256Sqr(out, p8, 8) 365 p256Mul(p16, out, p8) // ffff*p 366 367 p256Sqr(out, p16, 16) 368 p256Mul(p32, out, p16) // ffffffff*p 369 370 p256Sqr(out, p32, 32) 371 p256Mul(out, out, in) 372 373 p256Sqr(out, out, 128) 374 p256Mul(out, out, p32) 375 376 p256Sqr(out, out, 32) 377 p256Mul(out, out, p32) 378 379 p256Sqr(out, out, 16) 380 p256Mul(out, out, p16) 381 382 p256Sqr(out, out, 8) 383 p256Mul(out, out, p8) 384 385 p256Sqr(out, out, 4) 386 p256Mul(out, out, p4) 387 388 p256Sqr(out, out, 2) 389 p256Mul(out, out, p2) 390 391 p256Sqr(out, out, 2) 392 p256Mul(out, out, in) 393 } 394 395 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { 396 copy(r[index*12:], p.xyz[:]) 397 } 398 399 func boothW5(in uint) (int, int) { 400 var s uint = ^((in >> 5) - 1) 401 var d uint = (1 << 6) - in - 1 402 d = (d & s) | (in & (^s)) 403 d = (d >> 1) + (d & 1) 404 return int(d), int(s & 1) 405 } 406 407 func boothW6(in uint) (int, int) { 408 var s uint = ^((in >> 6) - 1) 409 var d uint = (1 << 7) - in - 1 410 d = (d & s) | (in & (^s)) 411 d = (d >> 1) + (d & 1) 412 return int(d), int(s & 1) 413 } 414 415 func initTable() { 416 p256Precomputed = new([43][32 * 8]uint64) 417 418 basePoint := []uint64{ 419 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6, 420 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85, 421 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe, 422 } 423 t1 := make([]uint64, 12) 424 t2 := make([]uint64, 12) 425 copy(t2, basePoint) 426 427 zInv := make([]uint64, 4) 428 zInvSq := make([]uint64, 4) 429 for j := 0; j < 32; j++ { 430 copy(t1, t2) 431 for i := 0; i < 43; i++ { 432 // The window size is 6 so we need to double 6 times. 433 if i != 0 { 434 for k := 0; k < 6; k++ { 435 p256PointDoubleAsm(t1, t1) 436 } 437 } 438 // Convert the point to affine form. (Its values are 439 // still in Montgomery form however.) 440 p256Inverse(zInv, t1[8:12]) 441 p256Sqr(zInvSq, zInv, 1) 442 p256Mul(zInv, zInv, zInvSq) 443 444 p256Mul(t1[:4], t1[:4], zInvSq) 445 p256Mul(t1[4:8], t1[4:8], zInv) 446 447 copy(t1[8:12], basePoint[8:12]) 448 // Update the table entry 449 copy(p256Precomputed[i][j*8:], t1[:8]) 450 } 451 if j == 0 { 452 p256PointDoubleAsm(t2, basePoint) 453 } else { 454 p256PointAddAsm(t2, t2, basePoint) 455 } 456 } 457 } 458 459 func (p *p256Point) p256BaseMult(scalar []uint64) { 460 precomputeOnce.Do(initTable) 461 462 wvalue := (scalar[0] << 1) & 0x7f 463 sel, sign := boothW6(uint(wvalue)) 464 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel) 465 p256NegCond(p.xyz[4:8], sign) 466 467 // (This is one, in the Montgomery domain.) 468 p.xyz[8] = 0x0000000000000001 469 p.xyz[9] = 0xffffffff00000000 470 p.xyz[10] = 0xffffffffffffffff 471 p.xyz[11] = 0x00000000fffffffe 472 473 var t0 p256Point 474 // (This is one, in the Montgomery domain.) 475 t0.xyz[8] = 0x0000000000000001 476 t0.xyz[9] = 0xffffffff00000000 477 t0.xyz[10] = 0xffffffffffffffff 478 t0.xyz[11] = 0x00000000fffffffe 479 480 index := uint(5) 481 zero := sel 482 483 for i := 1; i < 43; i++ { 484 if index < 192 { 485 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f 486 } else { 487 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f 488 } 489 index += 6 490 sel, sign = boothW6(uint(wvalue)) 491 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel) 492 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) 493 zero |= sel 494 } 495 } 496 497 func (p *p256Point) p256ScalarMult(scalar []uint64) { 498 // precomp is a table of precomputed points that stores powers of p 499 // from p^1 to p^16. 500 var precomp [16 * 4 * 3]uint64 501 var t0, t1, t2, t3 p256Point 502 503 // Prepare the table 504 p.p256StorePoint(&precomp, 0) // 1 505 506 p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) 507 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) 508 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) 509 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) 510 t0.p256StorePoint(&precomp, 1) // 2 511 t1.p256StorePoint(&precomp, 3) // 4 512 t2.p256StorePoint(&precomp, 7) // 8 513 t3.p256StorePoint(&precomp, 15) // 16 514 515 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 516 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 517 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 518 t0.p256StorePoint(&precomp, 2) // 3 519 t1.p256StorePoint(&precomp, 4) // 5 520 t2.p256StorePoint(&precomp, 8) // 9 521 522 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 523 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) 524 t0.p256StorePoint(&precomp, 5) // 6 525 t1.p256StorePoint(&precomp, 9) // 10 526 527 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) 528 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 529 t2.p256StorePoint(&precomp, 6) // 7 530 t1.p256StorePoint(&precomp, 10) // 11 531 532 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 533 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) 534 t0.p256StorePoint(&precomp, 11) // 12 535 t2.p256StorePoint(&precomp, 13) // 14 536 537 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 538 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 539 t0.p256StorePoint(&precomp, 12) // 13 540 t2.p256StorePoint(&precomp, 14) // 15 541 542 // Start scanning the window from top bit 543 index := uint(254) 544 var sel, sign int 545 546 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 547 sel, _ = boothW5(uint(wvalue)) 548 549 p256Select(p.xyz[0:12], precomp[0:], sel) 550 zero := sel 551 552 for index > 4 { 553 index -= 5 554 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 555 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 556 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 557 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 558 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 559 560 if index < 192 { 561 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 562 } else { 563 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 564 } 565 566 sel, sign = boothW5(uint(wvalue)) 567 568 p256Select(t0.xyz[0:], precomp[0:], sel) 569 p256NegCond(t0.xyz[4:8], sign) 570 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 571 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 572 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 573 zero |= sel 574 } 575 576 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 577 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 578 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 579 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 580 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 581 582 wvalue = (scalar[0] << 1) & 0x3f 583 sel, sign = boothW5(uint(wvalue)) 584 585 p256Select(t0.xyz[0:], precomp[0:], sel) 586 p256NegCond(t0.xyz[4:8], sign) 587 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 588 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 589 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 590 }