github.com/remobjects/goldbaselibrary@v0.0.0-20230924164425-d458680a936b/Source/Gold/crypto/elliptic/p256_asm.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the Go wrapper for the constant-time, 64-bit assembly 6 // implementation of P256. The optimizations performed here are described in 7 // detail in: 8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 9 // 256-bit primes" 10 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x 11 // https://eprint.iacr.org/2013/816.pdf 12 13 // +build amd64 arm64 14 15 package elliptic 16 17 import ( 18 "math/big" 19 "sync" 20 ) 21 22 type ( 23 p256Curve struct { 24 *CurveParams 25 } 26 27 p256Point struct { 28 xyz [12]uint64 29 } 30 ) 31 32 var ( 33 p256 p256Curve 34 p256Precomputed *[43][32 * 8]uint64 35 precomputeOnce sync.Once 36 ) 37 38 func initP256() { 39 // See FIPS 186-3, section D.2.3 40 p256.CurveParams = &CurveParams{Name: "P-256"} 41 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 42 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 43 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 44 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 45 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 46 p256.BitSize = 256 47 } 48 49 func (curve p256Curve) Params() *CurveParams { 50 return curve.CurveParams 51 } 52 53 // Functions implemented in p256_asm_*64.s 54 // Montgomery multiplication modulo P256 55 //go:noescape 56 func p256Mul(res, in1, in2 []uint64) 57 58 // Montgomery square modulo P256, repeated n times (n >= 1) 59 //go:noescape 60 func p256Sqr(res, in []uint64, n int) 61 62 // Montgomery multiplication by 1 63 //go:noescape 64 func p256FromMont(res, in []uint64) 65 66 // iff cond == 1 val <- -val 67 //go:noescape 68 func p256NegCond(val []uint64, cond int) 69 70 // if cond == 0 res <- b; else res <- a 71 //go:noescape 72 func p256MovCond(res, a, b []uint64, cond int) 73 74 // Endianness swap 75 //go:noescape 76 func p256BigToLittle(res []uint64, in []byte) 77 78 //go:noescape 79 func p256LittleToBig(res []byte, in []uint64) 80 81 // Constant time table access 82 //go:noescape 83 func p256Select(point, table []uint64, idx int) 84 85 //go:noescape 86 func p256SelectBase(point, table []uint64, idx int) 87 88 // Montgomery multiplication modulo Ord(G) 89 //go:noescape 90 func p256OrdMul(res, in1, in2 []uint64) 91 92 // Montgomery square modulo Ord(G), repeated n times 93 //go:noescape 94 func p256OrdSqr(res, in []uint64, n int) 95 96 // Point add with in2 being affine point 97 // If sign == 1 -> in2 = -in2 98 // If sel == 0 -> res = in1 99 // if zero == 0 -> res = in2 100 //go:noescape 101 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) 102 103 // Point add. Returns one if the two input points were equal and zero 104 // otherwise. (Note that, due to the way that the equations work out, some 105 // representations of ∞ are considered equal to everything by this function.) 106 //go:noescape 107 func p256PointAddAsm(res, in1, in2 []uint64) int 108 109 // Point double 110 //go:noescape 111 func p256PointDoubleAsm(res, in []uint64) 112 113 func (curve p256Curve) Inverse(k *big.Int) *big.Int { 114 if k.Sign() < 0 { 115 // This should never happen. 116 k = new(big.Int).Neg(k) 117 } 118 119 if k.Cmp(p256.N) >= 0 { 120 // This should never happen. 121 k = new(big.Int).Mod(k, p256.N) 122 } 123 124 // table will store precomputed powers of x. 125 var table [4 * 9]uint64 126 var ( 127 _1 = table[4*0 : 4*1] 128 _11 = table[4*1 : 4*2] 129 _101 = table[4*2 : 4*3] 130 _111 = table[4*3 : 4*4] 131 _1111 = table[4*4 : 4*5] 132 _10101 = table[4*5 : 4*6] 133 _101111 = table[4*6 : 4*7] 134 x = table[4*7 : 4*8] 135 t = table[4*8 : 4*9] 136 ) 137 138 fromBig(x[:], k) 139 // This code operates in the Montgomery domain where R = 2^256 mod n 140 // and n is the order of the scalar field. (See initP256 for the 141 // value.) Elements in the Montgomery domain take the form a×R and 142 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 143 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 144 // i.e. converts x into the Montgomery domain. 145 // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion 146 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620} 147 p256OrdMul(_1, x, RR) // _1 148 p256OrdSqr(x, _1, 1) // _10 149 p256OrdMul(_11, x, _1) // _11 150 p256OrdMul(_101, x, _11) // _101 151 p256OrdMul(_111, x, _101) // _111 152 p256OrdSqr(x, _101, 1) // _1010 153 p256OrdMul(_1111, _101, x) // _1111 154 155 p256OrdSqr(t, x, 1) // _10100 156 p256OrdMul(_10101, t, _1) // _10101 157 p256OrdSqr(x, _10101, 1) // _101010 158 p256OrdMul(_101111, _101, x) // _101111 159 p256OrdMul(x, _10101, x) // _111111 = x6 160 p256OrdSqr(t, x, 2) // _11111100 161 p256OrdMul(t, t, _11) // _11111111 = x8 162 p256OrdSqr(x, t, 8) // _ff00 163 p256OrdMul(x, x, t) // _ffff = x16 164 p256OrdSqr(t, x, 16) // _ffff0000 165 p256OrdMul(t, t, x) // _ffffffff = x32 166 167 p256OrdSqr(x, t, 64) 168 p256OrdMul(x, x, t) 169 p256OrdSqr(x, x, 32) 170 p256OrdMul(x, x, t) 171 172 sqrs := []uint8{ 173 6, 5, 4, 5, 5, 174 4, 3, 3, 5, 9, 175 6, 2, 5, 6, 5, 176 4, 5, 5, 3, 10, 177 2, 5, 5, 3, 7, 6} 178 muls := [][]uint64{ 179 _101111, _111, _11, _1111, _10101, 180 _101, _101, _101, _111, _101111, 181 _1111, _1, _1, _1111, _111, 182 _111, _111, _101, _11, _101111, 183 _11, _11, _11, _1, _10101, _1111} 184 185 for i, s := range sqrs { 186 p256OrdSqr(x, x, int(s)) 187 p256OrdMul(x, x, muls[i]) 188 } 189 190 // Multiplying by one in the Montgomery domain converts a Montgomery 191 // value out of the domain. 192 one := []uint64{1, 0, 0, 0} 193 p256OrdMul(x, x, one) 194 195 xOut := make([]byte, 32) 196 p256LittleToBig(xOut, x) 197 return new(big.Int).SetBytes(xOut) 198 } 199 200 // fromBig converts a *big.Int into a format used by this code. 201 func fromBig(out []uint64, big *big.Int) { 202 for i := range out { 203 out[i] = 0 204 } 205 206 for i, v := range big.Bits() { 207 out[i] = uint64(v) 208 } 209 } 210 211 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 212 // to out. If the scalar is equal or greater than the order of the group, it's 213 // reduced modulo that order. 214 func p256GetScalar(out []uint64, in []byte) { 215 n := new(big.Int).SetBytes(in) 216 217 if n.Cmp(p256.N) >= 0 { 218 n.Mod(n, p256.N) 219 } 220 fromBig(out, n) 221 } 222 223 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the 224 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 225 // R×R mod p. See comment in Inverse about how this is used. 226 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd} 227 228 func maybeReduceModP(in *big.Int) *big.Int { 229 if in.Cmp(p256.P) < 0 { 230 return in 231 } 232 return new(big.Int).Mod(in, p256.P) 233 } 234 235 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 236 scalarReversed := make([]uint64, 4) 237 var r1, r2 p256Point 238 p256GetScalar(scalarReversed, baseScalar) 239 r1IsInfinity := scalarIsZero(scalarReversed) 240 r1.p256BaseMult(scalarReversed) 241 242 p256GetScalar(scalarReversed, scalar) 243 r2IsInfinity := scalarIsZero(scalarReversed) 244 fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) 245 fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) 246 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) 247 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) 248 249 // This sets r2's Z value to 1, in the Montgomery domain. 250 r2.xyz[8] = 0x0000000000000001 251 r2.xyz[9] = 0xffffffff00000000 252 r2.xyz[10] = 0xffffffffffffffff 253 r2.xyz[11] = 0x00000000fffffffe 254 255 r2.p256ScalarMult(scalarReversed) 256 257 var sum, double p256Point 258 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:]) 259 p256PointDoubleAsm(double.xyz[:], r1.xyz[:]) 260 sum.CopyConditional(&double, pointsEqual) 261 sum.CopyConditional(&r1, r2IsInfinity) 262 sum.CopyConditional(&r2, r1IsInfinity) 263 264 return sum.p256PointToAffine() 265 } 266 267 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 268 scalarReversed := make([]uint64, 4) 269 p256GetScalar(scalarReversed, scalar) 270 271 var r p256Point 272 r.p256BaseMult(scalarReversed) 273 return r.p256PointToAffine() 274 } 275 276 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 277 scalarReversed := make([]uint64, 4) 278 p256GetScalar(scalarReversed, scalar) 279 280 var r p256Point 281 fromBig(r.xyz[0:4], maybeReduceModP(bigX)) 282 fromBig(r.xyz[4:8], maybeReduceModP(bigY)) 283 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) 284 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) 285 // This sets r2's Z value to 1, in the Montgomery domain. 286 r.xyz[8] = 0x0000000000000001 287 r.xyz[9] = 0xffffffff00000000 288 r.xyz[10] = 0xffffffffffffffff 289 r.xyz[11] = 0x00000000fffffffe 290 291 r.p256ScalarMult(scalarReversed) 292 return r.p256PointToAffine() 293 } 294 295 // uint64IsZero returns 1 if x is zero and zero otherwise. 296 func uint64IsZero(x uint64) int { 297 x = ^x 298 x &= x >> 32 299 x &= x >> 16 300 x &= x >> 8 301 x &= x >> 4 302 x &= x >> 2 303 x &= x >> 1 304 return int(x & 1) 305 } 306 307 // scalarIsZero returns 1 if scalar represents the zero value, and zero 308 // otherwise. 309 func scalarIsZero(scalar []uint64) int { 310 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3]) 311 } 312 313 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 314 zInv := make([]uint64, 4) 315 zInvSq := make([]uint64, 4) 316 p256Inverse(zInv, p.xyz[8:12]) 317 p256Sqr(zInvSq, zInv, 1) 318 p256Mul(zInv, zInv, zInvSq) 319 320 p256Mul(zInvSq, p.xyz[0:4], zInvSq) 321 p256Mul(zInv, p.xyz[4:8], zInv) 322 323 p256FromMont(zInvSq, zInvSq) 324 p256FromMont(zInv, zInv) 325 326 xOut := make([]byte, 32) 327 yOut := make([]byte, 32) 328 p256LittleToBig(xOut, zInvSq) 329 p256LittleToBig(yOut, zInv) 330 331 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) 332 } 333 334 // CopyConditional copies overwrites p with src if v == 1, and leaves p 335 // unchanged if v == 0. 336 func (p *p256Point) CopyConditional(src *p256Point, v int) { 337 pMask := uint64(v) - 1 338 srcMask := ^pMask 339 340 for i, n := range p.xyz { 341 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask) 342 } 343 } 344 345 // p256Inverse sets out to in^-1 mod p. 346 func p256Inverse(out, in []uint64) { 347 var stack [6 * 4]uint64 348 p2 := stack[4*0 : 4*0+4] 349 p4 := stack[4*1 : 4*1+4] 350 p8 := stack[4*2 : 4*2+4] 351 p16 := stack[4*3 : 4*3+4] 352 p32 := stack[4*4 : 4*4+4] 353 354 p256Sqr(out, in, 1) 355 p256Mul(p2, out, in) // 3*p 356 357 p256Sqr(out, p2, 2) 358 p256Mul(p4, out, p2) // f*p 359 360 p256Sqr(out, p4, 4) 361 p256Mul(p8, out, p4) // ff*p 362 363 p256Sqr(out, p8, 8) 364 p256Mul(p16, out, p8) // ffff*p 365 366 p256Sqr(out, p16, 16) 367 p256Mul(p32, out, p16) // ffffffff*p 368 369 p256Sqr(out, p32, 32) 370 p256Mul(out, out, in) 371 372 p256Sqr(out, out, 128) 373 p256Mul(out, out, p32) 374 375 p256Sqr(out, out, 32) 376 p256Mul(out, out, p32) 377 378 p256Sqr(out, out, 16) 379 p256Mul(out, out, p16) 380 381 p256Sqr(out, out, 8) 382 p256Mul(out, out, p8) 383 384 p256Sqr(out, out, 4) 385 p256Mul(out, out, p4) 386 387 p256Sqr(out, out, 2) 388 p256Mul(out, out, p2) 389 390 p256Sqr(out, out, 2) 391 p256Mul(out, out, in) 392 } 393 394 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { 395 copy(r[index*12:], p.xyz[:]) 396 } 397 398 func boothW5(in uint) (int, int) { 399 var s uint = ^((in >> 5) - 1) 400 var d uint = (1 << 6) - in - 1 401 d = (d & s) | (in & (^s)) 402 d = (d >> 1) + (d & 1) 403 return int(d), int(s & 1) 404 } 405 406 func boothW6(in uint) (int, int) { 407 var s uint = ^((in >> 6) - 1) 408 var d uint = (1 << 7) - in - 1 409 d = (d & s) | (in & (^s)) 410 d = (d >> 1) + (d & 1) 411 return int(d), int(s & 1) 412 } 413 414 func initTable() { 415 p256Precomputed = new([43][32 * 8]uint64) 416 417 basePoint := []uint64{ 418 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6, 419 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85, 420 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe, 421 } 422 t1 := make([]uint64, 12) 423 t2 := make([]uint64, 12) 424 copy(t2, basePoint) 425 426 zInv := make([]uint64, 4) 427 zInvSq := make([]uint64, 4) 428 for j := 0; j < 32; j++ { 429 copy(t1, t2) 430 for i := 0; i < 43; i++ { 431 // The window size is 6 so we need to double 6 times. 432 if i != 0 { 433 for k := 0; k < 6; k++ { 434 p256PointDoubleAsm(t1, t1) 435 } 436 } 437 // Convert the point to affine form. (Its values are 438 // still in Montgomery form however.) 439 p256Inverse(zInv, t1[8:12]) 440 p256Sqr(zInvSq, zInv, 1) 441 p256Mul(zInv, zInv, zInvSq) 442 443 p256Mul(t1[:4], t1[:4], zInvSq) 444 p256Mul(t1[4:8], t1[4:8], zInv) 445 446 copy(t1[8:12], basePoint[8:12]) 447 // Update the table entry 448 copy(p256Precomputed[i][j*8:], t1[:8]) 449 } 450 if j == 0 { 451 p256PointDoubleAsm(t2, basePoint) 452 } else { 453 p256PointAddAsm(t2, t2, basePoint) 454 } 455 } 456 } 457 458 func (p *p256Point) p256BaseMult(scalar []uint64) { 459 precomputeOnce.Do(initTable) 460 461 wvalue := (scalar[0] << 1) & 0x7f 462 sel, sign := boothW6(uint(wvalue)) 463 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel) 464 p256NegCond(p.xyz[4:8], sign) 465 466 // (This is one, in the Montgomery domain.) 467 p.xyz[8] = 0x0000000000000001 468 p.xyz[9] = 0xffffffff00000000 469 p.xyz[10] = 0xffffffffffffffff 470 p.xyz[11] = 0x00000000fffffffe 471 472 var t0 p256Point 473 // (This is one, in the Montgomery domain.) 474 t0.xyz[8] = 0x0000000000000001 475 t0.xyz[9] = 0xffffffff00000000 476 t0.xyz[10] = 0xffffffffffffffff 477 t0.xyz[11] = 0x00000000fffffffe 478 479 index := uint(5) 480 zero := sel 481 482 for i := 1; i < 43; i++ { 483 if index < 192 { 484 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f 485 } else { 486 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f 487 } 488 index += 6 489 sel, sign = boothW6(uint(wvalue)) 490 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel) 491 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) 492 zero |= sel 493 } 494 } 495 496 func (p *p256Point) p256ScalarMult(scalar []uint64) { 497 // precomp is a table of precomputed points that stores powers of p 498 // from p^1 to p^16. 499 var precomp [16 * 4 * 3]uint64 500 var t0, t1, t2, t3 p256Point 501 502 // Prepare the table 503 p.p256StorePoint(&precomp, 0) // 1 504 505 p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) 506 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) 507 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) 508 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) 509 t0.p256StorePoint(&precomp, 1) // 2 510 t1.p256StorePoint(&precomp, 3) // 4 511 t2.p256StorePoint(&precomp, 7) // 8 512 t3.p256StorePoint(&precomp, 15) // 16 513 514 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 515 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 516 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 517 t0.p256StorePoint(&precomp, 2) // 3 518 t1.p256StorePoint(&precomp, 4) // 5 519 t2.p256StorePoint(&precomp, 8) // 9 520 521 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 522 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) 523 t0.p256StorePoint(&precomp, 5) // 6 524 t1.p256StorePoint(&precomp, 9) // 10 525 526 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) 527 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 528 t2.p256StorePoint(&precomp, 6) // 7 529 t1.p256StorePoint(&precomp, 10) // 11 530 531 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 532 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) 533 t0.p256StorePoint(&precomp, 11) // 12 534 t2.p256StorePoint(&precomp, 13) // 14 535 536 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 537 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 538 t0.p256StorePoint(&precomp, 12) // 13 539 t2.p256StorePoint(&precomp, 14) // 15 540 541 // Start scanning the window from top bit 542 index := uint(254) 543 var sel, sign int 544 545 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 546 sel, _ = boothW5(uint(wvalue)) 547 548 p256Select(p.xyz[0:12], precomp[0:], sel) 549 zero := sel 550 551 for index > 4 { 552 index -= 5 553 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 554 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 555 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 556 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 557 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 558 559 if index < 192 { 560 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 561 } else { 562 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 563 } 564 565 sel, sign = boothW5(uint(wvalue)) 566 567 p256Select(t0.xyz[0:], precomp[0:], sel) 568 p256NegCond(t0.xyz[4:8], sign) 569 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 570 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 571 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 572 zero |= sel 573 } 574 575 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 576 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 577 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 578 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 579 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 580 581 wvalue = (scalar[0] << 1) & 0x3f 582 sel, sign = boothW5(uint(wvalue)) 583 584 p256Select(t0.xyz[0:], precomp[0:], sel) 585 p256NegCond(t0.xyz[4:8], sign) 586 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 587 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 588 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 589 }