github.com/kdevb0x/go@v0.0.0-20180115030120-39687051e9e7/src/crypto/elliptic/p256_amd64.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the Go wrapper for the constant-time, 64-bit assembly 6 // implementation of P256. The optimizations performed here are described in 7 // detail in: 8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 9 // 256-bit primes" 10 // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x 11 // https://eprint.iacr.org/2013/816.pdf 12 13 // +build amd64 14 15 package elliptic 16 17 import ( 18 "math/big" 19 "sync" 20 ) 21 22 type ( 23 p256Curve struct { 24 *CurveParams 25 } 26 27 p256Point struct { 28 xyz [12]uint64 29 } 30 ) 31 32 var ( 33 p256 p256Curve 34 p256Precomputed *[37][64 * 8]uint64 35 precomputeOnce sync.Once 36 ) 37 38 func initP256() { 39 // See FIPS 186-3, section D.2.3 40 p256.CurveParams = &CurveParams{Name: "P-256"} 41 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 42 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 43 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 44 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 45 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 46 p256.BitSize = 256 47 } 48 49 func (curve p256Curve) Params() *CurveParams { 50 return curve.CurveParams 51 } 52 53 // Functions implemented in p256_asm_amd64.s 54 // Montgomery multiplication modulo P256 55 //go:noescape 56 func p256Mul(res, in1, in2 []uint64) 57 58 // Montgomery square modulo P256 59 //go:noescape 60 func p256Sqr(res, in []uint64) 61 62 // Montgomery multiplication by 1 63 //go:noescape 64 func p256FromMont(res, in []uint64) 65 66 // iff cond == 1 val <- -val 67 //go:noescape 68 func p256NegCond(val []uint64, cond int) 69 70 // if cond == 0 res <- b; else res <- a 71 //go:noescape 72 func p256MovCond(res, a, b []uint64, cond int) 73 74 // Endianness swap 75 //go:noescape 76 func p256BigToLittle(res []uint64, in []byte) 77 78 //go:noescape 79 func p256LittleToBig(res []byte, in []uint64) 80 81 // Constant time table access 82 //go:noescape 83 func p256Select(point, table []uint64, idx int) 84 85 //go:noescape 86 func p256SelectBase(point, table []uint64, idx int) 87 88 // Montgomery multiplication modulo Ord(G) 89 //go:noescape 90 func p256OrdMul(res, in1, in2 []uint64) 91 92 // Montgomery square modulo Ord(G), repeated n times 93 //go:noescape 94 func p256OrdSqr(res, in []uint64, n int) 95 96 // Point add with in2 being affine point 97 // If sign == 1 -> in2 = -in2 98 // If sel == 0 -> res = in1 99 // if zero == 0 -> res = in2 100 //go:noescape 101 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) 102 103 // Point add. Returns one if the two input points were equal and zero 104 // otherwise. (Note that, due to the way that the equations work out, some 105 // representations of ∞ are considered equal to everything by this function.) 106 //go:noescape 107 func p256PointAddAsm(res, in1, in2 []uint64) int 108 109 // Point double 110 //go:noescape 111 func p256PointDoubleAsm(res, in []uint64) 112 113 func (curve p256Curve) Inverse(k *big.Int) *big.Int { 114 if k.Sign() < 0 { 115 // This should never happen. 116 k = new(big.Int).Neg(k) 117 } 118 119 if k.Cmp(p256.N) >= 0 { 120 // This should never happen. 121 k = new(big.Int).Mod(k, p256.N) 122 } 123 124 // table will store precomputed powers of x. The four words at index 125 // 4×i store x^(i+1). 126 var table [4 * 15]uint64 127 128 x := make([]uint64, 4) 129 fromBig(x[:], k) 130 // This code operates in the Montgomery domain where R = 2^256 mod n 131 // and n is the order of the scalar field. (See initP256 for the 132 // value.) Elements in the Montgomery domain take the form a×R and 133 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 134 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 135 // i.e. converts x into the Montgomery domain. 136 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620} 137 p256OrdMul(table[:4], x, RR) 138 139 // Prepare the table, no need in constant time access, because the 140 // power is not a secret. (Entry 0 is never used.) 141 for i := 2; i < 16; i += 2 { 142 p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1) 143 p256OrdMul(table[4*i:], table[4*(i-1):], table[:4]) 144 } 145 146 x[0] = table[4*14+0] // f 147 x[1] = table[4*14+1] 148 x[2] = table[4*14+2] 149 x[3] = table[4*14+3] 150 151 p256OrdSqr(x, x, 4) 152 p256OrdMul(x, x, table[4*14:4*14+4]) // ff 153 t := make([]uint64, 4, 4) 154 t[0] = x[0] 155 t[1] = x[1] 156 t[2] = x[2] 157 t[3] = x[3] 158 159 p256OrdSqr(x, x, 8) 160 p256OrdMul(x, x, t) // ffff 161 t[0] = x[0] 162 t[1] = x[1] 163 t[2] = x[2] 164 t[3] = x[3] 165 166 p256OrdSqr(x, x, 16) 167 p256OrdMul(x, x, t) // ffffffff 168 t[0] = x[0] 169 t[1] = x[1] 170 t[2] = x[2] 171 t[3] = x[3] 172 173 p256OrdSqr(x, x, 64) // ffffffff0000000000000000 174 p256OrdMul(x, x, t) // ffffffff00000000ffffffff 175 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000 176 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff 177 178 // Remaining 32 windows 179 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf} 180 for i := 0; i < 32; i++ { 181 p256OrdSqr(x, x, 4) 182 p256OrdMul(x, x, table[4*(expLo[i]-1):]) 183 } 184 185 // Multiplying by one in the Montgomery domain converts a Montgomery 186 // value out of the domain. 187 one := []uint64{1, 0, 0, 0} 188 p256OrdMul(x, x, one) 189 190 xOut := make([]byte, 32) 191 p256LittleToBig(xOut, x) 192 return new(big.Int).SetBytes(xOut) 193 } 194 195 // fromBig converts a *big.Int into a format used by this code. 196 func fromBig(out []uint64, big *big.Int) { 197 for i := range out { 198 out[i] = 0 199 } 200 201 for i, v := range big.Bits() { 202 out[i] = uint64(v) 203 } 204 } 205 206 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 207 // to out. If the scalar is equal or greater than the order of the group, it's 208 // reduced modulo that order. 209 func p256GetScalar(out []uint64, in []byte) { 210 n := new(big.Int).SetBytes(in) 211 212 if n.Cmp(p256.N) >= 0 { 213 n.Mod(n, p256.N) 214 } 215 fromBig(out, n) 216 } 217 218 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the 219 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 220 // R×R mod p. See comment in Inverse about how this is used. 221 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd} 222 223 func maybeReduceModP(in *big.Int) *big.Int { 224 if in.Cmp(p256.P) < 0 { 225 return in 226 } 227 return new(big.Int).Mod(in, p256.P) 228 } 229 230 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 231 scalarReversed := make([]uint64, 4) 232 var r1, r2 p256Point 233 p256GetScalar(scalarReversed, baseScalar) 234 r1IsInfinity := scalarIsZero(scalarReversed) 235 r1.p256BaseMult(scalarReversed) 236 237 p256GetScalar(scalarReversed, scalar) 238 r2IsInfinity := scalarIsZero(scalarReversed) 239 fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) 240 fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) 241 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) 242 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) 243 244 // This sets r2's Z value to 1, in the Montgomery domain. 245 r2.xyz[8] = 0x0000000000000001 246 r2.xyz[9] = 0xffffffff00000000 247 r2.xyz[10] = 0xffffffffffffffff 248 r2.xyz[11] = 0x00000000fffffffe 249 250 r2.p256ScalarMult(scalarReversed) 251 252 var sum, double p256Point 253 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:]) 254 p256PointDoubleAsm(double.xyz[:], r1.xyz[:]) 255 sum.CopyConditional(&double, pointsEqual) 256 sum.CopyConditional(&r1, r2IsInfinity) 257 sum.CopyConditional(&r2, r1IsInfinity) 258 259 return sum.p256PointToAffine() 260 } 261 262 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 263 scalarReversed := make([]uint64, 4) 264 p256GetScalar(scalarReversed, scalar) 265 266 var r p256Point 267 r.p256BaseMult(scalarReversed) 268 return r.p256PointToAffine() 269 } 270 271 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 272 scalarReversed := make([]uint64, 4) 273 p256GetScalar(scalarReversed, scalar) 274 275 var r p256Point 276 fromBig(r.xyz[0:4], maybeReduceModP(bigX)) 277 fromBig(r.xyz[4:8], maybeReduceModP(bigY)) 278 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) 279 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) 280 // This sets r2's Z value to 1, in the Montgomery domain. 281 r.xyz[8] = 0x0000000000000001 282 r.xyz[9] = 0xffffffff00000000 283 r.xyz[10] = 0xffffffffffffffff 284 r.xyz[11] = 0x00000000fffffffe 285 286 r.p256ScalarMult(scalarReversed) 287 return r.p256PointToAffine() 288 } 289 290 // uint64IsZero returns 1 if x is zero and zero otherwise. 291 func uint64IsZero(x uint64) int { 292 x = ^x 293 x &= x >> 32 294 x &= x >> 16 295 x &= x >> 8 296 x &= x >> 4 297 x &= x >> 2 298 x &= x >> 1 299 return int(x & 1) 300 } 301 302 // scalarIsZero returns 1 if scalar represents the zero value, and zero 303 // otherwise. 304 func scalarIsZero(scalar []uint64) int { 305 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3]) 306 } 307 308 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 309 zInv := make([]uint64, 4) 310 zInvSq := make([]uint64, 4) 311 p256Inverse(zInv, p.xyz[8:12]) 312 p256Sqr(zInvSq, zInv) 313 p256Mul(zInv, zInv, zInvSq) 314 315 p256Mul(zInvSq, p.xyz[0:4], zInvSq) 316 p256Mul(zInv, p.xyz[4:8], zInv) 317 318 p256FromMont(zInvSq, zInvSq) 319 p256FromMont(zInv, zInv) 320 321 xOut := make([]byte, 32) 322 yOut := make([]byte, 32) 323 p256LittleToBig(xOut, zInvSq) 324 p256LittleToBig(yOut, zInv) 325 326 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) 327 } 328 329 // CopyConditional copies overwrites p with src if v == 1, and leaves p 330 // unchanged if v == 0. 331 func (p *p256Point) CopyConditional(src *p256Point, v int) { 332 pMask := uint64(v) - 1 333 srcMask := ^pMask 334 335 for i, n := range p.xyz { 336 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask) 337 } 338 } 339 340 // p256Inverse sets out to in^-1 mod p. 341 func p256Inverse(out, in []uint64) { 342 var stack [6 * 4]uint64 343 p2 := stack[4*0 : 4*0+4] 344 p4 := stack[4*1 : 4*1+4] 345 p8 := stack[4*2 : 4*2+4] 346 p16 := stack[4*3 : 4*3+4] 347 p32 := stack[4*4 : 4*4+4] 348 349 p256Sqr(out, in) 350 p256Mul(p2, out, in) // 3*p 351 352 p256Sqr(out, p2) 353 p256Sqr(out, out) 354 p256Mul(p4, out, p2) // f*p 355 356 p256Sqr(out, p4) 357 p256Sqr(out, out) 358 p256Sqr(out, out) 359 p256Sqr(out, out) 360 p256Mul(p8, out, p4) // ff*p 361 362 p256Sqr(out, p8) 363 364 for i := 0; i < 7; i++ { 365 p256Sqr(out, out) 366 } 367 p256Mul(p16, out, p8) // ffff*p 368 369 p256Sqr(out, p16) 370 for i := 0; i < 15; i++ { 371 p256Sqr(out, out) 372 } 373 p256Mul(p32, out, p16) // ffffffff*p 374 375 p256Sqr(out, p32) 376 377 for i := 0; i < 31; i++ { 378 p256Sqr(out, out) 379 } 380 p256Mul(out, out, in) 381 382 for i := 0; i < 32*4; i++ { 383 p256Sqr(out, out) 384 } 385 p256Mul(out, out, p32) 386 387 for i := 0; i < 32; i++ { 388 p256Sqr(out, out) 389 } 390 p256Mul(out, out, p32) 391 392 for i := 0; i < 16; i++ { 393 p256Sqr(out, out) 394 } 395 p256Mul(out, out, p16) 396 397 for i := 0; i < 8; i++ { 398 p256Sqr(out, out) 399 } 400 p256Mul(out, out, p8) 401 402 p256Sqr(out, out) 403 p256Sqr(out, out) 404 p256Sqr(out, out) 405 p256Sqr(out, out) 406 p256Mul(out, out, p4) 407 408 p256Sqr(out, out) 409 p256Sqr(out, out) 410 p256Mul(out, out, p2) 411 412 p256Sqr(out, out) 413 p256Sqr(out, out) 414 p256Mul(out, out, in) 415 } 416 417 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { 418 copy(r[index*12:], p.xyz[:]) 419 } 420 421 func boothW5(in uint) (int, int) { 422 var s uint = ^((in >> 5) - 1) 423 var d uint = (1 << 6) - in - 1 424 d = (d & s) | (in & (^s)) 425 d = (d >> 1) + (d & 1) 426 return int(d), int(s & 1) 427 } 428 429 func boothW7(in uint) (int, int) { 430 var s uint = ^((in >> 7) - 1) 431 var d uint = (1 << 8) - in - 1 432 d = (d & s) | (in & (^s)) 433 d = (d >> 1) + (d & 1) 434 return int(d), int(s & 1) 435 } 436 437 func initTable() { 438 p256Precomputed = new([37][64 * 8]uint64) 439 440 basePoint := []uint64{ 441 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6, 442 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85, 443 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe, 444 } 445 t1 := make([]uint64, 12) 446 t2 := make([]uint64, 12) 447 copy(t2, basePoint) 448 449 zInv := make([]uint64, 4) 450 zInvSq := make([]uint64, 4) 451 for j := 0; j < 64; j++ { 452 copy(t1, t2) 453 for i := 0; i < 37; i++ { 454 // The window size is 7 so we need to double 7 times. 455 if i != 0 { 456 for k := 0; k < 7; k++ { 457 p256PointDoubleAsm(t1, t1) 458 } 459 } 460 // Convert the point to affine form. (Its values are 461 // still in Montgomery form however.) 462 p256Inverse(zInv, t1[8:12]) 463 p256Sqr(zInvSq, zInv) 464 p256Mul(zInv, zInv, zInvSq) 465 466 p256Mul(t1[:4], t1[:4], zInvSq) 467 p256Mul(t1[4:8], t1[4:8], zInv) 468 469 copy(t1[8:12], basePoint[8:12]) 470 // Update the table entry 471 copy(p256Precomputed[i][j*8:], t1[:8]) 472 } 473 if j == 0 { 474 p256PointDoubleAsm(t2, basePoint) 475 } else { 476 p256PointAddAsm(t2, t2, basePoint) 477 } 478 } 479 } 480 481 func (p *p256Point) p256BaseMult(scalar []uint64) { 482 precomputeOnce.Do(initTable) 483 484 wvalue := (scalar[0] << 1) & 0xff 485 sel, sign := boothW7(uint(wvalue)) 486 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel) 487 p256NegCond(p.xyz[4:8], sign) 488 489 // (This is one, in the Montgomery domain.) 490 p.xyz[8] = 0x0000000000000001 491 p.xyz[9] = 0xffffffff00000000 492 p.xyz[10] = 0xffffffffffffffff 493 p.xyz[11] = 0x00000000fffffffe 494 495 var t0 p256Point 496 // (This is one, in the Montgomery domain.) 497 t0.xyz[8] = 0x0000000000000001 498 t0.xyz[9] = 0xffffffff00000000 499 t0.xyz[10] = 0xffffffffffffffff 500 t0.xyz[11] = 0x00000000fffffffe 501 502 index := uint(6) 503 zero := sel 504 505 for i := 1; i < 37; i++ { 506 if index < 192 { 507 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff 508 } else { 509 wvalue = (scalar[index/64] >> (index % 64)) & 0xff 510 } 511 index += 7 512 sel, sign = boothW7(uint(wvalue)) 513 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel) 514 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) 515 zero |= sel 516 } 517 } 518 519 func (p *p256Point) p256ScalarMult(scalar []uint64) { 520 // precomp is a table of precomputed points that stores powers of p 521 // from p^1 to p^16. 522 var precomp [16 * 4 * 3]uint64 523 var t0, t1, t2, t3 p256Point 524 525 // Prepare the table 526 p.p256StorePoint(&precomp, 0) // 1 527 528 p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) 529 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) 530 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) 531 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) 532 t0.p256StorePoint(&precomp, 1) // 2 533 t1.p256StorePoint(&precomp, 3) // 4 534 t2.p256StorePoint(&precomp, 7) // 8 535 t3.p256StorePoint(&precomp, 15) // 16 536 537 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 538 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 539 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 540 t0.p256StorePoint(&precomp, 2) // 3 541 t1.p256StorePoint(&precomp, 4) // 5 542 t2.p256StorePoint(&precomp, 8) // 9 543 544 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 545 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) 546 t0.p256StorePoint(&precomp, 5) // 6 547 t1.p256StorePoint(&precomp, 9) // 10 548 549 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) 550 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 551 t2.p256StorePoint(&precomp, 6) // 7 552 t1.p256StorePoint(&precomp, 10) // 11 553 554 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 555 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) 556 t0.p256StorePoint(&precomp, 11) // 12 557 t2.p256StorePoint(&precomp, 13) // 14 558 559 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 560 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 561 t0.p256StorePoint(&precomp, 12) // 13 562 t2.p256StorePoint(&precomp, 14) // 15 563 564 // Start scanning the window from top bit 565 index := uint(254) 566 var sel, sign int 567 568 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 569 sel, _ = boothW5(uint(wvalue)) 570 571 p256Select(p.xyz[0:12], precomp[0:], sel) 572 zero := sel 573 574 for index > 4 { 575 index -= 5 576 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 577 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 578 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 579 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 580 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 581 582 if index < 192 { 583 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 584 } else { 585 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 586 } 587 588 sel, sign = boothW5(uint(wvalue)) 589 590 p256Select(t0.xyz[0:], precomp[0:], sel) 591 p256NegCond(t0.xyz[4:8], sign) 592 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 593 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 594 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 595 zero |= sel 596 } 597 598 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 599 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 600 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 601 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 602 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 603 604 wvalue = (scalar[0] << 1) & 0x3f 605 sel, sign = boothW5(uint(wvalue)) 606 607 p256Select(t0.xyz[0:], precomp[0:], sel) 608 p256NegCond(t0.xyz[4:8], sign) 609 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 610 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 611 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 612 }