github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/crypto/internal/nistec/p256_asm.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains the Go wrapper for the constant-time, 64-bit assembly 6 // implementation of P256. The optimizations performed here are described in 7 // detail in: 8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 9 // 256-bit primes" 10 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x 11 // https://eprint.iacr.org/2013/816.pdf 12 13 //go:build amd64 || arm64 || ppc64le || s390x 14 15 package nistec 16 17 import ( 18 _ "embed" 19 "encoding/binary" 20 "errors" 21 "math/bits" 22 "runtime" 23 "unsafe" 24 ) 25 26 // p256Element is a P-256 base field element in [0, P-1] in the Montgomery 27 // domain (with R 2²⁵⁶) as four limbs in little-endian order value. 28 type p256Element [4]uint64 29 30 // p256One is one in the Montgomery domain. 31 var p256One = p256Element{0x0000000000000001, 0xffffffff00000000, 32 0xffffffffffffffff, 0x00000000fffffffe} 33 34 var p256Zero = p256Element{} 35 36 // p256P is 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1 in the Montgomery domain. 37 var p256P = p256Element{0xffffffffffffffff, 0x00000000ffffffff, 38 0x0000000000000000, 0xffffffff00000001} 39 40 // P256Point is a P-256 point. The zero value should not be assumed to be valid 41 // (although it is in this implementation). 42 type P256Point struct { 43 // (X:Y:Z) are Jacobian coordinates where x = X/Z² and y = Y/Z³. The point 44 // at infinity can be represented by any set of coordinates with Z = 0. 45 x, y, z p256Element 46 } 47 48 // NewP256Point returns a new P256Point representing the point at infinity. 49 func NewP256Point() *P256Point { 50 return &P256Point{ 51 x: p256One, y: p256One, z: p256Zero, 52 } 53 } 54 55 // SetGenerator sets p to the canonical generator and returns p. 56 func (p *P256Point) SetGenerator() *P256Point { 57 p.x = p256Element{0x79e730d418a9143c, 0x75ba95fc5fedb601, 58 0x79fb732b77622510, 0x18905f76a53755c6} 59 p.y = p256Element{0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 60 0xd2e88688dd21f325, 0x8571ff1825885d85} 61 p.z = p256One 62 return p 63 } 64 65 // Set sets p = q and returns p. 66 func (p *P256Point) Set(q *P256Point) *P256Point { 67 p.x, p.y, p.z = q.x, q.y, q.z 68 return p 69 } 70 71 const p256ElementLength = 32 72 const p256UncompressedLength = 1 + 2*p256ElementLength 73 const p256CompressedLength = 1 + p256ElementLength 74 75 // SetBytes sets p to the compressed, uncompressed, or infinity value encoded in 76 // b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on 77 // the curve, it returns nil and an error, and the receiver is unchanged. 78 // Otherwise, it returns p. 79 func (p *P256Point) SetBytes(b []byte) (*P256Point, error) { 80 // p256Mul operates in the Montgomery domain with R = 2²⁵⁶ mod p. Thus rr 81 // here is R in the Montgomery domain, or R×R mod p. See comment in 82 // P256OrdInverse about how this is used. 83 rr := p256Element{0x0000000000000003, 0xfffffffbffffffff, 84 0xfffffffffffffffe, 0x00000004fffffffd} 85 86 switch { 87 // Point at infinity. 88 case len(b) == 1 && b[0] == 0: 89 return p.Set(NewP256Point()), nil 90 91 // Uncompressed form. 92 case len(b) == p256UncompressedLength && b[0] == 4: 93 var r P256Point 94 p256BigToLittle(&r.x, (*[32]byte)(b[1:33])) 95 p256BigToLittle(&r.y, (*[32]byte)(b[33:65])) 96 if p256LessThanP(&r.x) == 0 || p256LessThanP(&r.y) == 0 { 97 return nil, errors.New("invalid P256 element encoding") 98 } 99 p256Mul(&r.x, &r.x, &rr) 100 p256Mul(&r.y, &r.y, &rr) 101 if err := p256CheckOnCurve(&r.x, &r.y); err != nil { 102 return nil, err 103 } 104 r.z = p256One 105 return p.Set(&r), nil 106 107 // Compressed form. 108 case len(b) == p256CompressedLength && (b[0] == 2 || b[0] == 3): 109 var r P256Point 110 p256BigToLittle(&r.x, (*[32]byte)(b[1:33])) 111 if p256LessThanP(&r.x) == 0 { 112 return nil, errors.New("invalid P256 element encoding") 113 } 114 p256Mul(&r.x, &r.x, &rr) 115 116 // y² = x³ - 3x + b 117 p256Polynomial(&r.y, &r.x) 118 if !p256Sqrt(&r.y, &r.y) { 119 return nil, errors.New("invalid P256 compressed point encoding") 120 } 121 122 // Select the positive or negative root, as indicated by the least 123 // significant bit, based on the encoding type byte. 124 yy := new(p256Element) 125 p256FromMont(yy, &r.y) 126 cond := int(yy[0]&1) ^ int(b[0]&1) 127 p256NegCond(&r.y, cond) 128 129 r.z = p256One 130 return p.Set(&r), nil 131 132 default: 133 return nil, errors.New("invalid P256 point encoding") 134 } 135 } 136 137 // p256Polynomial sets y2 to x³ - 3x + b, and returns y2. 138 func p256Polynomial(y2, x *p256Element) *p256Element { 139 x3 := new(p256Element) 140 p256Sqr(x3, x, 1) 141 p256Mul(x3, x3, x) 142 143 threeX := new(p256Element) 144 p256Add(threeX, x, x) 145 p256Add(threeX, threeX, x) 146 p256NegCond(threeX, 1) 147 148 p256B := &p256Element{0xd89cdf6229c4bddf, 0xacf005cd78843090, 149 0xe5a220abf7212ed6, 0xdc30061d04874834} 150 151 p256Add(x3, x3, threeX) 152 p256Add(x3, x3, p256B) 153 154 *y2 = *x3 155 return y2 156 } 157 158 func p256CheckOnCurve(x, y *p256Element) error { 159 // y² = x³ - 3x + b 160 rhs := p256Polynomial(new(p256Element), x) 161 lhs := new(p256Element) 162 p256Sqr(lhs, y, 1) 163 if p256Equal(lhs, rhs) != 1 { 164 return errors.New("P256 point not on curve") 165 } 166 return nil 167 } 168 169 // p256LessThanP returns 1 if x < p, and 0 otherwise. Note that a p256Element is 170 // not allowed to be equal to or greater than p, so if this function returns 0 171 // then x is invalid. 172 func p256LessThanP(x *p256Element) int { 173 var b uint64 174 _, b = bits.Sub64(x[0], p256P[0], b) 175 _, b = bits.Sub64(x[1], p256P[1], b) 176 _, b = bits.Sub64(x[2], p256P[2], b) 177 _, b = bits.Sub64(x[3], p256P[3], b) 178 return int(b) 179 } 180 181 // p256Add sets res = x + y. 182 func p256Add(res, x, y *p256Element) { 183 var c, b uint64 184 t1 := make([]uint64, 4) 185 t1[0], c = bits.Add64(x[0], y[0], 0) 186 t1[1], c = bits.Add64(x[1], y[1], c) 187 t1[2], c = bits.Add64(x[2], y[2], c) 188 t1[3], c = bits.Add64(x[3], y[3], c) 189 t2 := make([]uint64, 4) 190 t2[0], b = bits.Sub64(t1[0], p256P[0], 0) 191 t2[1], b = bits.Sub64(t1[1], p256P[1], b) 192 t2[2], b = bits.Sub64(t1[2], p256P[2], b) 193 t2[3], b = bits.Sub64(t1[3], p256P[3], b) 194 // Three options: 195 // - a+b < p 196 // then c is 0, b is 1, and t1 is correct 197 // - p <= a+b < 2^256 198 // then c is 0, b is 0, and t2 is correct 199 // - 2^256 <= a+b 200 // then c is 1, b is 1, and t2 is correct 201 t2Mask := (c ^ b) - 1 202 res[0] = (t1[0] & ^t2Mask) | (t2[0] & t2Mask) 203 res[1] = (t1[1] & ^t2Mask) | (t2[1] & t2Mask) 204 res[2] = (t1[2] & ^t2Mask) | (t2[2] & t2Mask) 205 res[3] = (t1[3] & ^t2Mask) | (t2[3] & t2Mask) 206 } 207 208 // p256Sqrt sets e to a square root of x. If x is not a square, p256Sqrt returns 209 // false and e is unchanged. e and x can overlap. 210 func p256Sqrt(e, x *p256Element) (isSquare bool) { 211 t0, t1 := new(p256Element), new(p256Element) 212 213 // Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate. 214 // 215 // The sequence of 7 multiplications and 253 squarings is derived from the 216 // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. 217 // 218 // _10 = 2*1 219 // _11 = 1 + _10 220 // _1100 = _11 << 2 221 // _1111 = _11 + _1100 222 // _11110000 = _1111 << 4 223 // _11111111 = _1111 + _11110000 224 // x16 = _11111111 << 8 + _11111111 225 // x32 = x16 << 16 + x16 226 // return ((x32 << 32 + 1) << 96 + 1) << 94 227 // 228 p256Sqr(t0, x, 1) 229 p256Mul(t0, x, t0) 230 p256Sqr(t1, t0, 2) 231 p256Mul(t0, t0, t1) 232 p256Sqr(t1, t0, 4) 233 p256Mul(t0, t0, t1) 234 p256Sqr(t1, t0, 8) 235 p256Mul(t0, t0, t1) 236 p256Sqr(t1, t0, 16) 237 p256Mul(t0, t0, t1) 238 p256Sqr(t0, t0, 32) 239 p256Mul(t0, x, t0) 240 p256Sqr(t0, t0, 96) 241 p256Mul(t0, x, t0) 242 p256Sqr(t0, t0, 94) 243 244 p256Sqr(t1, t0, 1) 245 if p256Equal(t1, x) != 1 { 246 return false 247 } 248 *e = *t0 249 return true 250 } 251 252 // The following assembly functions are implemented in p256_asm_*.s 253 254 // Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p. 255 // 256 //go:noescape 257 func p256Mul(res, in1, in2 *p256Element) 258 259 // Montgomery square, repeated n times (n >= 1). 260 // 261 //go:noescape 262 func p256Sqr(res, in *p256Element, n int) 263 264 // Montgomery multiplication by R⁻¹, or 1 outside the domain. 265 // Sets res = in * R⁻¹, bringing res out of the Montgomery domain. 266 // 267 //go:noescape 268 func p256FromMont(res, in *p256Element) 269 270 // If cond is not 0, sets val = -val mod p. 271 // 272 //go:noescape 273 func p256NegCond(val *p256Element, cond int) 274 275 // If cond is 0, sets res = b, otherwise sets res = a. 276 // 277 //go:noescape 278 func p256MovCond(res, a, b *P256Point, cond int) 279 280 //go:noescape 281 func p256BigToLittle(res *p256Element, in *[32]byte) 282 283 //go:noescape 284 func p256LittleToBig(res *[32]byte, in *p256Element) 285 286 //go:noescape 287 func p256OrdBigToLittle(res *p256OrdElement, in *[32]byte) 288 289 //go:noescape 290 func p256OrdLittleToBig(res *[32]byte, in *p256OrdElement) 291 292 // p256Table is a table of the first 16 multiples of a point. Points are stored 293 // at an index offset of -1 so [8]P is at index 7, P is at 0, and [16]P is at 15. 294 // [0]P is the point at infinity and it's not stored. 295 type p256Table [16]P256Point 296 297 // p256Select sets res to the point at index idx in the table. 298 // idx must be in [0, 15]. It executes in constant time. 299 // 300 //go:noescape 301 func p256Select(res *P256Point, table *p256Table, idx int) 302 303 // p256AffinePoint is a point in affine coordinates (x, y). x and y are still 304 // Montgomery domain elements. The point can't be the point at infinity. 305 type p256AffinePoint struct { 306 x, y p256Element 307 } 308 309 // p256AffineTable is a table of the first 32 multiples of a point. Points are 310 // stored at an index offset of -1 like in p256Table, and [0]P is not stored. 311 type p256AffineTable [32]p256AffinePoint 312 313 // p256Precomputed is a series of precomputed multiples of G, the canonical 314 // generator. The first p256AffineTable contains multiples of G. The second one 315 // multiples of [2⁶]G, the third one of [2¹²]G, and so on, where each successive 316 // table is the previous table doubled six times. Six is the width of the 317 // sliding window used in p256ScalarMult, and having each table already 318 // pre-doubled lets us avoid the doublings between windows entirely. This table 319 // MUST NOT be modified, as it aliases into p256PrecomputedEmbed below. 320 var p256Precomputed *[43]p256AffineTable 321 322 //go:embed p256_asm_table.bin 323 var p256PrecomputedEmbed string 324 325 func init() { 326 p256PrecomputedPtr := (*unsafe.Pointer)(unsafe.Pointer(&p256PrecomputedEmbed)) 327 if runtime.GOARCH == "s390x" { 328 var newTable [43 * 32 * 2 * 4]uint64 329 for i, x := range (*[43 * 32 * 2 * 4][8]byte)(*p256PrecomputedPtr) { 330 newTable[i] = binary.LittleEndian.Uint64(x[:]) 331 } 332 newTablePtr := unsafe.Pointer(&newTable) 333 p256PrecomputedPtr = &newTablePtr 334 } 335 p256Precomputed = (*[43]p256AffineTable)(*p256PrecomputedPtr) 336 } 337 338 // p256SelectAffine sets res to the point at index idx in the table. 339 // idx must be in [0, 31]. It executes in constant time. 340 // 341 //go:noescape 342 func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int) 343 344 // Point addition with an affine point and constant time conditions. 345 // If zero is 0, sets res = in2. If sel is 0, sets res = in1. 346 // If sign is not 0, sets res = in1 + -in2. Otherwise, sets res = in1 + in2 347 // 348 //go:noescape 349 func p256PointAddAffineAsm(res, in1 *P256Point, in2 *p256AffinePoint, sign, sel, zero int) 350 351 // Point addition. Sets res = in1 + in2. Returns one if the two input points 352 // were equal and zero otherwise. If in1 or in2 are the point at infinity, res 353 // and the return value are undefined. 354 // 355 //go:noescape 356 func p256PointAddAsm(res, in1, in2 *P256Point) int 357 358 // Point doubling. Sets res = in + in. in can be the point at infinity. 359 // 360 //go:noescape 361 func p256PointDoubleAsm(res, in *P256Point) 362 363 // p256OrdElement is a P-256 scalar field element in [0, ord(G)-1] in the 364 // Montgomery domain (with R 2²⁵⁶) as four uint64 limbs in little-endian order. 365 type p256OrdElement [4]uint64 366 367 // Add sets q = p1 + p2, and returns q. The points may overlap. 368 func (q *P256Point) Add(r1, r2 *P256Point) *P256Point { 369 var sum, double P256Point 370 r1IsInfinity := r1.isInfinity() 371 r2IsInfinity := r2.isInfinity() 372 pointsEqual := p256PointAddAsm(&sum, r1, r2) 373 p256PointDoubleAsm(&double, r1) 374 p256MovCond(&sum, &double, &sum, pointsEqual) 375 p256MovCond(&sum, r1, &sum, r2IsInfinity) 376 p256MovCond(&sum, r2, &sum, r1IsInfinity) 377 return q.Set(&sum) 378 } 379 380 // Double sets q = p + p, and returns q. The points may overlap. 381 func (q *P256Point) Double(p *P256Point) *P256Point { 382 var double P256Point 383 p256PointDoubleAsm(&double, p) 384 return q.Set(&double) 385 } 386 387 // ScalarBaseMult sets r = scalar * generator, where scalar is a 32-byte big 388 // endian value, and returns r. If scalar is not 32 bytes long, ScalarBaseMult 389 // returns an error and the receiver is unchanged. 390 func (r *P256Point) ScalarBaseMult(scalar []byte) (*P256Point, error) { 391 if len(scalar) != 32 { 392 return nil, errors.New("invalid scalar length") 393 } 394 scalarReversed := new(p256OrdElement) 395 p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar)) 396 397 r.p256BaseMult(scalarReversed) 398 return r, nil 399 } 400 401 // ScalarMult sets r = scalar * q, where scalar is a 32-byte big endian value, 402 // and returns r. If scalar is not 32 bytes long, ScalarBaseMult returns an 403 // error and the receiver is unchanged. 404 func (r *P256Point) ScalarMult(q *P256Point, scalar []byte) (*P256Point, error) { 405 if len(scalar) != 32 { 406 return nil, errors.New("invalid scalar length") 407 } 408 scalarReversed := new(p256OrdElement) 409 p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar)) 410 411 r.Set(q).p256ScalarMult(scalarReversed) 412 return r, nil 413 } 414 415 // uint64IsZero returns 1 if x is zero and zero otherwise. 416 func uint64IsZero(x uint64) int { 417 x = ^x 418 x &= x >> 32 419 x &= x >> 16 420 x &= x >> 8 421 x &= x >> 4 422 x &= x >> 2 423 x &= x >> 1 424 return int(x & 1) 425 } 426 427 // p256Equal returns 1 if a and b are equal and 0 otherwise. 428 func p256Equal(a, b *p256Element) int { 429 var acc uint64 430 for i := range a { 431 acc |= a[i] ^ b[i] 432 } 433 return uint64IsZero(acc) 434 } 435 436 // isInfinity returns 1 if p is the point at infinity and 0 otherwise. 437 func (p *P256Point) isInfinity() int { 438 return p256Equal(&p.z, &p256Zero) 439 } 440 441 // Bytes returns the uncompressed or infinity encoding of p, as specified in 442 // SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at 443 // infinity is shorter than all other encodings. 444 func (p *P256Point) Bytes() []byte { 445 // This function is outlined to make the allocations inline in the caller 446 // rather than happen on the heap. 447 var out [p256UncompressedLength]byte 448 return p.bytes(&out) 449 } 450 451 func (p *P256Point) bytes(out *[p256UncompressedLength]byte) []byte { 452 // The proper representation of the point at infinity is a single zero byte. 453 if p.isInfinity() == 1 { 454 return append(out[:0], 0) 455 } 456 457 x, y := new(p256Element), new(p256Element) 458 p.affineFromMont(x, y) 459 460 out[0] = 4 // Uncompressed form. 461 p256LittleToBig((*[32]byte)(out[1:33]), x) 462 p256LittleToBig((*[32]byte)(out[33:65]), y) 463 464 return out[:] 465 } 466 467 // affineFromMont sets (x, y) to the affine coordinates of p, converted out of the 468 // Montgomery domain. 469 func (p *P256Point) affineFromMont(x, y *p256Element) { 470 p256Inverse(y, &p.z) 471 p256Sqr(x, y, 1) 472 p256Mul(y, y, x) 473 474 p256Mul(x, &p.x, x) 475 p256Mul(y, &p.y, y) 476 477 p256FromMont(x, x) 478 p256FromMont(y, y) 479 } 480 481 // BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1, 482 // Version 2.0, Section 2.3.5, or an error if p is the point at infinity. 483 func (p *P256Point) BytesX() ([]byte, error) { 484 // This function is outlined to make the allocations inline in the caller 485 // rather than happen on the heap. 486 var out [p256ElementLength]byte 487 return p.bytesX(&out) 488 } 489 490 func (p *P256Point) bytesX(out *[p256ElementLength]byte) ([]byte, error) { 491 if p.isInfinity() == 1 { 492 return nil, errors.New("P256 point is the point at infinity") 493 } 494 495 x := new(p256Element) 496 p256Inverse(x, &p.z) 497 p256Sqr(x, x, 1) 498 p256Mul(x, &p.x, x) 499 p256FromMont(x, x) 500 p256LittleToBig((*[32]byte)(out[:]), x) 501 502 return out[:], nil 503 } 504 505 // BytesCompressed returns the compressed or infinity encoding of p, as 506 // specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the 507 // point at infinity is shorter than all other encodings. 508 func (p *P256Point) BytesCompressed() []byte { 509 // This function is outlined to make the allocations inline in the caller 510 // rather than happen on the heap. 511 var out [p256CompressedLength]byte 512 return p.bytesCompressed(&out) 513 } 514 515 func (p *P256Point) bytesCompressed(out *[p256CompressedLength]byte) []byte { 516 if p.isInfinity() == 1 { 517 return append(out[:0], 0) 518 } 519 520 x, y := new(p256Element), new(p256Element) 521 p.affineFromMont(x, y) 522 523 out[0] = 2 | byte(y[0]&1) 524 p256LittleToBig((*[32]byte)(out[1:33]), x) 525 526 return out[:] 527 } 528 529 // Select sets q to p1 if cond == 1, and to p2 if cond == 0. 530 func (q *P256Point) Select(p1, p2 *P256Point, cond int) *P256Point { 531 p256MovCond(q, p1, p2, cond) 532 return q 533 } 534 535 // p256Inverse sets out to in⁻¹ mod p. If in is zero, out will be zero. 536 func p256Inverse(out, in *p256Element) { 537 // Inversion is calculated through exponentiation by p - 2, per Fermat's 538 // little theorem. 539 // 540 // The sequence of 12 multiplications and 255 squarings is derived from the 541 // following addition chain generated with github.com/mmcloughlin/addchain 542 // v0.4.0. 543 // 544 // _10 = 2*1 545 // _11 = 1 + _10 546 // _110 = 2*_11 547 // _111 = 1 + _110 548 // _111000 = _111 << 3 549 // _111111 = _111 + _111000 550 // x12 = _111111 << 6 + _111111 551 // x15 = x12 << 3 + _111 552 // x16 = 2*x15 + 1 553 // x32 = x16 << 16 + x16 554 // i53 = x32 << 15 555 // x47 = x15 + i53 556 // i263 = ((i53 << 17 + 1) << 143 + x47) << 47 557 // return (x47 + i263) << 2 + 1 558 // 559 var z = new(p256Element) 560 var t0 = new(p256Element) 561 var t1 = new(p256Element) 562 563 p256Sqr(z, in, 1) 564 p256Mul(z, in, z) 565 p256Sqr(z, z, 1) 566 p256Mul(z, in, z) 567 p256Sqr(t0, z, 3) 568 p256Mul(t0, z, t0) 569 p256Sqr(t1, t0, 6) 570 p256Mul(t0, t0, t1) 571 p256Sqr(t0, t0, 3) 572 p256Mul(z, z, t0) 573 p256Sqr(t0, z, 1) 574 p256Mul(t0, in, t0) 575 p256Sqr(t1, t0, 16) 576 p256Mul(t0, t0, t1) 577 p256Sqr(t0, t0, 15) 578 p256Mul(z, z, t0) 579 p256Sqr(t0, t0, 17) 580 p256Mul(t0, in, t0) 581 p256Sqr(t0, t0, 143) 582 p256Mul(t0, z, t0) 583 p256Sqr(t0, t0, 47) 584 p256Mul(z, z, t0) 585 p256Sqr(z, z, 2) 586 p256Mul(out, in, z) 587 } 588 589 func boothW5(in uint) (int, int) { 590 var s uint = ^((in >> 5) - 1) 591 var d uint = (1 << 6) - in - 1 592 d = (d & s) | (in & (^s)) 593 d = (d >> 1) + (d & 1) 594 return int(d), int(s & 1) 595 } 596 597 func boothW6(in uint) (int, int) { 598 var s uint = ^((in >> 6) - 1) 599 var d uint = (1 << 7) - in - 1 600 d = (d & s) | (in & (^s)) 601 d = (d >> 1) + (d & 1) 602 return int(d), int(s & 1) 603 } 604 605 func (p *P256Point) p256BaseMult(scalar *p256OrdElement) { 606 var t0 p256AffinePoint 607 608 wvalue := (scalar[0] << 1) & 0x7f 609 sel, sign := boothW6(uint(wvalue)) 610 p256SelectAffine(&t0, &p256Precomputed[0], sel) 611 p.x, p.y, p.z = t0.x, t0.y, p256One 612 p256NegCond(&p.y, sign) 613 614 index := uint(5) 615 zero := sel 616 617 for i := 1; i < 43; i++ { 618 if index < 192 { 619 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f 620 } else { 621 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f 622 } 623 index += 6 624 sel, sign = boothW6(uint(wvalue)) 625 p256SelectAffine(&t0, &p256Precomputed[i], sel) 626 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) 627 zero |= sel 628 } 629 630 // If the whole scalar was zero, set to the point at infinity. 631 p256MovCond(p, p, NewP256Point(), zero) 632 } 633 634 func (p *P256Point) p256ScalarMult(scalar *p256OrdElement) { 635 // precomp is a table of precomputed points that stores powers of p 636 // from p^1 to p^16. 637 var precomp p256Table 638 var t0, t1, t2, t3 P256Point 639 640 // Prepare the table 641 precomp[0] = *p // 1 642 643 p256PointDoubleAsm(&t0, p) 644 p256PointDoubleAsm(&t1, &t0) 645 p256PointDoubleAsm(&t2, &t1) 646 p256PointDoubleAsm(&t3, &t2) 647 precomp[1] = t0 // 2 648 precomp[3] = t1 // 4 649 precomp[7] = t2 // 8 650 precomp[15] = t3 // 16 651 652 p256PointAddAsm(&t0, &t0, p) 653 p256PointAddAsm(&t1, &t1, p) 654 p256PointAddAsm(&t2, &t2, p) 655 precomp[2] = t0 // 3 656 precomp[4] = t1 // 5 657 precomp[8] = t2 // 9 658 659 p256PointDoubleAsm(&t0, &t0) 660 p256PointDoubleAsm(&t1, &t1) 661 precomp[5] = t0 // 6 662 precomp[9] = t1 // 10 663 664 p256PointAddAsm(&t2, &t0, p) 665 p256PointAddAsm(&t1, &t1, p) 666 precomp[6] = t2 // 7 667 precomp[10] = t1 // 11 668 669 p256PointDoubleAsm(&t0, &t0) 670 p256PointDoubleAsm(&t2, &t2) 671 precomp[11] = t0 // 12 672 precomp[13] = t2 // 14 673 674 p256PointAddAsm(&t0, &t0, p) 675 p256PointAddAsm(&t2, &t2, p) 676 precomp[12] = t0 // 13 677 precomp[14] = t2 // 15 678 679 // Start scanning the window from top bit 680 index := uint(254) 681 var sel, sign int 682 683 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 684 sel, _ = boothW5(uint(wvalue)) 685 686 p256Select(p, &precomp, sel) 687 zero := sel 688 689 for index > 4 { 690 index -= 5 691 p256PointDoubleAsm(p, p) 692 p256PointDoubleAsm(p, p) 693 p256PointDoubleAsm(p, p) 694 p256PointDoubleAsm(p, p) 695 p256PointDoubleAsm(p, p) 696 697 if index < 192 { 698 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 699 } else { 700 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 701 } 702 703 sel, sign = boothW5(uint(wvalue)) 704 705 p256Select(&t0, &precomp, sel) 706 p256NegCond(&t0.y, sign) 707 p256PointAddAsm(&t1, p, &t0) 708 p256MovCond(&t1, &t1, p, sel) 709 p256MovCond(p, &t1, &t0, zero) 710 zero |= sel 711 } 712 713 p256PointDoubleAsm(p, p) 714 p256PointDoubleAsm(p, p) 715 p256PointDoubleAsm(p, p) 716 p256PointDoubleAsm(p, p) 717 p256PointDoubleAsm(p, p) 718 719 wvalue = (scalar[0] << 1) & 0x3f 720 sel, sign = boothW5(uint(wvalue)) 721 722 p256Select(&t0, &precomp, sel) 723 p256NegCond(&t0.y, sign) 724 p256PointAddAsm(&t1, p, &t0) 725 p256MovCond(&t1, &t1, p, sel) 726 p256MovCond(p, &t1, &t0, zero) 727 }