gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/sm2/p256_asm.go (about) 1 // It is by standing on the shoulders of giants. 2 3 // This file contains the Go wrapper for the constant-time, 64-bit assembly 4 // implementation of P256. The optimizations performed here are described in 5 // detail in: 6 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 7 // 256-bit primes" 8 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x 9 // https://eprint.iacr.org/2013/816.pdf 10 //go:build amd64 || arm64 11 // +build amd64 arm64 12 13 package sm2 14 15 /* 16 sm2/p256_asm.go sm2p256在amd64或arm64架构下的实现 17 */ 18 19 import ( 20 "crypto/elliptic" 21 "math/big" 22 ) 23 24 type ( 25 // sm2的P256椭圆曲线类型,内嵌 *elliptic.CurveParams 26 p256Curve struct { 27 *elliptic.CurveParams 28 } 29 // sm2p256曲线上的座标类型 30 p256Point struct { 31 xyz [12]uint64 32 } 33 ) 34 35 var ( 36 // 定义sm2椭圆曲线单例 : p256 37 p256 p256Curve 38 ) 39 40 // 初始化sm2的 p256 曲线单例 41 func initP256() { 42 p256.CurveParams = &elliptic.CurveParams{Name: "SM2-P-256"} 43 // SM2椭圆曲线公钥密码算法推荐曲线参数 44 // 2**256 - 2**224 - 2**96 + 2**64 - 1 45 p256.P, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) 46 p256.N, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16) 47 p256.B, _ = new(big.Int).SetString("28E9FA9E9D9F5E344D5A9E4BCF6509A7F39789F515AB8F92DDBCBD414D940E93", 16) 48 p256.Gx, _ = new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16) 49 p256.Gy, _ = new(big.Int).SetString("BC3736A2F4F6779C59BDCEE36B692153D0A9877CC62A474002DF32E52139F0A0", 16) 50 p256.BitSize = 256 51 } 52 53 // Params 获取sm2p256曲线参数 54 func (curve p256Curve) Params() *elliptic.CurveParams { 55 return curve.CurveParams 56 } 57 58 // p256曲线的蒙哥马利模乘运算。 59 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 60 // Functions implemented in p256_asm_*64.s 61 // Montgomery multiplication modulo P256 62 //go:noescape 63 //goland:noinspection GoUnusedParameter 64 func p256Mul(res, in1, in2 []uint64) 65 66 // p256曲线的蒙哥马利幂方运算。 67 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 68 // Montgomery square modulo P256, repeated n times (n >= 1) 69 //go:noescape 70 //goland:noinspection GoUnusedParameter 71 func p256Sqr(res, in []uint64, n int) 72 73 // p256曲线的蒙哥马利乘1运算。 74 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 75 // Montgomery multiplication by 1 76 //go:noescape 77 //goland:noinspection GoUnusedParameter 78 func p256FromMont(res, in []uint64) 79 80 // p256曲线的按条件求补(取反)指令。 81 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 82 // iff cond == 1 val <- -val 83 //go:noescape 84 //goland:noinspection GoUnusedParameter 85 func p256NegCond(val []uint64, cond int) 86 87 // p256曲线的按条件传送指令。 88 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 89 // if cond == 0 res <- b; else res <- a 90 //go:noescape 91 //goland:noinspection GoUnusedParameter 92 func p256MovCond(res, a, b []uint64, cond int) 93 94 // p256曲线的字节序交换运算(大端序转小端序)。 95 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 96 // Endianness swap 97 //go:noescape 98 //goland:noinspection GoUnusedParameter 99 func p256BigToLittle(res []uint64, in []byte) 100 101 // p256曲线的字节序交换运算(小端序转大端序)。 102 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 103 //go:noescape 104 //goland:noinspection GoUnusedParameter 105 func p256LittleToBig(res []byte, in []uint64) 106 107 // Constant time table access 108 //go:noescape 109 //goland:noinspection GoUnusedParameter 110 func p256Select(point, table []uint64, idx int) 111 112 //go:noescape 113 //goland:noinspection GoUnusedParameter 114 func p256SelectBase(point *[12]uint64, table string, idx int) 115 116 // p256曲线的蒙哥马利Ord(G)模乘运算。 117 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 118 // Montgomery multiplication modulo Ord(G) 119 //go:noescape 120 //goland:noinspection GoUnusedParameter 121 func p256OrdMul(res, in1, in2 []uint64) 122 123 // p256曲线的蒙哥马利 Ord(G)幂方运算。 124 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 125 // Montgomery square modulo Ord(G), repeated n times 126 //go:noescape 127 //goland:noinspection GoUnusedParameter 128 func p256OrdSqr(res, in []uint64, n int) 129 130 // Point add with in2 being affine point 131 // If sign == 1 -> in2 = -in2 132 // If sel == 0 -> res = in1 133 // if zero == 0 -> res = in2 134 //go:noescape 135 //goland:noinspection GoUnusedParameter 136 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) 137 138 // Point add. Returns one if the two input points were equal and zero 139 // otherwise. (Note that, due to the way that the equations work out, some 140 // representations of ∞ are considered equal to everything by this function.) 141 //go:noescape 142 //goland:noinspection GoUnusedParameter 143 func p256PointAddAsm(res, in1, in2 []uint64) int 144 145 // Point double 146 //go:noescape 147 //goland:noinspection GoUnusedParameter 148 func p256PointDoubleAsm(res, in []uint64) 149 150 var p256one = []uint64{0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000} 151 152 // Inverse 利用amd64或arm64架构的CPU实现快速的 mod Params().N 的倒数运算 153 // Inverse, implements invertible interface, used by Sign() 154 // n-2 = 155 // 1111111111111111111111111111111011111111111111111111111111111111 156 // 1111111111111111111111111111111111111111111111111111111111111111 157 // 0111001000000011110111110110101100100001110001100000010100101011 158 // 0101001110111011111101000000100100111001110101010100000100100001 159 // 160 func (curve p256Curve) Inverse(k *big.Int) *big.Int { 161 if k.Sign() < 0 { 162 // This should never happen. 163 k = new(big.Int).Neg(k) 164 } 165 166 if k.Cmp(p256.N) >= 0 { 167 // This should never happen. 168 k = new(big.Int).Mod(k, p256.N) 169 } 170 171 // table will store precomputed powers of x. 172 var table [4 * 10]uint64 173 var ( 174 _1 = table[4*0 : 4*1] 175 _11 = table[4*1 : 4*2] 176 _101 = table[4*2 : 4*3] 177 _111 = table[4*3 : 4*4] 178 _1111 = table[4*4 : 4*5] 179 _10101 = table[4*5 : 4*6] 180 _101111 = table[4*6 : 4*7] 181 x = table[4*7 : 4*8] 182 t = table[4*8 : 4*9] 183 m = table[4*9 : 4*10] 184 ) 185 186 fromBig(x[:], k) 187 // This code operates in the Montgomery domain where R = 2^256 mod n 188 // and n is the order of the scalar field. (See initP256 for the 189 // value.) Elements in the Montgomery domain take the form a×R and 190 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 191 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 192 // i.e. converts x into the Montgomery domain. 193 // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion 194 RR := []uint64{0x901192af7c114f20, 0x3464504ade6fa2fa, 0x620fc84c3affe0d4, 0x1eb5e412a22b3d3b} 195 196 p256OrdMul(_1, x, RR) // _1 , 2^0 197 p256OrdSqr(m, _1, 1) // _10, 2^1 198 p256OrdMul(_11, m, _1) // _11, 2^1 + 2^0 199 p256OrdMul(_101, m, _11) // _101, 2^2 + 2^0 200 p256OrdMul(_111, m, _101) // _111, 2^2 + 2^1 + 2^0 201 p256OrdSqr(x, _101, 1) // _1010, 2^3 + 2^1 202 p256OrdMul(_1111, _101, x) // _1111, 2^3 + 2^2 + 2^1 + 2^0 203 204 p256OrdSqr(t, x, 1) // _10100, 2^4 + 2^2 205 p256OrdMul(_10101, t, _1) // _10101, 2^4 + 2^2 + 2^0 206 p256OrdSqr(x, _10101, 1) // _101010, 2^5 + 2^3 + 2^1 207 p256OrdMul(_101111, _101, x) // _101111, 2^5 + 2^3 + 2^2 + 2^1 + 2^0 208 p256OrdMul(x, _10101, x) // _111111 = x6, 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 209 p256OrdSqr(t, x, 2) // _11111100, 2^7 + 2^6 + 2^5 + 2^4 + 2^3 + 2^2 210 211 p256OrdMul(m, t, m) // _11111110 = x8, , 2^7 + 2^6 + 2^5 + 2^4 + 2^3 + 2^2 + 2^1 212 p256OrdMul(t, t, _11) // _11111111 = x8, , 2^7 + 2^6 + 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 213 p256OrdSqr(x, t, 8) // _ff00, 2^15 + 2^14 + 2^13 + 2^12 + 2^11 + 2^10 + 2^9 + 2^8 214 p256OrdMul(m, x, m) // _fffe 215 p256OrdMul(x, x, t) // _ffff = x16, 2^15 + 2^14 + 2^13 + 2^12 + 2^11 + 2^10 + 2^9 + 2^8 + 2^7 + 2^6 + 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 216 217 p256OrdSqr(t, x, 16) // _ffff0000, 2^31 + 2^30 + 2^29 + 2^28 + 2^27 + 2^26 + 2^25 + 2^24 + 2^23 + 2^22 + 2^21 + 2^20 + 2^19 + 2^18 + 2^17 + 2^16 218 p256OrdMul(m, t, m) // _fffffffe 219 p256OrdMul(t, t, x) // _ffffffff = x32 220 221 p256OrdSqr(x, m, 32) // _fffffffe00000000 222 p256OrdMul(x, x, t) // _fffffffeffffffff 223 p256OrdSqr(x, x, 32) // _fffffffeffffffff00000000 224 p256OrdMul(x, x, t) // _fffffffeffffffffffffffff 225 p256OrdSqr(x, x, 32) // _fffffffeffffffffffffffff00000000 226 p256OrdMul(x, x, t) // _fffffffeffffffffffffffffffffffff 227 228 sqrs := []uint8{ 229 4, 3, 11, 5, 3, 5, 1, 230 3, 7, 5, 9, 7, 5, 5, 231 4, 5, 2, 2, 7, 3, 5, 232 5, 6, 2, 6, 3, 5, 233 } 234 muls := [][]uint64{ 235 _111, _1, _1111, _1111, _101, _10101, _1, 236 _1, _111, _11, _101, _10101, _10101, _111, 237 _111, _1111, _11, _1, _1, _1, _111, 238 _111, _10101, _1, _1, _1, _1} 239 240 for i, s := range sqrs { 241 p256OrdSqr(x, x, int(s)) 242 p256OrdMul(x, x, muls[i]) 243 } 244 245 // Multiplying by one in the Montgomery domain converts a Montgomery 246 // value out of the domain. 247 one := []uint64{1, 0, 0, 0} 248 p256OrdMul(x, x, one) 249 250 xOut := make([]byte, 32) 251 p256LittleToBig(xOut, x) 252 return new(big.Int).SetBytes(xOut) 253 } 254 255 // fromBig converts a *big.Int into a format used by this code. 256 func fromBig(out []uint64, big *big.Int) { 257 for i := range out { 258 out[i] = 0 259 } 260 261 for i, v := range big.Bits() { 262 out[i] = uint64(v) 263 } 264 } 265 266 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 267 // to out. If the scalar is equal or greater than the order of the group, it's 268 // reduced modulo that order. 269 func p256GetScalar(out []uint64, in []byte) { 270 n := new(big.Int).SetBytes(in) 271 272 if n.Cmp(p256.N) >= 0 { 273 n.Mod(n, p256.N) 274 } 275 fromBig(out, n) 276 } 277 278 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the 279 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 280 // R×R mod p. See comment in Inverse about how this is used. 281 var rr = []uint64{0x200000003, 0x2ffffffff, 0x100000001, 0x400000002} 282 283 func maybeReduceModP(in *big.Int) *big.Int { 284 if in.Cmp(p256.P) < 0 { 285 return in 286 } 287 return new(big.Int).Mod(in, p256.P) 288 } 289 290 // CombinedMult 利用amd64或arm64架构的cpu指令集功能加速曲线上的乘法运算 291 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 292 scalarReversed := make([]uint64, 4) 293 var r1, r2 p256Point 294 p256GetScalar(scalarReversed, baseScalar) 295 r1IsInfinity := scalarIsZero(scalarReversed) 296 r1.p256BaseMult(scalarReversed) 297 298 p256GetScalar(scalarReversed, scalar) 299 r2IsInfinity := scalarIsZero(scalarReversed) 300 fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) 301 fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) 302 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) 303 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) 304 305 // This sets r2's Z value to 1, in the Montgomery domain. 306 r2.xyz[8] = p256one[0] 307 r2.xyz[9] = p256one[1] 308 r2.xyz[10] = p256one[2] 309 r2.xyz[11] = p256one[3] 310 311 r2.p256ScalarMult(scalarReversed) 312 313 var sum, double p256Point 314 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:]) 315 p256PointDoubleAsm(double.xyz[:], r1.xyz[:]) 316 sum.CopyConditional(&double, pointsEqual) 317 sum.CopyConditional(&r1, r2IsInfinity) 318 sum.CopyConditional(&r2, r1IsInfinity) 319 320 return sum.p256PointToAffine() 321 } 322 323 // ScalarBaseMult sm2p256曲线基点乘法: k*G , k是随机数,G是曲线基点座标。 324 // 实现的是 elliptic.Curve 接口。 325 // ScalarBaseMult returns k*G, where G is the base point of the group 326 // and k is an integer in big-endian form. 327 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 328 scalarReversed := make([]uint64, 4) 329 p256GetScalar(scalarReversed, scalar) 330 331 var r p256Point 332 r.p256BaseMult(scalarReversed) 333 return r.p256PointToAffine() 334 } 335 336 // ScalarMult sm2p256曲线乘法: k*G , k是随机数,(Bx,By)是曲线上某点座标。 337 // 实现的是 elliptic.Curve 接口。 338 // ScalarMult returns k*(Bx,By) where k is a number in big-endian form. 339 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 340 scalarReversed := make([]uint64, 4) 341 p256GetScalar(scalarReversed, scalar) 342 343 var r p256Point 344 fromBig(r.xyz[0:4], maybeReduceModP(bigX)) 345 fromBig(r.xyz[4:8], maybeReduceModP(bigY)) 346 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) 347 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) 348 // This sets r2's Z value to 1, in the Montgomery domain. 349 r.xyz[8] = p256one[0] 350 r.xyz[9] = p256one[1] 351 r.xyz[10] = p256one[2] 352 r.xyz[11] = p256one[3] 353 354 r.p256ScalarMult(scalarReversed) 355 return r.p256PointToAffine() 356 } 357 358 // uint64IsZero returns 1 if x is zero and zero otherwise. 359 func uint64IsZero(x uint64) int { 360 x = ^x 361 x &= x >> 32 362 x &= x >> 16 363 x &= x >> 8 364 x &= x >> 4 365 x &= x >> 2 366 x &= x >> 1 367 return int(x & 1) 368 } 369 370 // scalarIsZero returns 1 if scalar represents the zero value, and zero 371 // otherwise. 372 func scalarIsZero(scalar []uint64) int { 373 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3]) 374 } 375 376 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 377 zInv := make([]uint64, 4) 378 zInvSq := make([]uint64, 4) 379 p256Inverse(zInv, p.xyz[8:12]) 380 p256Sqr(zInvSq, zInv, 1) 381 p256Mul(zInv, zInv, zInvSq) 382 383 p256Mul(zInvSq, p.xyz[0:4], zInvSq) 384 p256Mul(zInv, p.xyz[4:8], zInv) 385 386 p256FromMont(zInvSq, zInvSq) 387 p256FromMont(zInv, zInv) 388 389 xOut := make([]byte, 32) 390 yOut := make([]byte, 32) 391 p256LittleToBig(xOut, zInvSq) 392 p256LittleToBig(yOut, zInv) 393 394 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) 395 } 396 397 // CopyConditional copies overwrites p with src if v == 1, and leaves p 398 // unchanged if v == 0. 399 func (p *p256Point) CopyConditional(src *p256Point, v int) { 400 pMask := uint64(v) - 1 401 srcMask := ^pMask 402 403 for i, n := range p.xyz { 404 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask) 405 } 406 } 407 408 // p256Inverse sets out to in^-1 mod p. 409 func p256Inverse(out, in []uint64) { 410 // Inversion is calculated through exponentiation by p - 2, per Fermat's 411 // little theorem. 412 // 413 // The sequence of 14 multiplications and 255 squarings is derived from the 414 // following addition chain generated with github.com/mmcloughlin/addchain 415 // v0.4.0. 416 // 417 // _10 = 2*1 418 // _11 = 1 + _10 419 // _110 = 2*_11 420 // _111 = 1 + _110 421 // _111000 = _111 << 3 422 // _111111 = _111 + _111000 423 // _1111110 = 2*_111111 424 // _1111111 = 1 + _1111110 425 // x12 = _1111110 << 5 + _111111 426 // x24 = x12 << 12 + x12 427 // x31 = x24 << 7 + _1111111 428 // i39 = x31 << 2 429 // i68 = i39 << 29 430 // x62 = x31 + i68 431 // i71 = i68 << 2 432 // x64 = i39 + i71 + _11 433 // i265 = ((i71 << 32 + x64) << 64 + x64) << 94 434 // return (x62 + i265) << 2 + 1 435 var stack [3 * 4]uint64 436 t0 := stack[4*0 : 4*0+4] 437 t1 := stack[4*1 : 4*1+4] 438 t2 := stack[4*2 : 4*2+4] 439 440 p256Sqr(out, in, 1) 441 p256Mul(t0, in, out) 442 p256Sqr(out, t0, 1) 443 p256Mul(out, in, out) 444 p256Sqr(t1, out, 3) 445 p256Mul(t1, out, t1) 446 p256Sqr(t2, t1, 1) 447 p256Mul(out, in, t2) 448 p256Sqr(t2, t2, 5) 449 p256Mul(t1, t1, t2) 450 p256Sqr(t2, t1, 12) 451 p256Mul(t1, t1, t2) 452 p256Sqr(t1, t1, 7) 453 p256Mul(out, out, t1) 454 p256Sqr(t2, out, 2) 455 p256Sqr(t1, t2, 29) 456 p256Mul(out, out, t1) 457 p256Sqr(t1, t1, 2) 458 p256Mul(t2, t2, t1) 459 p256Mul(t0, t0, t2) 460 p256Sqr(t1, t1, 32) 461 p256Mul(t1, t0, t1) 462 p256Sqr(t1, t1, 64) 463 p256Mul(t0, t0, t1) 464 p256Sqr(t0, t0, 94) 465 p256Mul(out, out, t0) 466 p256Sqr(out, out, 2) 467 p256Mul(out, in, out) 468 } 469 470 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { 471 copy(r[index*12:], p.xyz[:]) 472 } 473 474 // This function takes those six bits as an integer (0 .. 63), writing the 475 // recoded digit to *sign (0 for positive, 1 for negative) and *digit (absolute 476 // value, in the range 0 .. 16). Note that this integer essentially provides 477 // the input bits "shifted to the left" by one position: for example, the input 478 // to compute the least significant recoded digit, given that there's no bit 479 // b_-1, has to be b_4 b_3 b_2 b_1 b_0 0. 480 // 481 // Reference: 482 // https://github.com/openssl/openssl/blob/master/crypto/ec/ecp_nistputil.c 483 // https://github.com/google/boringssl/blob/master/crypto/fipsmodule/ec/util.c 484 func boothW5(in uint) (int, int) { 485 var s = ^((in >> 5) - 1) // sets all bits to MSB(in), 'in' seen as 6-bit value 486 var d = (1 << 6) - in - 1 // d = 63 - in, or d = ^in & 0x3f 487 d = (d & s) | (in & (^s)) // d = in if in < 2^5; otherwise, d = 63 - in 488 d = (d >> 1) + (d & 1) // d = (d + 1) / 2 489 return int(d), int(s & 1) 490 } 491 492 func boothW6(in uint) (int, int) { 493 var s = ^((in >> 6) - 1) 494 var d = (1 << 7) - in - 1 495 d = (d & s) | (in & (^s)) 496 d = (d >> 1) + (d & 1) 497 return int(d), int(s & 1) 498 } 499 500 func (p *p256Point) p256BaseMult(scalar []uint64) { 501 wvalue := (scalar[0] << 1) & 0x7f 502 sel, sign := boothW6(uint(wvalue)) 503 p256SelectBase(&p.xyz, p256Precomputed, sel) 504 p256NegCond(p.xyz[4:8], sign) 505 506 // (This is one, in the Montgomery domain.) 507 p.xyz[8] = p256one[0] 508 p.xyz[9] = p256one[1] 509 p.xyz[10] = p256one[2] 510 p.xyz[11] = p256one[3] 511 512 var t0 p256Point 513 // (This is one, in the Montgomery domain.) 514 t0.xyz[8] = p256one[0] 515 t0.xyz[9] = p256one[1] 516 t0.xyz[10] = p256one[2] 517 t0.xyz[11] = p256one[3] 518 519 index := uint(5) 520 zero := sel 521 522 for i := 1; i < 43; i++ { 523 if index < 192 { 524 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f 525 } else { 526 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f 527 } 528 index += 6 529 sel, sign = boothW6(uint(wvalue)) 530 p256SelectBase(&t0.xyz, p256Precomputed[i*32*8*8:], sel) 531 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) 532 zero |= sel 533 } 534 } 535 536 func (p *p256Point) p256ScalarMult(scalar []uint64) { 537 // precomp is a table of precomputed points that stores powers of p 538 // from p^1 to p^16. 539 var precomp [16 * 4 * 3]uint64 540 var t0, t1, t2, t3 p256Point 541 542 // Prepare the table 543 p.p256StorePoint(&precomp, 0) // 1 544 545 p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) 546 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) 547 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) 548 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) 549 t0.p256StorePoint(&precomp, 1) // 2 550 t1.p256StorePoint(&precomp, 3) // 4 551 t2.p256StorePoint(&precomp, 7) // 8 552 t3.p256StorePoint(&precomp, 15) // 16 553 554 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 555 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 556 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 557 t0.p256StorePoint(&precomp, 2) // 3 558 t1.p256StorePoint(&precomp, 4) // 5 559 t2.p256StorePoint(&precomp, 8) // 9 560 561 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 562 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) 563 t0.p256StorePoint(&precomp, 5) // 6 564 t1.p256StorePoint(&precomp, 9) // 10 565 566 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) 567 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 568 t2.p256StorePoint(&precomp, 6) // 7 569 t1.p256StorePoint(&precomp, 10) // 11 570 571 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 572 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) 573 t0.p256StorePoint(&precomp, 11) // 12 574 t2.p256StorePoint(&precomp, 13) // 14 575 576 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 577 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 578 t0.p256StorePoint(&precomp, 12) // 13 579 t2.p256StorePoint(&precomp, 14) // 15 580 581 // Start scanning the window from top bit 582 index := uint(254) 583 var sel, sign int 584 585 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 586 sel, _ = boothW5(uint(wvalue)) 587 588 p256Select(p.xyz[0:12], precomp[0:], sel) 589 zero := sel 590 591 for index > 4 { 592 index -= 5 593 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 594 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 595 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 596 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 597 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 598 599 if index < 192 { 600 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 601 } else { 602 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 603 } 604 605 sel, sign = boothW5(uint(wvalue)) 606 607 p256Select(t0.xyz[0:], precomp[0:], sel) 608 p256NegCond(t0.xyz[4:8], sign) 609 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 610 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 611 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 612 zero |= sel 613 } 614 615 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 616 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 617 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 618 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 619 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 620 621 wvalue = (scalar[0] << 1) & 0x3f 622 sel, sign = boothW5(uint(wvalue)) 623 624 p256Select(t0.xyz[0:], precomp[0:], sel) 625 p256NegCond(t0.xyz[4:8], sign) 626 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 627 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 628 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 629 }