github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/sm2/p256_asm.go (about) 1 // It is by standing on the shoulders of giants. 2 3 // This file contains the Go wrapper for the constant-time, 64-bit assembly 4 // implementation of P256. The optimizations performed here are described in 5 // detail in: 6 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 7 // 256-bit primes" 8 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x 9 // https://eprint.iacr.org/2013/816.pdf 10 //go:build amd64 || arm64 11 // +build amd64 arm64 12 13 package sm2 14 15 /* 16 sm2/p256_asm.go sm2p256在amd64或arm64架构下的实现 17 */ 18 19 import ( 20 "crypto/elliptic" 21 "math/big" 22 ) 23 24 type ( 25 // sm2的P256椭圆曲线类型,内嵌 *elliptic.CurveParams 26 p256Curve struct { 27 *elliptic.CurveParams 28 } 29 // sm2p256曲线上的座标类型 30 p256Point struct { 31 xyz [12]uint64 32 } 33 ) 34 35 var ( 36 // 定义sm2椭圆曲线单例 : p256 37 p256 p256Curve 38 ) 39 40 // 初始化sm2的 p256 曲线单例 41 func initP256() { 42 p256.CurveParams = &elliptic.CurveParams{Name: "SM2-P-256"} 43 // SM2椭圆曲线公钥密码算法推荐曲线参数 44 // 2**256 - 2**224 - 2**96 + 2**64 - 1 45 p256.P, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) 46 p256.N, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16) 47 p256.B, _ = new(big.Int).SetString("28E9FA9E9D9F5E344D5A9E4BCF6509A7F39789F515AB8F92DDBCBD414D940E93", 16) 48 p256.Gx, _ = new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16) 49 p256.Gy, _ = new(big.Int).SetString("BC3736A2F4F6779C59BDCEE36B692153D0A9877CC62A474002DF32E52139F0A0", 16) 50 p256.BitSize = 256 51 } 52 53 // Params 获取sm2p256曲线参数 54 func (curve p256Curve) Params() *elliptic.CurveParams { 55 return curve.CurveParams 56 } 57 58 // p256曲线的蒙哥马利模乘运算。 59 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 60 // Functions implemented in p256_asm_*64.s 61 // Montgomery multiplication modulo P256 62 // 63 //go:noescape 64 //goland:noinspection GoUnusedParameter 65 func p256Mul(res, in1, in2 []uint64) 66 67 // p256曲线的蒙哥马利幂方运算。 68 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 69 // Montgomery square modulo P256, repeated n times (n >= 1) 70 // 71 //go:noescape 72 //goland:noinspection GoUnusedParameter 73 func p256Sqr(res, in []uint64, n int) 74 75 // p256曲线的蒙哥马利乘1运算。 76 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 77 // Montgomery multiplication by 1 78 // 79 //go:noescape 80 //goland:noinspection GoUnusedParameter 81 func p256FromMont(res, in []uint64) 82 83 // p256曲线的按条件求补(取反)指令。 84 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 85 // iff cond == 1 val <- -val 86 // 87 //go:noescape 88 //goland:noinspection GoUnusedParameter 89 func p256NegCond(val []uint64, cond int) 90 91 // p256曲线的按条件传送指令。 92 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 93 // if cond == 0 res <- b; else res <- a 94 // 95 //go:noescape 96 //goland:noinspection GoUnusedParameter 97 func p256MovCond(res, a, b []uint64, cond int) 98 99 // p256曲线的字节序交换运算(大端序转小端序)。 100 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 101 // Endianness swap 102 // 103 //go:noescape 104 //goland:noinspection GoUnusedParameter 105 func p256BigToLittle(res []uint64, in []byte) 106 107 // p256曲线的字节序交换运算(小端序转大端序)。 108 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 109 // 110 //go:noescape 111 //goland:noinspection GoUnusedParameter 112 func p256LittleToBig(res []byte, in []uint64) 113 114 // Constant time table access 115 // 116 //go:noescape 117 //goland:noinspection GoUnusedParameter 118 func p256Select(point, table []uint64, idx int) 119 120 //go:noescape 121 //goland:noinspection GoUnusedParameter 122 func p256SelectBase(point *[12]uint64, table string, idx int) 123 124 // p256曲线的蒙哥马利Ord(G)模乘运算。 125 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 126 // Montgomery multiplication modulo Ord(G) 127 // 128 //go:noescape 129 func p256OrdMul(res, in1, in2 []uint64) 130 131 // p256曲线的蒙哥马利 Ord(G)幂方运算。 132 // 具体实现在对应平台的 p256_asm_*64.s , *匹配amd64或arm64。 133 // Montgomery square modulo Ord(G), repeated n times 134 // 135 //go:noescape 136 //goland:noinspection GoUnusedParameter 137 func p256OrdSqr(res, in []uint64, n int) 138 139 // Point add with in2 being affine point 140 // If sign == 1 -> in2 = -in2 141 // If sel == 0 -> res = in1 142 // if zero == 0 -> res = in2 143 // 144 //go:noescape 145 //goland:noinspection GoUnusedParameter 146 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int) 147 148 // Point add. Returns one if the two input points were equal and zero 149 // otherwise. (Note that, due to the way that the equations work out, some 150 // representations of ∞ are considered equal to everything by this function.) 151 // 152 //go:noescape 153 //goland:noinspection GoUnusedParameter 154 func p256PointAddAsm(res, in1, in2 []uint64) int 155 156 // Point double 157 // 158 //go:noescape 159 //goland:noinspection GoUnusedParameter 160 func p256PointDoubleAsm(res, in []uint64) 161 162 var p256one = []uint64{0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000} 163 164 // Inverse 利用amd64或arm64架构的CPU实现快速的 mod Params().N 的倒数运算 165 // Inverse, implements invertible interface, used by Sign() 166 // n-2 = 167 // 1111111111111111111111111111111011111111111111111111111111111111 168 // 1111111111111111111111111111111111111111111111111111111111111111 169 // 0111001000000011110111110110101100100001110001100000010100101011 170 // 0101001110111011111101000000100100111001110101010100000100100001 171 func (curve p256Curve) Inverse(k *big.Int) *big.Int { 172 if k.Sign() < 0 { 173 // This should never happen. 174 k = new(big.Int).Neg(k) 175 } 176 177 if k.Cmp(p256.N) >= 0 { 178 // This should never happen. 179 k = new(big.Int).Mod(k, p256.N) 180 } 181 182 // table will store precomputed powers of x. 183 var table [4 * 10]uint64 184 var ( 185 _1 = table[4*0 : 4*1] 186 _11 = table[4*1 : 4*2] 187 _101 = table[4*2 : 4*3] 188 _111 = table[4*3 : 4*4] 189 _1111 = table[4*4 : 4*5] 190 _10101 = table[4*5 : 4*6] 191 _101111 = table[4*6 : 4*7] 192 x = table[4*7 : 4*8] 193 t = table[4*8 : 4*9] 194 m = table[4*9 : 4*10] 195 ) 196 197 fromBig(x[:], k) 198 // This code operates in the Montgomery domain where R = 2^256 mod n 199 // and n is the order of the scalar field. (See initP256 for the 200 // value.) Elements in the Montgomery domain take the form a×R and 201 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 202 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 203 // i.e. converts x into the Montgomery domain. 204 // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion 205 RR := []uint64{0x901192af7c114f20, 0x3464504ade6fa2fa, 0x620fc84c3affe0d4, 0x1eb5e412a22b3d3b} 206 207 p256OrdMul(_1, x, RR) // _1 , 2^0 208 p256OrdSqr(m, _1, 1) // _10, 2^1 209 p256OrdMul(_11, m, _1) // _11, 2^1 + 2^0 210 p256OrdMul(_101, m, _11) // _101, 2^2 + 2^0 211 p256OrdMul(_111, m, _101) // _111, 2^2 + 2^1 + 2^0 212 p256OrdSqr(x, _101, 1) // _1010, 2^3 + 2^1 213 p256OrdMul(_1111, _101, x) // _1111, 2^3 + 2^2 + 2^1 + 2^0 214 215 p256OrdSqr(t, x, 1) // _10100, 2^4 + 2^2 216 p256OrdMul(_10101, t, _1) // _10101, 2^4 + 2^2 + 2^0 217 p256OrdSqr(x, _10101, 1) // _101010, 2^5 + 2^3 + 2^1 218 p256OrdMul(_101111, _101, x) // _101111, 2^5 + 2^3 + 2^2 + 2^1 + 2^0 219 p256OrdMul(x, _10101, x) // _111111 = x6, 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 220 p256OrdSqr(t, x, 2) // _11111100, 2^7 + 2^6 + 2^5 + 2^4 + 2^3 + 2^2 221 222 p256OrdMul(m, t, m) // _11111110 = x8, , 2^7 + 2^6 + 2^5 + 2^4 + 2^3 + 2^2 + 2^1 223 p256OrdMul(t, t, _11) // _11111111 = x8, , 2^7 + 2^6 + 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 224 p256OrdSqr(x, t, 8) // _ff00, 2^15 + 2^14 + 2^13 + 2^12 + 2^11 + 2^10 + 2^9 + 2^8 225 p256OrdMul(m, x, m) // _fffe 226 p256OrdMul(x, x, t) // _ffff = x16, 2^15 + 2^14 + 2^13 + 2^12 + 2^11 + 2^10 + 2^9 + 2^8 + 2^7 + 2^6 + 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 227 228 p256OrdSqr(t, x, 16) // _ffff0000, 2^31 + 2^30 + 2^29 + 2^28 + 2^27 + 2^26 + 2^25 + 2^24 + 2^23 + 2^22 + 2^21 + 2^20 + 2^19 + 2^18 + 2^17 + 2^16 229 p256OrdMul(m, t, m) // _fffffffe 230 p256OrdMul(t, t, x) // _ffffffff = x32 231 232 p256OrdSqr(x, m, 32) // _fffffffe00000000 233 p256OrdMul(x, x, t) // _fffffffeffffffff 234 p256OrdSqr(x, x, 32) // _fffffffeffffffff00000000 235 p256OrdMul(x, x, t) // _fffffffeffffffffffffffff 236 p256OrdSqr(x, x, 32) // _fffffffeffffffffffffffff00000000 237 p256OrdMul(x, x, t) // _fffffffeffffffffffffffffffffffff 238 239 sqrs := []uint8{ 240 4, 3, 11, 5, 3, 5, 1, 241 3, 7, 5, 9, 7, 5, 5, 242 4, 5, 2, 2, 7, 3, 5, 243 5, 6, 2, 6, 3, 5, 244 } 245 muls := [][]uint64{ 246 _111, _1, _1111, _1111, _101, _10101, _1, 247 _1, _111, _11, _101, _10101, _10101, _111, 248 _111, _1111, _11, _1, _1, _1, _111, 249 _111, _10101, _1, _1, _1, _1} 250 251 for i, s := range sqrs { 252 p256OrdSqr(x, x, int(s)) 253 p256OrdMul(x, x, muls[i]) 254 } 255 256 // Multiplying by one in the Montgomery domain converts a Montgomery 257 // value out of the domain. 258 one := []uint64{1, 0, 0, 0} 259 p256OrdMul(x, x, one) 260 261 xOut := make([]byte, 32) 262 p256LittleToBig(xOut, x) 263 return new(big.Int).SetBytes(xOut) 264 } 265 266 // fromBig converts a *big.Int into a format used by this code. 267 func fromBig(out []uint64, big *big.Int) { 268 for i := range out { 269 out[i] = 0 270 } 271 272 for i, v := range big.Bits() { 273 out[i] = uint64(v) 274 } 275 } 276 277 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 278 // to out. If the scalar is equal or greater than the order of the group, it's 279 // reduced modulo that order. 280 func p256GetScalar(out []uint64, in []byte) { 281 n := new(big.Int).SetBytes(in) 282 283 if n.Cmp(p256.N) >= 0 { 284 n.Mod(n, p256.N) 285 } 286 fromBig(out, n) 287 } 288 289 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the 290 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 291 // R×R mod p. See comment in Inverse about how this is used. 292 var rr = []uint64{0x200000003, 0x2ffffffff, 0x100000001, 0x400000002} 293 294 func maybeReduceModP(in *big.Int) *big.Int { 295 if in.Cmp(p256.P) < 0 { 296 return in 297 } 298 return new(big.Int).Mod(in, p256.P) 299 } 300 301 // CombinedMult 利用amd64或arm64架构的cpu指令集功能加速曲线上的乘法运算 302 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 303 scalarReversed := make([]uint64, 4) 304 var r1, r2 p256Point 305 p256GetScalar(scalarReversed, baseScalar) 306 r1IsInfinity := scalarIsZero(scalarReversed) 307 r1.p256BaseMult(scalarReversed) 308 309 p256GetScalar(scalarReversed, scalar) 310 r2IsInfinity := scalarIsZero(scalarReversed) 311 fromBig(r2.xyz[0:4], maybeReduceModP(bigX)) 312 fromBig(r2.xyz[4:8], maybeReduceModP(bigY)) 313 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:]) 314 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) 315 316 // This sets r2's Z value to 1, in the Montgomery domain. 317 r2.xyz[8] = p256one[0] 318 r2.xyz[9] = p256one[1] 319 r2.xyz[10] = p256one[2] 320 r2.xyz[11] = p256one[3] 321 322 r2.p256ScalarMult(scalarReversed) 323 324 var sum, double p256Point 325 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:]) 326 p256PointDoubleAsm(double.xyz[:], r1.xyz[:]) 327 sum.CopyConditional(&double, pointsEqual) 328 sum.CopyConditional(&r1, r2IsInfinity) 329 sum.CopyConditional(&r2, r1IsInfinity) 330 331 return sum.p256PointToAffine() 332 } 333 334 // ScalarBaseMult sm2p256曲线基点乘法: k*G , k是随机数,G是曲线基点座标。 335 // 实现的是 elliptic.Curve 接口。 336 // ScalarBaseMult returns k*G, where G is the base point of the group 337 // and k is an integer in big-endian form. 338 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 339 scalarReversed := make([]uint64, 4) 340 p256GetScalar(scalarReversed, scalar) 341 342 var r p256Point 343 r.p256BaseMult(scalarReversed) 344 return r.p256PointToAffine() 345 } 346 347 // ScalarMult sm2p256曲线乘法: k*G , k是随机数,(Bx,By)是曲线上某点座标。 348 // 实现的是 elliptic.Curve 接口。 349 // ScalarMult returns k*(Bx,By) where k is a number in big-endian form. 350 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 351 scalarReversed := make([]uint64, 4) 352 p256GetScalar(scalarReversed, scalar) 353 354 var r p256Point 355 fromBig(r.xyz[0:4], maybeReduceModP(bigX)) 356 fromBig(r.xyz[4:8], maybeReduceModP(bigY)) 357 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) 358 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) 359 // This sets r2's Z value to 1, in the Montgomery domain. 360 r.xyz[8] = p256one[0] 361 r.xyz[9] = p256one[1] 362 r.xyz[10] = p256one[2] 363 r.xyz[11] = p256one[3] 364 365 r.p256ScalarMult(scalarReversed) 366 return r.p256PointToAffine() 367 } 368 369 // uint64IsZero returns 1 if x is zero and zero otherwise. 370 func uint64IsZero(x uint64) int { 371 x = ^x 372 x &= x >> 32 373 x &= x >> 16 374 x &= x >> 8 375 x &= x >> 4 376 x &= x >> 2 377 x &= x >> 1 378 return int(x & 1) 379 } 380 381 // scalarIsZero returns 1 if scalar represents the zero value, and zero 382 // otherwise. 383 func scalarIsZero(scalar []uint64) int { 384 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3]) 385 } 386 387 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 388 zInv := make([]uint64, 4) 389 zInvSq := make([]uint64, 4) 390 p256Inverse(zInv, p.xyz[8:12]) 391 p256Sqr(zInvSq, zInv, 1) 392 p256Mul(zInv, zInv, zInvSq) 393 394 p256Mul(zInvSq, p.xyz[0:4], zInvSq) 395 p256Mul(zInv, p.xyz[4:8], zInv) 396 397 p256FromMont(zInvSq, zInvSq) 398 p256FromMont(zInv, zInv) 399 400 xOut := make([]byte, 32) 401 yOut := make([]byte, 32) 402 p256LittleToBig(xOut, zInvSq) 403 p256LittleToBig(yOut, zInv) 404 405 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut) 406 } 407 408 // CopyConditional copies overwrites p with src if v == 1, and leaves p 409 // unchanged if v == 0. 410 func (p *p256Point) CopyConditional(src *p256Point, v int) { 411 pMask := uint64(v) - 1 412 srcMask := ^pMask 413 414 for i, n := range p.xyz { 415 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask) 416 } 417 } 418 419 // p256Inverse sets out to in^-1 mod p. 420 func p256Inverse(out, in []uint64) { 421 // Inversion is calculated through exponentiation by p - 2, per Fermat's 422 // little theorem. 423 // 424 // The sequence of 14 multiplications and 255 squarings is derived from the 425 // following addition chain generated with github.com/mmcloughlin/addchain 426 // v0.4.0. 427 // 428 // _10 = 2*1 429 // _11 = 1 + _10 430 // _110 = 2*_11 431 // _111 = 1 + _110 432 // _111000 = _111 << 3 433 // _111111 = _111 + _111000 434 // _1111110 = 2*_111111 435 // _1111111 = 1 + _1111110 436 // x12 = _1111110 << 5 + _111111 437 // x24 = x12 << 12 + x12 438 // x31 = x24 << 7 + _1111111 439 // i39 = x31 << 2 440 // i68 = i39 << 29 441 // x62 = x31 + i68 442 // i71 = i68 << 2 443 // x64 = i39 + i71 + _11 444 // i265 = ((i71 << 32 + x64) << 64 + x64) << 94 445 // return (x62 + i265) << 2 + 1 446 var stack [3 * 4]uint64 447 t0 := stack[4*0 : 4*0+4] 448 t1 := stack[4*1 : 4*1+4] 449 t2 := stack[4*2 : 4*2+4] 450 451 p256Sqr(out, in, 1) 452 p256Mul(t0, in, out) 453 p256Sqr(out, t0, 1) 454 p256Mul(out, in, out) 455 p256Sqr(t1, out, 3) 456 p256Mul(t1, out, t1) 457 p256Sqr(t2, t1, 1) 458 p256Mul(out, in, t2) 459 p256Sqr(t2, t2, 5) 460 p256Mul(t1, t1, t2) 461 p256Sqr(t2, t1, 12) 462 p256Mul(t1, t1, t2) 463 p256Sqr(t1, t1, 7) 464 p256Mul(out, out, t1) 465 p256Sqr(t2, out, 2) 466 p256Sqr(t1, t2, 29) 467 p256Mul(out, out, t1) 468 p256Sqr(t1, t1, 2) 469 p256Mul(t2, t2, t1) 470 p256Mul(t0, t0, t2) 471 p256Sqr(t1, t1, 32) 472 p256Mul(t1, t0, t1) 473 p256Sqr(t1, t1, 64) 474 p256Mul(t0, t0, t1) 475 p256Sqr(t0, t0, 94) 476 p256Mul(out, out, t0) 477 p256Sqr(out, out, 2) 478 p256Mul(out, in, out) 479 } 480 481 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) { 482 copy(r[index*12:], p.xyz[:]) 483 } 484 485 // This function takes those six bits as an integer (0 .. 63), writing the 486 // recoded digit to *sign (0 for positive, 1 for negative) and *digit (absolute 487 // value, in the range 0 .. 16). Note that this integer essentially provides 488 // the input bits "shifted to the left" by one position: for example, the input 489 // to compute the least significant recoded digit, given that there's no bit 490 // b_-1, has to be b_4 b_3 b_2 b_1 b_0 0. 491 // 492 // Reference: 493 // https://github.com/openssl/openssl/blob/master/crypto/ec/ecp_nistputil.c 494 // https://github.com/google/boringssl/blob/master/crypto/fipsmodule/ec/util.c 495 func boothW5(in uint) (int, int) { 496 var s = ^((in >> 5) - 1) // sets all bits to MSB(in), 'in' seen as 6-bit value 497 var d = (1 << 6) - in - 1 // d = 63 - in, or d = ^in & 0x3f 498 d = (d & s) | (in & (^s)) // d = in if in < 2^5; otherwise, d = 63 - in 499 d = (d >> 1) + (d & 1) // d = (d + 1) / 2 500 return int(d), int(s & 1) 501 } 502 503 func boothW6(in uint) (int, int) { 504 var s = ^((in >> 6) - 1) 505 var d = (1 << 7) - in - 1 506 d = (d & s) | (in & (^s)) 507 d = (d >> 1) + (d & 1) 508 return int(d), int(s & 1) 509 } 510 511 func (p *p256Point) p256BaseMult(scalar []uint64) { 512 wvalue := (scalar[0] << 1) & 0x7f 513 sel, sign := boothW6(uint(wvalue)) 514 p256SelectBase(&p.xyz, p256Precomputed, sel) 515 p256NegCond(p.xyz[4:8], sign) 516 517 // (This is one, in the Montgomery domain.) 518 p.xyz[8] = p256one[0] 519 p.xyz[9] = p256one[1] 520 p.xyz[10] = p256one[2] 521 p.xyz[11] = p256one[3] 522 523 var t0 p256Point 524 // (This is one, in the Montgomery domain.) 525 t0.xyz[8] = p256one[0] 526 t0.xyz[9] = p256one[1] 527 t0.xyz[10] = p256one[2] 528 t0.xyz[11] = p256one[3] 529 530 index := uint(5) 531 zero := sel 532 533 for i := 1; i < 43; i++ { 534 if index < 192 { 535 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f 536 } else { 537 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f 538 } 539 index += 6 540 sel, sign = boothW6(uint(wvalue)) 541 p256SelectBase(&t0.xyz, p256Precomputed[i*32*8*8:], sel) 542 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero) 543 zero |= sel 544 } 545 } 546 547 func (p *p256Point) p256ScalarMult(scalar []uint64) { 548 // precomp is a table of precomputed points that stores powers of p 549 // from p^1 to p^16. 550 var precomp [16 * 4 * 3]uint64 551 var t0, t1, t2, t3 p256Point 552 553 // Prepare the table 554 p.p256StorePoint(&precomp, 0) // 1 555 556 p256PointDoubleAsm(t0.xyz[:], p.xyz[:]) 557 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:]) 558 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:]) 559 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:]) 560 t0.p256StorePoint(&precomp, 1) // 2 561 t1.p256StorePoint(&precomp, 3) // 4 562 t2.p256StorePoint(&precomp, 7) // 8 563 t3.p256StorePoint(&precomp, 15) // 16 564 565 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 566 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 567 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 568 t0.p256StorePoint(&precomp, 2) // 3 569 t1.p256StorePoint(&precomp, 4) // 5 570 t2.p256StorePoint(&precomp, 8) // 9 571 572 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 573 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:]) 574 t0.p256StorePoint(&precomp, 5) // 6 575 t1.p256StorePoint(&precomp, 9) // 10 576 577 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:]) 578 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:]) 579 t2.p256StorePoint(&precomp, 6) // 7 580 t1.p256StorePoint(&precomp, 10) // 11 581 582 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:]) 583 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:]) 584 t0.p256StorePoint(&precomp, 11) // 12 585 t2.p256StorePoint(&precomp, 13) // 14 586 587 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:]) 588 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:]) 589 t0.p256StorePoint(&precomp, 12) // 13 590 t2.p256StorePoint(&precomp, 14) // 15 591 592 // Start scanning the window from top bit 593 index := uint(254) 594 var sel, sign int 595 596 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 597 sel, _ = boothW5(uint(wvalue)) 598 599 p256Select(p.xyz[0:12], precomp[0:], sel) 600 zero := sel 601 602 for index > 4 { 603 index -= 5 604 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 605 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 606 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 607 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 608 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 609 610 if index < 192 { 611 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 612 } else { 613 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 614 } 615 616 sel, sign = boothW5(uint(wvalue)) 617 618 p256Select(t0.xyz[0:], precomp[0:], sel) 619 p256NegCond(t0.xyz[4:8], sign) 620 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 621 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 622 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 623 zero |= sel 624 } 625 626 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 627 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 628 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 629 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 630 p256PointDoubleAsm(p.xyz[:], p.xyz[:]) 631 632 wvalue = (scalar[0] << 1) & 0x3f 633 sel, sign = boothW5(uint(wvalue)) 634 635 p256Select(t0.xyz[0:], precomp[0:], sel) 636 p256NegCond(t0.xyz[4:8], sign) 637 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:]) 638 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel) 639 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero) 640 }