github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/crypto/elliptic/p256_ppc64le.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ppc64le 6 7 package elliptic 8 9 import ( 10 "crypto/subtle" 11 "encoding/binary" 12 "math/big" 13 ) 14 15 // This was ported from the s390x implementation for ppc64le. 16 // Some hints are included here for changes that should be 17 // in the big endian ppc64 implementation, however more 18 // investigation and testing is needed for the ppc64 big 19 // endian version to work. 20 type p256CurveFast struct { 21 *CurveParams 22 } 23 24 type p256Point struct { 25 x [32]byte 26 y [32]byte 27 z [32]byte 28 } 29 30 var ( 31 p256 Curve 32 p256PreFast *[37][64]p256Point 33 ) 34 35 func initP256Arch() { 36 p256 = p256CurveFast{p256Params} 37 initTable() 38 return 39 } 40 41 func (curve p256CurveFast) Params() *CurveParams { 42 return curve.CurveParams 43 } 44 45 // Functions implemented in p256_asm_ppc64le.s 46 // Montgomery multiplication modulo P256 47 // 48 //go:noescape 49 func p256MulAsm(res, in1, in2 []byte) 50 51 // Montgomery square modulo P256 52 // 53 func p256Sqr(res, in []byte) { 54 p256MulAsm(res, in, in) 55 } 56 57 // Montgomery multiplication by 1 58 // 59 //go:noescape 60 func p256FromMont(res, in []byte) 61 62 // iff cond == 1 val <- -val 63 // 64 //go:noescape 65 func p256NegCond(val *p256Point, cond int) 66 67 // if cond == 0 res <- b; else res <- a 68 // 69 //go:noescape 70 func p256MovCond(res, a, b *p256Point, cond int) 71 72 // Constant time table access 73 // 74 //go:noescape 75 func p256Select(point *p256Point, table []p256Point, idx int) 76 77 // 78 //go:noescape 79 func p256SelectBase(point *p256Point, table []p256Point, idx int) 80 81 // Point add with P2 being affine point 82 // If sign == 1 -> P2 = -P2 83 // If sel == 0 -> P3 = P1 84 // if zero == 0 -> P3 = P2 85 // 86 //go:noescape 87 func p256PointAddAffineAsm(res, in1, in2 *p256Point, sign, sel, zero int) 88 89 // Point add 90 // 91 //go:noescape 92 func p256PointAddAsm(res, in1, in2 *p256Point) int 93 94 // 95 //go:noescape 96 func p256PointDoubleAsm(res, in *p256Point) 97 98 // The result should be a slice in LE order, but the slice 99 // from big.Bytes is in BE order. 100 // TODO: For big endian implementation, do not reverse bytes. 101 // 102 func fromBig(big *big.Int) []byte { 103 // This could be done a lot more efficiently... 104 res := big.Bytes() 105 t := make([]byte, 32) 106 if len(res) < 32 { 107 copy(t[32-len(res):], res) 108 } else if len(res) == 32 { 109 copy(t, res) 110 } else { 111 copy(t, res[len(res)-32:]) 112 } 113 p256ReverseBytes(t, t) 114 return t 115 } 116 117 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar 118 // is equal or greater than the order of the group, it's reduced modulo that order. 119 func p256GetMultiplier(in []byte) []byte { 120 n := new(big.Int).SetBytes(in) 121 122 if n.Cmp(p256Params.N) >= 0 { 123 n.Mod(n, p256Params.N) 124 } 125 return fromBig(n) 126 } 127 128 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the 129 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 130 // R×R mod p. See comment in Inverse about how this is used. 131 // TODO: For big endian implementation, the bytes in these slices should be in reverse order, 132 // as found in the s390x implementation. 133 var rr = []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00} 134 135 // (This is one, in the Montgomery domain.) 136 var one = []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00} 137 138 func maybeReduceModP(in *big.Int) *big.Int { 139 if in.Cmp(p256Params.P) < 0 { 140 return in 141 } 142 return new(big.Int).Mod(in, p256Params.P) 143 } 144 145 // p256ReverseBytes copies the first 32 bytes from in to res in reverse order. 146 func p256ReverseBytes(res, in []byte) { 147 // remove bounds check 148 in = in[:32] 149 res = res[:32] 150 151 // Load in reverse order 152 a := binary.BigEndian.Uint64(in[0:]) 153 b := binary.BigEndian.Uint64(in[8:]) 154 c := binary.BigEndian.Uint64(in[16:]) 155 d := binary.BigEndian.Uint64(in[24:]) 156 157 // Store in normal order 158 binary.LittleEndian.PutUint64(res[0:], d) 159 binary.LittleEndian.PutUint64(res[8:], c) 160 binary.LittleEndian.PutUint64(res[16:], b) 161 binary.LittleEndian.PutUint64(res[24:], a) 162 } 163 164 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 165 var r1, r2 p256Point 166 167 scalarReduced := p256GetMultiplier(baseScalar) 168 r1IsInfinity := scalarIsZero(scalarReduced) 169 r1.p256BaseMult(scalarReduced) 170 171 copy(r2.x[:], fromBig(maybeReduceModP(bigX))) 172 copy(r2.y[:], fromBig(maybeReduceModP(bigY))) 173 copy(r2.z[:], one) 174 p256MulAsm(r2.x[:], r2.x[:], rr[:]) 175 p256MulAsm(r2.y[:], r2.y[:], rr[:]) 176 177 scalarReduced = p256GetMultiplier(scalar) 178 r2IsInfinity := scalarIsZero(scalarReduced) 179 r2.p256ScalarMult(scalarReduced) 180 181 var sum, double p256Point 182 pointsEqual := p256PointAddAsm(&sum, &r1, &r2) 183 p256PointDoubleAsm(&double, &r1) 184 p256MovCond(&sum, &double, &sum, pointsEqual) 185 p256MovCond(&sum, &r1, &sum, r2IsInfinity) 186 p256MovCond(&sum, &r2, &sum, r1IsInfinity) 187 return sum.p256PointToAffine() 188 } 189 190 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 191 var r p256Point 192 reducedScalar := p256GetMultiplier(scalar) 193 r.p256BaseMult(reducedScalar) 194 return r.p256PointToAffine() 195 } 196 197 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 198 scalarReduced := p256GetMultiplier(scalar) 199 var r p256Point 200 copy(r.x[:], fromBig(maybeReduceModP(bigX))) 201 copy(r.y[:], fromBig(maybeReduceModP(bigY))) 202 copy(r.z[:], one) 203 p256MulAsm(r.x[:], r.x[:], rr[:]) 204 p256MulAsm(r.y[:], r.y[:], rr[:]) 205 r.p256ScalarMult(scalarReduced) 206 return r.p256PointToAffine() 207 } 208 209 func scalarIsZero(scalar []byte) int { 210 // If any byte is not zero, return 0. 211 // Check for -0.... since that appears to compare to 0. 212 b := byte(0) 213 for _, s := range scalar { 214 b |= s 215 } 216 return subtle.ConstantTimeByteEq(b, 0) 217 } 218 219 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 220 zInv := make([]byte, 32) 221 zInvSq := make([]byte, 32) 222 223 p256Inverse(zInv, p.z[:]) 224 p256Sqr(zInvSq, zInv) 225 p256MulAsm(zInv, zInv, zInvSq) 226 227 p256MulAsm(zInvSq, p.x[:], zInvSq) 228 p256MulAsm(zInv, p.y[:], zInv) 229 230 p256FromMont(zInvSq, zInvSq) 231 p256FromMont(zInv, zInv) 232 233 // SetBytes expects a slice in big endian order, 234 // since ppc64le is little endian, reverse the bytes. 235 // TODO: For big endian, bytes don't need to be reversed. 236 p256ReverseBytes(zInvSq, zInvSq) 237 p256ReverseBytes(zInv, zInv) 238 rx := new(big.Int).SetBytes(zInvSq) 239 ry := new(big.Int).SetBytes(zInv) 240 return rx, ry 241 } 242 243 // p256Inverse sets out to in^-1 mod p. 244 func p256Inverse(out, in []byte) { 245 var stack [6 * 32]byte 246 p2 := stack[32*0 : 32*0+32] 247 p4 := stack[32*1 : 32*1+32] 248 p8 := stack[32*2 : 32*2+32] 249 p16 := stack[32*3 : 32*3+32] 250 p32 := stack[32*4 : 32*4+32] 251 252 p256Sqr(out, in) 253 p256MulAsm(p2, out, in) // 3*p 254 255 p256Sqr(out, p2) 256 p256Sqr(out, out) 257 p256MulAsm(p4, out, p2) // f*p 258 259 p256Sqr(out, p4) 260 p256Sqr(out, out) 261 p256Sqr(out, out) 262 p256Sqr(out, out) 263 p256MulAsm(p8, out, p4) // ff*p 264 265 p256Sqr(out, p8) 266 267 for i := 0; i < 7; i++ { 268 p256Sqr(out, out) 269 } 270 p256MulAsm(p16, out, p8) // ffff*p 271 272 p256Sqr(out, p16) 273 for i := 0; i < 15; i++ { 274 p256Sqr(out, out) 275 } 276 p256MulAsm(p32, out, p16) // ffffffff*p 277 278 p256Sqr(out, p32) 279 280 for i := 0; i < 31; i++ { 281 p256Sqr(out, out) 282 } 283 p256MulAsm(out, out, in) 284 285 for i := 0; i < 32*4; i++ { 286 p256Sqr(out, out) 287 } 288 p256MulAsm(out, out, p32) 289 290 for i := 0; i < 32; i++ { 291 p256Sqr(out, out) 292 } 293 p256MulAsm(out, out, p32) 294 295 for i := 0; i < 16; i++ { 296 p256Sqr(out, out) 297 } 298 p256MulAsm(out, out, p16) 299 300 for i := 0; i < 8; i++ { 301 p256Sqr(out, out) 302 } 303 p256MulAsm(out, out, p8) 304 305 p256Sqr(out, out) 306 p256Sqr(out, out) 307 p256Sqr(out, out) 308 p256Sqr(out, out) 309 p256MulAsm(out, out, p4) 310 311 p256Sqr(out, out) 312 p256Sqr(out, out) 313 p256MulAsm(out, out, p2) 314 315 p256Sqr(out, out) 316 p256Sqr(out, out) 317 p256MulAsm(out, out, in) 318 } 319 320 func boothW5(in uint) (int, int) { 321 var s uint = ^((in >> 5) - 1) 322 var d uint = (1 << 6) - in - 1 323 d = (d & s) | (in & (^s)) 324 d = (d >> 1) + (d & 1) 325 return int(d), int(s & 1) 326 } 327 328 func boothW6(in uint) (int, int) { 329 var s uint = ^((in >> 6) - 1) 330 var d uint = (1 << 7) - in - 1 331 d = (d & s) | (in & (^s)) 332 d = (d >> 1) + (d & 1) 333 return int(d), int(s & 1) 334 } 335 336 func boothW7(in uint) (int, int) { 337 var s uint = ^((in >> 7) - 1) 338 var d uint = (1 << 8) - in - 1 339 d = (d & s) | (in & (^s)) 340 d = (d >> 1) + (d & 1) 341 return int(d), int(s & 1) 342 } 343 344 func initTable() { 345 346 p256PreFast = new([37][64]p256Point) 347 348 // TODO: For big endian, these slices should be in reverse byte order, 349 // as found in the s390x implementation. 350 basePoint := p256Point{ 351 x: [32]byte{0x3c, 0x14, 0xa9, 0x18, 0xd4, 0x30, 0xe7, 0x79, 0x01, 0xb6, 0xed, 0x5f, 0xfc, 0x95, 0xba, 0x75, 352 0x10, 0x25, 0x62, 0x77, 0x2b, 0x73, 0xfb, 0x79, 0xc6, 0x55, 0x37, 0xa5, 0x76, 0x5f, 0x90, 0x18}, //(p256.x*2^256)%p 353 y: [32]byte{0x0a, 0x56, 0x95, 0xce, 0x57, 0x53, 0xf2, 0xdd, 0x5c, 0xe4, 0x19, 0xba, 0xe4, 0xb8, 0x4a, 0x8b, 354 0x25, 0xf3, 0x21, 0xdd, 0x88, 0x86, 0xe8, 0xd2, 0x85, 0x5d, 0x88, 0x25, 0x18, 0xff, 0x71, 0x85}, //(p256.y*2^256)%p 355 z: [32]byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 356 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, //(p256.z*2^256)%p 357 358 } 359 360 t1 := new(p256Point) 361 t2 := new(p256Point) 362 *t2 = basePoint 363 364 zInv := make([]byte, 32) 365 zInvSq := make([]byte, 32) 366 for j := 0; j < 64; j++ { 367 *t1 = *t2 368 for i := 0; i < 37; i++ { 369 // The window size is 7 so we need to double 7 times. 370 if i != 0 { 371 for k := 0; k < 7; k++ { 372 p256PointDoubleAsm(t1, t1) 373 } 374 } 375 // Convert the point to affine form. (Its values are 376 // still in Montgomery form however.) 377 p256Inverse(zInv, t1.z[:]) 378 p256Sqr(zInvSq, zInv) 379 p256MulAsm(zInv, zInv, zInvSq) 380 381 p256MulAsm(t1.x[:], t1.x[:], zInvSq) 382 p256MulAsm(t1.y[:], t1.y[:], zInv) 383 384 copy(t1.z[:], basePoint.z[:]) 385 // Update the table entry 386 copy(p256PreFast[i][j].x[:], t1.x[:]) 387 copy(p256PreFast[i][j].y[:], t1.y[:]) 388 } 389 if j == 0 { 390 p256PointDoubleAsm(t2, &basePoint) 391 } else { 392 p256PointAddAsm(t2, t2, &basePoint) 393 } 394 } 395 } 396 397 func (p *p256Point) p256BaseMult(scalar []byte) { 398 // TODO: For big endian, the index should be 31 not 0. 399 wvalue := (uint(scalar[0]) << 1) & 0xff 400 sel, sign := boothW7(uint(wvalue)) 401 p256SelectBase(p, p256PreFast[0][:], sel) 402 p256NegCond(p, sign) 403 404 copy(p.z[:], one[:]) 405 var t0 p256Point 406 407 copy(t0.z[:], one[:]) 408 409 index := uint(6) 410 zero := sel 411 for i := 1; i < 37; i++ { 412 // TODO: For big endian, use the same index values as found 413 // in the s390x implementation. 414 if index < 247 { 415 wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0xff 416 } else { 417 wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0xff 418 } 419 index += 7 420 sel, sign = boothW7(uint(wvalue)) 421 p256SelectBase(&t0, p256PreFast[i][:], sel) 422 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) 423 zero |= sel 424 } 425 } 426 427 func (p *p256Point) p256ScalarMult(scalar []byte) { 428 // precomp is a table of precomputed points that stores powers of p 429 // from p^1 to p^16. 430 var precomp [16]p256Point 431 var t0, t1, t2, t3 p256Point 432 433 *&precomp[0] = *p 434 p256PointDoubleAsm(&t0, p) 435 p256PointDoubleAsm(&t1, &t0) 436 p256PointDoubleAsm(&t2, &t1) 437 p256PointDoubleAsm(&t3, &t2) 438 *&precomp[1] = t0 439 *&precomp[3] = t1 440 *&precomp[7] = t2 441 *&precomp[15] = t3 442 443 p256PointAddAsm(&t0, &t0, p) 444 p256PointAddAsm(&t1, &t1, p) 445 p256PointAddAsm(&t2, &t2, p) 446 447 *&precomp[2] = t0 448 *&precomp[4] = t1 449 *&precomp[8] = t2 450 451 p256PointDoubleAsm(&t0, &t0) 452 p256PointDoubleAsm(&t1, &t1) 453 *&precomp[5] = t0 454 *&precomp[9] = t1 455 456 p256PointAddAsm(&t2, &t0, p) 457 p256PointAddAsm(&t1, &t1, p) 458 *&precomp[6] = t2 459 *&precomp[10] = t1 460 461 p256PointDoubleAsm(&t0, &t0) 462 p256PointDoubleAsm(&t2, &t2) 463 *&precomp[11] = t0 464 *&precomp[13] = t2 465 466 p256PointAddAsm(&t0, &t0, p) 467 p256PointAddAsm(&t2, &t2, p) 468 *&precomp[12] = t0 469 *&precomp[14] = t2 470 471 // Start scanning the window from top bit 472 index := uint(254) 473 var sel, sign int 474 475 // TODO: For big endian, use index found in s390x implementation. 476 wvalue := (uint(scalar[index/8]) >> (index % 8)) & 0x3f 477 sel, _ = boothW5(uint(wvalue)) 478 p256Select(p, precomp[:], sel) 479 zero := sel 480 481 for index > 4 { 482 index -= 5 483 p256PointDoubleAsm(p, p) 484 p256PointDoubleAsm(p, p) 485 p256PointDoubleAsm(p, p) 486 p256PointDoubleAsm(p, p) 487 p256PointDoubleAsm(p, p) 488 489 // TODO: For big endian, use index values as found in s390x implementation. 490 if index < 247 { 491 wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0x3f 492 } else { 493 wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0x3f 494 } 495 496 sel, sign = boothW5(uint(wvalue)) 497 498 p256Select(&t0, precomp[:], sel) 499 p256NegCond(&t0, sign) 500 p256PointAddAsm(&t1, p, &t0) 501 p256MovCond(&t1, &t1, p, sel) 502 p256MovCond(p, &t1, &t0, zero) 503 zero |= sel 504 } 505 506 p256PointDoubleAsm(p, p) 507 p256PointDoubleAsm(p, p) 508 p256PointDoubleAsm(p, p) 509 p256PointDoubleAsm(p, p) 510 p256PointDoubleAsm(p, p) 511 512 // TODO: Use index for big endian as found in s390x implementation. 513 wvalue = (uint(scalar[0]) << 1) & 0x3f 514 sel, sign = boothW5(uint(wvalue)) 515 516 p256Select(&t0, precomp[:], sel) 517 p256NegCond(&t0, sign) 518 p256PointAddAsm(&t1, p, &t0) 519 p256MovCond(&t1, &t1, p, sel) 520 p256MovCond(p, &t1, &t0, zero) 521 }