github.com/guyezi/gofrontend@v0.0.0-20200228202240-7a62a49e62c0/libgo/go/crypto/elliptic/p256_ppc64le.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 // +build ppc64le 7 8 package elliptic 9 10 import ( 11 "crypto/subtle" 12 "encoding/binary" 13 "math/big" 14 ) 15 16 // This was ported from the s390x implementation for ppc64le. 17 // Some hints are included here for changes that should be 18 // in the big endian ppc64 implementation, however more 19 // investigation and testing is needed for the ppc64 big 20 // endian version to work. 21 type p256CurveFast struct { 22 *CurveParams 23 } 24 25 type p256Point struct { 26 x [32]byte 27 y [32]byte 28 z [32]byte 29 } 30 31 var ( 32 p256 Curve 33 p256PreFast *[37][64]p256Point 34 ) 35 36 func initP256Arch() { 37 p256 = p256CurveFast{p256Params} 38 initTable() 39 return 40 } 41 42 func (curve p256CurveFast) Params() *CurveParams { 43 return curve.CurveParams 44 } 45 46 // Functions implemented in p256_asm_ppc64le.s 47 // Montgomery multiplication modulo P256 48 // 49 //go:noescape 50 func p256MulAsm(res, in1, in2 []byte) 51 52 // Montgomery square modulo P256 53 // 54 func p256Sqr(res, in []byte) { 55 p256MulAsm(res, in, in) 56 } 57 58 // Montgomery multiplication by 1 59 // 60 //go:noescape 61 func p256FromMont(res, in []byte) 62 63 // iff cond == 1 val <- -val 64 // 65 //go:noescape 66 func p256NegCond(val *p256Point, cond int) 67 68 // if cond == 0 res <- b; else res <- a 69 // 70 //go:noescape 71 func p256MovCond(res, a, b *p256Point, cond int) 72 73 // Constant time table access 74 // 75 //go:noescape 76 func p256Select(point *p256Point, table []p256Point, idx int) 77 78 // 79 //go:noescape 80 func p256SelectBase(point *p256Point, table []p256Point, idx int) 81 82 // Point add with P2 being affine point 83 // If sign == 1 -> P2 = -P2 84 // If sel == 0 -> P3 = P1 85 // if zero == 0 -> P3 = P2 86 // 87 //go:noescape 88 func p256PointAddAffineAsm(res, in1, in2 *p256Point, sign, sel, zero int) 89 90 // Point add 91 // 92 //go:noescape 93 func p256PointAddAsm(res, in1, in2 *p256Point) int 94 95 // 96 //go:noescape 97 func p256PointDoubleAsm(res, in *p256Point) 98 99 // The result should be a slice in LE order, but the slice 100 // from big.Bytes is in BE order. 101 // TODO: For big endian implementation, do not reverse bytes. 102 // 103 func fromBig(big *big.Int) []byte { 104 // This could be done a lot more efficiently... 105 res := big.Bytes() 106 t := make([]byte, 32) 107 if len(res) < 32 { 108 copy(t[32-len(res):], res) 109 } else if len(res) == 32 { 110 copy(t, res) 111 } else { 112 copy(t, res[len(res)-32:]) 113 } 114 p256ReverseBytes(t, t) 115 return t 116 } 117 118 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar 119 // is equal or greater than the order of the group, it's reduced modulo that order. 120 func p256GetMultiplier(in []byte) []byte { 121 n := new(big.Int).SetBytes(in) 122 123 if n.Cmp(p256Params.N) >= 0 { 124 n.Mod(n, p256Params.N) 125 } 126 return fromBig(n) 127 } 128 129 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the 130 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 131 // R×R mod p. See comment in Inverse about how this is used. 132 // TODO: For big endian implementation, the bytes in these slices should be in reverse order, 133 // as found in the s390x implementation. 134 var rr = []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00} 135 136 // (This is one, in the Montgomery domain.) 137 var one = []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00} 138 139 func maybeReduceModP(in *big.Int) *big.Int { 140 if in.Cmp(p256Params.P) < 0 { 141 return in 142 } 143 return new(big.Int).Mod(in, p256Params.P) 144 } 145 146 // p256ReverseBytes copies the first 32 bytes from in to res in reverse order. 147 func p256ReverseBytes(res, in []byte) { 148 // remove bounds check 149 in = in[:32] 150 res = res[:32] 151 152 // Load in reverse order 153 a := binary.BigEndian.Uint64(in[0:]) 154 b := binary.BigEndian.Uint64(in[8:]) 155 c := binary.BigEndian.Uint64(in[16:]) 156 d := binary.BigEndian.Uint64(in[24:]) 157 158 // Store in normal order 159 binary.LittleEndian.PutUint64(res[0:], d) 160 binary.LittleEndian.PutUint64(res[8:], c) 161 binary.LittleEndian.PutUint64(res[16:], b) 162 binary.LittleEndian.PutUint64(res[24:], a) 163 } 164 165 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 166 var r1, r2 p256Point 167 168 scalarReduced := p256GetMultiplier(baseScalar) 169 r1IsInfinity := scalarIsZero(scalarReduced) 170 r1.p256BaseMult(scalarReduced) 171 172 copy(r2.x[:], fromBig(maybeReduceModP(bigX))) 173 copy(r2.y[:], fromBig(maybeReduceModP(bigY))) 174 copy(r2.z[:], one) 175 p256MulAsm(r2.x[:], r2.x[:], rr[:]) 176 p256MulAsm(r2.y[:], r2.y[:], rr[:]) 177 178 scalarReduced = p256GetMultiplier(scalar) 179 r2IsInfinity := scalarIsZero(scalarReduced) 180 r2.p256ScalarMult(scalarReduced) 181 182 var sum, double p256Point 183 pointsEqual := p256PointAddAsm(&sum, &r1, &r2) 184 p256PointDoubleAsm(&double, &r1) 185 p256MovCond(&sum, &double, &sum, pointsEqual) 186 p256MovCond(&sum, &r1, &sum, r2IsInfinity) 187 p256MovCond(&sum, &r2, &sum, r1IsInfinity) 188 return sum.p256PointToAffine() 189 } 190 191 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 192 var r p256Point 193 reducedScalar := p256GetMultiplier(scalar) 194 r.p256BaseMult(reducedScalar) 195 return r.p256PointToAffine() 196 } 197 198 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 199 scalarReduced := p256GetMultiplier(scalar) 200 var r p256Point 201 copy(r.x[:], fromBig(maybeReduceModP(bigX))) 202 copy(r.y[:], fromBig(maybeReduceModP(bigY))) 203 copy(r.z[:], one) 204 p256MulAsm(r.x[:], r.x[:], rr[:]) 205 p256MulAsm(r.y[:], r.y[:], rr[:]) 206 r.p256ScalarMult(scalarReduced) 207 return r.p256PointToAffine() 208 } 209 210 func scalarIsZero(scalar []byte) int { 211 // If any byte is not zero, return 0. 212 // Check for -0.... since that appears to compare to 0. 213 b := byte(0) 214 for _, s := range scalar { 215 b |= s 216 } 217 return subtle.ConstantTimeByteEq(b, 0) 218 } 219 220 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 221 zInv := make([]byte, 32) 222 zInvSq := make([]byte, 32) 223 224 p256Inverse(zInv, p.z[:]) 225 p256Sqr(zInvSq, zInv) 226 p256MulAsm(zInv, zInv, zInvSq) 227 228 p256MulAsm(zInvSq, p.x[:], zInvSq) 229 p256MulAsm(zInv, p.y[:], zInv) 230 231 p256FromMont(zInvSq, zInvSq) 232 p256FromMont(zInv, zInv) 233 234 // SetBytes expects a slice in big endian order, 235 // since ppc64le is little endian, reverse the bytes. 236 // TODO: For big endian, bytes don't need to be reversed. 237 p256ReverseBytes(zInvSq, zInvSq) 238 p256ReverseBytes(zInv, zInv) 239 rx := new(big.Int).SetBytes(zInvSq) 240 ry := new(big.Int).SetBytes(zInv) 241 return rx, ry 242 } 243 244 // p256Inverse sets out to in^-1 mod p. 245 func p256Inverse(out, in []byte) { 246 var stack [6 * 32]byte 247 p2 := stack[32*0 : 32*0+32] 248 p4 := stack[32*1 : 32*1+32] 249 p8 := stack[32*2 : 32*2+32] 250 p16 := stack[32*3 : 32*3+32] 251 p32 := stack[32*4 : 32*4+32] 252 253 p256Sqr(out, in) 254 p256MulAsm(p2, out, in) // 3*p 255 256 p256Sqr(out, p2) 257 p256Sqr(out, out) 258 p256MulAsm(p4, out, p2) // f*p 259 260 p256Sqr(out, p4) 261 p256Sqr(out, out) 262 p256Sqr(out, out) 263 p256Sqr(out, out) 264 p256MulAsm(p8, out, p4) // ff*p 265 266 p256Sqr(out, p8) 267 268 for i := 0; i < 7; i++ { 269 p256Sqr(out, out) 270 } 271 p256MulAsm(p16, out, p8) // ffff*p 272 273 p256Sqr(out, p16) 274 for i := 0; i < 15; i++ { 275 p256Sqr(out, out) 276 } 277 p256MulAsm(p32, out, p16) // ffffffff*p 278 279 p256Sqr(out, p32) 280 281 for i := 0; i < 31; i++ { 282 p256Sqr(out, out) 283 } 284 p256MulAsm(out, out, in) 285 286 for i := 0; i < 32*4; i++ { 287 p256Sqr(out, out) 288 } 289 p256MulAsm(out, out, p32) 290 291 for i := 0; i < 32; i++ { 292 p256Sqr(out, out) 293 } 294 p256MulAsm(out, out, p32) 295 296 for i := 0; i < 16; i++ { 297 p256Sqr(out, out) 298 } 299 p256MulAsm(out, out, p16) 300 301 for i := 0; i < 8; i++ { 302 p256Sqr(out, out) 303 } 304 p256MulAsm(out, out, p8) 305 306 p256Sqr(out, out) 307 p256Sqr(out, out) 308 p256Sqr(out, out) 309 p256Sqr(out, out) 310 p256MulAsm(out, out, p4) 311 312 p256Sqr(out, out) 313 p256Sqr(out, out) 314 p256MulAsm(out, out, p2) 315 316 p256Sqr(out, out) 317 p256Sqr(out, out) 318 p256MulAsm(out, out, in) 319 } 320 321 func boothW5(in uint) (int, int) { 322 var s uint = ^((in >> 5) - 1) 323 var d uint = (1 << 6) - in - 1 324 d = (d & s) | (in & (^s)) 325 d = (d >> 1) + (d & 1) 326 return int(d), int(s & 1) 327 } 328 329 func boothW6(in uint) (int, int) { 330 var s uint = ^((in >> 6) - 1) 331 var d uint = (1 << 7) - in - 1 332 d = (d & s) | (in & (^s)) 333 d = (d >> 1) + (d & 1) 334 return int(d), int(s & 1) 335 } 336 337 func boothW7(in uint) (int, int) { 338 var s uint = ^((in >> 7) - 1) 339 var d uint = (1 << 8) - in - 1 340 d = (d & s) | (in & (^s)) 341 d = (d >> 1) + (d & 1) 342 return int(d), int(s & 1) 343 } 344 345 func initTable() { 346 347 p256PreFast = new([37][64]p256Point) 348 349 // TODO: For big endian, these slices should be in reverse byte order, 350 // as found in the s390x implementation. 351 basePoint := p256Point{ 352 x: [32]byte{0x3c, 0x14, 0xa9, 0x18, 0xd4, 0x30, 0xe7, 0x79, 0x01, 0xb6, 0xed, 0x5f, 0xfc, 0x95, 0xba, 0x75, 353 0x10, 0x25, 0x62, 0x77, 0x2b, 0x73, 0xfb, 0x79, 0xc6, 0x55, 0x37, 0xa5, 0x76, 0x5f, 0x90, 0x18}, //(p256.x*2^256)%p 354 y: [32]byte{0x0a, 0x56, 0x95, 0xce, 0x57, 0x53, 0xf2, 0xdd, 0x5c, 0xe4, 0x19, 0xba, 0xe4, 0xb8, 0x4a, 0x8b, 355 0x25, 0xf3, 0x21, 0xdd, 0x88, 0x86, 0xe8, 0xd2, 0x85, 0x5d, 0x88, 0x25, 0x18, 0xff, 0x71, 0x85}, //(p256.y*2^256)%p 356 z: [32]byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 357 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, //(p256.z*2^256)%p 358 359 } 360 361 t1 := new(p256Point) 362 t2 := new(p256Point) 363 *t2 = basePoint 364 365 zInv := make([]byte, 32) 366 zInvSq := make([]byte, 32) 367 for j := 0; j < 64; j++ { 368 *t1 = *t2 369 for i := 0; i < 37; i++ { 370 // The window size is 7 so we need to double 7 times. 371 if i != 0 { 372 for k := 0; k < 7; k++ { 373 p256PointDoubleAsm(t1, t1) 374 } 375 } 376 // Convert the point to affine form. (Its values are 377 // still in Montgomery form however.) 378 p256Inverse(zInv, t1.z[:]) 379 p256Sqr(zInvSq, zInv) 380 p256MulAsm(zInv, zInv, zInvSq) 381 382 p256MulAsm(t1.x[:], t1.x[:], zInvSq) 383 p256MulAsm(t1.y[:], t1.y[:], zInv) 384 385 copy(t1.z[:], basePoint.z[:]) 386 // Update the table entry 387 copy(p256PreFast[i][j].x[:], t1.x[:]) 388 copy(p256PreFast[i][j].y[:], t1.y[:]) 389 } 390 if j == 0 { 391 p256PointDoubleAsm(t2, &basePoint) 392 } else { 393 p256PointAddAsm(t2, t2, &basePoint) 394 } 395 } 396 } 397 398 func (p *p256Point) p256BaseMult(scalar []byte) { 399 // TODO: For big endian, the index should be 31 not 0. 400 wvalue := (uint(scalar[0]) << 1) & 0xff 401 sel, sign := boothW7(uint(wvalue)) 402 p256SelectBase(p, p256PreFast[0][:], sel) 403 p256NegCond(p, sign) 404 405 copy(p.z[:], one[:]) 406 var t0 p256Point 407 408 copy(t0.z[:], one[:]) 409 410 index := uint(6) 411 zero := sel 412 for i := 1; i < 37; i++ { 413 // TODO: For big endian, use the same index values as found 414 // in the s390x implementation. 415 if index < 247 { 416 wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0xff 417 } else { 418 wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0xff 419 } 420 index += 7 421 sel, sign = boothW7(uint(wvalue)) 422 p256SelectBase(&t0, p256PreFast[i][:], sel) 423 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) 424 zero |= sel 425 } 426 } 427 428 func (p *p256Point) p256ScalarMult(scalar []byte) { 429 // precomp is a table of precomputed points that stores powers of p 430 // from p^1 to p^16. 431 var precomp [16]p256Point 432 var t0, t1, t2, t3 p256Point 433 434 *&precomp[0] = *p 435 p256PointDoubleAsm(&t0, p) 436 p256PointDoubleAsm(&t1, &t0) 437 p256PointDoubleAsm(&t2, &t1) 438 p256PointDoubleAsm(&t3, &t2) 439 *&precomp[1] = t0 440 *&precomp[3] = t1 441 *&precomp[7] = t2 442 *&precomp[15] = t3 443 444 p256PointAddAsm(&t0, &t0, p) 445 p256PointAddAsm(&t1, &t1, p) 446 p256PointAddAsm(&t2, &t2, p) 447 448 *&precomp[2] = t0 449 *&precomp[4] = t1 450 *&precomp[8] = t2 451 452 p256PointDoubleAsm(&t0, &t0) 453 p256PointDoubleAsm(&t1, &t1) 454 *&precomp[5] = t0 455 *&precomp[9] = t1 456 457 p256PointAddAsm(&t2, &t0, p) 458 p256PointAddAsm(&t1, &t1, p) 459 *&precomp[6] = t2 460 *&precomp[10] = t1 461 462 p256PointDoubleAsm(&t0, &t0) 463 p256PointDoubleAsm(&t2, &t2) 464 *&precomp[11] = t0 465 *&precomp[13] = t2 466 467 p256PointAddAsm(&t0, &t0, p) 468 p256PointAddAsm(&t2, &t2, p) 469 *&precomp[12] = t0 470 *&precomp[14] = t2 471 472 // Start scanning the window from top bit 473 index := uint(254) 474 var sel, sign int 475 476 // TODO: For big endian, use index found in s390x implementation. 477 wvalue := (uint(scalar[index/8]) >> (index % 8)) & 0x3f 478 sel, _ = boothW5(uint(wvalue)) 479 p256Select(p, precomp[:], sel) 480 zero := sel 481 482 for index > 4 { 483 index -= 5 484 p256PointDoubleAsm(p, p) 485 p256PointDoubleAsm(p, p) 486 p256PointDoubleAsm(p, p) 487 p256PointDoubleAsm(p, p) 488 p256PointDoubleAsm(p, p) 489 490 // TODO: For big endian, use index values as found in s390x implementation. 491 if index < 247 { 492 wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0x3f 493 } else { 494 wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0x3f 495 } 496 497 sel, sign = boothW5(uint(wvalue)) 498 499 p256Select(&t0, precomp[:], sel) 500 p256NegCond(&t0, sign) 501 p256PointAddAsm(&t1, p, &t0) 502 p256MovCond(&t1, &t1, p, sel) 503 p256MovCond(p, &t1, &t0, zero) 504 zero |= sel 505 } 506 507 p256PointDoubleAsm(p, p) 508 p256PointDoubleAsm(p, p) 509 p256PointDoubleAsm(p, p) 510 p256PointDoubleAsm(p, p) 511 p256PointDoubleAsm(p, p) 512 513 // TODO: Use index for big endian as found in s390x implementation. 514 wvalue = (uint(scalar[0]) << 1) & 0x3f 515 sel, sign = boothW5(uint(wvalue)) 516 517 p256Select(&t0, precomp[:], sel) 518 p256NegCond(&t0, sign) 519 p256PointAddAsm(&t1, p, &t0) 520 p256MovCond(&t1, &t1, p, sel) 521 p256MovCond(p, &t1, &t0, zero) 522 }