github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/crypto/elliptic/p256_s390x.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build s390x 6 7 package elliptic 8 9 import ( 10 "crypto/subtle" 11 "internal/cpu" 12 "math/big" 13 "unsafe" 14 ) 15 16 const ( 17 offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX) 18 offsetS390xHasVE1 = unsafe.Offsetof(cpu.S390X.HasVXE) 19 ) 20 21 type p256CurveFast struct { 22 *CurveParams 23 } 24 25 type p256Point struct { 26 x [32]byte 27 y [32]byte 28 z [32]byte 29 } 30 31 var ( 32 p256 Curve 33 p256PreFast *[37][64]p256Point 34 ) 35 36 //go:noescape 37 func p256MulInternalTrampolineSetup() 38 39 //go:noescape 40 func p256SqrInternalTrampolineSetup() 41 42 //go:noescape 43 func p256MulInternalVX() 44 45 //go:noescape 46 func p256MulInternalVMSL() 47 48 //go:noescape 49 func p256SqrInternalVX() 50 51 //go:noescape 52 func p256SqrInternalVMSL() 53 54 func initP256Arch() { 55 if cpu.S390X.HasVX { 56 p256 = p256CurveFast{p256Params} 57 initTable() 58 return 59 } 60 61 // No vector support, use pure Go implementation. 62 p256 = p256Curve{p256Params} 63 return 64 } 65 66 func (curve p256CurveFast) Params() *CurveParams { 67 return curve.CurveParams 68 } 69 70 // Functions implemented in p256_asm_s390x.s 71 // Montgomery multiplication modulo P256 72 // 73 //go:noescape 74 func p256SqrAsm(res, in1 []byte) 75 76 //go:noescape 77 func p256MulAsm(res, in1, in2 []byte) 78 79 // Montgomery square modulo P256 80 func p256Sqr(res, in []byte) { 81 p256SqrAsm(res, in) 82 } 83 84 // Montgomery multiplication by 1 85 // 86 //go:noescape 87 func p256FromMont(res, in []byte) 88 89 // iff cond == 1 val <- -val 90 // 91 //go:noescape 92 func p256NegCond(val *p256Point, cond int) 93 94 // if cond == 0 res <- b; else res <- a 95 // 96 //go:noescape 97 func p256MovCond(res, a, b *p256Point, cond int) 98 99 // Constant time table access 100 // 101 //go:noescape 102 func p256Select(point *p256Point, table []p256Point, idx int) 103 104 //go:noescape 105 func p256SelectBase(point *p256Point, table []p256Point, idx int) 106 107 // Montgomery multiplication modulo Ord(G) 108 // 109 //go:noescape 110 func p256OrdMul(res, in1, in2 []byte) 111 112 // Montgomery square modulo Ord(G), repeated n times 113 func p256OrdSqr(res, in []byte, n int) { 114 copy(res, in) 115 for i := 0; i < n; i += 1 { 116 p256OrdMul(res, res, res) 117 } 118 } 119 120 // Point add with P2 being affine point 121 // If sign == 1 -> P2 = -P2 122 // If sel == 0 -> P3 = P1 123 // if zero == 0 -> P3 = P2 124 // 125 //go:noescape 126 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int) 127 128 // Point add 129 // 130 //go:noescape 131 func p256PointAddAsm(P3, P1, P2 *p256Point) int 132 133 //go:noescape 134 func p256PointDoubleAsm(P3, P1 *p256Point) 135 136 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int { 137 if k.Cmp(p256Params.N) >= 0 { 138 // This should never happen. 139 reducedK := new(big.Int).Mod(k, p256Params.N) 140 k = reducedK 141 } 142 143 // table will store precomputed powers of x. The 32 bytes at index 144 // i store x^(i+1). 145 var table [15][32]byte 146 147 x := fromBig(k) 148 // This code operates in the Montgomery domain where R = 2^256 mod n 149 // and n is the order of the scalar field. (See initP256 for the 150 // value.) Elements in the Montgomery domain take the form a×R and 151 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 152 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 153 // i.e. converts x into the Montgomery domain. Stored in BigEndian form 154 RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59, 155 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2} 156 157 p256OrdMul(table[0][:], x, RR) 158 159 // Prepare the table, no need in constant time access, because the 160 // power is not a secret. (Entry 0 is never used.) 161 for i := 2; i < 16; i += 2 { 162 p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1) 163 p256OrdMul(table[i][:], table[i-1][:], table[0][:]) 164 } 165 166 copy(x, table[14][:]) // f 167 168 p256OrdSqr(x[0:32], x[0:32], 4) 169 p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff 170 t := make([]byte, 32) 171 copy(t, x) 172 173 p256OrdSqr(x, x, 8) 174 p256OrdMul(x, x, t) // ffff 175 copy(t, x) 176 177 p256OrdSqr(x, x, 16) 178 p256OrdMul(x, x, t) // ffffffff 179 copy(t, x) 180 181 p256OrdSqr(x, x, 64) // ffffffff0000000000000000 182 p256OrdMul(x, x, t) // ffffffff00000000ffffffff 183 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000 184 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff 185 186 // Remaining 32 windows 187 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 188 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf} 189 for i := 0; i < 32; i++ { 190 p256OrdSqr(x, x, 4) 191 p256OrdMul(x, x, table[expLo[i]-1][:]) 192 } 193 194 // Multiplying by one in the Montgomery domain converts a Montgomery 195 // value out of the domain. 196 one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} 198 p256OrdMul(x, x, one) 199 200 return new(big.Int).SetBytes(x) 201 } 202 203 // fromBig converts a *big.Int into a format used by this code. 204 func fromBig(big *big.Int) []byte { 205 // This could be done a lot more efficiently... 206 res := big.Bytes() 207 if 32 == len(res) { 208 return res 209 } 210 t := make([]byte, 32) 211 offset := 32 - len(res) 212 for i := len(res) - 1; i >= 0; i-- { 213 t[i+offset] = res[i] 214 } 215 return t 216 } 217 218 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar 219 // is equal or greater than the order of the group, it's reduced modulo that order. 220 func p256GetMultiplier(in []byte) []byte { 221 n := new(big.Int).SetBytes(in) 222 223 if n.Cmp(p256Params.N) >= 0 { 224 n.Mod(n, p256Params.N) 225 } 226 return fromBig(n) 227 } 228 229 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the 230 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 231 // R×R mod p. See comment in Inverse about how this is used. 232 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 233 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03} 234 235 // (This is one, in the Montgomery domain.) 236 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 237 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01} 238 239 func maybeReduceModP(in *big.Int) *big.Int { 240 if in.Cmp(p256Params.P) < 0 { 241 return in 242 } 243 return new(big.Int).Mod(in, p256Params.P) 244 } 245 246 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 247 var r1, r2 p256Point 248 scalarReduced := p256GetMultiplier(baseScalar) 249 r1IsInfinity := scalarIsZero(scalarReduced) 250 r1.p256BaseMult(scalarReduced) 251 252 copy(r2.x[:], fromBig(maybeReduceModP(bigX))) 253 copy(r2.y[:], fromBig(maybeReduceModP(bigY))) 254 copy(r2.z[:], one) 255 p256MulAsm(r2.x[:], r2.x[:], rr[:]) 256 p256MulAsm(r2.y[:], r2.y[:], rr[:]) 257 258 scalarReduced = p256GetMultiplier(scalar) 259 r2IsInfinity := scalarIsZero(scalarReduced) 260 r2.p256ScalarMult(p256GetMultiplier(scalar)) 261 262 var sum, double p256Point 263 pointsEqual := p256PointAddAsm(&sum, &r1, &r2) 264 p256PointDoubleAsm(&double, &r1) 265 p256MovCond(&sum, &double, &sum, pointsEqual) 266 p256MovCond(&sum, &r1, &sum, r2IsInfinity) 267 p256MovCond(&sum, &r2, &sum, r1IsInfinity) 268 return sum.p256PointToAffine() 269 } 270 271 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 272 var r p256Point 273 r.p256BaseMult(p256GetMultiplier(scalar)) 274 return r.p256PointToAffine() 275 } 276 277 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 278 var r p256Point 279 copy(r.x[:], fromBig(maybeReduceModP(bigX))) 280 copy(r.y[:], fromBig(maybeReduceModP(bigY))) 281 copy(r.z[:], one) 282 p256MulAsm(r.x[:], r.x[:], rr[:]) 283 p256MulAsm(r.y[:], r.y[:], rr[:]) 284 r.p256ScalarMult(p256GetMultiplier(scalar)) 285 return r.p256PointToAffine() 286 } 287 288 // scalarIsZero returns 1 if scalar represents the zero value, and zero 289 // otherwise. 290 func scalarIsZero(scalar []byte) int { 291 b := byte(0) 292 for _, s := range scalar { 293 b |= s 294 } 295 return subtle.ConstantTimeByteEq(b, 0) 296 } 297 298 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 299 zInv := make([]byte, 32) 300 zInvSq := make([]byte, 32) 301 302 p256Inverse(zInv, p.z[:]) 303 p256Sqr(zInvSq, zInv) 304 p256MulAsm(zInv, zInv, zInvSq) 305 306 p256MulAsm(zInvSq, p.x[:], zInvSq) 307 p256MulAsm(zInv, p.y[:], zInv) 308 309 p256FromMont(zInvSq, zInvSq) 310 p256FromMont(zInv, zInv) 311 312 return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv) 313 } 314 315 // p256Inverse sets out to in^-1 mod p. 316 func p256Inverse(out, in []byte) { 317 var stack [6 * 32]byte 318 p2 := stack[32*0 : 32*0+32] 319 p4 := stack[32*1 : 32*1+32] 320 p8 := stack[32*2 : 32*2+32] 321 p16 := stack[32*3 : 32*3+32] 322 p32 := stack[32*4 : 32*4+32] 323 324 p256Sqr(out, in) 325 p256MulAsm(p2, out, in) // 3*p 326 327 p256Sqr(out, p2) 328 p256Sqr(out, out) 329 p256MulAsm(p4, out, p2) // f*p 330 331 p256Sqr(out, p4) 332 p256Sqr(out, out) 333 p256Sqr(out, out) 334 p256Sqr(out, out) 335 p256MulAsm(p8, out, p4) // ff*p 336 337 p256Sqr(out, p8) 338 339 for i := 0; i < 7; i++ { 340 p256Sqr(out, out) 341 } 342 p256MulAsm(p16, out, p8) // ffff*p 343 344 p256Sqr(out, p16) 345 for i := 0; i < 15; i++ { 346 p256Sqr(out, out) 347 } 348 p256MulAsm(p32, out, p16) // ffffffff*p 349 350 p256Sqr(out, p32) 351 352 for i := 0; i < 31; i++ { 353 p256Sqr(out, out) 354 } 355 p256MulAsm(out, out, in) 356 357 for i := 0; i < 32*4; i++ { 358 p256Sqr(out, out) 359 } 360 p256MulAsm(out, out, p32) 361 362 for i := 0; i < 32; i++ { 363 p256Sqr(out, out) 364 } 365 p256MulAsm(out, out, p32) 366 367 for i := 0; i < 16; i++ { 368 p256Sqr(out, out) 369 } 370 p256MulAsm(out, out, p16) 371 372 for i := 0; i < 8; i++ { 373 p256Sqr(out, out) 374 } 375 p256MulAsm(out, out, p8) 376 377 p256Sqr(out, out) 378 p256Sqr(out, out) 379 p256Sqr(out, out) 380 p256Sqr(out, out) 381 p256MulAsm(out, out, p4) 382 383 p256Sqr(out, out) 384 p256Sqr(out, out) 385 p256MulAsm(out, out, p2) 386 387 p256Sqr(out, out) 388 p256Sqr(out, out) 389 p256MulAsm(out, out, in) 390 } 391 392 func boothW5(in uint) (int, int) { 393 var s uint = ^((in >> 5) - 1) 394 var d uint = (1 << 6) - in - 1 395 d = (d & s) | (in & (^s)) 396 d = (d >> 1) + (d & 1) 397 return int(d), int(s & 1) 398 } 399 400 func boothW7(in uint) (int, int) { 401 var s uint = ^((in >> 7) - 1) 402 var d uint = (1 << 8) - in - 1 403 d = (d & s) | (in & (^s)) 404 d = (d >> 1) + (d & 1) 405 return int(d), int(s & 1) 406 } 407 408 func initTable() { 409 p256PreFast = new([37][64]p256Point) //z coordinate not used 410 basePoint := p256Point{ 411 x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10, 412 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p 413 y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25, 414 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p 415 z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 416 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p 417 } 418 419 t1 := new(p256Point) 420 t2 := new(p256Point) 421 *t2 = basePoint 422 423 zInv := make([]byte, 32) 424 zInvSq := make([]byte, 32) 425 for j := 0; j < 64; j++ { 426 *t1 = *t2 427 for i := 0; i < 37; i++ { 428 // The window size is 7 so we need to double 7 times. 429 if i != 0 { 430 for k := 0; k < 7; k++ { 431 p256PointDoubleAsm(t1, t1) 432 } 433 } 434 // Convert the point to affine form. (Its values are 435 // still in Montgomery form however.) 436 p256Inverse(zInv, t1.z[:]) 437 p256Sqr(zInvSq, zInv) 438 p256MulAsm(zInv, zInv, zInvSq) 439 440 p256MulAsm(t1.x[:], t1.x[:], zInvSq) 441 p256MulAsm(t1.y[:], t1.y[:], zInv) 442 443 copy(t1.z[:], basePoint.z[:]) 444 // Update the table entry 445 copy(p256PreFast[i][j].x[:], t1.x[:]) 446 copy(p256PreFast[i][j].y[:], t1.y[:]) 447 } 448 if j == 0 { 449 p256PointDoubleAsm(t2, &basePoint) 450 } else { 451 p256PointAddAsm(t2, t2, &basePoint) 452 } 453 } 454 } 455 456 func (p *p256Point) p256BaseMult(scalar []byte) { 457 wvalue := (uint(scalar[31]) << 1) & 0xff 458 sel, sign := boothW7(uint(wvalue)) 459 p256SelectBase(p, p256PreFast[0][:], sel) 460 p256NegCond(p, sign) 461 462 copy(p.z[:], one[:]) 463 var t0 p256Point 464 465 copy(t0.z[:], one[:]) 466 467 index := uint(6) 468 zero := sel 469 470 for i := 1; i < 37; i++ { 471 if index < 247 { 472 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff 473 } else { 474 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff 475 } 476 index += 7 477 sel, sign = boothW7(uint(wvalue)) 478 p256SelectBase(&t0, p256PreFast[i][:], sel) 479 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) 480 zero |= sel 481 } 482 } 483 484 func (p *p256Point) p256ScalarMult(scalar []byte) { 485 // precomp is a table of precomputed points that stores powers of p 486 // from p^1 to p^16. 487 var precomp [16]p256Point 488 var t0, t1, t2, t3 p256Point 489 490 // Prepare the table 491 *&precomp[0] = *p 492 493 p256PointDoubleAsm(&t0, p) 494 p256PointDoubleAsm(&t1, &t0) 495 p256PointDoubleAsm(&t2, &t1) 496 p256PointDoubleAsm(&t3, &t2) 497 *&precomp[1] = t0 // 2 498 *&precomp[3] = t1 // 4 499 *&precomp[7] = t2 // 8 500 *&precomp[15] = t3 // 16 501 502 p256PointAddAsm(&t0, &t0, p) 503 p256PointAddAsm(&t1, &t1, p) 504 p256PointAddAsm(&t2, &t2, p) 505 *&precomp[2] = t0 // 3 506 *&precomp[4] = t1 // 5 507 *&precomp[8] = t2 // 9 508 509 p256PointDoubleAsm(&t0, &t0) 510 p256PointDoubleAsm(&t1, &t1) 511 *&precomp[5] = t0 // 6 512 *&precomp[9] = t1 // 10 513 514 p256PointAddAsm(&t2, &t0, p) 515 p256PointAddAsm(&t1, &t1, p) 516 *&precomp[6] = t2 // 7 517 *&precomp[10] = t1 // 11 518 519 p256PointDoubleAsm(&t0, &t0) 520 p256PointDoubleAsm(&t2, &t2) 521 *&precomp[11] = t0 // 12 522 *&precomp[13] = t2 // 14 523 524 p256PointAddAsm(&t0, &t0, p) 525 p256PointAddAsm(&t2, &t2, p) 526 *&precomp[12] = t0 // 13 527 *&precomp[14] = t2 // 15 528 529 // Start scanning the window from top bit 530 index := uint(254) 531 var sel, sign int 532 533 wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 534 sel, _ = boothW5(uint(wvalue)) 535 p256Select(p, precomp[:], sel) 536 zero := sel 537 538 for index > 4 { 539 index -= 5 540 p256PointDoubleAsm(p, p) 541 p256PointDoubleAsm(p, p) 542 p256PointDoubleAsm(p, p) 543 p256PointDoubleAsm(p, p) 544 p256PointDoubleAsm(p, p) 545 546 if index < 247 { 547 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f 548 } else { 549 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 550 } 551 552 sel, sign = boothW5(uint(wvalue)) 553 554 p256Select(&t0, precomp[:], sel) 555 p256NegCond(&t0, sign) 556 p256PointAddAsm(&t1, p, &t0) 557 p256MovCond(&t1, &t1, p, sel) 558 p256MovCond(p, &t1, &t0, zero) 559 zero |= sel 560 } 561 562 p256PointDoubleAsm(p, p) 563 p256PointDoubleAsm(p, p) 564 p256PointDoubleAsm(p, p) 565 p256PointDoubleAsm(p, p) 566 p256PointDoubleAsm(p, p) 567 568 wvalue = (uint(scalar[31]) << 1) & 0x3f 569 sel, sign = boothW5(uint(wvalue)) 570 571 p256Select(&t0, precomp[:], sel) 572 p256NegCond(&t0, sign) 573 p256PointAddAsm(&t1, p, &t0) 574 p256MovCond(&t1, &t1, p, sel) 575 p256MovCond(p, &t1, &t0, zero) 576 }