github.com/FISCO-BCOS/crypto@v0.0.0-20200202032121-bd8ab0b5d4f1/elliptic/p256_s390x.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build s390x 6 7 package elliptic 8 9 import ( 10 "crypto/subtle" 11 "math/big" 12 "unsafe" 13 14 "github.com/FISCO-BCOS/crypto/internal/cpu" 15 ) 16 17 const ( 18 offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX) 19 offsetS390xHasVE1 = unsafe.Offsetof(cpu.S390X.HasVXE) 20 ) 21 22 type p256CurveFast struct { 23 *CurveParams 24 } 25 26 type p256Point struct { 27 x [32]byte 28 y [32]byte 29 z [32]byte 30 } 31 32 var ( 33 p256 Curve 34 p256PreFast *[37][64]p256Point 35 ) 36 37 //go:noescape 38 func p256MulInternalTrampolineSetup() 39 40 //go:noescape 41 func p256SqrInternalTrampolineSetup() 42 43 //go:noescape 44 func p256MulInternalVX() 45 46 //go:noescape 47 func p256MulInternalVMSL() 48 49 //go:noescape 50 func p256SqrInternalVX() 51 52 //go:noescape 53 func p256SqrInternalVMSL() 54 55 func initP256Arch() { 56 if cpu.S390X.HasVX { 57 p256 = p256CurveFast{p256Params} 58 initTable() 59 return 60 } 61 62 // No vector support, use pure Go implementation. 63 p256 = p256Curve{p256Params} 64 return 65 } 66 67 func (curve p256CurveFast) Params() *CurveParams { 68 return curve.CurveParams 69 } 70 71 // Functions implemented in p256_asm_s390x.s 72 // Montgomery multiplication modulo P256 73 // 74 //go:noescape 75 func p256SqrAsm(res, in1 []byte) 76 77 //go:noescape 78 func p256MulAsm(res, in1, in2 []byte) 79 80 // Montgomery square modulo P256 81 func p256Sqr(res, in []byte) { 82 p256SqrAsm(res, in) 83 } 84 85 // Montgomery multiplication by 1 86 // 87 //go:noescape 88 func p256FromMont(res, in []byte) 89 90 // iff cond == 1 val <- -val 91 // 92 //go:noescape 93 func p256NegCond(val *p256Point, cond int) 94 95 // if cond == 0 res <- b; else res <- a 96 // 97 //go:noescape 98 func p256MovCond(res, a, b *p256Point, cond int) 99 100 // Constant time table access 101 // 102 //go:noescape 103 func p256Select(point *p256Point, table []p256Point, idx int) 104 105 //go:noescape 106 func p256SelectBase(point *p256Point, table []p256Point, idx int) 107 108 // Montgomery multiplication modulo Ord(G) 109 // 110 //go:noescape 111 func p256OrdMul(res, in1, in2 []byte) 112 113 // Montgomery square modulo Ord(G), repeated n times 114 func p256OrdSqr(res, in []byte, n int) { 115 copy(res, in) 116 for i := 0; i < n; i += 1 { 117 p256OrdMul(res, res, res) 118 } 119 } 120 121 // Point add with P2 being affine point 122 // If sign == 1 -> P2 = -P2 123 // If sel == 0 -> P3 = P1 124 // if zero == 0 -> P3 = P2 125 // 126 //go:noescape 127 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int) 128 129 // Point add 130 // 131 //go:noescape 132 func p256PointAddAsm(P3, P1, P2 *p256Point) int 133 134 //go:noescape 135 func p256PointDoubleAsm(P3, P1 *p256Point) 136 137 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int { 138 if k.Cmp(p256Params.N) >= 0 { 139 // This should never happen. 140 reducedK := new(big.Int).Mod(k, p256Params.N) 141 k = reducedK 142 } 143 144 // table will store precomputed powers of x. The 32 bytes at index 145 // i store x^(i+1). 146 var table [15][32]byte 147 148 x := fromBig(k) 149 // This code operates in the Montgomery domain where R = 2^256 mod n 150 // and n is the order of the scalar field. (See initP256 for the 151 // value.) Elements in the Montgomery domain take the form a×R and 152 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 153 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 154 // i.e. converts x into the Montgomery domain. Stored in BigEndian form 155 RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59, 156 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2} 157 158 p256OrdMul(table[0][:], x, RR) 159 160 // Prepare the table, no need in constant time access, because the 161 // power is not a secret. (Entry 0 is never used.) 162 for i := 2; i < 16; i += 2 { 163 p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1) 164 p256OrdMul(table[i][:], table[i-1][:], table[0][:]) 165 } 166 167 copy(x, table[14][:]) // f 168 169 p256OrdSqr(x[0:32], x[0:32], 4) 170 p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff 171 t := make([]byte, 32) 172 copy(t, x) 173 174 p256OrdSqr(x, x, 8) 175 p256OrdMul(x, x, t) // ffff 176 copy(t, x) 177 178 p256OrdSqr(x, x, 16) 179 p256OrdMul(x, x, t) // ffffffff 180 copy(t, x) 181 182 p256OrdSqr(x, x, 64) // ffffffff0000000000000000 183 p256OrdMul(x, x, t) // ffffffff00000000ffffffff 184 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000 185 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff 186 187 // Remaining 32 windows 188 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 189 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf} 190 for i := 0; i < 32; i++ { 191 p256OrdSqr(x, x, 4) 192 p256OrdMul(x, x, table[expLo[i]-1][:]) 193 } 194 195 // Multiplying by one in the Montgomery domain converts a Montgomery 196 // value out of the domain. 197 one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} 199 p256OrdMul(x, x, one) 200 201 return new(big.Int).SetBytes(x) 202 } 203 204 // fromBig converts a *big.Int into a format used by this code. 205 func fromBig(big *big.Int) []byte { 206 // This could be done a lot more efficiently... 207 res := big.Bytes() 208 if 32 == len(res) { 209 return res 210 } 211 t := make([]byte, 32) 212 offset := 32 - len(res) 213 for i := len(res) - 1; i >= 0; i-- { 214 t[i+offset] = res[i] 215 } 216 return t 217 } 218 219 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar 220 // is equal or greater than the order of the group, it's reduced modulo that order. 221 func p256GetMultiplier(in []byte) []byte { 222 n := new(big.Int).SetBytes(in) 223 224 if n.Cmp(p256Params.N) >= 0 { 225 n.Mod(n, p256Params.N) 226 } 227 return fromBig(n) 228 } 229 230 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the 231 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 232 // R×R mod p. See comment in Inverse about how this is used. 233 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 234 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03} 235 236 // (This is one, in the Montgomery domain.) 237 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 238 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01} 239 240 func maybeReduceModP(in *big.Int) *big.Int { 241 if in.Cmp(p256Params.P) < 0 { 242 return in 243 } 244 return new(big.Int).Mod(in, p256Params.P) 245 } 246 247 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 248 var r1, r2 p256Point 249 scalarReduced := p256GetMultiplier(baseScalar) 250 r1IsInfinity := scalarIsZero(scalarReduced) 251 r1.p256BaseMult(scalarReduced) 252 253 copy(r2.x[:], fromBig(maybeReduceModP(bigX))) 254 copy(r2.y[:], fromBig(maybeReduceModP(bigY))) 255 copy(r2.z[:], one) 256 p256MulAsm(r2.x[:], r2.x[:], rr[:]) 257 p256MulAsm(r2.y[:], r2.y[:], rr[:]) 258 259 scalarReduced = p256GetMultiplier(scalar) 260 r2IsInfinity := scalarIsZero(scalarReduced) 261 r2.p256ScalarMult(p256GetMultiplier(scalar)) 262 263 var sum, double p256Point 264 pointsEqual := p256PointAddAsm(&sum, &r1, &r2) 265 p256PointDoubleAsm(&double, &r1) 266 p256MovCond(&sum, &double, &sum, pointsEqual) 267 p256MovCond(&sum, &r1, &sum, r2IsInfinity) 268 p256MovCond(&sum, &r2, &sum, r1IsInfinity) 269 return sum.p256PointToAffine() 270 } 271 272 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 273 var r p256Point 274 r.p256BaseMult(p256GetMultiplier(scalar)) 275 return r.p256PointToAffine() 276 } 277 278 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 279 var r p256Point 280 copy(r.x[:], fromBig(maybeReduceModP(bigX))) 281 copy(r.y[:], fromBig(maybeReduceModP(bigY))) 282 copy(r.z[:], one) 283 p256MulAsm(r.x[:], r.x[:], rr[:]) 284 p256MulAsm(r.y[:], r.y[:], rr[:]) 285 r.p256ScalarMult(p256GetMultiplier(scalar)) 286 return r.p256PointToAffine() 287 } 288 289 // scalarIsZero returns 1 if scalar represents the zero value, and zero 290 // otherwise. 291 func scalarIsZero(scalar []byte) int { 292 b := byte(0) 293 for _, s := range scalar { 294 b |= s 295 } 296 return subtle.ConstantTimeByteEq(b, 0) 297 } 298 299 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 300 zInv := make([]byte, 32) 301 zInvSq := make([]byte, 32) 302 303 p256Inverse(zInv, p.z[:]) 304 p256Sqr(zInvSq, zInv) 305 p256MulAsm(zInv, zInv, zInvSq) 306 307 p256MulAsm(zInvSq, p.x[:], zInvSq) 308 p256MulAsm(zInv, p.y[:], zInv) 309 310 p256FromMont(zInvSq, zInvSq) 311 p256FromMont(zInv, zInv) 312 313 return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv) 314 } 315 316 // p256Inverse sets out to in^-1 mod p. 317 func p256Inverse(out, in []byte) { 318 var stack [6 * 32]byte 319 p2 := stack[32*0 : 32*0+32] 320 p4 := stack[32*1 : 32*1+32] 321 p8 := stack[32*2 : 32*2+32] 322 p16 := stack[32*3 : 32*3+32] 323 p32 := stack[32*4 : 32*4+32] 324 325 p256Sqr(out, in) 326 p256MulAsm(p2, out, in) // 3*p 327 328 p256Sqr(out, p2) 329 p256Sqr(out, out) 330 p256MulAsm(p4, out, p2) // f*p 331 332 p256Sqr(out, p4) 333 p256Sqr(out, out) 334 p256Sqr(out, out) 335 p256Sqr(out, out) 336 p256MulAsm(p8, out, p4) // ff*p 337 338 p256Sqr(out, p8) 339 340 for i := 0; i < 7; i++ { 341 p256Sqr(out, out) 342 } 343 p256MulAsm(p16, out, p8) // ffff*p 344 345 p256Sqr(out, p16) 346 for i := 0; i < 15; i++ { 347 p256Sqr(out, out) 348 } 349 p256MulAsm(p32, out, p16) // ffffffff*p 350 351 p256Sqr(out, p32) 352 353 for i := 0; i < 31; i++ { 354 p256Sqr(out, out) 355 } 356 p256MulAsm(out, out, in) 357 358 for i := 0; i < 32*4; i++ { 359 p256Sqr(out, out) 360 } 361 p256MulAsm(out, out, p32) 362 363 for i := 0; i < 32; i++ { 364 p256Sqr(out, out) 365 } 366 p256MulAsm(out, out, p32) 367 368 for i := 0; i < 16; i++ { 369 p256Sqr(out, out) 370 } 371 p256MulAsm(out, out, p16) 372 373 for i := 0; i < 8; i++ { 374 p256Sqr(out, out) 375 } 376 p256MulAsm(out, out, p8) 377 378 p256Sqr(out, out) 379 p256Sqr(out, out) 380 p256Sqr(out, out) 381 p256Sqr(out, out) 382 p256MulAsm(out, out, p4) 383 384 p256Sqr(out, out) 385 p256Sqr(out, out) 386 p256MulAsm(out, out, p2) 387 388 p256Sqr(out, out) 389 p256Sqr(out, out) 390 p256MulAsm(out, out, in) 391 } 392 393 func boothW5(in uint) (int, int) { 394 var s uint = ^((in >> 5) - 1) 395 var d uint = (1 << 6) - in - 1 396 d = (d & s) | (in & (^s)) 397 d = (d >> 1) + (d & 1) 398 return int(d), int(s & 1) 399 } 400 401 func boothW7(in uint) (int, int) { 402 var s uint = ^((in >> 7) - 1) 403 var d uint = (1 << 8) - in - 1 404 d = (d & s) | (in & (^s)) 405 d = (d >> 1) + (d & 1) 406 return int(d), int(s & 1) 407 } 408 409 func initTable() { 410 p256PreFast = new([37][64]p256Point) //z coordinate not used 411 basePoint := p256Point{ 412 x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10, 413 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p 414 y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25, 415 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p 416 z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 417 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p 418 } 419 420 t1 := new(p256Point) 421 t2 := new(p256Point) 422 *t2 = basePoint 423 424 zInv := make([]byte, 32) 425 zInvSq := make([]byte, 32) 426 for j := 0; j < 64; j++ { 427 *t1 = *t2 428 for i := 0; i < 37; i++ { 429 // The window size is 7 so we need to double 7 times. 430 if i != 0 { 431 for k := 0; k < 7; k++ { 432 p256PointDoubleAsm(t1, t1) 433 } 434 } 435 // Convert the point to affine form. (Its values are 436 // still in Montgomery form however.) 437 p256Inverse(zInv, t1.z[:]) 438 p256Sqr(zInvSq, zInv) 439 p256MulAsm(zInv, zInv, zInvSq) 440 441 p256MulAsm(t1.x[:], t1.x[:], zInvSq) 442 p256MulAsm(t1.y[:], t1.y[:], zInv) 443 444 copy(t1.z[:], basePoint.z[:]) 445 // Update the table entry 446 copy(p256PreFast[i][j].x[:], t1.x[:]) 447 copy(p256PreFast[i][j].y[:], t1.y[:]) 448 } 449 if j == 0 { 450 p256PointDoubleAsm(t2, &basePoint) 451 } else { 452 p256PointAddAsm(t2, t2, &basePoint) 453 } 454 } 455 } 456 457 func (p *p256Point) p256BaseMult(scalar []byte) { 458 wvalue := (uint(scalar[31]) << 1) & 0xff 459 sel, sign := boothW7(uint(wvalue)) 460 p256SelectBase(p, p256PreFast[0][:], sel) 461 p256NegCond(p, sign) 462 463 copy(p.z[:], one[:]) 464 var t0 p256Point 465 466 copy(t0.z[:], one[:]) 467 468 index := uint(6) 469 zero := sel 470 471 for i := 1; i < 37; i++ { 472 if index < 247 { 473 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff 474 } else { 475 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff 476 } 477 index += 7 478 sel, sign = boothW7(uint(wvalue)) 479 p256SelectBase(&t0, p256PreFast[i][:], sel) 480 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) 481 zero |= sel 482 } 483 } 484 485 func (p *p256Point) p256ScalarMult(scalar []byte) { 486 // precomp is a table of precomputed points that stores powers of p 487 // from p^1 to p^16. 488 var precomp [16]p256Point 489 var t0, t1, t2, t3 p256Point 490 491 // Prepare the table 492 *&precomp[0] = *p 493 494 p256PointDoubleAsm(&t0, p) 495 p256PointDoubleAsm(&t1, &t0) 496 p256PointDoubleAsm(&t2, &t1) 497 p256PointDoubleAsm(&t3, &t2) 498 *&precomp[1] = t0 // 2 499 *&precomp[3] = t1 // 4 500 *&precomp[7] = t2 // 8 501 *&precomp[15] = t3 // 16 502 503 p256PointAddAsm(&t0, &t0, p) 504 p256PointAddAsm(&t1, &t1, p) 505 p256PointAddAsm(&t2, &t2, p) 506 *&precomp[2] = t0 // 3 507 *&precomp[4] = t1 // 5 508 *&precomp[8] = t2 // 9 509 510 p256PointDoubleAsm(&t0, &t0) 511 p256PointDoubleAsm(&t1, &t1) 512 *&precomp[5] = t0 // 6 513 *&precomp[9] = t1 // 10 514 515 p256PointAddAsm(&t2, &t0, p) 516 p256PointAddAsm(&t1, &t1, p) 517 *&precomp[6] = t2 // 7 518 *&precomp[10] = t1 // 11 519 520 p256PointDoubleAsm(&t0, &t0) 521 p256PointDoubleAsm(&t2, &t2) 522 *&precomp[11] = t0 // 12 523 *&precomp[13] = t2 // 14 524 525 p256PointAddAsm(&t0, &t0, p) 526 p256PointAddAsm(&t2, &t2, p) 527 *&precomp[12] = t0 // 13 528 *&precomp[14] = t2 // 15 529 530 // Start scanning the window from top bit 531 index := uint(254) 532 var sel, sign int 533 534 wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 535 sel, _ = boothW5(uint(wvalue)) 536 p256Select(p, precomp[:], sel) 537 zero := sel 538 539 for index > 4 { 540 index -= 5 541 p256PointDoubleAsm(p, p) 542 p256PointDoubleAsm(p, p) 543 p256PointDoubleAsm(p, p) 544 p256PointDoubleAsm(p, p) 545 p256PointDoubleAsm(p, p) 546 547 if index < 247 { 548 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f 549 } else { 550 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 551 } 552 553 sel, sign = boothW5(uint(wvalue)) 554 555 p256Select(&t0, precomp[:], sel) 556 p256NegCond(&t0, sign) 557 p256PointAddAsm(&t1, p, &t0) 558 p256MovCond(&t1, &t1, p, sel) 559 p256MovCond(p, &t1, &t0, zero) 560 zero |= sel 561 } 562 563 p256PointDoubleAsm(p, p) 564 p256PointDoubleAsm(p, p) 565 p256PointDoubleAsm(p, p) 566 p256PointDoubleAsm(p, p) 567 p256PointDoubleAsm(p, p) 568 569 wvalue = (uint(scalar[31]) << 1) & 0x3f 570 sel, sign = boothW5(uint(wvalue)) 571 572 p256Select(&t0, precomp[:], sel) 573 p256NegCond(&t0, sign) 574 p256PointAddAsm(&t1, p, &t0) 575 p256MovCond(&t1, &t1, p, sel) 576 p256MovCond(p, &t1, &t0, zero) 577 }