github.com/s1s1ty/go@v0.0.0-20180207192209-104445e3140f/src/crypto/elliptic/p256_s390x.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build s390x 6 7 package elliptic 8 9 import ( 10 "crypto/subtle" 11 "math/big" 12 ) 13 14 type p256CurveFast struct { 15 *CurveParams 16 } 17 18 type p256Point struct { 19 x [32]byte 20 y [32]byte 21 z [32]byte 22 } 23 24 var ( 25 p256 Curve 26 p256PreFast *[37][64]p256Point 27 ) 28 29 // hasVectorFacility reports whether the machine has the z/Architecture 30 // vector facility installed and enabled. 31 func hasVectorFacility() bool 32 33 var hasVX = hasVectorFacility() 34 35 func initP256Arch() { 36 if hasVX { 37 p256 = p256CurveFast{p256Params} 38 initTable() 39 return 40 } 41 42 // No vector support, use pure Go implementation. 43 p256 = p256Curve{p256Params} 44 return 45 } 46 47 func (curve p256CurveFast) Params() *CurveParams { 48 return curve.CurveParams 49 } 50 51 // Functions implemented in p256_asm_s390x.s 52 // Montgomery multiplication modulo P256 53 // 54 //go:noescape 55 func p256MulAsm(res, in1, in2 []byte) 56 57 // Montgomery square modulo P256 58 func p256Sqr(res, in []byte) { 59 p256MulAsm(res, in, in) 60 } 61 62 // Montgomery multiplication by 1 63 // 64 //go:noescape 65 func p256FromMont(res, in []byte) 66 67 // iff cond == 1 val <- -val 68 // 69 //go:noescape 70 func p256NegCond(val *p256Point, cond int) 71 72 // if cond == 0 res <- b; else res <- a 73 // 74 //go:noescape 75 func p256MovCond(res, a, b *p256Point, cond int) 76 77 // Constant time table access 78 // 79 //go:noescape 80 func p256Select(point *p256Point, table []p256Point, idx int) 81 82 //go:noescape 83 func p256SelectBase(point *p256Point, table []p256Point, idx int) 84 85 // Montgomery multiplication modulo Ord(G) 86 // 87 //go:noescape 88 func p256OrdMul(res, in1, in2 []byte) 89 90 // Montgomery square modulo Ord(G), repeated n times 91 func p256OrdSqr(res, in []byte, n int) { 92 copy(res, in) 93 for i := 0; i < n; i += 1 { 94 p256OrdMul(res, res, res) 95 } 96 } 97 98 // Point add with P2 being affine point 99 // If sign == 1 -> P2 = -P2 100 // If sel == 0 -> P3 = P1 101 // if zero == 0 -> P3 = P2 102 // 103 //go:noescape 104 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int) 105 106 // Point add 107 // 108 //go:noescape 109 func p256PointAddAsm(P3, P1, P2 *p256Point) int 110 111 //go:noescape 112 func p256PointDoubleAsm(P3, P1 *p256Point) 113 114 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int { 115 if k.Cmp(p256Params.N) >= 0 { 116 // This should never happen. 117 reducedK := new(big.Int).Mod(k, p256Params.N) 118 k = reducedK 119 } 120 121 // table will store precomputed powers of x. The 32 bytes at index 122 // i store x^(i+1). 123 var table [15][32]byte 124 125 x := fromBig(k) 126 // This code operates in the Montgomery domain where R = 2^256 mod n 127 // and n is the order of the scalar field. (See initP256 for the 128 // value.) Elements in the Montgomery domain take the form a×R and 129 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 130 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 131 // i.e. converts x into the Montgomery domain. Stored in BigEndian form 132 RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59, 133 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2} 134 135 p256OrdMul(table[0][:], x, RR) 136 137 // Prepare the table, no need in constant time access, because the 138 // power is not a secret. (Entry 0 is never used.) 139 for i := 2; i < 16; i += 2 { 140 p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1) 141 p256OrdMul(table[i][:], table[i-1][:], table[0][:]) 142 } 143 144 copy(x, table[14][:]) // f 145 146 p256OrdSqr(x[0:32], x[0:32], 4) 147 p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff 148 t := make([]byte, 32) 149 copy(t, x) 150 151 p256OrdSqr(x, x, 8) 152 p256OrdMul(x, x, t) // ffff 153 copy(t, x) 154 155 p256OrdSqr(x, x, 16) 156 p256OrdMul(x, x, t) // ffffffff 157 copy(t, x) 158 159 p256OrdSqr(x, x, 64) // ffffffff0000000000000000 160 p256OrdMul(x, x, t) // ffffffff00000000ffffffff 161 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000 162 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff 163 164 // Remaining 32 windows 165 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 166 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf} 167 for i := 0; i < 32; i++ { 168 p256OrdSqr(x, x, 4) 169 p256OrdMul(x, x, table[expLo[i]-1][:]) 170 } 171 172 // Multiplying by one in the Montgomery domain converts a Montgomery 173 // value out of the domain. 174 one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} 176 p256OrdMul(x, x, one) 177 178 return new(big.Int).SetBytes(x) 179 } 180 181 // fromBig converts a *big.Int into a format used by this code. 182 func fromBig(big *big.Int) []byte { 183 // This could be done a lot more efficiently... 184 res := big.Bytes() 185 if 32 == len(res) { 186 return res 187 } 188 t := make([]byte, 32) 189 offset := 32 - len(res) 190 for i := len(res) - 1; i >= 0; i-- { 191 t[i+offset] = res[i] 192 } 193 return t 194 } 195 196 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar 197 // is equal or greater than the order of the group, it's reduced modulo that order. 198 func p256GetMultiplier(in []byte) []byte { 199 n := new(big.Int).SetBytes(in) 200 201 if n.Cmp(p256Params.N) >= 0 { 202 n.Mod(n, p256Params.N) 203 } 204 return fromBig(n) 205 } 206 207 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the 208 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 209 // R×R mod p. See comment in Inverse about how this is used. 210 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 211 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03} 212 213 // (This is one, in the Montgomery domain.) 214 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 215 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01} 216 217 func maybeReduceModP(in *big.Int) *big.Int { 218 if in.Cmp(p256Params.P) < 0 { 219 return in 220 } 221 return new(big.Int).Mod(in, p256Params.P) 222 } 223 224 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 225 var r1, r2 p256Point 226 scalarReduced := p256GetMultiplier(baseScalar) 227 r1IsInfinity := scalarIsZero(scalarReduced) 228 r1.p256BaseMult(scalarReduced) 229 230 copy(r2.x[:], fromBig(maybeReduceModP(bigX))) 231 copy(r2.y[:], fromBig(maybeReduceModP(bigY))) 232 copy(r2.z[:], one) 233 p256MulAsm(r2.x[:], r2.x[:], rr[:]) 234 p256MulAsm(r2.y[:], r2.y[:], rr[:]) 235 236 scalarReduced = p256GetMultiplier(scalar) 237 r2IsInfinity := scalarIsZero(scalarReduced) 238 r2.p256ScalarMult(p256GetMultiplier(scalar)) 239 240 var sum, double p256Point 241 pointsEqual := p256PointAddAsm(&sum, &r1, &r2) 242 p256PointDoubleAsm(&double, &r1) 243 p256MovCond(&sum, &double, &sum, pointsEqual) 244 p256MovCond(&sum, &r1, &sum, r2IsInfinity) 245 p256MovCond(&sum, &r2, &sum, r1IsInfinity) 246 return sum.p256PointToAffine() 247 } 248 249 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 250 var r p256Point 251 r.p256BaseMult(p256GetMultiplier(scalar)) 252 return r.p256PointToAffine() 253 } 254 255 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 256 var r p256Point 257 copy(r.x[:], fromBig(maybeReduceModP(bigX))) 258 copy(r.y[:], fromBig(maybeReduceModP(bigY))) 259 copy(r.z[:], one) 260 p256MulAsm(r.x[:], r.x[:], rr[:]) 261 p256MulAsm(r.y[:], r.y[:], rr[:]) 262 r.p256ScalarMult(p256GetMultiplier(scalar)) 263 return r.p256PointToAffine() 264 } 265 266 // scalarIsZero returns 1 if scalar represents the zero value, and zero 267 // otherwise. 268 func scalarIsZero(scalar []byte) int { 269 b := byte(0) 270 for _, s := range scalar { 271 b |= s 272 } 273 return subtle.ConstantTimeByteEq(b, 0) 274 } 275 276 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 277 zInv := make([]byte, 32) 278 zInvSq := make([]byte, 32) 279 280 p256Inverse(zInv, p.z[:]) 281 p256Sqr(zInvSq, zInv) 282 p256MulAsm(zInv, zInv, zInvSq) 283 284 p256MulAsm(zInvSq, p.x[:], zInvSq) 285 p256MulAsm(zInv, p.y[:], zInv) 286 287 p256FromMont(zInvSq, zInvSq) 288 p256FromMont(zInv, zInv) 289 290 return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv) 291 } 292 293 // p256Inverse sets out to in^-1 mod p. 294 func p256Inverse(out, in []byte) { 295 var stack [6 * 32]byte 296 p2 := stack[32*0 : 32*0+32] 297 p4 := stack[32*1 : 32*1+32] 298 p8 := stack[32*2 : 32*2+32] 299 p16 := stack[32*3 : 32*3+32] 300 p32 := stack[32*4 : 32*4+32] 301 302 p256Sqr(out, in) 303 p256MulAsm(p2, out, in) // 3*p 304 305 p256Sqr(out, p2) 306 p256Sqr(out, out) 307 p256MulAsm(p4, out, p2) // f*p 308 309 p256Sqr(out, p4) 310 p256Sqr(out, out) 311 p256Sqr(out, out) 312 p256Sqr(out, out) 313 p256MulAsm(p8, out, p4) // ff*p 314 315 p256Sqr(out, p8) 316 317 for i := 0; i < 7; i++ { 318 p256Sqr(out, out) 319 } 320 p256MulAsm(p16, out, p8) // ffff*p 321 322 p256Sqr(out, p16) 323 for i := 0; i < 15; i++ { 324 p256Sqr(out, out) 325 } 326 p256MulAsm(p32, out, p16) // ffffffff*p 327 328 p256Sqr(out, p32) 329 330 for i := 0; i < 31; i++ { 331 p256Sqr(out, out) 332 } 333 p256MulAsm(out, out, in) 334 335 for i := 0; i < 32*4; i++ { 336 p256Sqr(out, out) 337 } 338 p256MulAsm(out, out, p32) 339 340 for i := 0; i < 32; i++ { 341 p256Sqr(out, out) 342 } 343 p256MulAsm(out, out, p32) 344 345 for i := 0; i < 16; i++ { 346 p256Sqr(out, out) 347 } 348 p256MulAsm(out, out, p16) 349 350 for i := 0; i < 8; i++ { 351 p256Sqr(out, out) 352 } 353 p256MulAsm(out, out, p8) 354 355 p256Sqr(out, out) 356 p256Sqr(out, out) 357 p256Sqr(out, out) 358 p256Sqr(out, out) 359 p256MulAsm(out, out, p4) 360 361 p256Sqr(out, out) 362 p256Sqr(out, out) 363 p256MulAsm(out, out, p2) 364 365 p256Sqr(out, out) 366 p256Sqr(out, out) 367 p256MulAsm(out, out, in) 368 } 369 370 func boothW5(in uint) (int, int) { 371 var s uint = ^((in >> 5) - 1) 372 var d uint = (1 << 6) - in - 1 373 d = (d & s) | (in & (^s)) 374 d = (d >> 1) + (d & 1) 375 return int(d), int(s & 1) 376 } 377 378 func boothW7(in uint) (int, int) { 379 var s uint = ^((in >> 7) - 1) 380 var d uint = (1 << 8) - in - 1 381 d = (d & s) | (in & (^s)) 382 d = (d >> 1) + (d & 1) 383 return int(d), int(s & 1) 384 } 385 386 func initTable() { 387 p256PreFast = new([37][64]p256Point) //z coordinate not used 388 basePoint := p256Point{ 389 x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10, 390 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p 391 y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25, 392 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p 393 z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 394 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p 395 } 396 397 t1 := new(p256Point) 398 t2 := new(p256Point) 399 *t2 = basePoint 400 401 zInv := make([]byte, 32) 402 zInvSq := make([]byte, 32) 403 for j := 0; j < 64; j++ { 404 *t1 = *t2 405 for i := 0; i < 37; i++ { 406 // The window size is 7 so we need to double 7 times. 407 if i != 0 { 408 for k := 0; k < 7; k++ { 409 p256PointDoubleAsm(t1, t1) 410 } 411 } 412 // Convert the point to affine form. (Its values are 413 // still in Montgomery form however.) 414 p256Inverse(zInv, t1.z[:]) 415 p256Sqr(zInvSq, zInv) 416 p256MulAsm(zInv, zInv, zInvSq) 417 418 p256MulAsm(t1.x[:], t1.x[:], zInvSq) 419 p256MulAsm(t1.y[:], t1.y[:], zInv) 420 421 copy(t1.z[:], basePoint.z[:]) 422 // Update the table entry 423 copy(p256PreFast[i][j].x[:], t1.x[:]) 424 copy(p256PreFast[i][j].y[:], t1.y[:]) 425 } 426 if j == 0 { 427 p256PointDoubleAsm(t2, &basePoint) 428 } else { 429 p256PointAddAsm(t2, t2, &basePoint) 430 } 431 } 432 } 433 434 func (p *p256Point) p256BaseMult(scalar []byte) { 435 wvalue := (uint(scalar[31]) << 1) & 0xff 436 sel, sign := boothW7(uint(wvalue)) 437 p256SelectBase(p, p256PreFast[0][:], sel) 438 p256NegCond(p, sign) 439 440 copy(p.z[:], one[:]) 441 var t0 p256Point 442 443 copy(t0.z[:], one[:]) 444 445 index := uint(6) 446 zero := sel 447 448 for i := 1; i < 37; i++ { 449 if index < 247 { 450 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff 451 } else { 452 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff 453 } 454 index += 7 455 sel, sign = boothW7(uint(wvalue)) 456 p256SelectBase(&t0, p256PreFast[i][:], sel) 457 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) 458 zero |= sel 459 } 460 } 461 462 func (p *p256Point) p256ScalarMult(scalar []byte) { 463 // precomp is a table of precomputed points that stores powers of p 464 // from p^1 to p^16. 465 var precomp [16]p256Point 466 var t0, t1, t2, t3 p256Point 467 468 // Prepare the table 469 *&precomp[0] = *p 470 471 p256PointDoubleAsm(&t0, p) 472 p256PointDoubleAsm(&t1, &t0) 473 p256PointDoubleAsm(&t2, &t1) 474 p256PointDoubleAsm(&t3, &t2) 475 *&precomp[1] = t0 // 2 476 *&precomp[3] = t1 // 4 477 *&precomp[7] = t2 // 8 478 *&precomp[15] = t3 // 16 479 480 p256PointAddAsm(&t0, &t0, p) 481 p256PointAddAsm(&t1, &t1, p) 482 p256PointAddAsm(&t2, &t2, p) 483 *&precomp[2] = t0 // 3 484 *&precomp[4] = t1 // 5 485 *&precomp[8] = t2 // 9 486 487 p256PointDoubleAsm(&t0, &t0) 488 p256PointDoubleAsm(&t1, &t1) 489 *&precomp[5] = t0 // 6 490 *&precomp[9] = t1 // 10 491 492 p256PointAddAsm(&t2, &t0, p) 493 p256PointAddAsm(&t1, &t1, p) 494 *&precomp[6] = t2 // 7 495 *&precomp[10] = t1 // 11 496 497 p256PointDoubleAsm(&t0, &t0) 498 p256PointDoubleAsm(&t2, &t2) 499 *&precomp[11] = t0 // 12 500 *&precomp[13] = t2 // 14 501 502 p256PointAddAsm(&t0, &t0, p) 503 p256PointAddAsm(&t2, &t2, p) 504 *&precomp[12] = t0 // 13 505 *&precomp[14] = t2 // 15 506 507 // Start scanning the window from top bit 508 index := uint(254) 509 var sel, sign int 510 511 wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 512 sel, _ = boothW5(uint(wvalue)) 513 p256Select(p, precomp[:], sel) 514 zero := sel 515 516 for index > 4 { 517 index -= 5 518 p256PointDoubleAsm(p, p) 519 p256PointDoubleAsm(p, p) 520 p256PointDoubleAsm(p, p) 521 p256PointDoubleAsm(p, p) 522 p256PointDoubleAsm(p, p) 523 524 if index < 247 { 525 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f 526 } else { 527 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 528 } 529 530 sel, sign = boothW5(uint(wvalue)) 531 532 p256Select(&t0, precomp[:], sel) 533 p256NegCond(&t0, sign) 534 p256PointAddAsm(&t1, p, &t0) 535 p256MovCond(&t1, &t1, p, sel) 536 p256MovCond(p, &t1, &t0, zero) 537 zero |= sel 538 } 539 540 p256PointDoubleAsm(p, p) 541 p256PointDoubleAsm(p, p) 542 p256PointDoubleAsm(p, p) 543 p256PointDoubleAsm(p, p) 544 p256PointDoubleAsm(p, p) 545 546 wvalue = (uint(scalar[31]) << 1) & 0x3f 547 sel, sign = boothW5(uint(wvalue)) 548 549 p256Select(&t0, precomp[:], sel) 550 p256NegCond(&t0, sign) 551 p256PointAddAsm(&t1, p, &t0) 552 p256MovCond(&t1, &t1, p, sel) 553 p256MovCond(p, &t1, &t0, zero) 554 }