github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/crypto/elliptic/p256_s390x.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build s390x 6 7 package elliptic 8 9 import ( 10 "crypto/subtle" 11 "math/big" 12 ) 13 14 type p256CurveFast struct { 15 *CurveParams 16 } 17 18 type p256Point struct { 19 x [32]byte 20 y [32]byte 21 z [32]byte 22 } 23 24 var ( 25 p256 Curve 26 p256PreFast *[37][64]p256Point 27 ) 28 29 // hasVectorFacility reports whether the machine has the z/Architecture 30 // vector facility installed and enabled. 31 func hasVectorFacility() bool 32 33 var hasVX = hasVectorFacility() 34 35 func initP256Arch() { 36 if hasVX { 37 p256 = p256CurveFast{p256Params} 38 initTable() 39 return 40 } 41 42 // No vector support, use pure Go implementation. 43 p256 = p256Curve{p256Params} 44 return 45 } 46 47 func (curve p256CurveFast) Params() *CurveParams { 48 return curve.CurveParams 49 } 50 51 // Functions implemented in p256_asm_s390x.s 52 // Montgomery multiplication modulo P256 53 func p256MulAsm(res, in1, in2 []byte) 54 55 // Montgomery square modulo P256 56 func p256Sqr(res, in []byte) { 57 p256MulAsm(res, in, in) 58 } 59 60 // Montgomery multiplication by 1 61 func p256FromMont(res, in []byte) 62 63 // iff cond == 1 val <- -val 64 func p256NegCond(val *p256Point, cond int) 65 66 // if cond == 0 res <- b; else res <- a 67 func p256MovCond(res, a, b *p256Point, cond int) 68 69 // Constant time table access 70 func p256Select(point *p256Point, table []p256Point, idx int) 71 func p256SelectBase(point *p256Point, table []p256Point, idx int) 72 73 // Montgomery multiplication modulo Ord(G) 74 func p256OrdMul(res, in1, in2 []byte) 75 76 // Montgomery square modulo Ord(G), repeated n times 77 func p256OrdSqr(res, in []byte, n int) { 78 copy(res, in) 79 for i := 0; i < n; i += 1 { 80 p256OrdMul(res, res, res) 81 } 82 } 83 84 // Point add with P2 being affine point 85 // If sign == 1 -> P2 = -P2 86 // If sel == 0 -> P3 = P1 87 // if zero == 0 -> P3 = P2 88 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int) 89 90 // Point add 91 func p256PointAddAsm(P3, P1, P2 *p256Point) int 92 func p256PointDoubleAsm(P3, P1 *p256Point) 93 94 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int { 95 if k.Cmp(p256Params.N) >= 0 { 96 // This should never happen. 97 reducedK := new(big.Int).Mod(k, p256Params.N) 98 k = reducedK 99 } 100 101 // table will store precomputed powers of x. The 32 bytes at index 102 // i store x^(i+1). 103 var table [15][32]byte 104 105 x := fromBig(k) 106 // This code operates in the Montgomery domain where R = 2^256 mod n 107 // and n is the order of the scalar field. (See initP256 for the 108 // value.) Elements in the Montgomery domain take the form a×R and 109 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 110 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 111 // i.e. converts x into the Montgomery domain. Stored in BigEndian form 112 RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59, 113 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2} 114 115 p256OrdMul(table[0][:], x, RR) 116 117 // Prepare the table, no need in constant time access, because the 118 // power is not a secret. (Entry 0 is never used.) 119 for i := 2; i < 16; i += 2 { 120 p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1) 121 p256OrdMul(table[i][:], table[i-1][:], table[0][:]) 122 } 123 124 copy(x, table[14][:]) // f 125 126 p256OrdSqr(x[0:32], x[0:32], 4) 127 p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff 128 t := make([]byte, 32) 129 copy(t, x) 130 131 p256OrdSqr(x, x, 8) 132 p256OrdMul(x, x, t) // ffff 133 copy(t, x) 134 135 p256OrdSqr(x, x, 16) 136 p256OrdMul(x, x, t) // ffffffff 137 copy(t, x) 138 139 p256OrdSqr(x, x, 64) // ffffffff0000000000000000 140 p256OrdMul(x, x, t) // ffffffff00000000ffffffff 141 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000 142 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff 143 144 // Remaining 32 windows 145 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 146 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf} 147 for i := 0; i < 32; i++ { 148 p256OrdSqr(x, x, 4) 149 p256OrdMul(x, x, table[expLo[i]-1][:]) 150 } 151 152 // Multiplying by one in the Montgomery domain converts a Montgomery 153 // value out of the domain. 154 one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 155 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} 156 p256OrdMul(x, x, one) 157 158 return new(big.Int).SetBytes(x) 159 } 160 161 // fromBig converts a *big.Int into a format used by this code. 162 func fromBig(big *big.Int) []byte { 163 // This could be done a lot more efficiently... 164 res := big.Bytes() 165 if 32 == len(res) { 166 return res 167 } 168 t := make([]byte, 32) 169 offset := 32 - len(res) 170 for i := len(res) - 1; i >= 0; i-- { 171 t[i+offset] = res[i] 172 } 173 return t 174 } 175 176 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar 177 // is equal or greater than the order of the group, it's reduced modulo that order. 178 func p256GetMultiplier(in []byte) []byte { 179 n := new(big.Int).SetBytes(in) 180 181 if n.Cmp(p256Params.N) >= 0 { 182 n.Mod(n, p256Params.N) 183 } 184 return fromBig(n) 185 } 186 187 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the 188 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 189 // R×R mod p. See comment in Inverse about how this is used. 190 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 191 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03} 192 193 // (This is one, in the Montgomery domain.) 194 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 195 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01} 196 197 func maybeReduceModP(in *big.Int) *big.Int { 198 if in.Cmp(p256Params.P) < 0 { 199 return in 200 } 201 return new(big.Int).Mod(in, p256Params.P) 202 } 203 204 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 205 var r1, r2 p256Point 206 scalarReduced := p256GetMultiplier(baseScalar) 207 r1IsInfinity := scalarIsZero(scalarReduced) 208 r1.p256BaseMult(scalarReduced) 209 210 copy(r2.x[:], fromBig(maybeReduceModP(bigX))) 211 copy(r2.y[:], fromBig(maybeReduceModP(bigY))) 212 copy(r2.z[:], one) 213 p256MulAsm(r2.x[:], r2.x[:], rr[:]) 214 p256MulAsm(r2.y[:], r2.y[:], rr[:]) 215 216 scalarReduced = p256GetMultiplier(scalar) 217 r2IsInfinity := scalarIsZero(scalarReduced) 218 r2.p256ScalarMult(p256GetMultiplier(scalar)) 219 220 var sum, double p256Point 221 pointsEqual := p256PointAddAsm(&sum, &r1, &r2) 222 p256PointDoubleAsm(&double, &r1) 223 p256MovCond(&sum, &double, &sum, pointsEqual) 224 p256MovCond(&sum, &r1, &sum, r2IsInfinity) 225 p256MovCond(&sum, &r2, &sum, r1IsInfinity) 226 return sum.p256PointToAffine() 227 } 228 229 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 230 var r p256Point 231 r.p256BaseMult(p256GetMultiplier(scalar)) 232 return r.p256PointToAffine() 233 } 234 235 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 236 var r p256Point 237 copy(r.x[:], fromBig(maybeReduceModP(bigX))) 238 copy(r.y[:], fromBig(maybeReduceModP(bigY))) 239 copy(r.z[:], one) 240 p256MulAsm(r.x[:], r.x[:], rr[:]) 241 p256MulAsm(r.y[:], r.y[:], rr[:]) 242 r.p256ScalarMult(p256GetMultiplier(scalar)) 243 return r.p256PointToAffine() 244 } 245 246 // scalarIsZero returns 1 if scalar represents the zero value, and zero 247 // otherwise. 248 func scalarIsZero(scalar []byte) int { 249 b := byte(0) 250 for _, s := range scalar { 251 b |= s 252 } 253 return subtle.ConstantTimeByteEq(b, 0) 254 } 255 256 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 257 zInv := make([]byte, 32) 258 zInvSq := make([]byte, 32) 259 260 p256Inverse(zInv, p.z[:]) 261 p256Sqr(zInvSq, zInv) 262 p256MulAsm(zInv, zInv, zInvSq) 263 264 p256MulAsm(zInvSq, p.x[:], zInvSq) 265 p256MulAsm(zInv, p.y[:], zInv) 266 267 p256FromMont(zInvSq, zInvSq) 268 p256FromMont(zInv, zInv) 269 270 return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv) 271 } 272 273 // p256Inverse sets out to in^-1 mod p. 274 func p256Inverse(out, in []byte) { 275 var stack [6 * 32]byte 276 p2 := stack[32*0 : 32*0+32] 277 p4 := stack[32*1 : 32*1+32] 278 p8 := stack[32*2 : 32*2+32] 279 p16 := stack[32*3 : 32*3+32] 280 p32 := stack[32*4 : 32*4+32] 281 282 p256Sqr(out, in) 283 p256MulAsm(p2, out, in) // 3*p 284 285 p256Sqr(out, p2) 286 p256Sqr(out, out) 287 p256MulAsm(p4, out, p2) // f*p 288 289 p256Sqr(out, p4) 290 p256Sqr(out, out) 291 p256Sqr(out, out) 292 p256Sqr(out, out) 293 p256MulAsm(p8, out, p4) // ff*p 294 295 p256Sqr(out, p8) 296 297 for i := 0; i < 7; i++ { 298 p256Sqr(out, out) 299 } 300 p256MulAsm(p16, out, p8) // ffff*p 301 302 p256Sqr(out, p16) 303 for i := 0; i < 15; i++ { 304 p256Sqr(out, out) 305 } 306 p256MulAsm(p32, out, p16) // ffffffff*p 307 308 p256Sqr(out, p32) 309 310 for i := 0; i < 31; i++ { 311 p256Sqr(out, out) 312 } 313 p256MulAsm(out, out, in) 314 315 for i := 0; i < 32*4; i++ { 316 p256Sqr(out, out) 317 } 318 p256MulAsm(out, out, p32) 319 320 for i := 0; i < 32; i++ { 321 p256Sqr(out, out) 322 } 323 p256MulAsm(out, out, p32) 324 325 for i := 0; i < 16; i++ { 326 p256Sqr(out, out) 327 } 328 p256MulAsm(out, out, p16) 329 330 for i := 0; i < 8; i++ { 331 p256Sqr(out, out) 332 } 333 p256MulAsm(out, out, p8) 334 335 p256Sqr(out, out) 336 p256Sqr(out, out) 337 p256Sqr(out, out) 338 p256Sqr(out, out) 339 p256MulAsm(out, out, p4) 340 341 p256Sqr(out, out) 342 p256Sqr(out, out) 343 p256MulAsm(out, out, p2) 344 345 p256Sqr(out, out) 346 p256Sqr(out, out) 347 p256MulAsm(out, out, in) 348 } 349 350 func boothW5(in uint) (int, int) { 351 var s uint = ^((in >> 5) - 1) 352 var d uint = (1 << 6) - in - 1 353 d = (d & s) | (in & (^s)) 354 d = (d >> 1) + (d & 1) 355 return int(d), int(s & 1) 356 } 357 358 func boothW7(in uint) (int, int) { 359 var s uint = ^((in >> 7) - 1) 360 var d uint = (1 << 8) - in - 1 361 d = (d & s) | (in & (^s)) 362 d = (d >> 1) + (d & 1) 363 return int(d), int(s & 1) 364 } 365 366 func initTable() { 367 p256PreFast = new([37][64]p256Point) //z coordinate not used 368 basePoint := p256Point{ 369 x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10, 370 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p 371 y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25, 372 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p 373 z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 374 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p 375 } 376 377 t1 := new(p256Point) 378 t2 := new(p256Point) 379 *t2 = basePoint 380 381 zInv := make([]byte, 32) 382 zInvSq := make([]byte, 32) 383 for j := 0; j < 64; j++ { 384 *t1 = *t2 385 for i := 0; i < 37; i++ { 386 // The window size is 7 so we need to double 7 times. 387 if i != 0 { 388 for k := 0; k < 7; k++ { 389 p256PointDoubleAsm(t1, t1) 390 } 391 } 392 // Convert the point to affine form. (Its values are 393 // still in Montgomery form however.) 394 p256Inverse(zInv, t1.z[:]) 395 p256Sqr(zInvSq, zInv) 396 p256MulAsm(zInv, zInv, zInvSq) 397 398 p256MulAsm(t1.x[:], t1.x[:], zInvSq) 399 p256MulAsm(t1.y[:], t1.y[:], zInv) 400 401 copy(t1.z[:], basePoint.z[:]) 402 // Update the table entry 403 copy(p256PreFast[i][j].x[:], t1.x[:]) 404 copy(p256PreFast[i][j].y[:], t1.y[:]) 405 } 406 if j == 0 { 407 p256PointDoubleAsm(t2, &basePoint) 408 } else { 409 p256PointAddAsm(t2, t2, &basePoint) 410 } 411 } 412 } 413 414 func (p *p256Point) p256BaseMult(scalar []byte) { 415 wvalue := (uint(scalar[31]) << 1) & 0xff 416 sel, sign := boothW7(uint(wvalue)) 417 p256SelectBase(p, p256PreFast[0][:], sel) 418 p256NegCond(p, sign) 419 420 copy(p.z[:], one[:]) 421 var t0 p256Point 422 423 copy(t0.z[:], one[:]) 424 425 index := uint(6) 426 zero := sel 427 428 for i := 1; i < 37; i++ { 429 if index < 247 { 430 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff 431 } else { 432 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff 433 } 434 index += 7 435 sel, sign = boothW7(uint(wvalue)) 436 p256SelectBase(&t0, p256PreFast[i][:], sel) 437 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) 438 zero |= sel 439 } 440 } 441 442 func (p *p256Point) p256ScalarMult(scalar []byte) { 443 // precomp is a table of precomputed points that stores powers of p 444 // from p^1 to p^16. 445 var precomp [16]p256Point 446 var t0, t1, t2, t3 p256Point 447 448 // Prepare the table 449 *&precomp[0] = *p 450 451 p256PointDoubleAsm(&t0, p) 452 p256PointDoubleAsm(&t1, &t0) 453 p256PointDoubleAsm(&t2, &t1) 454 p256PointDoubleAsm(&t3, &t2) 455 *&precomp[1] = t0 // 2 456 *&precomp[3] = t1 // 4 457 *&precomp[7] = t2 // 8 458 *&precomp[15] = t3 // 16 459 460 p256PointAddAsm(&t0, &t0, p) 461 p256PointAddAsm(&t1, &t1, p) 462 p256PointAddAsm(&t2, &t2, p) 463 *&precomp[2] = t0 // 3 464 *&precomp[4] = t1 // 5 465 *&precomp[8] = t2 // 9 466 467 p256PointDoubleAsm(&t0, &t0) 468 p256PointDoubleAsm(&t1, &t1) 469 *&precomp[5] = t0 // 6 470 *&precomp[9] = t1 // 10 471 472 p256PointAddAsm(&t2, &t0, p) 473 p256PointAddAsm(&t1, &t1, p) 474 *&precomp[6] = t2 // 7 475 *&precomp[10] = t1 // 11 476 477 p256PointDoubleAsm(&t0, &t0) 478 p256PointDoubleAsm(&t2, &t2) 479 *&precomp[11] = t0 // 12 480 *&precomp[13] = t2 // 14 481 482 p256PointAddAsm(&t0, &t0, p) 483 p256PointAddAsm(&t2, &t2, p) 484 *&precomp[12] = t0 // 13 485 *&precomp[14] = t2 // 15 486 487 // Start scanning the window from top bit 488 index := uint(254) 489 var sel, sign int 490 491 wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 492 sel, _ = boothW5(uint(wvalue)) 493 p256Select(p, precomp[:], sel) 494 zero := sel 495 496 for index > 4 { 497 index -= 5 498 p256PointDoubleAsm(p, p) 499 p256PointDoubleAsm(p, p) 500 p256PointDoubleAsm(p, p) 501 p256PointDoubleAsm(p, p) 502 p256PointDoubleAsm(p, p) 503 504 if index < 247 { 505 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f 506 } else { 507 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 508 } 509 510 sel, sign = boothW5(uint(wvalue)) 511 512 p256Select(&t0, precomp[:], sel) 513 p256NegCond(&t0, sign) 514 p256PointAddAsm(&t1, p, &t0) 515 p256MovCond(&t1, &t1, p, sel) 516 p256MovCond(p, &t1, &t0, zero) 517 zero |= sel 518 } 519 520 p256PointDoubleAsm(p, p) 521 p256PointDoubleAsm(p, p) 522 p256PointDoubleAsm(p, p) 523 p256PointDoubleAsm(p, p) 524 p256PointDoubleAsm(p, p) 525 526 wvalue = (uint(scalar[31]) << 1) & 0x3f 527 sel, sign = boothW5(uint(wvalue)) 528 529 p256Select(&t0, precomp[:], sel) 530 p256NegCond(&t0, sign) 531 p256PointAddAsm(&t1, p, &t0) 532 p256MovCond(&t1, &t1, p, sel) 533 p256MovCond(p, &t1, &t0, zero) 534 }