github.com/mangodowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/crypto/elliptic/p256_s390x.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build s390x 6 7 package elliptic 8 9 import ( 10 "math/big" 11 ) 12 13 type p256CurveFast struct { 14 *CurveParams 15 } 16 17 type p256Point struct { 18 x [32]byte 19 y [32]byte 20 z [32]byte 21 } 22 23 var ( 24 p256 Curve 25 p256PreFast *[37][64]p256Point 26 ) 27 28 // hasVectorFacility reports whether the machine has the z/Architecture 29 // vector facility installed and enabled. 30 func hasVectorFacility() bool 31 32 var hasVX = hasVectorFacility() 33 34 func initP256Arch() { 35 if hasVX { 36 p256 = p256CurveFast{p256Params} 37 initTable() 38 return 39 } 40 41 // No vector support, use pure Go implementation. 42 p256 = p256Curve{p256Params} 43 return 44 } 45 46 func (curve p256CurveFast) Params() *CurveParams { 47 return curve.CurveParams 48 } 49 50 // Functions implemented in p256_asm_s390x.s 51 // Montgomery multiplication modulo P256 52 func p256MulAsm(res, in1, in2 []byte) 53 54 // Montgomery square modulo P256 55 func p256Sqr(res, in []byte) { 56 p256MulAsm(res, in, in) 57 } 58 59 // Montgomery multiplication by 1 60 func p256FromMont(res, in []byte) 61 62 // iff cond == 1 val <- -val 63 func p256NegCond(val *p256Point, cond int) 64 65 // if cond == 0 res <- b; else res <- a 66 func p256MovCond(res, a, b *p256Point, cond int) 67 68 // Constant time table access 69 func p256Select(point *p256Point, table []p256Point, idx int) 70 func p256SelectBase(point *p256Point, table []p256Point, idx int) 71 72 // Montgomery multiplication modulo Ord(G) 73 func p256OrdMul(res, in1, in2 []byte) 74 75 // Montgomery square modulo Ord(G), repeated n times 76 func p256OrdSqr(res, in []byte, n int) { 77 copy(res, in) 78 for i := 0; i < n; i += 1 { 79 p256OrdMul(res, res, res) 80 } 81 } 82 83 // Point add with P2 being affine point 84 // If sign == 1 -> P2 = -P2 85 // If sel == 0 -> P3 = P1 86 // if zero == 0 -> P3 = P2 87 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int) 88 89 // Point add 90 func p256PointAddAsm(P3, P1, P2 *p256Point) 91 func p256PointDoubleAsm(P3, P1 *p256Point) 92 93 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int { 94 if k.Cmp(p256Params.N) >= 0 { 95 // This should never happen. 96 reducedK := new(big.Int).Mod(k, p256Params.N) 97 k = reducedK 98 } 99 100 // table will store precomputed powers of x. The 32 bytes at index 101 // i store x^(i+1). 102 var table [15][32]byte 103 104 x := fromBig(k) 105 // This code operates in the Montgomery domain where R = 2^256 mod n 106 // and n is the order of the scalar field. (See initP256 for the 107 // value.) Elements in the Montgomery domain take the form a×R and 108 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR 109 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R, 110 // i.e. converts x into the Montgomery domain. Stored in BigEndian form 111 RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59, 112 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2} 113 114 p256OrdMul(table[0][:], x, RR) 115 116 // Prepare the table, no need in constant time access, because the 117 // power is not a secret. (Entry 0 is never used.) 118 for i := 2; i < 16; i += 2 { 119 p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1) 120 p256OrdMul(table[i][:], table[i-1][:], table[0][:]) 121 } 122 123 copy(x, table[14][:]) // f 124 125 p256OrdSqr(x[0:32], x[0:32], 4) 126 p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff 127 t := make([]byte, 32) 128 copy(t, x) 129 130 p256OrdSqr(x, x, 8) 131 p256OrdMul(x, x, t) // ffff 132 copy(t, x) 133 134 p256OrdSqr(x, x, 16) 135 p256OrdMul(x, x, t) // ffffffff 136 copy(t, x) 137 138 p256OrdSqr(x, x, 64) // ffffffff0000000000000000 139 p256OrdMul(x, x, t) // ffffffff00000000ffffffff 140 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000 141 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff 142 143 // Remaining 32 windows 144 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 145 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf} 146 for i := 0; i < 32; i++ { 147 p256OrdSqr(x, x, 4) 148 p256OrdMul(x, x, table[expLo[i]-1][:]) 149 } 150 151 // Multiplying by one in the Montgomery domain converts a Montgomery 152 // value out of the domain. 153 one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 154 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} 155 p256OrdMul(x, x, one) 156 157 return new(big.Int).SetBytes(x) 158 } 159 160 // fromBig converts a *big.Int into a format used by this code. 161 func fromBig(big *big.Int) []byte { 162 // This could be done a lot more efficiently... 163 res := big.Bytes() 164 if 32 == len(res) { 165 return res 166 } 167 t := make([]byte, 32) 168 offset := 32 - len(res) 169 for i := len(res) - 1; i >= 0; i-- { 170 t[i+offset] = res[i] 171 } 172 return t 173 } 174 175 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar 176 // is equal or greater than the order of the group, it's reduced modulo that order. 177 func p256GetMultiplier(in []byte) []byte { 178 n := new(big.Int).SetBytes(in) 179 180 if n.Cmp(p256Params.N) >= 0 { 181 n.Mod(n, p256Params.N) 182 } 183 return fromBig(n) 184 } 185 186 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the 187 // underlying field of the curve. (See initP256 for the value.) Thus rr here is 188 // R×R mod p. See comment in Inverse about how this is used. 189 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 190 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03} 191 192 // (This is one, in the Montgomery domain.) 193 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 194 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01} 195 196 func maybeReduceModP(in *big.Int) *big.Int { 197 if in.Cmp(p256Params.P) < 0 { 198 return in 199 } 200 return new(big.Int).Mod(in, p256Params.P) 201 } 202 203 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { 204 var r1, r2 p256Point 205 r1.p256BaseMult(p256GetMultiplier(baseScalar)) 206 207 copy(r2.x[:], fromBig(maybeReduceModP(bigX))) 208 copy(r2.y[:], fromBig(maybeReduceModP(bigY))) 209 copy(r2.z[:], one) 210 p256MulAsm(r2.x[:], r2.x[:], rr[:]) 211 p256MulAsm(r2.y[:], r2.y[:], rr[:]) 212 213 r2.p256ScalarMult(p256GetMultiplier(scalar)) 214 p256PointAddAsm(&r1, &r1, &r2) 215 return r1.p256PointToAffine() 216 } 217 218 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 219 var r p256Point 220 r.p256BaseMult(p256GetMultiplier(scalar)) 221 return r.p256PointToAffine() 222 } 223 224 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 225 var r p256Point 226 copy(r.x[:], fromBig(maybeReduceModP(bigX))) 227 copy(r.y[:], fromBig(maybeReduceModP(bigY))) 228 copy(r.z[:], one) 229 p256MulAsm(r.x[:], r.x[:], rr[:]) 230 p256MulAsm(r.y[:], r.y[:], rr[:]) 231 r.p256ScalarMult(p256GetMultiplier(scalar)) 232 return r.p256PointToAffine() 233 } 234 235 func (p *p256Point) p256PointToAffine() (x, y *big.Int) { 236 zInv := make([]byte, 32) 237 zInvSq := make([]byte, 32) 238 239 p256Inverse(zInv, p.z[:]) 240 p256Sqr(zInvSq, zInv) 241 p256MulAsm(zInv, zInv, zInvSq) 242 243 p256MulAsm(zInvSq, p.x[:], zInvSq) 244 p256MulAsm(zInv, p.y[:], zInv) 245 246 p256FromMont(zInvSq, zInvSq) 247 p256FromMont(zInv, zInv) 248 249 return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv) 250 } 251 252 // p256Inverse sets out to in^-1 mod p. 253 func p256Inverse(out, in []byte) { 254 var stack [6 * 32]byte 255 p2 := stack[32*0 : 32*0+32] 256 p4 := stack[32*1 : 32*1+32] 257 p8 := stack[32*2 : 32*2+32] 258 p16 := stack[32*3 : 32*3+32] 259 p32 := stack[32*4 : 32*4+32] 260 261 p256Sqr(out, in) 262 p256MulAsm(p2, out, in) // 3*p 263 264 p256Sqr(out, p2) 265 p256Sqr(out, out) 266 p256MulAsm(p4, out, p2) // f*p 267 268 p256Sqr(out, p4) 269 p256Sqr(out, out) 270 p256Sqr(out, out) 271 p256Sqr(out, out) 272 p256MulAsm(p8, out, p4) // ff*p 273 274 p256Sqr(out, p8) 275 276 for i := 0; i < 7; i++ { 277 p256Sqr(out, out) 278 } 279 p256MulAsm(p16, out, p8) // ffff*p 280 281 p256Sqr(out, p16) 282 for i := 0; i < 15; i++ { 283 p256Sqr(out, out) 284 } 285 p256MulAsm(p32, out, p16) // ffffffff*p 286 287 p256Sqr(out, p32) 288 289 for i := 0; i < 31; i++ { 290 p256Sqr(out, out) 291 } 292 p256MulAsm(out, out, in) 293 294 for i := 0; i < 32*4; i++ { 295 p256Sqr(out, out) 296 } 297 p256MulAsm(out, out, p32) 298 299 for i := 0; i < 32; i++ { 300 p256Sqr(out, out) 301 } 302 p256MulAsm(out, out, p32) 303 304 for i := 0; i < 16; i++ { 305 p256Sqr(out, out) 306 } 307 p256MulAsm(out, out, p16) 308 309 for i := 0; i < 8; i++ { 310 p256Sqr(out, out) 311 } 312 p256MulAsm(out, out, p8) 313 314 p256Sqr(out, out) 315 p256Sqr(out, out) 316 p256Sqr(out, out) 317 p256Sqr(out, out) 318 p256MulAsm(out, out, p4) 319 320 p256Sqr(out, out) 321 p256Sqr(out, out) 322 p256MulAsm(out, out, p2) 323 324 p256Sqr(out, out) 325 p256Sqr(out, out) 326 p256MulAsm(out, out, in) 327 } 328 329 func boothW5(in uint) (int, int) { 330 var s uint = ^((in >> 5) - 1) 331 var d uint = (1 << 6) - in - 1 332 d = (d & s) | (in & (^s)) 333 d = (d >> 1) + (d & 1) 334 return int(d), int(s & 1) 335 } 336 337 func boothW7(in uint) (int, int) { 338 var s uint = ^((in >> 7) - 1) 339 var d uint = (1 << 8) - in - 1 340 d = (d & s) | (in & (^s)) 341 d = (d >> 1) + (d & 1) 342 return int(d), int(s & 1) 343 } 344 345 func initTable() { 346 p256PreFast = new([37][64]p256Point) //z coordinate not used 347 basePoint := p256Point{ 348 x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10, 349 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p 350 y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25, 351 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p 352 z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 353 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p 354 } 355 356 t1 := new(p256Point) 357 t2 := new(p256Point) 358 *t2 = basePoint 359 360 zInv := make([]byte, 32) 361 zInvSq := make([]byte, 32) 362 for j := 0; j < 64; j++ { 363 *t1 = *t2 364 for i := 0; i < 37; i++ { 365 // The window size is 7 so we need to double 7 times. 366 if i != 0 { 367 for k := 0; k < 7; k++ { 368 p256PointDoubleAsm(t1, t1) 369 } 370 } 371 // Convert the point to affine form. (Its values are 372 // still in Montgomery form however.) 373 p256Inverse(zInv, t1.z[:]) 374 p256Sqr(zInvSq, zInv) 375 p256MulAsm(zInv, zInv, zInvSq) 376 377 p256MulAsm(t1.x[:], t1.x[:], zInvSq) 378 p256MulAsm(t1.y[:], t1.y[:], zInv) 379 380 copy(t1.z[:], basePoint.z[:]) 381 // Update the table entry 382 copy(p256PreFast[i][j].x[:], t1.x[:]) 383 copy(p256PreFast[i][j].y[:], t1.y[:]) 384 } 385 if j == 0 { 386 p256PointDoubleAsm(t2, &basePoint) 387 } else { 388 p256PointAddAsm(t2, t2, &basePoint) 389 } 390 } 391 } 392 393 func (p *p256Point) p256BaseMult(scalar []byte) { 394 wvalue := (uint(scalar[31]) << 1) & 0xff 395 sel, sign := boothW7(uint(wvalue)) 396 p256SelectBase(p, p256PreFast[0][:], sel) 397 p256NegCond(p, sign) 398 399 copy(p.z[:], one[:]) 400 var t0 p256Point 401 402 copy(t0.z[:], one[:]) 403 404 index := uint(6) 405 zero := sel 406 407 for i := 1; i < 37; i++ { 408 if index < 247 { 409 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff 410 } else { 411 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff 412 } 413 index += 7 414 sel, sign = boothW7(uint(wvalue)) 415 p256SelectBase(&t0, p256PreFast[i][:], sel) 416 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) 417 zero |= sel 418 } 419 } 420 421 func (p *p256Point) p256ScalarMult(scalar []byte) { 422 // precomp is a table of precomputed points that stores powers of p 423 // from p^1 to p^16. 424 var precomp [16]p256Point 425 var t0, t1, t2, t3 p256Point 426 427 // Prepare the table 428 *&precomp[0] = *p 429 430 p256PointDoubleAsm(&t0, p) 431 p256PointDoubleAsm(&t1, &t0) 432 p256PointDoubleAsm(&t2, &t1) 433 p256PointDoubleAsm(&t3, &t2) 434 *&precomp[1] = t0 // 2 435 *&precomp[3] = t1 // 4 436 *&precomp[7] = t2 // 8 437 *&precomp[15] = t3 // 16 438 439 p256PointAddAsm(&t0, &t0, p) 440 p256PointAddAsm(&t1, &t1, p) 441 p256PointAddAsm(&t2, &t2, p) 442 *&precomp[2] = t0 // 3 443 *&precomp[4] = t1 // 5 444 *&precomp[8] = t2 // 9 445 446 p256PointDoubleAsm(&t0, &t0) 447 p256PointDoubleAsm(&t1, &t1) 448 *&precomp[5] = t0 // 6 449 *&precomp[9] = t1 // 10 450 451 p256PointAddAsm(&t2, &t0, p) 452 p256PointAddAsm(&t1, &t1, p) 453 *&precomp[6] = t2 // 7 454 *&precomp[10] = t1 // 11 455 456 p256PointDoubleAsm(&t0, &t0) 457 p256PointDoubleAsm(&t2, &t2) 458 *&precomp[11] = t0 // 12 459 *&precomp[13] = t2 // 14 460 461 p256PointAddAsm(&t0, &t0, p) 462 p256PointAddAsm(&t2, &t2, p) 463 *&precomp[12] = t0 // 13 464 *&precomp[14] = t2 // 15 465 466 // Start scanning the window from top bit 467 index := uint(254) 468 var sel, sign int 469 470 wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 471 sel, _ = boothW5(uint(wvalue)) 472 p256Select(p, precomp[:], sel) 473 zero := sel 474 475 for index > 4 { 476 index -= 5 477 p256PointDoubleAsm(p, p) 478 p256PointDoubleAsm(p, p) 479 p256PointDoubleAsm(p, p) 480 p256PointDoubleAsm(p, p) 481 p256PointDoubleAsm(p, p) 482 483 if index < 247 { 484 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f 485 } else { 486 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f 487 } 488 489 sel, sign = boothW5(uint(wvalue)) 490 491 p256Select(&t0, precomp[:], sel) 492 p256NegCond(&t0, sign) 493 p256PointAddAsm(&t1, p, &t0) 494 p256MovCond(&t1, &t1, p, sel) 495 p256MovCond(p, &t1, &t0, zero) 496 zero |= sel 497 } 498 499 p256PointDoubleAsm(p, p) 500 p256PointDoubleAsm(p, p) 501 p256PointDoubleAsm(p, p) 502 p256PointDoubleAsm(p, p) 503 p256PointDoubleAsm(p, p) 504 505 wvalue = (uint(scalar[31]) << 1) & 0x3f 506 sel, sign = boothW5(uint(wvalue)) 507 508 p256Select(&t0, precomp[:], sel) 509 p256NegCond(&t0, sign) 510 p256PointAddAsm(&t1, p, &t0) 511 p256MovCond(&t1, &t1, p, sel) 512 p256MovCond(p, &t1, &t0, zero) 513 }