
     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     5  // +build s390x
     7  package elliptic
     9  import (
    10  	"crypto/subtle"
    11  	"math/big"
    12  )
    14  type p256CurveFast struct {
    15  	*CurveParams
    16  }
    18  type p256Point struct {
    19  	x [32]byte
    20  	y [32]byte
    21  	z [32]byte
    22  }
    24  var (
    25  	p256        Curve
    26  	p256PreFast *[37][64]p256Point
    27  )
    29  // hasVectorFacility reports whether the machine has the z/Architecture
    30  // vector facility installed and enabled.
    31  func hasVectorFacility() bool
    33  var hasVX = hasVectorFacility()
    35  func initP256Arch() {
    36  	if hasVX {
    37  		p256 = p256CurveFast{p256Params}
    38  		initTable()
    39  		return
    40  	}
    42  	// No vector support, use pure Go implementation.
    43  	p256 = p256Curve{p256Params}
    44  	return
    45  }
    47  func (curve p256CurveFast) Params() *CurveParams {
    48  	return curve.CurveParams
    49  }
    51  // Functions implemented in p256_asm_s390x.s
    52  // Montgomery multiplication modulo P256
    53  //
    54  //go:noescape
    55  func p256MulAsm(res, in1, in2 []byte)
    57  // Montgomery square modulo P256
    58  func p256Sqr(res, in []byte) {
    59  	p256MulAsm(res, in, in)
    60  }
    62  // Montgomery multiplication by 1
    63  //
    64  //go:noescape
    65  func p256FromMont(res, in []byte)
    67  // iff cond == 1  val <- -val
    68  //
    69  //go:noescape
    70  func p256NegCond(val *p256Point, cond int)
    72  // if cond == 0 res <- b; else res <- a
    73  //
    74  //go:noescape
    75  func p256MovCond(res, a, b *p256Point, cond int)
    77  // Constant time table access
    78  //
    79  //go:noescape
    80  func p256Select(point *p256Point, table []p256Point, idx int)
    82  //go:noescape
    83  func p256SelectBase(point *p256Point, table []p256Point, idx int)
    85  // Montgomery multiplication modulo Ord(G)
    86  //
    87  //go:noescape
    88  func p256OrdMul(res, in1, in2 []byte)
    90  // Montgomery square modulo Ord(G), repeated n times
    91  func p256OrdSqr(res, in []byte, n int) {
    92  	copy(res, in)
    93  	for i := 0; i < n; i += 1 {
    94  		p256OrdMul(res, res, res)
    95  	}
    96  }
    98  // Point add with P2 being affine point
    99  // If sign == 1 -> P2 = -P2
   100  // If sel == 0 -> P3 = P1
   101  // if zero == 0 -> P3 = P2
   102  //
   103  //go:noescape
   104  func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
   106  // Point add
   107  //
   108  //go:noescape
   109  func p256PointAddAsm(P3, P1, P2 *p256Point) int
   111  //go:noescape
   112  func p256PointDoubleAsm(P3, P1 *p256Point)
   114  func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
   115  	if k.Cmp(p256Params.N) >= 0 {
   116  		// This should never happen.
   117  		reducedK := new(big.Int).Mod(k, p256Params.N)
   118  		k = reducedK
   119  	}
   121  	// table will store precomputed powers of x. The 32 bytes at index
   122  	// i store x^(i+1).
   123  	var table [15][32]byte
   125  	x := fromBig(k)
   126  	// This code operates in the Montgomery domain where R = 2^256 mod n
   127  	// and n is the order of the scalar field. (See initP256 for the
   128  	// value.) Elements in the Montgomery domain take the form a×R and
   129  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   130  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   131  	// i.e. converts x into the Montgomery domain. Stored in BigEndian form
   132  	RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
   133  		0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
   135  	p256OrdMul(table[0][:], x, RR)
   137  	// Prepare the table, no need in constant time access, because the
   138  	// power is not a secret. (Entry 0 is never used.)
   139  	for i := 2; i < 16; i += 2 {
   140  		p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
   141  		p256OrdMul(table[i][:], table[i-1][:], table[0][:])
   142  	}
   144  	copy(x, table[14][:]) // f
   146  	p256OrdSqr(x[0:32], x[0:32], 4)
   147  	p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
   148  	t := make([]byte, 32)
   149  	copy(t, x)
   151  	p256OrdSqr(x, x, 8)
   152  	p256OrdMul(x, x, t) // ffff
   153  	copy(t, x)
   155  	p256OrdSqr(x, x, 16)
   156  	p256OrdMul(x, x, t) // ffffffff
   157  	copy(t, x)
   159  	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
   160  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
   161  	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
   162  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
   164  	// Remaining 32 windows
   165  	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
   166  		0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
   167  	for i := 0; i < 32; i++ {
   168  		p256OrdSqr(x, x, 4)
   169  		p256OrdMul(x, x, table[expLo[i]-1][:])
   170  	}
   172  	// Multiplying by one in the Montgomery domain converts a Montgomery
   173  	// value out of the domain.
   174  	one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   175  		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
   176  	p256OrdMul(x, x, one)
   178  	return new(big.Int).SetBytes(x)
   179  }
   181  // fromBig converts a *big.Int into a format used by this code.
   182  func fromBig(big *big.Int) []byte {
   183  	// This could be done a lot more efficiently...
   184  	res := big.Bytes()
   185  	if 32 == len(res) {
   186  		return res
   187  	}
   188  	t := make([]byte, 32)
   189  	offset := 32 - len(res)
   190  	for i := len(res) - 1; i >= 0; i-- {
   191  		t[i+offset] = res[i]
   192  	}
   193  	return t
   194  }
   196  // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
   197  // is equal or greater than the order of the group, it's reduced modulo that order.
   198  func p256GetMultiplier(in []byte) []byte {
   199  	n := new(big.Int).SetBytes(in)
   201  	if n.Cmp(p256Params.N) >= 0 {
   202  		n.Mod(n, p256Params.N)
   203  	}
   204  	return fromBig(n)
   205  }
   207  // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
   208  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   209  // R×R mod p. See comment in Inverse about how this is used.
   210  var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
   211  	0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
   213  // (This is one, in the Montgomery domain.)
   214  var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   215  	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
   217  func maybeReduceModP(in *big.Int) *big.Int {
   218  	if in.Cmp(p256Params.P) < 0 {
   219  		return in
   220  	}
   221  	return new(big.Int).Mod(in, p256Params.P)
   222  }
   224  func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   225  	var r1, r2 p256Point
   226  	scalarReduced := p256GetMultiplier(baseScalar)
   227  	r1IsInfinity := scalarIsZero(scalarReduced)
   228  	r1.p256BaseMult(scalarReduced)
   230  	copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
   231  	copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
   232  	copy(r2.z[:], one)
   233  	p256MulAsm(r2.x[:], r2.x[:], rr[:])
   234  	p256MulAsm(r2.y[:], r2.y[:], rr[:])
   236  	scalarReduced = p256GetMultiplier(scalar)
   237  	r2IsInfinity := scalarIsZero(scalarReduced)
   238  	r2.p256ScalarMult(p256GetMultiplier(scalar))
   240  	var sum, double p256Point
   241  	pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
   242  	p256PointDoubleAsm(&double, &r1)
   243  	p256MovCond(&sum, &double, &sum, pointsEqual)
   244  	p256MovCond(&sum, &r1, &sum, r2IsInfinity)
   245  	p256MovCond(&sum, &r2, &sum, r1IsInfinity)
   246  	return sum.p256PointToAffine()
   247  }
   249  func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   250  	var r p256Point
   251  	r.p256BaseMult(p256GetMultiplier(scalar))
   252  	return r.p256PointToAffine()
   253  }
   255  func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   256  	var r p256Point
   257  	copy(r.x[:], fromBig(maybeReduceModP(bigX)))
   258  	copy(r.y[:], fromBig(maybeReduceModP(bigY)))
   259  	copy(r.z[:], one)
   260  	p256MulAsm(r.x[:], r.x[:], rr[:])
   261  	p256MulAsm(r.y[:], r.y[:], rr[:])
   262  	r.p256ScalarMult(p256GetMultiplier(scalar))
   263  	return r.p256PointToAffine()
   264  }
   266  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   267  // otherwise.
   268  func scalarIsZero(scalar []byte) int {
   269  	b := byte(0)
   270  	for _, s := range scalar {
   271  		b |= s
   272  	}
   273  	return subtle.ConstantTimeByteEq(b, 0)
   274  }
   276  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   277  	zInv := make([]byte, 32)
   278  	zInvSq := make([]byte, 32)
   280  	p256Inverse(zInv, p.z[:])
   281  	p256Sqr(zInvSq, zInv)
   282  	p256MulAsm(zInv, zInv, zInvSq)
   284  	p256MulAsm(zInvSq, p.x[:], zInvSq)
   285  	p256MulAsm(zInv, p.y[:], zInv)
   287  	p256FromMont(zInvSq, zInvSq)
   288  	p256FromMont(zInv, zInv)
   290  	return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
   291  }
   293  // p256Inverse sets out to in^-1 mod p.
   294  func p256Inverse(out, in []byte) {
   295  	var stack [6 * 32]byte
   296  	p2 := stack[32*0 : 32*0+32]
   297  	p4 := stack[32*1 : 32*1+32]
   298  	p8 := stack[32*2 : 32*2+32]
   299  	p16 := stack[32*3 : 32*3+32]
   300  	p32 := stack[32*4 : 32*4+32]
   302  	p256Sqr(out, in)
   303  	p256MulAsm(p2, out, in) // 3*p
   305  	p256Sqr(out, p2)
   306  	p256Sqr(out, out)
   307  	p256MulAsm(p4, out, p2) // f*p
   309  	p256Sqr(out, p4)
   310  	p256Sqr(out, out)
   311  	p256Sqr(out, out)
   312  	p256Sqr(out, out)
   313  	p256MulAsm(p8, out, p4) // ff*p
   315  	p256Sqr(out, p8)
   317  	for i := 0; i < 7; i++ {
   318  		p256Sqr(out, out)
   319  	}
   320  	p256MulAsm(p16, out, p8) // ffff*p
   322  	p256Sqr(out, p16)
   323  	for i := 0; i < 15; i++ {
   324  		p256Sqr(out, out)
   325  	}
   326  	p256MulAsm(p32, out, p16) // ffffffff*p
   328  	p256Sqr(out, p32)
   330  	for i := 0; i < 31; i++ {
   331  		p256Sqr(out, out)
   332  	}
   333  	p256MulAsm(out, out, in)
   335  	for i := 0; i < 32*4; i++ {
   336  		p256Sqr(out, out)
   337  	}
   338  	p256MulAsm(out, out, p32)
   340  	for i := 0; i < 32; i++ {
   341  		p256Sqr(out, out)
   342  	}
   343  	p256MulAsm(out, out, p32)
   345  	for i := 0; i < 16; i++ {
   346  		p256Sqr(out, out)
   347  	}
   348  	p256MulAsm(out, out, p16)
   350  	for i := 0; i < 8; i++ {
   351  		p256Sqr(out, out)
   352  	}
   353  	p256MulAsm(out, out, p8)
   355  	p256Sqr(out, out)
   356  	p256Sqr(out, out)
   357  	p256Sqr(out, out)
   358  	p256Sqr(out, out)
   359  	p256MulAsm(out, out, p4)
   361  	p256Sqr(out, out)
   362  	p256Sqr(out, out)
   363  	p256MulAsm(out, out, p2)
   365  	p256Sqr(out, out)
   366  	p256Sqr(out, out)
   367  	p256MulAsm(out, out, in)
   368  }
   370  func boothW5(in uint) (int, int) {
   371  	var s uint = ^((in >> 5) - 1)
   372  	var d uint = (1 << 6) - in - 1
   373  	d = (d & s) | (in & (^s))
   374  	d = (d >> 1) + (d & 1)
   375  	return int(d), int(s & 1)
   376  }
   378  func boothW7(in uint) (int, int) {
   379  	var s uint = ^((in >> 7) - 1)
   380  	var d uint = (1 << 8) - in - 1
   381  	d = (d & s) | (in & (^s))
   382  	d = (d >> 1) + (d & 1)
   383  	return int(d), int(s & 1)
   384  }
   386  func initTable() {
   387  	p256PreFast = new([37][64]p256Point) //z coordinate not used
   388  	basePoint := p256Point{
   389  		x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
   390  			0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
   391  		y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
   392  			0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
   393  		z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   394  			0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
   395  	}
   397  	t1 := new(p256Point)
   398  	t2 := new(p256Point)
   399  	*t2 = basePoint
   401  	zInv := make([]byte, 32)
   402  	zInvSq := make([]byte, 32)
   403  	for j := 0; j < 64; j++ {
   404  		*t1 = *t2
   405  		for i := 0; i < 37; i++ {
   406  			// The window size is 7 so we need to double 7 times.
   407  			if i != 0 {
   408  				for k := 0; k < 7; k++ {
   409  					p256PointDoubleAsm(t1, t1)
   410  				}
   411  			}
   412  			// Convert the point to affine form. (Its values are
   413  			// still in Montgomery form however.)
   414  			p256Inverse(zInv, t1.z[:])
   415  			p256Sqr(zInvSq, zInv)
   416  			p256MulAsm(zInv, zInv, zInvSq)
   418  			p256MulAsm(t1.x[:], t1.x[:], zInvSq)
   419  			p256MulAsm(t1.y[:], t1.y[:], zInv)
   421  			copy(t1.z[:], basePoint.z[:])
   422  			// Update the table entry
   423  			copy(p256PreFast[i][j].x[:], t1.x[:])
   424  			copy(p256PreFast[i][j].y[:], t1.y[:])
   425  		}
   426  		if j == 0 {
   427  			p256PointDoubleAsm(t2, &basePoint)
   428  		} else {
   429  			p256PointAddAsm(t2, t2, &basePoint)
   430  		}
   431  	}
   432  }
   434  func (p *p256Point) p256BaseMult(scalar []byte) {
   435  	wvalue := (uint(scalar[31]) << 1) & 0xff
   436  	sel, sign := boothW7(uint(wvalue))
   437  	p256SelectBase(p, p256PreFast[0][:], sel)
   438  	p256NegCond(p, sign)
   440  	copy(p.z[:], one[:])
   441  	var t0 p256Point
   443  	copy(t0.z[:], one[:])
   445  	index := uint(6)
   446  	zero := sel
   448  	for i := 1; i < 37; i++ {
   449  		if index < 247 {
   450  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
   451  		} else {
   452  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
   453  		}
   454  		index += 7
   455  		sel, sign = boothW7(uint(wvalue))
   456  		p256SelectBase(&t0, p256PreFast[i][:], sel)
   457  		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
   458  		zero |= sel
   459  	}
   460  }
   462  func (p *p256Point) p256ScalarMult(scalar []byte) {
   463  	// precomp is a table of precomputed points that stores powers of p
   464  	// from p^1 to p^16.
   465  	var precomp [16]p256Point
   466  	var t0, t1, t2, t3 p256Point
   468  	// Prepare the table
   469  	*&precomp[0] = *p
   471  	p256PointDoubleAsm(&t0, p)
   472  	p256PointDoubleAsm(&t1, &t0)
   473  	p256PointDoubleAsm(&t2, &t1)
   474  	p256PointDoubleAsm(&t3, &t2)
   475  	*&precomp[1] = t0  // 2
   476  	*&precomp[3] = t1  // 4
   477  	*&precomp[7] = t2  // 8
   478  	*&precomp[15] = t3 // 16
   480  	p256PointAddAsm(&t0, &t0, p)
   481  	p256PointAddAsm(&t1, &t1, p)
   482  	p256PointAddAsm(&t2, &t2, p)
   483  	*&precomp[2] = t0 // 3
   484  	*&precomp[4] = t1 // 5
   485  	*&precomp[8] = t2 // 9
   487  	p256PointDoubleAsm(&t0, &t0)
   488  	p256PointDoubleAsm(&t1, &t1)
   489  	*&precomp[5] = t0 // 6
   490  	*&precomp[9] = t1 // 10
   492  	p256PointAddAsm(&t2, &t0, p)
   493  	p256PointAddAsm(&t1, &t1, p)
   494  	*&precomp[6] = t2  // 7
   495  	*&precomp[10] = t1 // 11
   497  	p256PointDoubleAsm(&t0, &t0)
   498  	p256PointDoubleAsm(&t2, &t2)
   499  	*&precomp[11] = t0 // 12
   500  	*&precomp[13] = t2 // 14
   502  	p256PointAddAsm(&t0, &t0, p)
   503  	p256PointAddAsm(&t2, &t2, p)
   504  	*&precomp[12] = t0 // 13
   505  	*&precomp[14] = t2 // 15
   507  	// Start scanning the window from top bit
   508  	index := uint(254)
   509  	var sel, sign int
   511  	wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   512  	sel, _ = boothW5(uint(wvalue))
   513  	p256Select(p, precomp[:], sel)
   514  	zero := sel
   516  	for index > 4 {
   517  		index -= 5
   518  		p256PointDoubleAsm(p, p)
   519  		p256PointDoubleAsm(p, p)
   520  		p256PointDoubleAsm(p, p)
   521  		p256PointDoubleAsm(p, p)
   522  		p256PointDoubleAsm(p, p)
   524  		if index < 247 {
   525  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
   526  		} else {
   527  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   528  		}
   530  		sel, sign = boothW5(uint(wvalue))
   532  		p256Select(&t0, precomp[:], sel)
   533  		p256NegCond(&t0, sign)
   534  		p256PointAddAsm(&t1, p, &t0)
   535  		p256MovCond(&t1, &t1, p, sel)
   536  		p256MovCond(p, &t1, &t0, zero)
   537  		zero |= sel
   538  	}
   540  	p256PointDoubleAsm(p, p)
   541  	p256PointDoubleAsm(p, p)
   542  	p256PointDoubleAsm(p, p)
   543  	p256PointDoubleAsm(p, p)
   544  	p256PointDoubleAsm(p, p)
   546  	wvalue = (uint(scalar[31]) << 1) & 0x3f
   547  	sel, sign = boothW5(uint(wvalue))
   549  	p256Select(&t0, precomp[:], sel)
   550  	p256NegCond(&t0, sign)
   551  	p256PointAddAsm(&t1, p, &t0)
   552  	p256MovCond(&t1, &t1, p, sel)
   553  	p256MovCond(p, &t1, &t0, zero)
   554  }