github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/crypto/elliptic/p256_s390x.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build s390x
     6  
     7  package elliptic
     8  
     9  import (
    10  	"crypto/subtle"
    11  	"math/big"
    12  )
    13  
    14  type p256CurveFast struct {
    15  	*CurveParams
    16  }
    17  
    18  type p256Point struct {
    19  	x [32]byte
    20  	y [32]byte
    21  	z [32]byte
    22  }
    23  
    24  var (
    25  	p256        Curve
    26  	p256PreFast *[37][64]p256Point
    27  )
    28  
    29  // hasVectorFacility reports whether the machine has the z/Architecture
    30  // vector facility installed and enabled.
    31  func hasVectorFacility() bool
    32  
    33  var hasVX = hasVectorFacility()
    34  
    35  func initP256Arch() {
    36  	if hasVX {
    37  		p256 = p256CurveFast{p256Params}
    38  		initTable()
    39  		return
    40  	}
    41  
    42  	// No vector support, use pure Go implementation.
    43  	p256 = p256Curve{p256Params}
    44  	return
    45  }
    46  
    47  func (curve p256CurveFast) Params() *CurveParams {
    48  	return curve.CurveParams
    49  }
    50  
    51  // Functions implemented in p256_asm_s390x.s
    52  // Montgomery multiplication modulo P256
    53  func p256MulAsm(res, in1, in2 []byte)
    54  
    55  // Montgomery square modulo P256
    56  func p256Sqr(res, in []byte) {
    57  	p256MulAsm(res, in, in)
    58  }
    59  
    60  // Montgomery multiplication by 1
    61  func p256FromMont(res, in []byte)
    62  
    63  // iff cond == 1  val <- -val
    64  func p256NegCond(val *p256Point, cond int)
    65  
    66  // if cond == 0 res <- b; else res <- a
    67  func p256MovCond(res, a, b *p256Point, cond int)
    68  
    69  // Constant time table access
    70  func p256Select(point *p256Point, table []p256Point, idx int)
    71  func p256SelectBase(point *p256Point, table []p256Point, idx int)
    72  
    73  // Montgomery multiplication modulo Ord(G)
    74  func p256OrdMul(res, in1, in2 []byte)
    75  
    76  // Montgomery square modulo Ord(G), repeated n times
    77  func p256OrdSqr(res, in []byte, n int) {
    78  	copy(res, in)
    79  	for i := 0; i < n; i += 1 {
    80  		p256OrdMul(res, res, res)
    81  	}
    82  }
    83  
    84  // Point add with P2 being affine point
    85  // If sign == 1 -> P2 = -P2
    86  // If sel == 0 -> P3 = P1
    87  // if zero == 0 -> P3 = P2
    88  func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
    89  
    90  // Point add
    91  func p256PointAddAsm(P3, P1, P2 *p256Point) int
    92  func p256PointDoubleAsm(P3, P1 *p256Point)
    93  
    94  func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
    95  	if k.Cmp(p256Params.N) >= 0 {
    96  		// This should never happen.
    97  		reducedK := new(big.Int).Mod(k, p256Params.N)
    98  		k = reducedK
    99  	}
   100  
   101  	// table will store precomputed powers of x. The 32 bytes at index
   102  	// i store x^(i+1).
   103  	var table [15][32]byte
   104  
   105  	x := fromBig(k)
   106  	// This code operates in the Montgomery domain where R = 2^256 mod n
   107  	// and n is the order of the scalar field. (See initP256 for the
   108  	// value.) Elements in the Montgomery domain take the form a×R and
   109  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   110  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   111  	// i.e. converts x into the Montgomery domain. Stored in BigEndian form
   112  	RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
   113  		0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
   114  
   115  	p256OrdMul(table[0][:], x, RR)
   116  
   117  	// Prepare the table, no need in constant time access, because the
   118  	// power is not a secret. (Entry 0 is never used.)
   119  	for i := 2; i < 16; i += 2 {
   120  		p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
   121  		p256OrdMul(table[i][:], table[i-1][:], table[0][:])
   122  	}
   123  
   124  	copy(x, table[14][:]) // f
   125  
   126  	p256OrdSqr(x[0:32], x[0:32], 4)
   127  	p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
   128  	t := make([]byte, 32)
   129  	copy(t, x)
   130  
   131  	p256OrdSqr(x, x, 8)
   132  	p256OrdMul(x, x, t) // ffff
   133  	copy(t, x)
   134  
   135  	p256OrdSqr(x, x, 16)
   136  	p256OrdMul(x, x, t) // ffffffff
   137  	copy(t, x)
   138  
   139  	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
   140  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
   141  	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
   142  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
   143  
   144  	// Remaining 32 windows
   145  	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
   146  		0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
   147  	for i := 0; i < 32; i++ {
   148  		p256OrdSqr(x, x, 4)
   149  		p256OrdMul(x, x, table[expLo[i]-1][:])
   150  	}
   151  
   152  	// Multiplying by one in the Montgomery domain converts a Montgomery
   153  	// value out of the domain.
   154  	one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   155  		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
   156  	p256OrdMul(x, x, one)
   157  
   158  	return new(big.Int).SetBytes(x)
   159  }
   160  
   161  // fromBig converts a *big.Int into a format used by this code.
   162  func fromBig(big *big.Int) []byte {
   163  	// This could be done a lot more efficiently...
   164  	res := big.Bytes()
   165  	if 32 == len(res) {
   166  		return res
   167  	}
   168  	t := make([]byte, 32)
   169  	offset := 32 - len(res)
   170  	for i := len(res) - 1; i >= 0; i-- {
   171  		t[i+offset] = res[i]
   172  	}
   173  	return t
   174  }
   175  
   176  // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
   177  // is equal or greater than the order of the group, it's reduced modulo that order.
   178  func p256GetMultiplier(in []byte) []byte {
   179  	n := new(big.Int).SetBytes(in)
   180  
   181  	if n.Cmp(p256Params.N) >= 0 {
   182  		n.Mod(n, p256Params.N)
   183  	}
   184  	return fromBig(n)
   185  }
   186  
   187  // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
   188  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   189  // R×R mod p. See comment in Inverse about how this is used.
   190  var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
   191  	0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
   192  
   193  // (This is one, in the Montgomery domain.)
   194  var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   195  	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
   196  
   197  func maybeReduceModP(in *big.Int) *big.Int {
   198  	if in.Cmp(p256Params.P) < 0 {
   199  		return in
   200  	}
   201  	return new(big.Int).Mod(in, p256Params.P)
   202  }
   203  
   204  func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   205  	var r1, r2 p256Point
   206  	scalarReduced := p256GetMultiplier(baseScalar)
   207  	r1IsInfinity := scalarIsZero(scalarReduced)
   208  	r1.p256BaseMult(scalarReduced)
   209  
   210  	copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
   211  	copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
   212  	copy(r2.z[:], one)
   213  	p256MulAsm(r2.x[:], r2.x[:], rr[:])
   214  	p256MulAsm(r2.y[:], r2.y[:], rr[:])
   215  
   216  	scalarReduced = p256GetMultiplier(scalar)
   217  	r2IsInfinity := scalarIsZero(scalarReduced)
   218  	r2.p256ScalarMult(p256GetMultiplier(scalar))
   219  
   220  	var sum, double p256Point
   221  	pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
   222  	p256PointDoubleAsm(&double, &r1)
   223  	p256MovCond(&sum, &double, &sum, pointsEqual)
   224  	p256MovCond(&sum, &r1, &sum, r2IsInfinity)
   225  	p256MovCond(&sum, &r2, &sum, r1IsInfinity)
   226  	return sum.p256PointToAffine()
   227  }
   228  
   229  func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   230  	var r p256Point
   231  	r.p256BaseMult(p256GetMultiplier(scalar))
   232  	return r.p256PointToAffine()
   233  }
   234  
   235  func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   236  	var r p256Point
   237  	copy(r.x[:], fromBig(maybeReduceModP(bigX)))
   238  	copy(r.y[:], fromBig(maybeReduceModP(bigY)))
   239  	copy(r.z[:], one)
   240  	p256MulAsm(r.x[:], r.x[:], rr[:])
   241  	p256MulAsm(r.y[:], r.y[:], rr[:])
   242  	r.p256ScalarMult(p256GetMultiplier(scalar))
   243  	return r.p256PointToAffine()
   244  }
   245  
   246  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   247  // otherwise.
   248  func scalarIsZero(scalar []byte) int {
   249  	b := byte(0)
   250  	for _, s := range scalar {
   251  		b |= s
   252  	}
   253  	return subtle.ConstantTimeByteEq(b, 0)
   254  }
   255  
   256  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   257  	zInv := make([]byte, 32)
   258  	zInvSq := make([]byte, 32)
   259  
   260  	p256Inverse(zInv, p.z[:])
   261  	p256Sqr(zInvSq, zInv)
   262  	p256MulAsm(zInv, zInv, zInvSq)
   263  
   264  	p256MulAsm(zInvSq, p.x[:], zInvSq)
   265  	p256MulAsm(zInv, p.y[:], zInv)
   266  
   267  	p256FromMont(zInvSq, zInvSq)
   268  	p256FromMont(zInv, zInv)
   269  
   270  	return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
   271  }
   272  
   273  // p256Inverse sets out to in^-1 mod p.
   274  func p256Inverse(out, in []byte) {
   275  	var stack [6 * 32]byte
   276  	p2 := stack[32*0 : 32*0+32]
   277  	p4 := stack[32*1 : 32*1+32]
   278  	p8 := stack[32*2 : 32*2+32]
   279  	p16 := stack[32*3 : 32*3+32]
   280  	p32 := stack[32*4 : 32*4+32]
   281  
   282  	p256Sqr(out, in)
   283  	p256MulAsm(p2, out, in) // 3*p
   284  
   285  	p256Sqr(out, p2)
   286  	p256Sqr(out, out)
   287  	p256MulAsm(p4, out, p2) // f*p
   288  
   289  	p256Sqr(out, p4)
   290  	p256Sqr(out, out)
   291  	p256Sqr(out, out)
   292  	p256Sqr(out, out)
   293  	p256MulAsm(p8, out, p4) // ff*p
   294  
   295  	p256Sqr(out, p8)
   296  
   297  	for i := 0; i < 7; i++ {
   298  		p256Sqr(out, out)
   299  	}
   300  	p256MulAsm(p16, out, p8) // ffff*p
   301  
   302  	p256Sqr(out, p16)
   303  	for i := 0; i < 15; i++ {
   304  		p256Sqr(out, out)
   305  	}
   306  	p256MulAsm(p32, out, p16) // ffffffff*p
   307  
   308  	p256Sqr(out, p32)
   309  
   310  	for i := 0; i < 31; i++ {
   311  		p256Sqr(out, out)
   312  	}
   313  	p256MulAsm(out, out, in)
   314  
   315  	for i := 0; i < 32*4; i++ {
   316  		p256Sqr(out, out)
   317  	}
   318  	p256MulAsm(out, out, p32)
   319  
   320  	for i := 0; i < 32; i++ {
   321  		p256Sqr(out, out)
   322  	}
   323  	p256MulAsm(out, out, p32)
   324  
   325  	for i := 0; i < 16; i++ {
   326  		p256Sqr(out, out)
   327  	}
   328  	p256MulAsm(out, out, p16)
   329  
   330  	for i := 0; i < 8; i++ {
   331  		p256Sqr(out, out)
   332  	}
   333  	p256MulAsm(out, out, p8)
   334  
   335  	p256Sqr(out, out)
   336  	p256Sqr(out, out)
   337  	p256Sqr(out, out)
   338  	p256Sqr(out, out)
   339  	p256MulAsm(out, out, p4)
   340  
   341  	p256Sqr(out, out)
   342  	p256Sqr(out, out)
   343  	p256MulAsm(out, out, p2)
   344  
   345  	p256Sqr(out, out)
   346  	p256Sqr(out, out)
   347  	p256MulAsm(out, out, in)
   348  }
   349  
   350  func boothW5(in uint) (int, int) {
   351  	var s uint = ^((in >> 5) - 1)
   352  	var d uint = (1 << 6) - in - 1
   353  	d = (d & s) | (in & (^s))
   354  	d = (d >> 1) + (d & 1)
   355  	return int(d), int(s & 1)
   356  }
   357  
   358  func boothW7(in uint) (int, int) {
   359  	var s uint = ^((in >> 7) - 1)
   360  	var d uint = (1 << 8) - in - 1
   361  	d = (d & s) | (in & (^s))
   362  	d = (d >> 1) + (d & 1)
   363  	return int(d), int(s & 1)
   364  }
   365  
   366  func initTable() {
   367  	p256PreFast = new([37][64]p256Point) //z coordinate not used
   368  	basePoint := p256Point{
   369  		x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
   370  			0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
   371  		y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
   372  			0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
   373  		z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   374  			0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
   375  	}
   376  
   377  	t1 := new(p256Point)
   378  	t2 := new(p256Point)
   379  	*t2 = basePoint
   380  
   381  	zInv := make([]byte, 32)
   382  	zInvSq := make([]byte, 32)
   383  	for j := 0; j < 64; j++ {
   384  		*t1 = *t2
   385  		for i := 0; i < 37; i++ {
   386  			// The window size is 7 so we need to double 7 times.
   387  			if i != 0 {
   388  				for k := 0; k < 7; k++ {
   389  					p256PointDoubleAsm(t1, t1)
   390  				}
   391  			}
   392  			// Convert the point to affine form. (Its values are
   393  			// still in Montgomery form however.)
   394  			p256Inverse(zInv, t1.z[:])
   395  			p256Sqr(zInvSq, zInv)
   396  			p256MulAsm(zInv, zInv, zInvSq)
   397  
   398  			p256MulAsm(t1.x[:], t1.x[:], zInvSq)
   399  			p256MulAsm(t1.y[:], t1.y[:], zInv)
   400  
   401  			copy(t1.z[:], basePoint.z[:])
   402  			// Update the table entry
   403  			copy(p256PreFast[i][j].x[:], t1.x[:])
   404  			copy(p256PreFast[i][j].y[:], t1.y[:])
   405  		}
   406  		if j == 0 {
   407  			p256PointDoubleAsm(t2, &basePoint)
   408  		} else {
   409  			p256PointAddAsm(t2, t2, &basePoint)
   410  		}
   411  	}
   412  }
   413  
   414  func (p *p256Point) p256BaseMult(scalar []byte) {
   415  	wvalue := (uint(scalar[31]) << 1) & 0xff
   416  	sel, sign := boothW7(uint(wvalue))
   417  	p256SelectBase(p, p256PreFast[0][:], sel)
   418  	p256NegCond(p, sign)
   419  
   420  	copy(p.z[:], one[:])
   421  	var t0 p256Point
   422  
   423  	copy(t0.z[:], one[:])
   424  
   425  	index := uint(6)
   426  	zero := sel
   427  
   428  	for i := 1; i < 37; i++ {
   429  		if index < 247 {
   430  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
   431  		} else {
   432  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
   433  		}
   434  		index += 7
   435  		sel, sign = boothW7(uint(wvalue))
   436  		p256SelectBase(&t0, p256PreFast[i][:], sel)
   437  		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
   438  		zero |= sel
   439  	}
   440  }
   441  
   442  func (p *p256Point) p256ScalarMult(scalar []byte) {
   443  	// precomp is a table of precomputed points that stores powers of p
   444  	// from p^1 to p^16.
   445  	var precomp [16]p256Point
   446  	var t0, t1, t2, t3 p256Point
   447  
   448  	// Prepare the table
   449  	*&precomp[0] = *p
   450  
   451  	p256PointDoubleAsm(&t0, p)
   452  	p256PointDoubleAsm(&t1, &t0)
   453  	p256PointDoubleAsm(&t2, &t1)
   454  	p256PointDoubleAsm(&t3, &t2)
   455  	*&precomp[1] = t0  // 2
   456  	*&precomp[3] = t1  // 4
   457  	*&precomp[7] = t2  // 8
   458  	*&precomp[15] = t3 // 16
   459  
   460  	p256PointAddAsm(&t0, &t0, p)
   461  	p256PointAddAsm(&t1, &t1, p)
   462  	p256PointAddAsm(&t2, &t2, p)
   463  	*&precomp[2] = t0 // 3
   464  	*&precomp[4] = t1 // 5
   465  	*&precomp[8] = t2 // 9
   466  
   467  	p256PointDoubleAsm(&t0, &t0)
   468  	p256PointDoubleAsm(&t1, &t1)
   469  	*&precomp[5] = t0 // 6
   470  	*&precomp[9] = t1 // 10
   471  
   472  	p256PointAddAsm(&t2, &t0, p)
   473  	p256PointAddAsm(&t1, &t1, p)
   474  	*&precomp[6] = t2  // 7
   475  	*&precomp[10] = t1 // 11
   476  
   477  	p256PointDoubleAsm(&t0, &t0)
   478  	p256PointDoubleAsm(&t2, &t2)
   479  	*&precomp[11] = t0 // 12
   480  	*&precomp[13] = t2 // 14
   481  
   482  	p256PointAddAsm(&t0, &t0, p)
   483  	p256PointAddAsm(&t2, &t2, p)
   484  	*&precomp[12] = t0 // 13
   485  	*&precomp[14] = t2 // 15
   486  
   487  	// Start scanning the window from top bit
   488  	index := uint(254)
   489  	var sel, sign int
   490  
   491  	wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   492  	sel, _ = boothW5(uint(wvalue))
   493  	p256Select(p, precomp[:], sel)
   494  	zero := sel
   495  
   496  	for index > 4 {
   497  		index -= 5
   498  		p256PointDoubleAsm(p, p)
   499  		p256PointDoubleAsm(p, p)
   500  		p256PointDoubleAsm(p, p)
   501  		p256PointDoubleAsm(p, p)
   502  		p256PointDoubleAsm(p, p)
   503  
   504  		if index < 247 {
   505  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
   506  		} else {
   507  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   508  		}
   509  
   510  		sel, sign = boothW5(uint(wvalue))
   511  
   512  		p256Select(&t0, precomp[:], sel)
   513  		p256NegCond(&t0, sign)
   514  		p256PointAddAsm(&t1, p, &t0)
   515  		p256MovCond(&t1, &t1, p, sel)
   516  		p256MovCond(p, &t1, &t0, zero)
   517  		zero |= sel
   518  	}
   519  
   520  	p256PointDoubleAsm(p, p)
   521  	p256PointDoubleAsm(p, p)
   522  	p256PointDoubleAsm(p, p)
   523  	p256PointDoubleAsm(p, p)
   524  	p256PointDoubleAsm(p, p)
   525  
   526  	wvalue = (uint(scalar[31]) << 1) & 0x3f
   527  	sel, sign = boothW5(uint(wvalue))
   528  
   529  	p256Select(&t0, precomp[:], sel)
   530  	p256NegCond(&t0, sign)
   531  	p256PointAddAsm(&t1, p, &t0)
   532  	p256MovCond(&t1, &t1, p, sel)
   533  	p256MovCond(p, &t1, &t0, zero)
   534  }