github.com/mangodowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/crypto/elliptic/p256_s390x.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build s390x
     6  
     7  package elliptic
     8  
     9  import (
    10  	"math/big"
    11  )
    12  
    13  type p256CurveFast struct {
    14  	*CurveParams
    15  }
    16  
    17  type p256Point struct {
    18  	x [32]byte
    19  	y [32]byte
    20  	z [32]byte
    21  }
    22  
    23  var (
    24  	p256        Curve
    25  	p256PreFast *[37][64]p256Point
    26  )
    27  
    28  // hasVectorFacility reports whether the machine has the z/Architecture
    29  // vector facility installed and enabled.
    30  func hasVectorFacility() bool
    31  
    32  var hasVX = hasVectorFacility()
    33  
    34  func initP256Arch() {
    35  	if hasVX {
    36  		p256 = p256CurveFast{p256Params}
    37  		initTable()
    38  		return
    39  	}
    40  
    41  	// No vector support, use pure Go implementation.
    42  	p256 = p256Curve{p256Params}
    43  	return
    44  }
    45  
    46  func (curve p256CurveFast) Params() *CurveParams {
    47  	return curve.CurveParams
    48  }
    49  
    50  // Functions implemented in p256_asm_s390x.s
    51  // Montgomery multiplication modulo P256
    52  func p256MulAsm(res, in1, in2 []byte)
    53  
    54  // Montgomery square modulo P256
    55  func p256Sqr(res, in []byte) {
    56  	p256MulAsm(res, in, in)
    57  }
    58  
    59  // Montgomery multiplication by 1
    60  func p256FromMont(res, in []byte)
    61  
    62  // iff cond == 1  val <- -val
    63  func p256NegCond(val *p256Point, cond int)
    64  
    65  // if cond == 0 res <- b; else res <- a
    66  func p256MovCond(res, a, b *p256Point, cond int)
    67  
    68  // Constant time table access
    69  func p256Select(point *p256Point, table []p256Point, idx int)
    70  func p256SelectBase(point *p256Point, table []p256Point, idx int)
    71  
    72  // Montgomery multiplication modulo Ord(G)
    73  func p256OrdMul(res, in1, in2 []byte)
    74  
    75  // Montgomery square modulo Ord(G), repeated n times
    76  func p256OrdSqr(res, in []byte, n int) {
    77  	copy(res, in)
    78  	for i := 0; i < n; i += 1 {
    79  		p256OrdMul(res, res, res)
    80  	}
    81  }
    82  
    83  // Point add with P2 being affine point
    84  // If sign == 1 -> P2 = -P2
    85  // If sel == 0 -> P3 = P1
    86  // if zero == 0 -> P3 = P2
    87  func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
    88  
    89  // Point add
    90  func p256PointAddAsm(P3, P1, P2 *p256Point)
    91  func p256PointDoubleAsm(P3, P1 *p256Point)
    92  
    93  func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
    94  	if k.Cmp(p256Params.N) >= 0 {
    95  		// This should never happen.
    96  		reducedK := new(big.Int).Mod(k, p256Params.N)
    97  		k = reducedK
    98  	}
    99  
   100  	// table will store precomputed powers of x. The 32 bytes at index
   101  	// i store x^(i+1).
   102  	var table [15][32]byte
   103  
   104  	x := fromBig(k)
   105  	// This code operates in the Montgomery domain where R = 2^256 mod n
   106  	// and n is the order of the scalar field. (See initP256 for the
   107  	// value.) Elements in the Montgomery domain take the form a×R and
   108  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   109  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   110  	// i.e. converts x into the Montgomery domain. Stored in BigEndian form
   111  	RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
   112  		0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
   113  
   114  	p256OrdMul(table[0][:], x, RR)
   115  
   116  	// Prepare the table, no need in constant time access, because the
   117  	// power is not a secret. (Entry 0 is never used.)
   118  	for i := 2; i < 16; i += 2 {
   119  		p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
   120  		p256OrdMul(table[i][:], table[i-1][:], table[0][:])
   121  	}
   122  
   123  	copy(x, table[14][:]) // f
   124  
   125  	p256OrdSqr(x[0:32], x[0:32], 4)
   126  	p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
   127  	t := make([]byte, 32)
   128  	copy(t, x)
   129  
   130  	p256OrdSqr(x, x, 8)
   131  	p256OrdMul(x, x, t) // ffff
   132  	copy(t, x)
   133  
   134  	p256OrdSqr(x, x, 16)
   135  	p256OrdMul(x, x, t) // ffffffff
   136  	copy(t, x)
   137  
   138  	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
   139  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
   140  	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
   141  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
   142  
   143  	// Remaining 32 windows
   144  	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
   145  		0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
   146  	for i := 0; i < 32; i++ {
   147  		p256OrdSqr(x, x, 4)
   148  		p256OrdMul(x, x, table[expLo[i]-1][:])
   149  	}
   150  
   151  	// Multiplying by one in the Montgomery domain converts a Montgomery
   152  	// value out of the domain.
   153  	one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   154  		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
   155  	p256OrdMul(x, x, one)
   156  
   157  	return new(big.Int).SetBytes(x)
   158  }
   159  
   160  // fromBig converts a *big.Int into a format used by this code.
   161  func fromBig(big *big.Int) []byte {
   162  	// This could be done a lot more efficiently...
   163  	res := big.Bytes()
   164  	if 32 == len(res) {
   165  		return res
   166  	}
   167  	t := make([]byte, 32)
   168  	offset := 32 - len(res)
   169  	for i := len(res) - 1; i >= 0; i-- {
   170  		t[i+offset] = res[i]
   171  	}
   172  	return t
   173  }
   174  
   175  // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
   176  // is equal or greater than the order of the group, it's reduced modulo that order.
   177  func p256GetMultiplier(in []byte) []byte {
   178  	n := new(big.Int).SetBytes(in)
   179  
   180  	if n.Cmp(p256Params.N) >= 0 {
   181  		n.Mod(n, p256Params.N)
   182  	}
   183  	return fromBig(n)
   184  }
   185  
   186  // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
   187  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   188  // R×R mod p. See comment in Inverse about how this is used.
   189  var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
   190  	0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
   191  
   192  // (This is one, in the Montgomery domain.)
   193  var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   194  	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
   195  
   196  func maybeReduceModP(in *big.Int) *big.Int {
   197  	if in.Cmp(p256Params.P) < 0 {
   198  		return in
   199  	}
   200  	return new(big.Int).Mod(in, p256Params.P)
   201  }
   202  
   203  func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   204  	var r1, r2 p256Point
   205  	r1.p256BaseMult(p256GetMultiplier(baseScalar))
   206  
   207  	copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
   208  	copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
   209  	copy(r2.z[:], one)
   210  	p256MulAsm(r2.x[:], r2.x[:], rr[:])
   211  	p256MulAsm(r2.y[:], r2.y[:], rr[:])
   212  
   213  	r2.p256ScalarMult(p256GetMultiplier(scalar))
   214  	p256PointAddAsm(&r1, &r1, &r2)
   215  	return r1.p256PointToAffine()
   216  }
   217  
   218  func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   219  	var r p256Point
   220  	r.p256BaseMult(p256GetMultiplier(scalar))
   221  	return r.p256PointToAffine()
   222  }
   223  
   224  func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   225  	var r p256Point
   226  	copy(r.x[:], fromBig(maybeReduceModP(bigX)))
   227  	copy(r.y[:], fromBig(maybeReduceModP(bigY)))
   228  	copy(r.z[:], one)
   229  	p256MulAsm(r.x[:], r.x[:], rr[:])
   230  	p256MulAsm(r.y[:], r.y[:], rr[:])
   231  	r.p256ScalarMult(p256GetMultiplier(scalar))
   232  	return r.p256PointToAffine()
   233  }
   234  
   235  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   236  	zInv := make([]byte, 32)
   237  	zInvSq := make([]byte, 32)
   238  
   239  	p256Inverse(zInv, p.z[:])
   240  	p256Sqr(zInvSq, zInv)
   241  	p256MulAsm(zInv, zInv, zInvSq)
   242  
   243  	p256MulAsm(zInvSq, p.x[:], zInvSq)
   244  	p256MulAsm(zInv, p.y[:], zInv)
   245  
   246  	p256FromMont(zInvSq, zInvSq)
   247  	p256FromMont(zInv, zInv)
   248  
   249  	return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
   250  }
   251  
   252  // p256Inverse sets out to in^-1 mod p.
   253  func p256Inverse(out, in []byte) {
   254  	var stack [6 * 32]byte
   255  	p2 := stack[32*0 : 32*0+32]
   256  	p4 := stack[32*1 : 32*1+32]
   257  	p8 := stack[32*2 : 32*2+32]
   258  	p16 := stack[32*3 : 32*3+32]
   259  	p32 := stack[32*4 : 32*4+32]
   260  
   261  	p256Sqr(out, in)
   262  	p256MulAsm(p2, out, in) // 3*p
   263  
   264  	p256Sqr(out, p2)
   265  	p256Sqr(out, out)
   266  	p256MulAsm(p4, out, p2) // f*p
   267  
   268  	p256Sqr(out, p4)
   269  	p256Sqr(out, out)
   270  	p256Sqr(out, out)
   271  	p256Sqr(out, out)
   272  	p256MulAsm(p8, out, p4) // ff*p
   273  
   274  	p256Sqr(out, p8)
   275  
   276  	for i := 0; i < 7; i++ {
   277  		p256Sqr(out, out)
   278  	}
   279  	p256MulAsm(p16, out, p8) // ffff*p
   280  
   281  	p256Sqr(out, p16)
   282  	for i := 0; i < 15; i++ {
   283  		p256Sqr(out, out)
   284  	}
   285  	p256MulAsm(p32, out, p16) // ffffffff*p
   286  
   287  	p256Sqr(out, p32)
   288  
   289  	for i := 0; i < 31; i++ {
   290  		p256Sqr(out, out)
   291  	}
   292  	p256MulAsm(out, out, in)
   293  
   294  	for i := 0; i < 32*4; i++ {
   295  		p256Sqr(out, out)
   296  	}
   297  	p256MulAsm(out, out, p32)
   298  
   299  	for i := 0; i < 32; i++ {
   300  		p256Sqr(out, out)
   301  	}
   302  	p256MulAsm(out, out, p32)
   303  
   304  	for i := 0; i < 16; i++ {
   305  		p256Sqr(out, out)
   306  	}
   307  	p256MulAsm(out, out, p16)
   308  
   309  	for i := 0; i < 8; i++ {
   310  		p256Sqr(out, out)
   311  	}
   312  	p256MulAsm(out, out, p8)
   313  
   314  	p256Sqr(out, out)
   315  	p256Sqr(out, out)
   316  	p256Sqr(out, out)
   317  	p256Sqr(out, out)
   318  	p256MulAsm(out, out, p4)
   319  
   320  	p256Sqr(out, out)
   321  	p256Sqr(out, out)
   322  	p256MulAsm(out, out, p2)
   323  
   324  	p256Sqr(out, out)
   325  	p256Sqr(out, out)
   326  	p256MulAsm(out, out, in)
   327  }
   328  
   329  func boothW5(in uint) (int, int) {
   330  	var s uint = ^((in >> 5) - 1)
   331  	var d uint = (1 << 6) - in - 1
   332  	d = (d & s) | (in & (^s))
   333  	d = (d >> 1) + (d & 1)
   334  	return int(d), int(s & 1)
   335  }
   336  
   337  func boothW7(in uint) (int, int) {
   338  	var s uint = ^((in >> 7) - 1)
   339  	var d uint = (1 << 8) - in - 1
   340  	d = (d & s) | (in & (^s))
   341  	d = (d >> 1) + (d & 1)
   342  	return int(d), int(s & 1)
   343  }
   344  
   345  func initTable() {
   346  	p256PreFast = new([37][64]p256Point) //z coordinate not used
   347  	basePoint := p256Point{
   348  		x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
   349  			0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
   350  		y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
   351  			0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
   352  		z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   353  			0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
   354  	}
   355  
   356  	t1 := new(p256Point)
   357  	t2 := new(p256Point)
   358  	*t2 = basePoint
   359  
   360  	zInv := make([]byte, 32)
   361  	zInvSq := make([]byte, 32)
   362  	for j := 0; j < 64; j++ {
   363  		*t1 = *t2
   364  		for i := 0; i < 37; i++ {
   365  			// The window size is 7 so we need to double 7 times.
   366  			if i != 0 {
   367  				for k := 0; k < 7; k++ {
   368  					p256PointDoubleAsm(t1, t1)
   369  				}
   370  			}
   371  			// Convert the point to affine form. (Its values are
   372  			// still in Montgomery form however.)
   373  			p256Inverse(zInv, t1.z[:])
   374  			p256Sqr(zInvSq, zInv)
   375  			p256MulAsm(zInv, zInv, zInvSq)
   376  
   377  			p256MulAsm(t1.x[:], t1.x[:], zInvSq)
   378  			p256MulAsm(t1.y[:], t1.y[:], zInv)
   379  
   380  			copy(t1.z[:], basePoint.z[:])
   381  			// Update the table entry
   382  			copy(p256PreFast[i][j].x[:], t1.x[:])
   383  			copy(p256PreFast[i][j].y[:], t1.y[:])
   384  		}
   385  		if j == 0 {
   386  			p256PointDoubleAsm(t2, &basePoint)
   387  		} else {
   388  			p256PointAddAsm(t2, t2, &basePoint)
   389  		}
   390  	}
   391  }
   392  
   393  func (p *p256Point) p256BaseMult(scalar []byte) {
   394  	wvalue := (uint(scalar[31]) << 1) & 0xff
   395  	sel, sign := boothW7(uint(wvalue))
   396  	p256SelectBase(p, p256PreFast[0][:], sel)
   397  	p256NegCond(p, sign)
   398  
   399  	copy(p.z[:], one[:])
   400  	var t0 p256Point
   401  
   402  	copy(t0.z[:], one[:])
   403  
   404  	index := uint(6)
   405  	zero := sel
   406  
   407  	for i := 1; i < 37; i++ {
   408  		if index < 247 {
   409  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
   410  		} else {
   411  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
   412  		}
   413  		index += 7
   414  		sel, sign = boothW7(uint(wvalue))
   415  		p256SelectBase(&t0, p256PreFast[i][:], sel)
   416  		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
   417  		zero |= sel
   418  	}
   419  }
   420  
   421  func (p *p256Point) p256ScalarMult(scalar []byte) {
   422  	// precomp is a table of precomputed points that stores powers of p
   423  	// from p^1 to p^16.
   424  	var precomp [16]p256Point
   425  	var t0, t1, t2, t3 p256Point
   426  
   427  	// Prepare the table
   428  	*&precomp[0] = *p
   429  
   430  	p256PointDoubleAsm(&t0, p)
   431  	p256PointDoubleAsm(&t1, &t0)
   432  	p256PointDoubleAsm(&t2, &t1)
   433  	p256PointDoubleAsm(&t3, &t2)
   434  	*&precomp[1] = t0  // 2
   435  	*&precomp[3] = t1  // 4
   436  	*&precomp[7] = t2  // 8
   437  	*&precomp[15] = t3 // 16
   438  
   439  	p256PointAddAsm(&t0, &t0, p)
   440  	p256PointAddAsm(&t1, &t1, p)
   441  	p256PointAddAsm(&t2, &t2, p)
   442  	*&precomp[2] = t0 // 3
   443  	*&precomp[4] = t1 // 5
   444  	*&precomp[8] = t2 // 9
   445  
   446  	p256PointDoubleAsm(&t0, &t0)
   447  	p256PointDoubleAsm(&t1, &t1)
   448  	*&precomp[5] = t0 // 6
   449  	*&precomp[9] = t1 // 10
   450  
   451  	p256PointAddAsm(&t2, &t0, p)
   452  	p256PointAddAsm(&t1, &t1, p)
   453  	*&precomp[6] = t2  // 7
   454  	*&precomp[10] = t1 // 11
   455  
   456  	p256PointDoubleAsm(&t0, &t0)
   457  	p256PointDoubleAsm(&t2, &t2)
   458  	*&precomp[11] = t0 // 12
   459  	*&precomp[13] = t2 // 14
   460  
   461  	p256PointAddAsm(&t0, &t0, p)
   462  	p256PointAddAsm(&t2, &t2, p)
   463  	*&precomp[12] = t0 // 13
   464  	*&precomp[14] = t2 // 15
   465  
   466  	// Start scanning the window from top bit
   467  	index := uint(254)
   468  	var sel, sign int
   469  
   470  	wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   471  	sel, _ = boothW5(uint(wvalue))
   472  	p256Select(p, precomp[:], sel)
   473  	zero := sel
   474  
   475  	for index > 4 {
   476  		index -= 5
   477  		p256PointDoubleAsm(p, p)
   478  		p256PointDoubleAsm(p, p)
   479  		p256PointDoubleAsm(p, p)
   480  		p256PointDoubleAsm(p, p)
   481  		p256PointDoubleAsm(p, p)
   482  
   483  		if index < 247 {
   484  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
   485  		} else {
   486  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   487  		}
   488  
   489  		sel, sign = boothW5(uint(wvalue))
   490  
   491  		p256Select(&t0, precomp[:], sel)
   492  		p256NegCond(&t0, sign)
   493  		p256PointAddAsm(&t1, p, &t0)
   494  		p256MovCond(&t1, &t1, p, sel)
   495  		p256MovCond(p, &t1, &t0, zero)
   496  		zero |= sel
   497  	}
   498  
   499  	p256PointDoubleAsm(p, p)
   500  	p256PointDoubleAsm(p, p)
   501  	p256PointDoubleAsm(p, p)
   502  	p256PointDoubleAsm(p, p)
   503  	p256PointDoubleAsm(p, p)
   504  
   505  	wvalue = (uint(scalar[31]) << 1) & 0x3f
   506  	sel, sign = boothW5(uint(wvalue))
   507  
   508  	p256Select(&t0, precomp[:], sel)
   509  	p256NegCond(&t0, sign)
   510  	p256PointAddAsm(&t1, p, &t0)
   511  	p256MovCond(&t1, &t1, p, sel)
   512  	p256MovCond(p, &t1, &t0, zero)
   513  }