github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/crypto/elliptic/p256_s390x.go

github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/crypto/elliptic/p256_s390x.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build s390x
     6  
     7  package elliptic
     8  
     9  import (
    10  	"crypto/subtle"
    11  	"internal/cpu"
    12  	"math/big"
    13  	"unsafe"
    14  )
    15  
    16  const (
    17  	offsetS390xHasVX  = unsafe.Offsetof(cpu.S390X.HasVX)
    18  	offsetS390xHasVE1 = unsafe.Offsetof(cpu.S390X.HasVXE)
    19  )
    20  
    21  type p256CurveFast struct {
    22  	*CurveParams
    23  }
    24  
    25  type p256Point struct {
    26  	x [32]byte
    27  	y [32]byte
    28  	z [32]byte
    29  }
    30  
    31  var (
    32  	p256        Curve
    33  	p256PreFast *[37][64]p256Point
    34  )
    35  
    36  //go:noescape
    37  func p256MulInternalTrampolineSetup()
    38  
    39  //go:noescape
    40  func p256SqrInternalTrampolineSetup()
    41  
    42  //go:noescape
    43  func p256MulInternalVX()
    44  
    45  //go:noescape
    46  func p256MulInternalVMSL()
    47  
    48  //go:noescape
    49  func p256SqrInternalVX()
    50  
    51  //go:noescape
    52  func p256SqrInternalVMSL()
    53  
    54  func initP256Arch() {
    55  	if cpu.S390X.HasVX {
    56  		p256 = p256CurveFast{p256Params}
    57  		initTable()
    58  		return
    59  	}
    60  
    61  	// No vector support, use pure Go implementation.
    62  	p256 = p256Curve{p256Params}
    63  	return
    64  }
    65  
    66  func (curve p256CurveFast) Params() *CurveParams {
    67  	return curve.CurveParams
    68  }
    69  
    70  // Functions implemented in p256_asm_s390x.s
    71  // Montgomery multiplication modulo P256
    72  //
    73  //go:noescape
    74  func p256SqrAsm(res, in1 []byte)
    75  
    76  //go:noescape
    77  func p256MulAsm(res, in1, in2 []byte)
    78  
    79  // Montgomery square modulo P256
    80  func p256Sqr(res, in []byte) {
    81  	p256SqrAsm(res, in)
    82  }
    83  
    84  // Montgomery multiplication by 1
    85  //
    86  //go:noescape
    87  func p256FromMont(res, in []byte)
    88  
    89  // iff cond == 1  val <- -val
    90  //
    91  //go:noescape
    92  func p256NegCond(val *p256Point, cond int)
    93  
    94  // if cond == 0 res <- b; else res <- a
    95  //
    96  //go:noescape
    97  func p256MovCond(res, a, b *p256Point, cond int)
    98  
    99  // Constant time table access
   100  //
   101  //go:noescape
   102  func p256Select(point *p256Point, table []p256Point, idx int)
   103  
   104  //go:noescape
   105  func p256SelectBase(point *p256Point, table []p256Point, idx int)
   106  
   107  // Montgomery multiplication modulo Ord(G)
   108  //
   109  //go:noescape
   110  func p256OrdMul(res, in1, in2 []byte)
   111  
   112  // Montgomery square modulo Ord(G), repeated n times
   113  func p256OrdSqr(res, in []byte, n int) {
   114  	copy(res, in)
   115  	for i := 0; i < n; i += 1 {
   116  		p256OrdMul(res, res, res)
   117  	}
   118  }
   119  
   120  // Point add with P2 being affine point
   121  // If sign == 1 -> P2 = -P2
   122  // If sel == 0 -> P3 = P1
   123  // if zero == 0 -> P3 = P2
   124  //
   125  //go:noescape
   126  func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
   127  
   128  // Point add
   129  //
   130  //go:noescape
   131  func p256PointAddAsm(P3, P1, P2 *p256Point) int
   132  
   133  //go:noescape
   134  func p256PointDoubleAsm(P3, P1 *p256Point)
   135  
   136  func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
   137  	if k.Cmp(p256Params.N) >= 0 {
   138  		// This should never happen.
   139  		reducedK := new(big.Int).Mod(k, p256Params.N)
   140  		k = reducedK
   141  	}
   142  
   143  	// table will store precomputed powers of x. The 32 bytes at index
   144  	// i store x^(i+1).
   145  	var table [15][32]byte
   146  
   147  	x := fromBig(k)
   148  	// This code operates in the Montgomery domain where R = 2^256 mod n
   149  	// and n is the order of the scalar field. (See initP256 for the
   150  	// value.) Elements in the Montgomery domain take the form a×R and
   151  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   152  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   153  	// i.e. converts x into the Montgomery domain. Stored in BigEndian form
   154  	RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
   155  		0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
   156  
   157  	p256OrdMul(table[0][:], x, RR)
   158  
   159  	// Prepare the table, no need in constant time access, because the
   160  	// power is not a secret. (Entry 0 is never used.)
   161  	for i := 2; i < 16; i += 2 {
   162  		p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
   163  		p256OrdMul(table[i][:], table[i-1][:], table[0][:])
   164  	}
   165  
   166  	copy(x, table[14][:]) // f
   167  
   168  	p256OrdSqr(x[0:32], x[0:32], 4)
   169  	p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
   170  	t := make([]byte, 32)
   171  	copy(t, x)
   172  
   173  	p256OrdSqr(x, x, 8)
   174  	p256OrdMul(x, x, t) // ffff
   175  	copy(t, x)
   176  
   177  	p256OrdSqr(x, x, 16)
   178  	p256OrdMul(x, x, t) // ffffffff
   179  	copy(t, x)
   180  
   181  	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
   182  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
   183  	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
   184  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
   185  
   186  	// Remaining 32 windows
   187  	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
   188  		0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
   189  	for i := 0; i < 32; i++ {
   190  		p256OrdSqr(x, x, 4)
   191  		p256OrdMul(x, x, table[expLo[i]-1][:])
   192  	}
   193  
   194  	// Multiplying by one in the Montgomery domain converts a Montgomery
   195  	// value out of the domain.
   196  	one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   197  		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
   198  	p256OrdMul(x, x, one)
   199  
   200  	return new(big.Int).SetBytes(x)
   201  }
   202  
   203  // fromBig converts a *big.Int into a format used by this code.
   204  func fromBig(big *big.Int) []byte {
   205  	// This could be done a lot more efficiently...
   206  	res := big.Bytes()
   207  	if 32 == len(res) {
   208  		return res
   209  	}
   210  	t := make([]byte, 32)
   211  	offset := 32 - len(res)
   212  	for i := len(res) - 1; i >= 0; i-- {
   213  		t[i+offset] = res[i]
   214  	}
   215  	return t
   216  }
   217  
   218  // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
   219  // is equal or greater than the order of the group, it's reduced modulo that order.
   220  func p256GetMultiplier(in []byte) []byte {
   221  	n := new(big.Int).SetBytes(in)
   222  
   223  	if n.Cmp(p256Params.N) >= 0 {
   224  		n.Mod(n, p256Params.N)
   225  	}
   226  	return fromBig(n)
   227  }
   228  
   229  // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
   230  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   231  // R×R mod p. See comment in Inverse about how this is used.
   232  var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
   233  	0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
   234  
   235  // (This is one, in the Montgomery domain.)
   236  var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   237  	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
   238  
   239  func maybeReduceModP(in *big.Int) *big.Int {
   240  	if in.Cmp(p256Params.P) < 0 {
   241  		return in
   242  	}
   243  	return new(big.Int).Mod(in, p256Params.P)
   244  }
   245  
   246  func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   247  	var r1, r2 p256Point
   248  	scalarReduced := p256GetMultiplier(baseScalar)
   249  	r1IsInfinity := scalarIsZero(scalarReduced)
   250  	r1.p256BaseMult(scalarReduced)
   251  
   252  	copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
   253  	copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
   254  	copy(r2.z[:], one)
   255  	p256MulAsm(r2.x[:], r2.x[:], rr[:])
   256  	p256MulAsm(r2.y[:], r2.y[:], rr[:])
   257  
   258  	scalarReduced = p256GetMultiplier(scalar)
   259  	r2IsInfinity := scalarIsZero(scalarReduced)
   260  	r2.p256ScalarMult(p256GetMultiplier(scalar))
   261  
   262  	var sum, double p256Point
   263  	pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
   264  	p256PointDoubleAsm(&double, &r1)
   265  	p256MovCond(&sum, &double, &sum, pointsEqual)
   266  	p256MovCond(&sum, &r1, &sum, r2IsInfinity)
   267  	p256MovCond(&sum, &r2, &sum, r1IsInfinity)
   268  	return sum.p256PointToAffine()
   269  }
   270  
   271  func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   272  	var r p256Point
   273  	r.p256BaseMult(p256GetMultiplier(scalar))
   274  	return r.p256PointToAffine()
   275  }
   276  
   277  func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   278  	var r p256Point
   279  	copy(r.x[:], fromBig(maybeReduceModP(bigX)))
   280  	copy(r.y[:], fromBig(maybeReduceModP(bigY)))
   281  	copy(r.z[:], one)
   282  	p256MulAsm(r.x[:], r.x[:], rr[:])
   283  	p256MulAsm(r.y[:], r.y[:], rr[:])
   284  	r.p256ScalarMult(p256GetMultiplier(scalar))
   285  	return r.p256PointToAffine()
   286  }
   287  
   288  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   289  // otherwise.
   290  func scalarIsZero(scalar []byte) int {
   291  	b := byte(0)
   292  	for _, s := range scalar {
   293  		b |= s
   294  	}
   295  	return subtle.ConstantTimeByteEq(b, 0)
   296  }
   297  
   298  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   299  	zInv := make([]byte, 32)
   300  	zInvSq := make([]byte, 32)
   301  
   302  	p256Inverse(zInv, p.z[:])
   303  	p256Sqr(zInvSq, zInv)
   304  	p256MulAsm(zInv, zInv, zInvSq)
   305  
   306  	p256MulAsm(zInvSq, p.x[:], zInvSq)
   307  	p256MulAsm(zInv, p.y[:], zInv)
   308  
   309  	p256FromMont(zInvSq, zInvSq)
   310  	p256FromMont(zInv, zInv)
   311  
   312  	return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
   313  }
   314  
   315  // p256Inverse sets out to in^-1 mod p.
   316  func p256Inverse(out, in []byte) {
   317  	var stack [6 * 32]byte
   318  	p2 := stack[32*0 : 32*0+32]
   319  	p4 := stack[32*1 : 32*1+32]
   320  	p8 := stack[32*2 : 32*2+32]
   321  	p16 := stack[32*3 : 32*3+32]
   322  	p32 := stack[32*4 : 32*4+32]
   323  
   324  	p256Sqr(out, in)
   325  	p256MulAsm(p2, out, in) // 3*p
   326  
   327  	p256Sqr(out, p2)
   328  	p256Sqr(out, out)
   329  	p256MulAsm(p4, out, p2) // f*p
   330  
   331  	p256Sqr(out, p4)
   332  	p256Sqr(out, out)
   333  	p256Sqr(out, out)
   334  	p256Sqr(out, out)
   335  	p256MulAsm(p8, out, p4) // ff*p
   336  
   337  	p256Sqr(out, p8)
   338  
   339  	for i := 0; i < 7; i++ {
   340  		p256Sqr(out, out)
   341  	}
   342  	p256MulAsm(p16, out, p8) // ffff*p
   343  
   344  	p256Sqr(out, p16)
   345  	for i := 0; i < 15; i++ {
   346  		p256Sqr(out, out)
   347  	}
   348  	p256MulAsm(p32, out, p16) // ffffffff*p
   349  
   350  	p256Sqr(out, p32)
   351  
   352  	for i := 0; i < 31; i++ {
   353  		p256Sqr(out, out)
   354  	}
   355  	p256MulAsm(out, out, in)
   356  
   357  	for i := 0; i < 32*4; i++ {
   358  		p256Sqr(out, out)
   359  	}
   360  	p256MulAsm(out, out, p32)
   361  
   362  	for i := 0; i < 32; i++ {
   363  		p256Sqr(out, out)
   364  	}
   365  	p256MulAsm(out, out, p32)
   366  
   367  	for i := 0; i < 16; i++ {
   368  		p256Sqr(out, out)
   369  	}
   370  	p256MulAsm(out, out, p16)
   371  
   372  	for i := 0; i < 8; i++ {
   373  		p256Sqr(out, out)
   374  	}
   375  	p256MulAsm(out, out, p8)
   376  
   377  	p256Sqr(out, out)
   378  	p256Sqr(out, out)
   379  	p256Sqr(out, out)
   380  	p256Sqr(out, out)
   381  	p256MulAsm(out, out, p4)
   382  
   383  	p256Sqr(out, out)
   384  	p256Sqr(out, out)
   385  	p256MulAsm(out, out, p2)
   386  
   387  	p256Sqr(out, out)
   388  	p256Sqr(out, out)
   389  	p256MulAsm(out, out, in)
   390  }
   391  
   392  func boothW5(in uint) (int, int) {
   393  	var s uint = ^((in >> 5) - 1)
   394  	var d uint = (1 << 6) - in - 1
   395  	d = (d & s) | (in & (^s))
   396  	d = (d >> 1) + (d & 1)
   397  	return int(d), int(s & 1)
   398  }
   399  
   400  func boothW7(in uint) (int, int) {
   401  	var s uint = ^((in >> 7) - 1)
   402  	var d uint = (1 << 8) - in - 1
   403  	d = (d & s) | (in & (^s))
   404  	d = (d >> 1) + (d & 1)
   405  	return int(d), int(s & 1)
   406  }
   407  
   408  func initTable() {
   409  	p256PreFast = new([37][64]p256Point) //z coordinate not used
   410  	basePoint := p256Point{
   411  		x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
   412  			0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
   413  		y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
   414  			0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
   415  		z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   416  			0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
   417  	}
   418  
   419  	t1 := new(p256Point)
   420  	t2 := new(p256Point)
   421  	*t2 = basePoint
   422  
   423  	zInv := make([]byte, 32)
   424  	zInvSq := make([]byte, 32)
   425  	for j := 0; j < 64; j++ {
   426  		*t1 = *t2
   427  		for i := 0; i < 37; i++ {
   428  			// The window size is 7 so we need to double 7 times.
   429  			if i != 0 {
   430  				for k := 0; k < 7; k++ {
   431  					p256PointDoubleAsm(t1, t1)
   432  				}
   433  			}
   434  			// Convert the point to affine form. (Its values are
   435  			// still in Montgomery form however.)
   436  			p256Inverse(zInv, t1.z[:])
   437  			p256Sqr(zInvSq, zInv)
   438  			p256MulAsm(zInv, zInv, zInvSq)
   439  
   440  			p256MulAsm(t1.x[:], t1.x[:], zInvSq)
   441  			p256MulAsm(t1.y[:], t1.y[:], zInv)
   442  
   443  			copy(t1.z[:], basePoint.z[:])
   444  			// Update the table entry
   445  			copy(p256PreFast[i][j].x[:], t1.x[:])
   446  			copy(p256PreFast[i][j].y[:], t1.y[:])
   447  		}
   448  		if j == 0 {
   449  			p256PointDoubleAsm(t2, &basePoint)
   450  		} else {
   451  			p256PointAddAsm(t2, t2, &basePoint)
   452  		}
   453  	}
   454  }
   455  
   456  func (p *p256Point) p256BaseMult(scalar []byte) {
   457  	wvalue := (uint(scalar[31]) << 1) & 0xff
   458  	sel, sign := boothW7(uint(wvalue))
   459  	p256SelectBase(p, p256PreFast[0][:], sel)
   460  	p256NegCond(p, sign)
   461  
   462  	copy(p.z[:], one[:])
   463  	var t0 p256Point
   464  
   465  	copy(t0.z[:], one[:])
   466  
   467  	index := uint(6)
   468  	zero := sel
   469  
   470  	for i := 1; i < 37; i++ {
   471  		if index < 247 {
   472  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
   473  		} else {
   474  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
   475  		}
   476  		index += 7
   477  		sel, sign = boothW7(uint(wvalue))
   478  		p256SelectBase(&t0, p256PreFast[i][:], sel)
   479  		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
   480  		zero |= sel
   481  	}
   482  }
   483  
   484  func (p *p256Point) p256ScalarMult(scalar []byte) {
   485  	// precomp is a table of precomputed points that stores powers of p
   486  	// from p^1 to p^16.
   487  	var precomp [16]p256Point
   488  	var t0, t1, t2, t3 p256Point
   489  
   490  	// Prepare the table
   491  	*&precomp[0] = *p
   492  
   493  	p256PointDoubleAsm(&t0, p)
   494  	p256PointDoubleAsm(&t1, &t0)
   495  	p256PointDoubleAsm(&t2, &t1)
   496  	p256PointDoubleAsm(&t3, &t2)
   497  	*&precomp[1] = t0  // 2
   498  	*&precomp[3] = t1  // 4
   499  	*&precomp[7] = t2  // 8
   500  	*&precomp[15] = t3 // 16
   501  
   502  	p256PointAddAsm(&t0, &t0, p)
   503  	p256PointAddAsm(&t1, &t1, p)
   504  	p256PointAddAsm(&t2, &t2, p)
   505  	*&precomp[2] = t0 // 3
   506  	*&precomp[4] = t1 // 5
   507  	*&precomp[8] = t2 // 9
   508  
   509  	p256PointDoubleAsm(&t0, &t0)
   510  	p256PointDoubleAsm(&t1, &t1)
   511  	*&precomp[5] = t0 // 6
   512  	*&precomp[9] = t1 // 10
   513  
   514  	p256PointAddAsm(&t2, &t0, p)
   515  	p256PointAddAsm(&t1, &t1, p)
   516  	*&precomp[6] = t2  // 7
   517  	*&precomp[10] = t1 // 11
   518  
   519  	p256PointDoubleAsm(&t0, &t0)
   520  	p256PointDoubleAsm(&t2, &t2)
   521  	*&precomp[11] = t0 // 12
   522  	*&precomp[13] = t2 // 14
   523  
   524  	p256PointAddAsm(&t0, &t0, p)
   525  	p256PointAddAsm(&t2, &t2, p)
   526  	*&precomp[12] = t0 // 13
   527  	*&precomp[14] = t2 // 15
   528  
   529  	// Start scanning the window from top bit
   530  	index := uint(254)
   531  	var sel, sign int
   532  
   533  	wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   534  	sel, _ = boothW5(uint(wvalue))
   535  	p256Select(p, precomp[:], sel)
   536  	zero := sel
   537  
   538  	for index > 4 {
   539  		index -= 5
   540  		p256PointDoubleAsm(p, p)
   541  		p256PointDoubleAsm(p, p)
   542  		p256PointDoubleAsm(p, p)
   543  		p256PointDoubleAsm(p, p)
   544  		p256PointDoubleAsm(p, p)
   545  
   546  		if index < 247 {
   547  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
   548  		} else {
   549  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   550  		}
   551  
   552  		sel, sign = boothW5(uint(wvalue))
   553  
   554  		p256Select(&t0, precomp[:], sel)
   555  		p256NegCond(&t0, sign)
   556  		p256PointAddAsm(&t1, p, &t0)
   557  		p256MovCond(&t1, &t1, p, sel)
   558  		p256MovCond(p, &t1, &t0, zero)
   559  		zero |= sel
   560  	}
   561  
   562  	p256PointDoubleAsm(p, p)
   563  	p256PointDoubleAsm(p, p)
   564  	p256PointDoubleAsm(p, p)
   565  	p256PointDoubleAsm(p, p)
   566  	p256PointDoubleAsm(p, p)
   567  
   568  	wvalue = (uint(scalar[31]) << 1) & 0x3f
   569  	sel, sign = boothW5(uint(wvalue))
   570  
   571  	p256Select(&t0, precomp[:], sel)
   572  	p256NegCond(&t0, sign)
   573  	p256PointAddAsm(&t1, p, &t0)
   574  	p256MovCond(&t1, &t1, p, sel)
   575  	p256MovCond(p, &t1, &t0, zero)
   576  }