github.com/fisco-bcos/crypto@v0.0.0-20200202032121-bd8ab0b5d4f1/elliptic/p256_s390x.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build s390x
     6  
     7  package elliptic
     8  
     9  import (
    10  	"crypto/subtle"
    11  	"math/big"
    12  	"unsafe"
    13  
    14  	"github.com/FISCO-BCOS/crypto/internal/cpu"
    15  )
    16  
    17  const (
    18  	offsetS390xHasVX  = unsafe.Offsetof(cpu.S390X.HasVX)
    19  	offsetS390xHasVE1 = unsafe.Offsetof(cpu.S390X.HasVXE)
    20  )
    21  
    22  type p256CurveFast struct {
    23  	*CurveParams
    24  }
    25  
    26  type p256Point struct {
    27  	x [32]byte
    28  	y [32]byte
    29  	z [32]byte
    30  }
    31  
    32  var (
    33  	p256        Curve
    34  	p256PreFast *[37][64]p256Point
    35  )
    36  
    37  //go:noescape
    38  func p256MulInternalTrampolineSetup()
    39  
    40  //go:noescape
    41  func p256SqrInternalTrampolineSetup()
    42  
    43  //go:noescape
    44  func p256MulInternalVX()
    45  
    46  //go:noescape
    47  func p256MulInternalVMSL()
    48  
    49  //go:noescape
    50  func p256SqrInternalVX()
    51  
    52  //go:noescape
    53  func p256SqrInternalVMSL()
    54  
    55  func initP256Arch() {
    56  	if cpu.S390X.HasVX {
    57  		p256 = p256CurveFast{p256Params}
    58  		initTable()
    59  		return
    60  	}
    61  
    62  	// No vector support, use pure Go implementation.
    63  	p256 = p256Curve{p256Params}
    64  	return
    65  }
    66  
    67  func (curve p256CurveFast) Params() *CurveParams {
    68  	return curve.CurveParams
    69  }
    70  
    71  // Functions implemented in p256_asm_s390x.s
    72  // Montgomery multiplication modulo P256
    73  //
    74  //go:noescape
    75  func p256SqrAsm(res, in1 []byte)
    76  
    77  //go:noescape
    78  func p256MulAsm(res, in1, in2 []byte)
    79  
    80  // Montgomery square modulo P256
    81  func p256Sqr(res, in []byte) {
    82  	p256SqrAsm(res, in)
    83  }
    84  
    85  // Montgomery multiplication by 1
    86  //
    87  //go:noescape
    88  func p256FromMont(res, in []byte)
    89  
    90  // iff cond == 1  val <- -val
    91  //
    92  //go:noescape
    93  func p256NegCond(val *p256Point, cond int)
    94  
    95  // if cond == 0 res <- b; else res <- a
    96  //
    97  //go:noescape
    98  func p256MovCond(res, a, b *p256Point, cond int)
    99  
   100  // Constant time table access
   101  //
   102  //go:noescape
   103  func p256Select(point *p256Point, table []p256Point, idx int)
   104  
   105  //go:noescape
   106  func p256SelectBase(point *p256Point, table []p256Point, idx int)
   107  
   108  // Montgomery multiplication modulo Ord(G)
   109  //
   110  //go:noescape
   111  func p256OrdMul(res, in1, in2 []byte)
   112  
   113  // Montgomery square modulo Ord(G), repeated n times
   114  func p256OrdSqr(res, in []byte, n int) {
   115  	copy(res, in)
   116  	for i := 0; i < n; i += 1 {
   117  		p256OrdMul(res, res, res)
   118  	}
   119  }
   120  
   121  // Point add with P2 being affine point
   122  // If sign == 1 -> P2 = -P2
   123  // If sel == 0 -> P3 = P1
   124  // if zero == 0 -> P3 = P2
   125  //
   126  //go:noescape
   127  func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
   128  
   129  // Point add
   130  //
   131  //go:noescape
   132  func p256PointAddAsm(P3, P1, P2 *p256Point) int
   133  
   134  //go:noescape
   135  func p256PointDoubleAsm(P3, P1 *p256Point)
   136  
   137  func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
   138  	if k.Cmp(p256Params.N) >= 0 {
   139  		// This should never happen.
   140  		reducedK := new(big.Int).Mod(k, p256Params.N)
   141  		k = reducedK
   142  	}
   143  
   144  	// table will store precomputed powers of x. The 32 bytes at index
   145  	// i store x^(i+1).
   146  	var table [15][32]byte
   147  
   148  	x := fromBig(k)
   149  	// This code operates in the Montgomery domain where R = 2^256 mod n
   150  	// and n is the order of the scalar field. (See initP256 for the
   151  	// value.) Elements in the Montgomery domain take the form a×R and
   152  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   153  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   154  	// i.e. converts x into the Montgomery domain. Stored in BigEndian form
   155  	RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
   156  		0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
   157  
   158  	p256OrdMul(table[0][:], x, RR)
   159  
   160  	// Prepare the table, no need in constant time access, because the
   161  	// power is not a secret. (Entry 0 is never used.)
   162  	for i := 2; i < 16; i += 2 {
   163  		p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
   164  		p256OrdMul(table[i][:], table[i-1][:], table[0][:])
   165  	}
   166  
   167  	copy(x, table[14][:]) // f
   168  
   169  	p256OrdSqr(x[0:32], x[0:32], 4)
   170  	p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
   171  	t := make([]byte, 32)
   172  	copy(t, x)
   173  
   174  	p256OrdSqr(x, x, 8)
   175  	p256OrdMul(x, x, t) // ffff
   176  	copy(t, x)
   177  
   178  	p256OrdSqr(x, x, 16)
   179  	p256OrdMul(x, x, t) // ffffffff
   180  	copy(t, x)
   181  
   182  	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
   183  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
   184  	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
   185  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
   186  
   187  	// Remaining 32 windows
   188  	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
   189  		0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
   190  	for i := 0; i < 32; i++ {
   191  		p256OrdSqr(x, x, 4)
   192  		p256OrdMul(x, x, table[expLo[i]-1][:])
   193  	}
   194  
   195  	// Multiplying by one in the Montgomery domain converts a Montgomery
   196  	// value out of the domain.
   197  	one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   198  		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
   199  	p256OrdMul(x, x, one)
   200  
   201  	return new(big.Int).SetBytes(x)
   202  }
   203  
   204  // fromBig converts a *big.Int into a format used by this code.
   205  func fromBig(big *big.Int) []byte {
   206  	// This could be done a lot more efficiently...
   207  	res := big.Bytes()
   208  	if 32 == len(res) {
   209  		return res
   210  	}
   211  	t := make([]byte, 32)
   212  	offset := 32 - len(res)
   213  	for i := len(res) - 1; i >= 0; i-- {
   214  		t[i+offset] = res[i]
   215  	}
   216  	return t
   217  }
   218  
   219  // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
   220  // is equal or greater than the order of the group, it's reduced modulo that order.
   221  func p256GetMultiplier(in []byte) []byte {
   222  	n := new(big.Int).SetBytes(in)
   223  
   224  	if n.Cmp(p256Params.N) >= 0 {
   225  		n.Mod(n, p256Params.N)
   226  	}
   227  	return fromBig(n)
   228  }
   229  
   230  // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
   231  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   232  // R×R mod p. See comment in Inverse about how this is used.
   233  var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
   234  	0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
   235  
   236  // (This is one, in the Montgomery domain.)
   237  var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   238  	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
   239  
   240  func maybeReduceModP(in *big.Int) *big.Int {
   241  	if in.Cmp(p256Params.P) < 0 {
   242  		return in
   243  	}
   244  	return new(big.Int).Mod(in, p256Params.P)
   245  }
   246  
   247  func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   248  	var r1, r2 p256Point
   249  	scalarReduced := p256GetMultiplier(baseScalar)
   250  	r1IsInfinity := scalarIsZero(scalarReduced)
   251  	r1.p256BaseMult(scalarReduced)
   252  
   253  	copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
   254  	copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
   255  	copy(r2.z[:], one)
   256  	p256MulAsm(r2.x[:], r2.x[:], rr[:])
   257  	p256MulAsm(r2.y[:], r2.y[:], rr[:])
   258  
   259  	scalarReduced = p256GetMultiplier(scalar)
   260  	r2IsInfinity := scalarIsZero(scalarReduced)
   261  	r2.p256ScalarMult(p256GetMultiplier(scalar))
   262  
   263  	var sum, double p256Point
   264  	pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
   265  	p256PointDoubleAsm(&double, &r1)
   266  	p256MovCond(&sum, &double, &sum, pointsEqual)
   267  	p256MovCond(&sum, &r1, &sum, r2IsInfinity)
   268  	p256MovCond(&sum, &r2, &sum, r1IsInfinity)
   269  	return sum.p256PointToAffine()
   270  }
   271  
   272  func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   273  	var r p256Point
   274  	r.p256BaseMult(p256GetMultiplier(scalar))
   275  	return r.p256PointToAffine()
   276  }
   277  
   278  func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   279  	var r p256Point
   280  	copy(r.x[:], fromBig(maybeReduceModP(bigX)))
   281  	copy(r.y[:], fromBig(maybeReduceModP(bigY)))
   282  	copy(r.z[:], one)
   283  	p256MulAsm(r.x[:], r.x[:], rr[:])
   284  	p256MulAsm(r.y[:], r.y[:], rr[:])
   285  	r.p256ScalarMult(p256GetMultiplier(scalar))
   286  	return r.p256PointToAffine()
   287  }
   288  
   289  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   290  // otherwise.
   291  func scalarIsZero(scalar []byte) int {
   292  	b := byte(0)
   293  	for _, s := range scalar {
   294  		b |= s
   295  	}
   296  	return subtle.ConstantTimeByteEq(b, 0)
   297  }
   298  
   299  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   300  	zInv := make([]byte, 32)
   301  	zInvSq := make([]byte, 32)
   302  
   303  	p256Inverse(zInv, p.z[:])
   304  	p256Sqr(zInvSq, zInv)
   305  	p256MulAsm(zInv, zInv, zInvSq)
   306  
   307  	p256MulAsm(zInvSq, p.x[:], zInvSq)
   308  	p256MulAsm(zInv, p.y[:], zInv)
   309  
   310  	p256FromMont(zInvSq, zInvSq)
   311  	p256FromMont(zInv, zInv)
   312  
   313  	return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
   314  }
   315  
   316  // p256Inverse sets out to in^-1 mod p.
   317  func p256Inverse(out, in []byte) {
   318  	var stack [6 * 32]byte
   319  	p2 := stack[32*0 : 32*0+32]
   320  	p4 := stack[32*1 : 32*1+32]
   321  	p8 := stack[32*2 : 32*2+32]
   322  	p16 := stack[32*3 : 32*3+32]
   323  	p32 := stack[32*4 : 32*4+32]
   324  
   325  	p256Sqr(out, in)
   326  	p256MulAsm(p2, out, in) // 3*p
   327  
   328  	p256Sqr(out, p2)
   329  	p256Sqr(out, out)
   330  	p256MulAsm(p4, out, p2) // f*p
   331  
   332  	p256Sqr(out, p4)
   333  	p256Sqr(out, out)
   334  	p256Sqr(out, out)
   335  	p256Sqr(out, out)
   336  	p256MulAsm(p8, out, p4) // ff*p
   337  
   338  	p256Sqr(out, p8)
   339  
   340  	for i := 0; i < 7; i++ {
   341  		p256Sqr(out, out)
   342  	}
   343  	p256MulAsm(p16, out, p8) // ffff*p
   344  
   345  	p256Sqr(out, p16)
   346  	for i := 0; i < 15; i++ {
   347  		p256Sqr(out, out)
   348  	}
   349  	p256MulAsm(p32, out, p16) // ffffffff*p
   350  
   351  	p256Sqr(out, p32)
   352  
   353  	for i := 0; i < 31; i++ {
   354  		p256Sqr(out, out)
   355  	}
   356  	p256MulAsm(out, out, in)
   357  
   358  	for i := 0; i < 32*4; i++ {
   359  		p256Sqr(out, out)
   360  	}
   361  	p256MulAsm(out, out, p32)
   362  
   363  	for i := 0; i < 32; i++ {
   364  		p256Sqr(out, out)
   365  	}
   366  	p256MulAsm(out, out, p32)
   367  
   368  	for i := 0; i < 16; i++ {
   369  		p256Sqr(out, out)
   370  	}
   371  	p256MulAsm(out, out, p16)
   372  
   373  	for i := 0; i < 8; i++ {
   374  		p256Sqr(out, out)
   375  	}
   376  	p256MulAsm(out, out, p8)
   377  
   378  	p256Sqr(out, out)
   379  	p256Sqr(out, out)
   380  	p256Sqr(out, out)
   381  	p256Sqr(out, out)
   382  	p256MulAsm(out, out, p4)
   383  
   384  	p256Sqr(out, out)
   385  	p256Sqr(out, out)
   386  	p256MulAsm(out, out, p2)
   387  
   388  	p256Sqr(out, out)
   389  	p256Sqr(out, out)
   390  	p256MulAsm(out, out, in)
   391  }
   392  
   393  func boothW5(in uint) (int, int) {
   394  	var s uint = ^((in >> 5) - 1)
   395  	var d uint = (1 << 6) - in - 1
   396  	d = (d & s) | (in & (^s))
   397  	d = (d >> 1) + (d & 1)
   398  	return int(d), int(s & 1)
   399  }
   400  
   401  func boothW7(in uint) (int, int) {
   402  	var s uint = ^((in >> 7) - 1)
   403  	var d uint = (1 << 8) - in - 1
   404  	d = (d & s) | (in & (^s))
   405  	d = (d >> 1) + (d & 1)
   406  	return int(d), int(s & 1)
   407  }
   408  
   409  func initTable() {
   410  	p256PreFast = new([37][64]p256Point) //z coordinate not used
   411  	basePoint := p256Point{
   412  		x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
   413  			0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
   414  		y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
   415  			0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
   416  		z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   417  			0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
   418  	}
   419  
   420  	t1 := new(p256Point)
   421  	t2 := new(p256Point)
   422  	*t2 = basePoint
   423  
   424  	zInv := make([]byte, 32)
   425  	zInvSq := make([]byte, 32)
   426  	for j := 0; j < 64; j++ {
   427  		*t1 = *t2
   428  		for i := 0; i < 37; i++ {
   429  			// The window size is 7 so we need to double 7 times.
   430  			if i != 0 {
   431  				for k := 0; k < 7; k++ {
   432  					p256PointDoubleAsm(t1, t1)
   433  				}
   434  			}
   435  			// Convert the point to affine form. (Its values are
   436  			// still in Montgomery form however.)
   437  			p256Inverse(zInv, t1.z[:])
   438  			p256Sqr(zInvSq, zInv)
   439  			p256MulAsm(zInv, zInv, zInvSq)
   440  
   441  			p256MulAsm(t1.x[:], t1.x[:], zInvSq)
   442  			p256MulAsm(t1.y[:], t1.y[:], zInv)
   443  
   444  			copy(t1.z[:], basePoint.z[:])
   445  			// Update the table entry
   446  			copy(p256PreFast[i][j].x[:], t1.x[:])
   447  			copy(p256PreFast[i][j].y[:], t1.y[:])
   448  		}
   449  		if j == 0 {
   450  			p256PointDoubleAsm(t2, &basePoint)
   451  		} else {
   452  			p256PointAddAsm(t2, t2, &basePoint)
   453  		}
   454  	}
   455  }
   456  
   457  func (p *p256Point) p256BaseMult(scalar []byte) {
   458  	wvalue := (uint(scalar[31]) << 1) & 0xff
   459  	sel, sign := boothW7(uint(wvalue))
   460  	p256SelectBase(p, p256PreFast[0][:], sel)
   461  	p256NegCond(p, sign)
   462  
   463  	copy(p.z[:], one[:])
   464  	var t0 p256Point
   465  
   466  	copy(t0.z[:], one[:])
   467  
   468  	index := uint(6)
   469  	zero := sel
   470  
   471  	for i := 1; i < 37; i++ {
   472  		if index < 247 {
   473  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
   474  		} else {
   475  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
   476  		}
   477  		index += 7
   478  		sel, sign = boothW7(uint(wvalue))
   479  		p256SelectBase(&t0, p256PreFast[i][:], sel)
   480  		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
   481  		zero |= sel
   482  	}
   483  }
   484  
   485  func (p *p256Point) p256ScalarMult(scalar []byte) {
   486  	// precomp is a table of precomputed points that stores powers of p
   487  	// from p^1 to p^16.
   488  	var precomp [16]p256Point
   489  	var t0, t1, t2, t3 p256Point
   490  
   491  	// Prepare the table
   492  	*&precomp[0] = *p
   493  
   494  	p256PointDoubleAsm(&t0, p)
   495  	p256PointDoubleAsm(&t1, &t0)
   496  	p256PointDoubleAsm(&t2, &t1)
   497  	p256PointDoubleAsm(&t3, &t2)
   498  	*&precomp[1] = t0  // 2
   499  	*&precomp[3] = t1  // 4
   500  	*&precomp[7] = t2  // 8
   501  	*&precomp[15] = t3 // 16
   502  
   503  	p256PointAddAsm(&t0, &t0, p)
   504  	p256PointAddAsm(&t1, &t1, p)
   505  	p256PointAddAsm(&t2, &t2, p)
   506  	*&precomp[2] = t0 // 3
   507  	*&precomp[4] = t1 // 5
   508  	*&precomp[8] = t2 // 9
   509  
   510  	p256PointDoubleAsm(&t0, &t0)
   511  	p256PointDoubleAsm(&t1, &t1)
   512  	*&precomp[5] = t0 // 6
   513  	*&precomp[9] = t1 // 10
   514  
   515  	p256PointAddAsm(&t2, &t0, p)
   516  	p256PointAddAsm(&t1, &t1, p)
   517  	*&precomp[6] = t2  // 7
   518  	*&precomp[10] = t1 // 11
   519  
   520  	p256PointDoubleAsm(&t0, &t0)
   521  	p256PointDoubleAsm(&t2, &t2)
   522  	*&precomp[11] = t0 // 12
   523  	*&precomp[13] = t2 // 14
   524  
   525  	p256PointAddAsm(&t0, &t0, p)
   526  	p256PointAddAsm(&t2, &t2, p)
   527  	*&precomp[12] = t0 // 13
   528  	*&precomp[14] = t2 // 15
   529  
   530  	// Start scanning the window from top bit
   531  	index := uint(254)
   532  	var sel, sign int
   533  
   534  	wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   535  	sel, _ = boothW5(uint(wvalue))
   536  	p256Select(p, precomp[:], sel)
   537  	zero := sel
   538  
   539  	for index > 4 {
   540  		index -= 5
   541  		p256PointDoubleAsm(p, p)
   542  		p256PointDoubleAsm(p, p)
   543  		p256PointDoubleAsm(p, p)
   544  		p256PointDoubleAsm(p, p)
   545  		p256PointDoubleAsm(p, p)
   546  
   547  		if index < 247 {
   548  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
   549  		} else {
   550  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   551  		}
   552  
   553  		sel, sign = boothW5(uint(wvalue))
   554  
   555  		p256Select(&t0, precomp[:], sel)
   556  		p256NegCond(&t0, sign)
   557  		p256PointAddAsm(&t1, p, &t0)
   558  		p256MovCond(&t1, &t1, p, sel)
   559  		p256MovCond(p, &t1, &t0, zero)
   560  		zero |= sel
   561  	}
   562  
   563  	p256PointDoubleAsm(p, p)
   564  	p256PointDoubleAsm(p, p)
   565  	p256PointDoubleAsm(p, p)
   566  	p256PointDoubleAsm(p, p)
   567  	p256PointDoubleAsm(p, p)
   568  
   569  	wvalue = (uint(scalar[31]) << 1) & 0x3f
   570  	sel, sign = boothW5(uint(wvalue))
   571  
   572  	p256Select(&t0, precomp[:], sel)
   573  	p256NegCond(&t0, sign)
   574  	p256PointAddAsm(&t1, p, &t0)
   575  	p256MovCond(&t1, &t1, p, sel)
   576  	p256MovCond(p, &t1, &t0, zero)
   577  }