github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/crypto/elliptic/p256_asm.go

github.com/comwrg/go/src@v0.0.0-20220319063731-c238d0440370/crypto/elliptic/p256_asm.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the Go wrapper for the constant-time, 64-bit assembly
     6  // implementation of P256. The optimizations performed here are described in
     7  // detail in:
     8  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     9  //                          256-bit primes"
    10  // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
    11  // https://eprint.iacr.org/2013/816.pdf
    12  
    13  //go:build amd64 || arm64
    14  // +build amd64 arm64
    15  
    16  package elliptic
    17  
    18  import (
    19  	"math/big"
    20  )
    21  
    22  type (
    23  	p256Curve struct {
    24  		*CurveParams
    25  	}
    26  
    27  	p256Point struct {
    28  		xyz [12]uint64
    29  	}
    30  )
    31  
    32  var p256 p256Curve
    33  
    34  func initP256() {
    35  	// See FIPS 186-3, section D.2.3
    36  	p256.CurveParams = &CurveParams{Name: "P-256"}
    37  	p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
    38  	p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
    39  	p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
    40  	p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
    41  	p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
    42  	p256.BitSize = 256
    43  }
    44  
    45  func (curve p256Curve) Params() *CurveParams {
    46  	return curve.CurveParams
    47  }
    48  
    49  // Functions implemented in p256_asm_*64.s
    50  // Montgomery multiplication modulo P256
    51  //go:noescape
    52  func p256Mul(res, in1, in2 []uint64)
    53  
    54  // Montgomery square modulo P256, repeated n times (n >= 1)
    55  //go:noescape
    56  func p256Sqr(res, in []uint64, n int)
    57  
    58  // Montgomery multiplication by 1
    59  //go:noescape
    60  func p256FromMont(res, in []uint64)
    61  
    62  // iff cond == 1  val <- -val
    63  //go:noescape
    64  func p256NegCond(val []uint64, cond int)
    65  
    66  // if cond == 0 res <- b; else res <- a
    67  //go:noescape
    68  func p256MovCond(res, a, b []uint64, cond int)
    69  
    70  // Endianness swap
    71  //go:noescape
    72  func p256BigToLittle(res []uint64, in []byte)
    73  
    74  //go:noescape
    75  func p256LittleToBig(res []byte, in []uint64)
    76  
    77  // Constant time table access
    78  //go:noescape
    79  func p256Select(point, table []uint64, idx int)
    80  
    81  //go:noescape
    82  func p256SelectBase(point, table []uint64, idx int)
    83  
    84  // Montgomery multiplication modulo Ord(G)
    85  //go:noescape
    86  func p256OrdMul(res, in1, in2 []uint64)
    87  
    88  // Montgomery square modulo Ord(G), repeated n times
    89  //go:noescape
    90  func p256OrdSqr(res, in []uint64, n int)
    91  
    92  // Point add with in2 being affine point
    93  // If sign == 1 -> in2 = -in2
    94  // If sel == 0 -> res = in1
    95  // if zero == 0 -> res = in2
    96  //go:noescape
    97  func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
    98  
    99  // Point add. Returns one if the two input points were equal and zero
   100  // otherwise. (Note that, due to the way that the equations work out, some
   101  // representations of ∞ are considered equal to everything by this function.)
   102  //go:noescape
   103  func p256PointAddAsm(res, in1, in2 []uint64) int
   104  
   105  // Point double
   106  //go:noescape
   107  func p256PointDoubleAsm(res, in []uint64)
   108  
   109  func (curve p256Curve) Inverse(k *big.Int) *big.Int {
   110  	if k.Sign() < 0 {
   111  		// This should never happen.
   112  		k = new(big.Int).Neg(k)
   113  	}
   114  
   115  	if k.Cmp(p256.N) >= 0 {
   116  		// This should never happen.
   117  		k = new(big.Int).Mod(k, p256.N)
   118  	}
   119  
   120  	// table will store precomputed powers of x.
   121  	var table [4 * 9]uint64
   122  	var (
   123  		_1      = table[4*0 : 4*1]
   124  		_11     = table[4*1 : 4*2]
   125  		_101    = table[4*2 : 4*3]
   126  		_111    = table[4*3 : 4*4]
   127  		_1111   = table[4*4 : 4*5]
   128  		_10101  = table[4*5 : 4*6]
   129  		_101111 = table[4*6 : 4*7]
   130  		x       = table[4*7 : 4*8]
   131  		t       = table[4*8 : 4*9]
   132  	)
   133  
   134  	fromBig(x[:], k)
   135  	// This code operates in the Montgomery domain where R = 2^256 mod n
   136  	// and n is the order of the scalar field. (See initP256 for the
   137  	// value.) Elements in the Montgomery domain take the form a×R and
   138  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   139  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   140  	// i.e. converts x into the Montgomery domain.
   141  	// Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
   142  	RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
   143  	p256OrdMul(_1, x, RR)      // _1
   144  	p256OrdSqr(x, _1, 1)       // _10
   145  	p256OrdMul(_11, x, _1)     // _11
   146  	p256OrdMul(_101, x, _11)   // _101
   147  	p256OrdMul(_111, x, _101)  // _111
   148  	p256OrdSqr(x, _101, 1)     // _1010
   149  	p256OrdMul(_1111, _101, x) // _1111
   150  
   151  	p256OrdSqr(t, x, 1)          // _10100
   152  	p256OrdMul(_10101, t, _1)    // _10101
   153  	p256OrdSqr(x, _10101, 1)     // _101010
   154  	p256OrdMul(_101111, _101, x) // _101111
   155  	p256OrdMul(x, _10101, x)     // _111111 = x6
   156  	p256OrdSqr(t, x, 2)          // _11111100
   157  	p256OrdMul(t, t, _11)        // _11111111 = x8
   158  	p256OrdSqr(x, t, 8)          // _ff00
   159  	p256OrdMul(x, x, t)          // _ffff = x16
   160  	p256OrdSqr(t, x, 16)         // _ffff0000
   161  	p256OrdMul(t, t, x)          // _ffffffff = x32
   162  
   163  	p256OrdSqr(x, t, 64)
   164  	p256OrdMul(x, x, t)
   165  	p256OrdSqr(x, x, 32)
   166  	p256OrdMul(x, x, t)
   167  
   168  	sqrs := []uint8{
   169  		6, 5, 4, 5, 5,
   170  		4, 3, 3, 5, 9,
   171  		6, 2, 5, 6, 5,
   172  		4, 5, 5, 3, 10,
   173  		2, 5, 5, 3, 7, 6}
   174  	muls := [][]uint64{
   175  		_101111, _111, _11, _1111, _10101,
   176  		_101, _101, _101, _111, _101111,
   177  		_1111, _1, _1, _1111, _111,
   178  		_111, _111, _101, _11, _101111,
   179  		_11, _11, _11, _1, _10101, _1111}
   180  
   181  	for i, s := range sqrs {
   182  		p256OrdSqr(x, x, int(s))
   183  		p256OrdMul(x, x, muls[i])
   184  	}
   185  
   186  	// Multiplying by one in the Montgomery domain converts a Montgomery
   187  	// value out of the domain.
   188  	one := []uint64{1, 0, 0, 0}
   189  	p256OrdMul(x, x, one)
   190  
   191  	xOut := make([]byte, 32)
   192  	p256LittleToBig(xOut, x)
   193  	return new(big.Int).SetBytes(xOut)
   194  }
   195  
   196  // fromBig converts a *big.Int into a format used by this code.
   197  func fromBig(out []uint64, big *big.Int) {
   198  	for i := range out {
   199  		out[i] = 0
   200  	}
   201  
   202  	for i, v := range big.Bits() {
   203  		out[i] = uint64(v)
   204  	}
   205  }
   206  
   207  // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
   208  // to out. If the scalar is equal or greater than the order of the group, it's
   209  // reduced modulo that order.
   210  func p256GetScalar(out []uint64, in []byte) {
   211  	n := new(big.Int).SetBytes(in)
   212  
   213  	if n.Cmp(p256.N) >= 0 {
   214  		n.Mod(n, p256.N)
   215  	}
   216  	fromBig(out, n)
   217  }
   218  
   219  // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
   220  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   221  // R×R mod p. See comment in Inverse about how this is used.
   222  var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
   223  
   224  func maybeReduceModP(in *big.Int) *big.Int {
   225  	if in.Cmp(p256.P) < 0 {
   226  		return in
   227  	}
   228  	return new(big.Int).Mod(in, p256.P)
   229  }
   230  
   231  func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   232  	scalarReversed := make([]uint64, 4)
   233  	var r1, r2 p256Point
   234  	p256GetScalar(scalarReversed, baseScalar)
   235  	r1IsInfinity := scalarIsZero(scalarReversed)
   236  	r1.p256BaseMult(scalarReversed)
   237  
   238  	p256GetScalar(scalarReversed, scalar)
   239  	r2IsInfinity := scalarIsZero(scalarReversed)
   240  	fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
   241  	fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
   242  	p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
   243  	p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
   244  
   245  	// This sets r2's Z value to 1, in the Montgomery domain.
   246  	r2.xyz[8] = 0x0000000000000001
   247  	r2.xyz[9] = 0xffffffff00000000
   248  	r2.xyz[10] = 0xffffffffffffffff
   249  	r2.xyz[11] = 0x00000000fffffffe
   250  
   251  	r2.p256ScalarMult(scalarReversed)
   252  
   253  	var sum, double p256Point
   254  	pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
   255  	p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
   256  	sum.CopyConditional(&double, pointsEqual)
   257  	sum.CopyConditional(&r1, r2IsInfinity)
   258  	sum.CopyConditional(&r2, r1IsInfinity)
   259  
   260  	return sum.p256PointToAffine()
   261  }
   262  
   263  func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   264  	scalarReversed := make([]uint64, 4)
   265  	p256GetScalar(scalarReversed, scalar)
   266  
   267  	var r p256Point
   268  	r.p256BaseMult(scalarReversed)
   269  	return r.p256PointToAffine()
   270  }
   271  
   272  func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   273  	scalarReversed := make([]uint64, 4)
   274  	p256GetScalar(scalarReversed, scalar)
   275  
   276  	var r p256Point
   277  	fromBig(r.xyz[0:4], maybeReduceModP(bigX))
   278  	fromBig(r.xyz[4:8], maybeReduceModP(bigY))
   279  	p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
   280  	p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
   281  	// This sets r2's Z value to 1, in the Montgomery domain.
   282  	r.xyz[8] = 0x0000000000000001
   283  	r.xyz[9] = 0xffffffff00000000
   284  	r.xyz[10] = 0xffffffffffffffff
   285  	r.xyz[11] = 0x00000000fffffffe
   286  
   287  	r.p256ScalarMult(scalarReversed)
   288  	return r.p256PointToAffine()
   289  }
   290  
   291  // uint64IsZero returns 1 if x is zero and zero otherwise.
   292  func uint64IsZero(x uint64) int {
   293  	x = ^x
   294  	x &= x >> 32
   295  	x &= x >> 16
   296  	x &= x >> 8
   297  	x &= x >> 4
   298  	x &= x >> 2
   299  	x &= x >> 1
   300  	return int(x & 1)
   301  }
   302  
   303  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   304  // otherwise.
   305  func scalarIsZero(scalar []uint64) int {
   306  	return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
   307  }
   308  
   309  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   310  	zInv := make([]uint64, 4)
   311  	zInvSq := make([]uint64, 4)
   312  	p256Inverse(zInv, p.xyz[8:12])
   313  	p256Sqr(zInvSq, zInv, 1)
   314  	p256Mul(zInv, zInv, zInvSq)
   315  
   316  	p256Mul(zInvSq, p.xyz[0:4], zInvSq)
   317  	p256Mul(zInv, p.xyz[4:8], zInv)
   318  
   319  	p256FromMont(zInvSq, zInvSq)
   320  	p256FromMont(zInv, zInv)
   321  
   322  	xOut := make([]byte, 32)
   323  	yOut := make([]byte, 32)
   324  	p256LittleToBig(xOut, zInvSq)
   325  	p256LittleToBig(yOut, zInv)
   326  
   327  	return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
   328  }
   329  
   330  // CopyConditional copies overwrites p with src if v == 1, and leaves p
   331  // unchanged if v == 0.
   332  func (p *p256Point) CopyConditional(src *p256Point, v int) {
   333  	pMask := uint64(v) - 1
   334  	srcMask := ^pMask
   335  
   336  	for i, n := range p.xyz {
   337  		p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
   338  	}
   339  }
   340  
   341  // p256Inverse sets out to in^-1 mod p.
   342  func p256Inverse(out, in []uint64) {
   343  	var stack [6 * 4]uint64
   344  	p2 := stack[4*0 : 4*0+4]
   345  	p4 := stack[4*1 : 4*1+4]
   346  	p8 := stack[4*2 : 4*2+4]
   347  	p16 := stack[4*3 : 4*3+4]
   348  	p32 := stack[4*4 : 4*4+4]
   349  
   350  	p256Sqr(out, in, 1)
   351  	p256Mul(p2, out, in) // 3*p
   352  
   353  	p256Sqr(out, p2, 2)
   354  	p256Mul(p4, out, p2) // f*p
   355  
   356  	p256Sqr(out, p4, 4)
   357  	p256Mul(p8, out, p4) // ff*p
   358  
   359  	p256Sqr(out, p8, 8)
   360  	p256Mul(p16, out, p8) // ffff*p
   361  
   362  	p256Sqr(out, p16, 16)
   363  	p256Mul(p32, out, p16) // ffffffff*p
   364  
   365  	p256Sqr(out, p32, 32)
   366  	p256Mul(out, out, in)
   367  
   368  	p256Sqr(out, out, 128)
   369  	p256Mul(out, out, p32)
   370  
   371  	p256Sqr(out, out, 32)
   372  	p256Mul(out, out, p32)
   373  
   374  	p256Sqr(out, out, 16)
   375  	p256Mul(out, out, p16)
   376  
   377  	p256Sqr(out, out, 8)
   378  	p256Mul(out, out, p8)
   379  
   380  	p256Sqr(out, out, 4)
   381  	p256Mul(out, out, p4)
   382  
   383  	p256Sqr(out, out, 2)
   384  	p256Mul(out, out, p2)
   385  
   386  	p256Sqr(out, out, 2)
   387  	p256Mul(out, out, in)
   388  }
   389  
   390  func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
   391  	copy(r[index*12:], p.xyz[:])
   392  }
   393  
   394  func boothW5(in uint) (int, int) {
   395  	var s uint = ^((in >> 5) - 1)
   396  	var d uint = (1 << 6) - in - 1
   397  	d = (d & s) | (in & (^s))
   398  	d = (d >> 1) + (d & 1)
   399  	return int(d), int(s & 1)
   400  }
   401  
   402  func boothW6(in uint) (int, int) {
   403  	var s uint = ^((in >> 6) - 1)
   404  	var d uint = (1 << 7) - in - 1
   405  	d = (d & s) | (in & (^s))
   406  	d = (d >> 1) + (d & 1)
   407  	return int(d), int(s & 1)
   408  }
   409  
   410  func (p *p256Point) p256BaseMult(scalar []uint64) {
   411  	wvalue := (scalar[0] << 1) & 0x7f
   412  	sel, sign := boothW6(uint(wvalue))
   413  	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
   414  	p256NegCond(p.xyz[4:8], sign)
   415  
   416  	// (This is one, in the Montgomery domain.)
   417  	p.xyz[8] = 0x0000000000000001
   418  	p.xyz[9] = 0xffffffff00000000
   419  	p.xyz[10] = 0xffffffffffffffff
   420  	p.xyz[11] = 0x00000000fffffffe
   421  
   422  	var t0 p256Point
   423  	// (This is one, in the Montgomery domain.)
   424  	t0.xyz[8] = 0x0000000000000001
   425  	t0.xyz[9] = 0xffffffff00000000
   426  	t0.xyz[10] = 0xffffffffffffffff
   427  	t0.xyz[11] = 0x00000000fffffffe
   428  
   429  	index := uint(5)
   430  	zero := sel
   431  
   432  	for i := 1; i < 43; i++ {
   433  		if index < 192 {
   434  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
   435  		} else {
   436  			wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
   437  		}
   438  		index += 6
   439  		sel, sign = boothW6(uint(wvalue))
   440  		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
   441  		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
   442  		zero |= sel
   443  	}
   444  }
   445  
   446  func (p *p256Point) p256ScalarMult(scalar []uint64) {
   447  	// precomp is a table of precomputed points that stores powers of p
   448  	// from p^1 to p^16.
   449  	var precomp [16 * 4 * 3]uint64
   450  	var t0, t1, t2, t3 p256Point
   451  
   452  	// Prepare the table
   453  	p.p256StorePoint(&precomp, 0) // 1
   454  
   455  	p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
   456  	p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
   457  	p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
   458  	p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
   459  	t0.p256StorePoint(&precomp, 1)  // 2
   460  	t1.p256StorePoint(&precomp, 3)  // 4
   461  	t2.p256StorePoint(&precomp, 7)  // 8
   462  	t3.p256StorePoint(&precomp, 15) // 16
   463  
   464  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   465  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   466  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   467  	t0.p256StorePoint(&precomp, 2) // 3
   468  	t1.p256StorePoint(&precomp, 4) // 5
   469  	t2.p256StorePoint(&precomp, 8) // 9
   470  
   471  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   472  	p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
   473  	t0.p256StorePoint(&precomp, 5) // 6
   474  	t1.p256StorePoint(&precomp, 9) // 10
   475  
   476  	p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
   477  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   478  	t2.p256StorePoint(&precomp, 6)  // 7
   479  	t1.p256StorePoint(&precomp, 10) // 11
   480  
   481  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   482  	p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
   483  	t0.p256StorePoint(&precomp, 11) // 12
   484  	t2.p256StorePoint(&precomp, 13) // 14
   485  
   486  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   487  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   488  	t0.p256StorePoint(&precomp, 12) // 13
   489  	t2.p256StorePoint(&precomp, 14) // 15
   490  
   491  	// Start scanning the window from top bit
   492  	index := uint(254)
   493  	var sel, sign int
   494  
   495  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   496  	sel, _ = boothW5(uint(wvalue))
   497  
   498  	p256Select(p.xyz[0:12], precomp[0:], sel)
   499  	zero := sel
   500  
   501  	for index > 4 {
   502  		index -= 5
   503  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   504  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   505  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   506  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   507  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   508  
   509  		if index < 192 {
   510  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   511  		} else {
   512  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   513  		}
   514  
   515  		sel, sign = boothW5(uint(wvalue))
   516  
   517  		p256Select(t0.xyz[0:], precomp[0:], sel)
   518  		p256NegCond(t0.xyz[4:8], sign)
   519  		p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   520  		p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   521  		p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   522  		zero |= sel
   523  	}
   524  
   525  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   526  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   527  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   528  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   529  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   530  
   531  	wvalue = (scalar[0] << 1) & 0x3f
   532  	sel, sign = boothW5(uint(wvalue))
   533  
   534  	p256Select(t0.xyz[0:], precomp[0:], sel)
   535  	p256NegCond(t0.xyz[4:8], sign)
   536  	p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   537  	p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   538  	p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   539  }