github.com/euank/go@v0.0.0-20160829210321-495514729181/src/crypto/elliptic/p256_amd64.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the Go wrapper for the constant-time, 64-bit assembly
     6  // implementation of P256. The optimizations performed here are described in
     7  // detail in:
     8  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     9  //                          256-bit primes"
    10  // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x
    11  // https://eprint.iacr.org/2013/816.pdf
    12  
    13  // +build amd64
    14  
    15  package elliptic
    16  
    17  import (
    18  	"math/big"
    19  	"sync"
    20  )
    21  
    22  type (
    23  	p256Curve struct {
    24  		*CurveParams
    25  	}
    26  
    27  	p256Point struct {
    28  		xyz [12]uint64
    29  	}
    30  )
    31  
    32  var (
    33  	p256            p256Curve
    34  	p256Precomputed *[37][64 * 8]uint64
    35  	precomputeOnce  sync.Once
    36  )
    37  
    38  func initP256() {
    39  	// See FIPS 186-3, section D.2.3
    40  	p256.CurveParams = &CurveParams{Name: "P-256"}
    41  	p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
    42  	p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
    43  	p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
    44  	p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
    45  	p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
    46  	p256.BitSize = 256
    47  }
    48  
    49  func (curve p256Curve) Params() *CurveParams {
    50  	return curve.CurveParams
    51  }
    52  
    53  // Functions implemented in p256_asm_amd64.s
    54  // Montgomery multiplication modulo P256
    55  func p256Mul(res, in1, in2 []uint64)
    56  
    57  // Montgomery square modulo P256
    58  func p256Sqr(res, in []uint64)
    59  
    60  // Montgomery multiplication by 1
    61  func p256FromMont(res, in []uint64)
    62  
    63  // iff cond == 1  val <- -val
    64  func p256NegCond(val []uint64, cond int)
    65  
    66  // if cond == 0 res <- b; else res <- a
    67  func p256MovCond(res, a, b []uint64, cond int)
    68  
    69  // Endianness swap
    70  func p256BigToLittle(res []uint64, in []byte)
    71  func p256LittleToBig(res []byte, in []uint64)
    72  
    73  // Constant time table access
    74  func p256Select(point, table []uint64, idx int)
    75  func p256SelectBase(point, table []uint64, idx int)
    76  
    77  // Montgomery multiplication modulo Ord(G)
    78  func p256OrdMul(res, in1, in2 []uint64)
    79  
    80  // Montgomery square modulo Ord(G), repeated n times
    81  func p256OrdSqr(res, in []uint64, n int)
    82  
    83  // Point add with in2 being affine point
    84  // If sign == 1 -> in2 = -in2
    85  // If sel == 0 -> res = in1
    86  // if zero == 0 -> res = in2
    87  func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
    88  
    89  // Point add
    90  func p256PointAddAsm(res, in1, in2 []uint64)
    91  
    92  // Point double
    93  func p256PointDoubleAsm(res, in []uint64)
    94  
    95  func (curve p256Curve) Inverse(k *big.Int) *big.Int {
    96  	if k.Sign() < 0 {
    97  		// This should never happen.
    98  		k = new(big.Int).Neg(k)
    99  	}
   100  
   101  	if k.Cmp(p256.N) >= 0 {
   102  		// This should never happen.
   103  		k = new(big.Int).Mod(k, p256.N)
   104  	}
   105  
   106  	// table will store precomputed powers of x. The four words at index
   107  	// 4×i store x^(i+1).
   108  	var table [4 * 15]uint64
   109  
   110  	x := make([]uint64, 4)
   111  	fromBig(x[:], k)
   112  	// This code operates in the Montgomery domain where R = 2^256 mod n
   113  	// and n is the order of the scalar field. (See initP256 for the
   114  	// value.) Elements in the Montgomery domain take the form a×R and
   115  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   116  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   117  	// i.e. converts x into the Montgomery domain.
   118  	RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
   119  	p256OrdMul(table[:4], x, RR)
   120  
   121  	// Prepare the table, no need in constant time access, because the
   122  	// power is not a secret. (Entry 0 is never used.)
   123  	for i := 2; i < 16; i += 2 {
   124  		p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1)
   125  		p256OrdMul(table[4*i:], table[4*(i-1):], table[:4])
   126  	}
   127  
   128  	x[0] = table[4*14+0] // f
   129  	x[1] = table[4*14+1]
   130  	x[2] = table[4*14+2]
   131  	x[3] = table[4*14+3]
   132  
   133  	p256OrdSqr(x, x, 4)
   134  	p256OrdMul(x, x, table[4*14:4*14+4]) // ff
   135  	t := make([]uint64, 4, 4)
   136  	t[0] = x[0]
   137  	t[1] = x[1]
   138  	t[2] = x[2]
   139  	t[3] = x[3]
   140  
   141  	p256OrdSqr(x, x, 8)
   142  	p256OrdMul(x, x, t) // ffff
   143  	t[0] = x[0]
   144  	t[1] = x[1]
   145  	t[2] = x[2]
   146  	t[3] = x[3]
   147  
   148  	p256OrdSqr(x, x, 16)
   149  	p256OrdMul(x, x, t) // ffffffff
   150  	t[0] = x[0]
   151  	t[1] = x[1]
   152  	t[2] = x[2]
   153  	t[3] = x[3]
   154  
   155  	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
   156  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
   157  	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
   158  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
   159  
   160  	// Remaining 32 windows
   161  	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
   162  	for i := 0; i < 32; i++ {
   163  		p256OrdSqr(x, x, 4)
   164  		p256OrdMul(x, x, table[4*(expLo[i]-1):])
   165  	}
   166  
   167  	// Multiplying by one in the Montgomery domain converts a Montgomery
   168  	// value out of the domain.
   169  	one := []uint64{1, 0, 0, 0}
   170  	p256OrdMul(x, x, one)
   171  
   172  	xOut := make([]byte, 32)
   173  	p256LittleToBig(xOut, x)
   174  	return new(big.Int).SetBytes(xOut)
   175  }
   176  
   177  // fromBig converts a *big.Int into a format used by this code.
   178  func fromBig(out []uint64, big *big.Int) {
   179  	for i := range out {
   180  		out[i] = 0
   181  	}
   182  
   183  	for i, v := range big.Bits() {
   184  		out[i] = uint64(v)
   185  	}
   186  }
   187  
   188  // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
   189  // to out. If the scalar is equal or greater than the order of the group, it's
   190  // reduced modulo that order.
   191  func p256GetScalar(out []uint64, in []byte) {
   192  	n := new(big.Int).SetBytes(in)
   193  
   194  	if n.Cmp(p256.N) >= 0 {
   195  		n.Mod(n, p256.N)
   196  	}
   197  	fromBig(out, n)
   198  }
   199  
   200  // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
   201  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   202  // R×R mod p. See comment in Inverse about how this is used.
   203  var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
   204  
   205  func maybeReduceModP(in *big.Int) *big.Int {
   206  	if in.Cmp(p256.P) < 0 {
   207  		return in
   208  	}
   209  	return new(big.Int).Mod(in, p256.P)
   210  }
   211  
   212  func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   213  	scalarReversed := make([]uint64, 4)
   214  	var r1, r2 p256Point
   215  	p256GetScalar(scalarReversed, baseScalar)
   216  	r1.p256BaseMult(scalarReversed)
   217  
   218  	p256GetScalar(scalarReversed, scalar)
   219  	fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
   220  	fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
   221  	p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
   222  	p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
   223  
   224  	// This sets r2's Z value to 1, in the Montgomery domain.
   225  	r2.xyz[8] = 0x0000000000000001
   226  	r2.xyz[9] = 0xffffffff00000000
   227  	r2.xyz[10] = 0xffffffffffffffff
   228  	r2.xyz[11] = 0x00000000fffffffe
   229  
   230  	r2.p256ScalarMult(scalarReversed)
   231  	p256PointAddAsm(r1.xyz[:], r1.xyz[:], r2.xyz[:])
   232  	return r1.p256PointToAffine()
   233  }
   234  
   235  func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   236  	scalarReversed := make([]uint64, 4)
   237  	p256GetScalar(scalarReversed, scalar)
   238  
   239  	var r p256Point
   240  	r.p256BaseMult(scalarReversed)
   241  	return r.p256PointToAffine()
   242  }
   243  
   244  func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   245  	scalarReversed := make([]uint64, 4)
   246  	p256GetScalar(scalarReversed, scalar)
   247  
   248  	var r p256Point
   249  	fromBig(r.xyz[0:4], maybeReduceModP(bigX))
   250  	fromBig(r.xyz[4:8], maybeReduceModP(bigY))
   251  	p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
   252  	p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
   253  	// This sets r2's Z value to 1, in the Montgomery domain.
   254  	r.xyz[8] = 0x0000000000000001
   255  	r.xyz[9] = 0xffffffff00000000
   256  	r.xyz[10] = 0xffffffffffffffff
   257  	r.xyz[11] = 0x00000000fffffffe
   258  
   259  	r.p256ScalarMult(scalarReversed)
   260  	return r.p256PointToAffine()
   261  }
   262  
   263  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   264  	zInv := make([]uint64, 4)
   265  	zInvSq := make([]uint64, 4)
   266  	p256Inverse(zInv, p.xyz[8:12])
   267  	p256Sqr(zInvSq, zInv)
   268  	p256Mul(zInv, zInv, zInvSq)
   269  
   270  	p256Mul(zInvSq, p.xyz[0:4], zInvSq)
   271  	p256Mul(zInv, p.xyz[4:8], zInv)
   272  
   273  	p256FromMont(zInvSq, zInvSq)
   274  	p256FromMont(zInv, zInv)
   275  
   276  	xOut := make([]byte, 32)
   277  	yOut := make([]byte, 32)
   278  	p256LittleToBig(xOut, zInvSq)
   279  	p256LittleToBig(yOut, zInv)
   280  
   281  	return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
   282  }
   283  
   284  // p256Inverse sets out to in^-1 mod p.
   285  func p256Inverse(out, in []uint64) {
   286  	var stack [6 * 4]uint64
   287  	p2 := stack[4*0 : 4*0+4]
   288  	p4 := stack[4*1 : 4*1+4]
   289  	p8 := stack[4*2 : 4*2+4]
   290  	p16 := stack[4*3 : 4*3+4]
   291  	p32 := stack[4*4 : 4*4+4]
   292  
   293  	p256Sqr(out, in)
   294  	p256Mul(p2, out, in) // 3*p
   295  
   296  	p256Sqr(out, p2)
   297  	p256Sqr(out, out)
   298  	p256Mul(p4, out, p2) // f*p
   299  
   300  	p256Sqr(out, p4)
   301  	p256Sqr(out, out)
   302  	p256Sqr(out, out)
   303  	p256Sqr(out, out)
   304  	p256Mul(p8, out, p4) // ff*p
   305  
   306  	p256Sqr(out, p8)
   307  
   308  	for i := 0; i < 7; i++ {
   309  		p256Sqr(out, out)
   310  	}
   311  	p256Mul(p16, out, p8) // ffff*p
   312  
   313  	p256Sqr(out, p16)
   314  	for i := 0; i < 15; i++ {
   315  		p256Sqr(out, out)
   316  	}
   317  	p256Mul(p32, out, p16) // ffffffff*p
   318  
   319  	p256Sqr(out, p32)
   320  
   321  	for i := 0; i < 31; i++ {
   322  		p256Sqr(out, out)
   323  	}
   324  	p256Mul(out, out, in)
   325  
   326  	for i := 0; i < 32*4; i++ {
   327  		p256Sqr(out, out)
   328  	}
   329  	p256Mul(out, out, p32)
   330  
   331  	for i := 0; i < 32; i++ {
   332  		p256Sqr(out, out)
   333  	}
   334  	p256Mul(out, out, p32)
   335  
   336  	for i := 0; i < 16; i++ {
   337  		p256Sqr(out, out)
   338  	}
   339  	p256Mul(out, out, p16)
   340  
   341  	for i := 0; i < 8; i++ {
   342  		p256Sqr(out, out)
   343  	}
   344  	p256Mul(out, out, p8)
   345  
   346  	p256Sqr(out, out)
   347  	p256Sqr(out, out)
   348  	p256Sqr(out, out)
   349  	p256Sqr(out, out)
   350  	p256Mul(out, out, p4)
   351  
   352  	p256Sqr(out, out)
   353  	p256Sqr(out, out)
   354  	p256Mul(out, out, p2)
   355  
   356  	p256Sqr(out, out)
   357  	p256Sqr(out, out)
   358  	p256Mul(out, out, in)
   359  }
   360  
   361  func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
   362  	copy(r[index*12:], p.xyz[:])
   363  }
   364  
   365  func boothW5(in uint) (int, int) {
   366  	var s uint = ^((in >> 5) - 1)
   367  	var d uint = (1 << 6) - in - 1
   368  	d = (d & s) | (in & (^s))
   369  	d = (d >> 1) + (d & 1)
   370  	return int(d), int(s & 1)
   371  }
   372  
   373  func boothW7(in uint) (int, int) {
   374  	var s uint = ^((in >> 7) - 1)
   375  	var d uint = (1 << 8) - in - 1
   376  	d = (d & s) | (in & (^s))
   377  	d = (d >> 1) + (d & 1)
   378  	return int(d), int(s & 1)
   379  }
   380  
   381  func initTable() {
   382  	p256Precomputed = new([37][64 * 8]uint64)
   383  
   384  	basePoint := []uint64{
   385  		0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
   386  		0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
   387  		0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
   388  	}
   389  	t1 := make([]uint64, 12)
   390  	t2 := make([]uint64, 12)
   391  	copy(t2, basePoint)
   392  
   393  	zInv := make([]uint64, 4)
   394  	zInvSq := make([]uint64, 4)
   395  	for j := 0; j < 64; j++ {
   396  		copy(t1, t2)
   397  		for i := 0; i < 37; i++ {
   398  			// The window size is 7 so we need to double 7 times.
   399  			if i != 0 {
   400  				for k := 0; k < 7; k++ {
   401  					p256PointDoubleAsm(t1, t1)
   402  				}
   403  			}
   404  			// Convert the point to affine form. (Its values are
   405  			// still in Montgomery form however.)
   406  			p256Inverse(zInv, t1[8:12])
   407  			p256Sqr(zInvSq, zInv)
   408  			p256Mul(zInv, zInv, zInvSq)
   409  
   410  			p256Mul(t1[:4], t1[:4], zInvSq)
   411  			p256Mul(t1[4:8], t1[4:8], zInv)
   412  
   413  			copy(t1[8:12], basePoint[8:12])
   414  			// Update the table entry
   415  			copy(p256Precomputed[i][j*8:], t1[:8])
   416  		}
   417  		if j == 0 {
   418  			p256PointDoubleAsm(t2, basePoint)
   419  		} else {
   420  			p256PointAddAsm(t2, t2, basePoint)
   421  		}
   422  	}
   423  }
   424  
   425  func (p *p256Point) p256BaseMult(scalar []uint64) {
   426  	precomputeOnce.Do(initTable)
   427  
   428  	wvalue := (scalar[0] << 1) & 0xff
   429  	sel, sign := boothW7(uint(wvalue))
   430  	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
   431  	p256NegCond(p.xyz[4:8], sign)
   432  
   433  	// (This is one, in the Montgomery domain.)
   434  	p.xyz[8] = 0x0000000000000001
   435  	p.xyz[9] = 0xffffffff00000000
   436  	p.xyz[10] = 0xffffffffffffffff
   437  	p.xyz[11] = 0x00000000fffffffe
   438  
   439  	var t0 p256Point
   440  	// (This is one, in the Montgomery domain.)
   441  	t0.xyz[8] = 0x0000000000000001
   442  	t0.xyz[9] = 0xffffffff00000000
   443  	t0.xyz[10] = 0xffffffffffffffff
   444  	t0.xyz[11] = 0x00000000fffffffe
   445  
   446  	index := uint(6)
   447  	zero := sel
   448  
   449  	for i := 1; i < 37; i++ {
   450  		if index < 192 {
   451  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff
   452  		} else {
   453  			wvalue = (scalar[index/64] >> (index % 64)) & 0xff
   454  		}
   455  		index += 7
   456  		sel, sign = boothW7(uint(wvalue))
   457  		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
   458  		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
   459  		zero |= sel
   460  	}
   461  }
   462  
   463  func (p *p256Point) p256ScalarMult(scalar []uint64) {
   464  	// precomp is a table of precomputed points that stores powers of p
   465  	// from p^1 to p^16.
   466  	var precomp [16 * 4 * 3]uint64
   467  	var t0, t1, t2, t3 p256Point
   468  
   469  	// Prepare the table
   470  	p.p256StorePoint(&precomp, 0) // 1
   471  
   472  	p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
   473  	p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
   474  	p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
   475  	p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
   476  	t0.p256StorePoint(&precomp, 1)  // 2
   477  	t1.p256StorePoint(&precomp, 3)  // 4
   478  	t2.p256StorePoint(&precomp, 7)  // 8
   479  	t3.p256StorePoint(&precomp, 15) // 16
   480  
   481  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   482  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   483  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   484  	t0.p256StorePoint(&precomp, 2) // 3
   485  	t1.p256StorePoint(&precomp, 4) // 5
   486  	t2.p256StorePoint(&precomp, 8) // 9
   487  
   488  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   489  	p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
   490  	t0.p256StorePoint(&precomp, 5) // 6
   491  	t1.p256StorePoint(&precomp, 9) // 10
   492  
   493  	p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
   494  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   495  	t2.p256StorePoint(&precomp, 6)  // 7
   496  	t1.p256StorePoint(&precomp, 10) // 11
   497  
   498  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   499  	p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
   500  	t0.p256StorePoint(&precomp, 11) // 12
   501  	t2.p256StorePoint(&precomp, 13) // 14
   502  
   503  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   504  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   505  	t0.p256StorePoint(&precomp, 12) // 13
   506  	t2.p256StorePoint(&precomp, 14) // 15
   507  
   508  	// Start scanning the window from top bit
   509  	index := uint(254)
   510  	var sel, sign int
   511  
   512  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   513  	sel, _ = boothW5(uint(wvalue))
   514  
   515  	p256Select(p.xyz[0:12], precomp[0:], sel)
   516  	zero := sel
   517  
   518  	for index > 4 {
   519  		index -= 5
   520  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   521  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   522  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   523  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   524  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   525  
   526  		if index < 192 {
   527  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   528  		} else {
   529  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   530  		}
   531  
   532  		sel, sign = boothW5(uint(wvalue))
   533  
   534  		p256Select(t0.xyz[0:], precomp[0:], sel)
   535  		p256NegCond(t0.xyz[4:8], sign)
   536  		p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   537  		p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   538  		p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   539  		zero |= sel
   540  	}
   541  
   542  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   543  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   544  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   545  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   546  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   547  
   548  	wvalue = (scalar[0] << 1) & 0x3f
   549  	sel, sign = boothW5(uint(wvalue))
   550  
   551  	p256Select(t0.xyz[0:], precomp[0:], sel)
   552  	p256NegCond(t0.xyz[4:8], sign)
   553  	p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   554  	p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   555  	p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   556  }