github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/crypto/elliptic/p256_amd64.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the Go wrapper for the constant-time, 64-bit assembly
     6  // implementation of P256. The optimizations performed here are described in
     7  // detail in:
     8  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     9  //                          256-bit primes"
    10  // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x
    11  // https://eprint.iacr.org/2013/816.pdf
    12  
    13  // +build amd64
    14  
    15  package elliptic
    16  
    17  import (
    18  	"math/big"
    19  	"sync"
    20  )
    21  
    22  type (
    23  	p256Curve struct {
    24  		*CurveParams
    25  	}
    26  
    27  	p256Point struct {
    28  		xyz [12]uint64
    29  	}
    30  )
    31  
    32  var (
    33  	p256            p256Curve
    34  	p256Precomputed *[37][64 * 8]uint64
    35  	precomputeOnce  sync.Once
    36  )
    37  
    38  func initP256() {
    39  	// See FIPS 186-3, section D.2.3
    40  	p256.CurveParams = &CurveParams{Name: "P-256"}
    41  	p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
    42  	p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
    43  	p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
    44  	p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
    45  	p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
    46  	p256.BitSize = 256
    47  }
    48  
    49  func (curve p256Curve) Params() *CurveParams {
    50  	return curve.CurveParams
    51  }
    52  
    53  // Functions implemented in p256_asm_amd64.s
    54  // Montgomery multiplication modulo P256
    55  func p256Mul(res, in1, in2 []uint64)
    56  
    57  // Montgomery square modulo P256
    58  func p256Sqr(res, in []uint64)
    59  
    60  // Montgomery multiplication by 1
    61  func p256FromMont(res, in []uint64)
    62  
    63  // iff cond == 1  val <- -val
    64  func p256NegCond(val []uint64, cond int)
    65  
    66  // if cond == 0 res <- b; else res <- a
    67  func p256MovCond(res, a, b []uint64, cond int)
    68  
    69  // Endianess swap
    70  func p256BigToLittle(res []uint64, in []byte)
    71  func p256LittleToBig(res []byte, in []uint64)
    72  
    73  // Constant time table access
    74  func p256Select(point, table []uint64, idx int)
    75  func p256SelectBase(point, table []uint64, idx int)
    76  
    77  // Montgomery multiplication modulo Ord(G)
    78  func p256OrdMul(res, in1, in2 []uint64)
    79  
    80  // Montgomery square modulo Ord(G), repeated n times
    81  func p256OrdSqr(res, in []uint64, n int)
    82  
    83  // Point add with in2 being affine point
    84  // If sign == 1 -> in2 = -in2
    85  // If sel == 0 -> res = in1
    86  // if zero == 0 -> res = in2
    87  func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
    88  
    89  // Point add
    90  func p256PointAddAsm(res, in1, in2 []uint64)
    91  
    92  // Point double
    93  func p256PointDoubleAsm(res, in []uint64)
    94  
    95  func (curve p256Curve) Inverse(k *big.Int) *big.Int {
    96  	if k.Cmp(p256.N) >= 0 {
    97  		// This should never happen.
    98  		reducedK := new(big.Int).Mod(k, p256.N)
    99  		k = reducedK
   100  	}
   101  
   102  	// table will store precomputed powers of x. The four words at index
   103  	// 4×i store x^(i+1).
   104  	var table [4 * 15]uint64
   105  
   106  	x := make([]uint64, 4)
   107  	fromBig(x[:], k)
   108  	// This code operates in the Montgomery domain where R = 2^256 mod n
   109  	// and n is the order of the scalar field. (See initP256 for the
   110  	// value.) Elements in the Montgomery domain take the form a×R and
   111  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   112  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   113  	// i.e. converts x into the Montgomery domain.
   114  	RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
   115  	p256OrdMul(table[:4], x, RR)
   116  
   117  	// Prepare the table, no need in constant time access, because the
   118  	// power is not a secret. (Entry 0 is never used.)
   119  	for i := 2; i < 16; i += 2 {
   120  		p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1)
   121  		p256OrdMul(table[4*i:], table[4*(i-1):], table[:4])
   122  	}
   123  
   124  	x[0] = table[4*14+0] // f
   125  	x[1] = table[4*14+1]
   126  	x[2] = table[4*14+2]
   127  	x[3] = table[4*14+3]
   128  
   129  	p256OrdSqr(x, x, 4)
   130  	p256OrdMul(x, x, table[4*14:4*14+4]) // ff
   131  	t := make([]uint64, 4, 4)
   132  	t[0] = x[0]
   133  	t[1] = x[1]
   134  	t[2] = x[2]
   135  	t[3] = x[3]
   136  
   137  	p256OrdSqr(x, x, 8)
   138  	p256OrdMul(x, x, t) // ffff
   139  	t[0] = x[0]
   140  	t[1] = x[1]
   141  	t[2] = x[2]
   142  	t[3] = x[3]
   143  
   144  	p256OrdSqr(x, x, 16)
   145  	p256OrdMul(x, x, t) // ffffffff
   146  	t[0] = x[0]
   147  	t[1] = x[1]
   148  	t[2] = x[2]
   149  	t[3] = x[3]
   150  
   151  	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
   152  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
   153  	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
   154  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
   155  
   156  	// Remaining 32 windows
   157  	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
   158  	for i := 0; i < 32; i++ {
   159  		p256OrdSqr(x, x, 4)
   160  		p256OrdMul(x, x, table[4*(expLo[i]-1):])
   161  	}
   162  
   163  	// Multiplying by one in the Montgomery domain converts a Montgomery
   164  	// value out of the domain.
   165  	one := []uint64{1, 0, 0, 0}
   166  	p256OrdMul(x, x, one)
   167  
   168  	xOut := make([]byte, 32)
   169  	p256LittleToBig(xOut, x)
   170  	return new(big.Int).SetBytes(xOut)
   171  }
   172  
   173  // fromBig converts a *big.Int into a format used by this code.
   174  func fromBig(out []uint64, big *big.Int) {
   175  	for i := range out {
   176  		out[i] = 0
   177  	}
   178  
   179  	for i, v := range big.Bits() {
   180  		out[i] = uint64(v)
   181  	}
   182  }
   183  
   184  // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
   185  // to out. If the scalar is equal or greater than the order of the group, it's
   186  // reduced modulo that order.
   187  func p256GetScalar(out []uint64, in []byte) {
   188  	n := new(big.Int).SetBytes(in)
   189  
   190  	if n.Cmp(p256.N) >= 0 {
   191  		n.Mod(n, p256.N)
   192  	}
   193  	fromBig(out, n)
   194  }
   195  
   196  // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
   197  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   198  // R×R mod p. See comment in Inverse about how this is used.
   199  var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
   200  
   201  func maybeReduceModP(in *big.Int) *big.Int {
   202  	if in.Cmp(p256.P) < 0 {
   203  		return in
   204  	}
   205  	return new(big.Int).Mod(in, p256.P)
   206  }
   207  
   208  func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   209  	scalarReversed := make([]uint64, 4)
   210  	var r1, r2 p256Point
   211  	p256GetScalar(scalarReversed, baseScalar)
   212  	r1.p256BaseMult(scalarReversed)
   213  
   214  	p256GetScalar(scalarReversed, scalar)
   215  	fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
   216  	fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
   217  	p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
   218  	p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
   219  
   220  	// This sets r2's Z value to 1, in the Montgomery domain.
   221  	r2.xyz[8] = 0x0000000000000001
   222  	r2.xyz[9] = 0xffffffff00000000
   223  	r2.xyz[10] = 0xffffffffffffffff
   224  	r2.xyz[11] = 0x00000000fffffffe
   225  
   226  	r2.p256ScalarMult(scalarReversed)
   227  	p256PointAddAsm(r1.xyz[:], r1.xyz[:], r2.xyz[:])
   228  	return r1.p256PointToAffine()
   229  }
   230  
   231  func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   232  	scalarReversed := make([]uint64, 4)
   233  	p256GetScalar(scalarReversed, scalar)
   234  
   235  	var r p256Point
   236  	r.p256BaseMult(scalarReversed)
   237  	return r.p256PointToAffine()
   238  }
   239  
   240  func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   241  	scalarReversed := make([]uint64, 4)
   242  	p256GetScalar(scalarReversed, scalar)
   243  
   244  	var r p256Point
   245  	fromBig(r.xyz[0:4], maybeReduceModP(bigX))
   246  	fromBig(r.xyz[4:8], maybeReduceModP(bigY))
   247  	p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
   248  	p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
   249  	// This sets r2's Z value to 1, in the Montgomery domain.
   250  	r.xyz[8] = 0x0000000000000001
   251  	r.xyz[9] = 0xffffffff00000000
   252  	r.xyz[10] = 0xffffffffffffffff
   253  	r.xyz[11] = 0x00000000fffffffe
   254  
   255  	r.p256ScalarMult(scalarReversed)
   256  	return r.p256PointToAffine()
   257  }
   258  
   259  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   260  	zInv := make([]uint64, 4)
   261  	zInvSq := make([]uint64, 4)
   262  	p256Inverse(zInv, p.xyz[8:12])
   263  	p256Sqr(zInvSq, zInv)
   264  	p256Mul(zInv, zInv, zInvSq)
   265  
   266  	p256Mul(zInvSq, p.xyz[0:4], zInvSq)
   267  	p256Mul(zInv, p.xyz[4:8], zInv)
   268  
   269  	p256FromMont(zInvSq, zInvSq)
   270  	p256FromMont(zInv, zInv)
   271  
   272  	xOut := make([]byte, 32)
   273  	yOut := make([]byte, 32)
   274  	p256LittleToBig(xOut, zInvSq)
   275  	p256LittleToBig(yOut, zInv)
   276  
   277  	return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
   278  }
   279  
   280  // p256Inverse sets out to in^-1 mod p.
   281  func p256Inverse(out, in []uint64) {
   282  	var stack [6 * 4]uint64
   283  	p2 := stack[4*0 : 4*0+4]
   284  	p4 := stack[4*1 : 4*1+4]
   285  	p8 := stack[4*2 : 4*2+4]
   286  	p16 := stack[4*3 : 4*3+4]
   287  	p32 := stack[4*4 : 4*4+4]
   288  
   289  	p256Sqr(out, in)
   290  	p256Mul(p2, out, in) // 3*p
   291  
   292  	p256Sqr(out, p2)
   293  	p256Sqr(out, out)
   294  	p256Mul(p4, out, p2) // f*p
   295  
   296  	p256Sqr(out, p4)
   297  	p256Sqr(out, out)
   298  	p256Sqr(out, out)
   299  	p256Sqr(out, out)
   300  	p256Mul(p8, out, p4) // ff*p
   301  
   302  	p256Sqr(out, p8)
   303  
   304  	for i := 0; i < 7; i++ {
   305  		p256Sqr(out, out)
   306  	}
   307  	p256Mul(p16, out, p8) // ffff*p
   308  
   309  	p256Sqr(out, p16)
   310  	for i := 0; i < 15; i++ {
   311  		p256Sqr(out, out)
   312  	}
   313  	p256Mul(p32, out, p16) // ffffffff*p
   314  
   315  	p256Sqr(out, p32)
   316  
   317  	for i := 0; i < 31; i++ {
   318  		p256Sqr(out, out)
   319  	}
   320  	p256Mul(out, out, in)
   321  
   322  	for i := 0; i < 32*4; i++ {
   323  		p256Sqr(out, out)
   324  	}
   325  	p256Mul(out, out, p32)
   326  
   327  	for i := 0; i < 32; i++ {
   328  		p256Sqr(out, out)
   329  	}
   330  	p256Mul(out, out, p32)
   331  
   332  	for i := 0; i < 16; i++ {
   333  		p256Sqr(out, out)
   334  	}
   335  	p256Mul(out, out, p16)
   336  
   337  	for i := 0; i < 8; i++ {
   338  		p256Sqr(out, out)
   339  	}
   340  	p256Mul(out, out, p8)
   341  
   342  	p256Sqr(out, out)
   343  	p256Sqr(out, out)
   344  	p256Sqr(out, out)
   345  	p256Sqr(out, out)
   346  	p256Mul(out, out, p4)
   347  
   348  	p256Sqr(out, out)
   349  	p256Sqr(out, out)
   350  	p256Mul(out, out, p2)
   351  
   352  	p256Sqr(out, out)
   353  	p256Sqr(out, out)
   354  	p256Mul(out, out, in)
   355  }
   356  
   357  func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
   358  	copy(r[index*12:], p.xyz[:])
   359  }
   360  
   361  func boothW5(in uint) (int, int) {
   362  	var s uint = ^((in >> 5) - 1)
   363  	var d uint = (1 << 6) - in - 1
   364  	d = (d & s) | (in & (^s))
   365  	d = (d >> 1) + (d & 1)
   366  	return int(d), int(s & 1)
   367  }
   368  
   369  func boothW7(in uint) (int, int) {
   370  	var s uint = ^((in >> 7) - 1)
   371  	var d uint = (1 << 8) - in - 1
   372  	d = (d & s) | (in & (^s))
   373  	d = (d >> 1) + (d & 1)
   374  	return int(d), int(s & 1)
   375  }
   376  
   377  func initTable() {
   378  	p256Precomputed = new([37][64 * 8]uint64)
   379  
   380  	basePoint := []uint64{
   381  		0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
   382  		0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
   383  		0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
   384  	}
   385  	t1 := make([]uint64, 12)
   386  	t2 := make([]uint64, 12)
   387  	copy(t2, basePoint)
   388  
   389  	zInv := make([]uint64, 4)
   390  	zInvSq := make([]uint64, 4)
   391  	for j := 0; j < 64; j++ {
   392  		copy(t1, t2)
   393  		for i := 0; i < 37; i++ {
   394  			// The window size is 7 so we need to double 7 times.
   395  			if i != 0 {
   396  				for k := 0; k < 7; k++ {
   397  					p256PointDoubleAsm(t1, t1)
   398  				}
   399  			}
   400  			// Convert the point to affine form. (Its values are
   401  			// still in Montgomery form however.)
   402  			p256Inverse(zInv, t1[8:12])
   403  			p256Sqr(zInvSq, zInv)
   404  			p256Mul(zInv, zInv, zInvSq)
   405  
   406  			p256Mul(t1[:4], t1[:4], zInvSq)
   407  			p256Mul(t1[4:8], t1[4:8], zInv)
   408  
   409  			copy(t1[8:12], basePoint[8:12])
   410  			// Update the table entry
   411  			copy(p256Precomputed[i][j*8:], t1[:8])
   412  		}
   413  		if j == 0 {
   414  			p256PointDoubleAsm(t2, basePoint)
   415  		} else {
   416  			p256PointAddAsm(t2, t2, basePoint)
   417  		}
   418  	}
   419  }
   420  
   421  func (p *p256Point) p256BaseMult(scalar []uint64) {
   422  	precomputeOnce.Do(initTable)
   423  
   424  	wvalue := (scalar[0] << 1) & 0xff
   425  	sel, sign := boothW7(uint(wvalue))
   426  	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
   427  	p256NegCond(p.xyz[4:8], sign)
   428  
   429  	// (This is one, in the Montgomery domain.)
   430  	p.xyz[8] = 0x0000000000000001
   431  	p.xyz[9] = 0xffffffff00000000
   432  	p.xyz[10] = 0xffffffffffffffff
   433  	p.xyz[11] = 0x00000000fffffffe
   434  
   435  	var t0 p256Point
   436  	// (This is one, in the Montgomery domain.)
   437  	t0.xyz[8] = 0x0000000000000001
   438  	t0.xyz[9] = 0xffffffff00000000
   439  	t0.xyz[10] = 0xffffffffffffffff
   440  	t0.xyz[11] = 0x00000000fffffffe
   441  
   442  	index := uint(6)
   443  	zero := sel
   444  
   445  	for i := 1; i < 37; i++ {
   446  		if index < 192 {
   447  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff
   448  		} else {
   449  			wvalue = (scalar[index/64] >> (index % 64)) & 0xff
   450  		}
   451  		index += 7
   452  		sel, sign = boothW7(uint(wvalue))
   453  		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
   454  		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
   455  		zero |= sel
   456  	}
   457  }
   458  
   459  func (p *p256Point) p256ScalarMult(scalar []uint64) {
   460  	// precomp is a table of precomputed points that stores powers of p
   461  	// from p^1 to p^16.
   462  	var precomp [16 * 4 * 3]uint64
   463  	var t0, t1, t2, t3 p256Point
   464  
   465  	// Prepare the table
   466  	p.p256StorePoint(&precomp, 0) // 1
   467  
   468  	p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
   469  	p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
   470  	p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
   471  	p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
   472  	t0.p256StorePoint(&precomp, 1)  // 2
   473  	t1.p256StorePoint(&precomp, 3)  // 4
   474  	t2.p256StorePoint(&precomp, 7)  // 8
   475  	t3.p256StorePoint(&precomp, 15) // 16
   476  
   477  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   478  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   479  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   480  	t0.p256StorePoint(&precomp, 2) // 3
   481  	t1.p256StorePoint(&precomp, 4) // 5
   482  	t2.p256StorePoint(&precomp, 8) // 9
   483  
   484  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   485  	p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
   486  	t0.p256StorePoint(&precomp, 5) // 6
   487  	t1.p256StorePoint(&precomp, 9) // 10
   488  
   489  	p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
   490  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   491  	t2.p256StorePoint(&precomp, 6)  // 7
   492  	t1.p256StorePoint(&precomp, 10) // 11
   493  
   494  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   495  	p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
   496  	t0.p256StorePoint(&precomp, 11) // 12
   497  	t2.p256StorePoint(&precomp, 13) // 14
   498  
   499  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   500  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   501  	t0.p256StorePoint(&precomp, 12) // 13
   502  	t2.p256StorePoint(&precomp, 14) // 15
   503  
   504  	// Start scanning the window from top bit
   505  	index := uint(254)
   506  	var sel, sign int
   507  
   508  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   509  	sel, _ = boothW5(uint(wvalue))
   510  
   511  	p256Select(p.xyz[0:12], precomp[0:], sel)
   512  	zero := sel
   513  
   514  	for index > 4 {
   515  		index -= 5
   516  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   517  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   518  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   519  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   520  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   521  
   522  		if index < 192 {
   523  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   524  		} else {
   525  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   526  		}
   527  
   528  		sel, sign = boothW5(uint(wvalue))
   529  
   530  		p256Select(t0.xyz[0:], precomp[0:], sel)
   531  		p256NegCond(t0.xyz[4:8], sign)
   532  		p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   533  		p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   534  		p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   535  		zero |= sel
   536  	}
   537  
   538  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   539  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   540  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   541  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   542  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   543  
   544  	wvalue = (scalar[0] << 1) & 0x3f
   545  	sel, sign = boothW5(uint(wvalue))
   546  
   547  	p256Select(t0.xyz[0:], precomp[0:], sel)
   548  	p256NegCond(t0.xyz[4:8], sign)
   549  	p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   550  	p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   551  	p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   552  }