github.com/remobjects/goldbaselibrary@v0.0.0-20230924164425-d458680a936b/Source/Gold/crypto/elliptic/p256_asm.go

github.com/remobjects/goldbaselibrary@v0.0.0-20230924164425-d458680a936b/Source/Gold/crypto/elliptic/p256_asm.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the Go wrapper for the constant-time, 64-bit assembly
     6  // implementation of P256. The optimizations performed here are described in
     7  // detail in:
     8  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     9  //                          256-bit primes"
    10  // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
    11  // https://eprint.iacr.org/2013/816.pdf
    12  
    13  // +build amd64 arm64
    14  
    15  package elliptic
    16  
    17  import (
    18  	"math/big"
    19  	"sync"
    20  )
    21  
    22  type (
    23  	p256Curve struct {
    24  		*CurveParams
    25  	}
    26  
    27  	p256Point struct {
    28  		xyz [12]uint64
    29  	}
    30  )
    31  
    32  var (
    33  	p256            p256Curve
    34  	p256Precomputed *[43][32 * 8]uint64
    35  	precomputeOnce  sync.Once
    36  )
    37  
    38  func initP256() {
    39  	// See FIPS 186-3, section D.2.3
    40  	p256.CurveParams = &CurveParams{Name: "P-256"}
    41  	p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
    42  	p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
    43  	p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
    44  	p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
    45  	p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
    46  	p256.BitSize = 256
    47  }
    48  
    49  func (curve p256Curve) Params() *CurveParams {
    50  	return curve.CurveParams
    51  }
    52  
    53  // Functions implemented in p256_asm_*64.s
    54  // Montgomery multiplication modulo P256
    55  //go:noescape
    56  func p256Mul(res, in1, in2 []uint64)
    57  
    58  // Montgomery square modulo P256, repeated n times (n >= 1)
    59  //go:noescape
    60  func p256Sqr(res, in []uint64, n int)
    61  
    62  // Montgomery multiplication by 1
    63  //go:noescape
    64  func p256FromMont(res, in []uint64)
    65  
    66  // iff cond == 1  val <- -val
    67  //go:noescape
    68  func p256NegCond(val []uint64, cond int)
    69  
    70  // if cond == 0 res <- b; else res <- a
    71  //go:noescape
    72  func p256MovCond(res, a, b []uint64, cond int)
    73  
    74  // Endianness swap
    75  //go:noescape
    76  func p256BigToLittle(res []uint64, in []byte)
    77  
    78  //go:noescape
    79  func p256LittleToBig(res []byte, in []uint64)
    80  
    81  // Constant time table access
    82  //go:noescape
    83  func p256Select(point, table []uint64, idx int)
    84  
    85  //go:noescape
    86  func p256SelectBase(point, table []uint64, idx int)
    87  
    88  // Montgomery multiplication modulo Ord(G)
    89  //go:noescape
    90  func p256OrdMul(res, in1, in2 []uint64)
    91  
    92  // Montgomery square modulo Ord(G), repeated n times
    93  //go:noescape
    94  func p256OrdSqr(res, in []uint64, n int)
    95  
    96  // Point add with in2 being affine point
    97  // If sign == 1 -> in2 = -in2
    98  // If sel == 0 -> res = in1
    99  // if zero == 0 -> res = in2
   100  //go:noescape
   101  func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
   102  
   103  // Point add. Returns one if the two input points were equal and zero
   104  // otherwise. (Note that, due to the way that the equations work out, some
   105  // representations of ∞ are considered equal to everything by this function.)
   106  //go:noescape
   107  func p256PointAddAsm(res, in1, in2 []uint64) int
   108  
   109  // Point double
   110  //go:noescape
   111  func p256PointDoubleAsm(res, in []uint64)
   112  
   113  func (curve p256Curve) Inverse(k *big.Int) *big.Int {
   114  	if k.Sign() < 0 {
   115  		// This should never happen.
   116  		k = new(big.Int).Neg(k)
   117  	}
   118  
   119  	if k.Cmp(p256.N) >= 0 {
   120  		// This should never happen.
   121  		k = new(big.Int).Mod(k, p256.N)
   122  	}
   123  
   124  	// table will store precomputed powers of x.
   125  	var table [4 * 9]uint64
   126  	var (
   127  		_1      = table[4*0 : 4*1]
   128  		_11     = table[4*1 : 4*2]
   129  		_101    = table[4*2 : 4*3]
   130  		_111    = table[4*3 : 4*4]
   131  		_1111   = table[4*4 : 4*5]
   132  		_10101  = table[4*5 : 4*6]
   133  		_101111 = table[4*6 : 4*7]
   134  		x       = table[4*7 : 4*8]
   135  		t       = table[4*8 : 4*9]
   136  	)
   137  
   138  	fromBig(x[:], k)
   139  	// This code operates in the Montgomery domain where R = 2^256 mod n
   140  	// and n is the order of the scalar field. (See initP256 for the
   141  	// value.) Elements in the Montgomery domain take the form a×R and
   142  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   143  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   144  	// i.e. converts x into the Montgomery domain.
   145  	// Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
   146  	RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
   147  	p256OrdMul(_1, x, RR)      // _1
   148  	p256OrdSqr(x, _1, 1)       // _10
   149  	p256OrdMul(_11, x, _1)     // _11
   150  	p256OrdMul(_101, x, _11)   // _101
   151  	p256OrdMul(_111, x, _101)  // _111
   152  	p256OrdSqr(x, _101, 1)     // _1010
   153  	p256OrdMul(_1111, _101, x) // _1111
   154  
   155  	p256OrdSqr(t, x, 1)          // _10100
   156  	p256OrdMul(_10101, t, _1)    // _10101
   157  	p256OrdSqr(x, _10101, 1)     // _101010
   158  	p256OrdMul(_101111, _101, x) // _101111
   159  	p256OrdMul(x, _10101, x)     // _111111 = x6
   160  	p256OrdSqr(t, x, 2)          // _11111100
   161  	p256OrdMul(t, t, _11)        // _11111111 = x8
   162  	p256OrdSqr(x, t, 8)          // _ff00
   163  	p256OrdMul(x, x, t)          // _ffff = x16
   164  	p256OrdSqr(t, x, 16)         // _ffff0000
   165  	p256OrdMul(t, t, x)          // _ffffffff = x32
   166  
   167  	p256OrdSqr(x, t, 64)
   168  	p256OrdMul(x, x, t)
   169  	p256OrdSqr(x, x, 32)
   170  	p256OrdMul(x, x, t)
   171  
   172  	sqrs := []uint8{
   173  		6, 5, 4, 5, 5,
   174  		4, 3, 3, 5, 9,
   175  		6, 2, 5, 6, 5,
   176  		4, 5, 5, 3, 10,
   177  		2, 5, 5, 3, 7, 6}
   178  	muls := [][]uint64{
   179  		_101111, _111, _11, _1111, _10101,
   180  		_101, _101, _101, _111, _101111,
   181  		_1111, _1, _1, _1111, _111,
   182  		_111, _111, _101, _11, _101111,
   183  		_11, _11, _11, _1, _10101, _1111}
   184  
   185  	for i, s := range sqrs {
   186  		p256OrdSqr(x, x, int(s))
   187  		p256OrdMul(x, x, muls[i])
   188  	}
   189  
   190  	// Multiplying by one in the Montgomery domain converts a Montgomery
   191  	// value out of the domain.
   192  	one := []uint64{1, 0, 0, 0}
   193  	p256OrdMul(x, x, one)
   194  
   195  	xOut := make([]byte, 32)
   196  	p256LittleToBig(xOut, x)
   197  	return new(big.Int).SetBytes(xOut)
   198  }
   199  
   200  // fromBig converts a *big.Int into a format used by this code.
   201  func fromBig(out []uint64, big *big.Int) {
   202  	for i := range out {
   203  		out[i] = 0
   204  	}
   205  
   206  	for i, v := range big.Bits() {
   207  		out[i] = uint64(v)
   208  	}
   209  }
   210  
   211  // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
   212  // to out. If the scalar is equal or greater than the order of the group, it's
   213  // reduced modulo that order.
   214  func p256GetScalar(out []uint64, in []byte) {
   215  	n := new(big.Int).SetBytes(in)
   216  
   217  	if n.Cmp(p256.N) >= 0 {
   218  		n.Mod(n, p256.N)
   219  	}
   220  	fromBig(out, n)
   221  }
   222  
   223  // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
   224  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   225  // R×R mod p. See comment in Inverse about how this is used.
   226  var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
   227  
   228  func maybeReduceModP(in *big.Int) *big.Int {
   229  	if in.Cmp(p256.P) < 0 {
   230  		return in
   231  	}
   232  	return new(big.Int).Mod(in, p256.P)
   233  }
   234  
   235  func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   236  	scalarReversed := make([]uint64, 4)
   237  	var r1, r2 p256Point
   238  	p256GetScalar(scalarReversed, baseScalar)
   239  	r1IsInfinity := scalarIsZero(scalarReversed)
   240  	r1.p256BaseMult(scalarReversed)
   241  
   242  	p256GetScalar(scalarReversed, scalar)
   243  	r2IsInfinity := scalarIsZero(scalarReversed)
   244  	fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
   245  	fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
   246  	p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
   247  	p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
   248  
   249  	// This sets r2's Z value to 1, in the Montgomery domain.
   250  	r2.xyz[8] = 0x0000000000000001
   251  	r2.xyz[9] = 0xffffffff00000000
   252  	r2.xyz[10] = 0xffffffffffffffff
   253  	r2.xyz[11] = 0x00000000fffffffe
   254  
   255  	r2.p256ScalarMult(scalarReversed)
   256  
   257  	var sum, double p256Point
   258  	pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
   259  	p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
   260  	sum.CopyConditional(&double, pointsEqual)
   261  	sum.CopyConditional(&r1, r2IsInfinity)
   262  	sum.CopyConditional(&r2, r1IsInfinity)
   263  
   264  	return sum.p256PointToAffine()
   265  }
   266  
   267  func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   268  	scalarReversed := make([]uint64, 4)
   269  	p256GetScalar(scalarReversed, scalar)
   270  
   271  	var r p256Point
   272  	r.p256BaseMult(scalarReversed)
   273  	return r.p256PointToAffine()
   274  }
   275  
   276  func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   277  	scalarReversed := make([]uint64, 4)
   278  	p256GetScalar(scalarReversed, scalar)
   279  
   280  	var r p256Point
   281  	fromBig(r.xyz[0:4], maybeReduceModP(bigX))
   282  	fromBig(r.xyz[4:8], maybeReduceModP(bigY))
   283  	p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
   284  	p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
   285  	// This sets r2's Z value to 1, in the Montgomery domain.
   286  	r.xyz[8] = 0x0000000000000001
   287  	r.xyz[9] = 0xffffffff00000000
   288  	r.xyz[10] = 0xffffffffffffffff
   289  	r.xyz[11] = 0x00000000fffffffe
   290  
   291  	r.p256ScalarMult(scalarReversed)
   292  	return r.p256PointToAffine()
   293  }
   294  
   295  // uint64IsZero returns 1 if x is zero and zero otherwise.
   296  func uint64IsZero(x uint64) int {
   297  	x = ^x
   298  	x &= x >> 32
   299  	x &= x >> 16
   300  	x &= x >> 8
   301  	x &= x >> 4
   302  	x &= x >> 2
   303  	x &= x >> 1
   304  	return int(x & 1)
   305  }
   306  
   307  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   308  // otherwise.
   309  func scalarIsZero(scalar []uint64) int {
   310  	return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
   311  }
   312  
   313  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   314  	zInv := make([]uint64, 4)
   315  	zInvSq := make([]uint64, 4)
   316  	p256Inverse(zInv, p.xyz[8:12])
   317  	p256Sqr(zInvSq, zInv, 1)
   318  	p256Mul(zInv, zInv, zInvSq)
   319  
   320  	p256Mul(zInvSq, p.xyz[0:4], zInvSq)
   321  	p256Mul(zInv, p.xyz[4:8], zInv)
   322  
   323  	p256FromMont(zInvSq, zInvSq)
   324  	p256FromMont(zInv, zInv)
   325  
   326  	xOut := make([]byte, 32)
   327  	yOut := make([]byte, 32)
   328  	p256LittleToBig(xOut, zInvSq)
   329  	p256LittleToBig(yOut, zInv)
   330  
   331  	return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
   332  }
   333  
   334  // CopyConditional copies overwrites p with src if v == 1, and leaves p
   335  // unchanged if v == 0.
   336  func (p *p256Point) CopyConditional(src *p256Point, v int) {
   337  	pMask := uint64(v) - 1
   338  	srcMask := ^pMask
   339  
   340  	for i, n := range p.xyz {
   341  		p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
   342  	}
   343  }
   344  
   345  // p256Inverse sets out to in^-1 mod p.
   346  func p256Inverse(out, in []uint64) {
   347  	var stack [6 * 4]uint64
   348  	p2 := stack[4*0 : 4*0+4]
   349  	p4 := stack[4*1 : 4*1+4]
   350  	p8 := stack[4*2 : 4*2+4]
   351  	p16 := stack[4*3 : 4*3+4]
   352  	p32 := stack[4*4 : 4*4+4]
   353  
   354  	p256Sqr(out, in, 1)
   355  	p256Mul(p2, out, in) // 3*p
   356  
   357  	p256Sqr(out, p2, 2)
   358  	p256Mul(p4, out, p2) // f*p
   359  
   360  	p256Sqr(out, p4, 4)
   361  	p256Mul(p8, out, p4) // ff*p
   362  
   363  	p256Sqr(out, p8, 8)
   364  	p256Mul(p16, out, p8) // ffff*p
   365  
   366  	p256Sqr(out, p16, 16)
   367  	p256Mul(p32, out, p16) // ffffffff*p
   368  
   369  	p256Sqr(out, p32, 32)
   370  	p256Mul(out, out, in)
   371  
   372  	p256Sqr(out, out, 128)
   373  	p256Mul(out, out, p32)
   374  
   375  	p256Sqr(out, out, 32)
   376  	p256Mul(out, out, p32)
   377  
   378  	p256Sqr(out, out, 16)
   379  	p256Mul(out, out, p16)
   380  
   381  	p256Sqr(out, out, 8)
   382  	p256Mul(out, out, p8)
   383  
   384  	p256Sqr(out, out, 4)
   385  	p256Mul(out, out, p4)
   386  
   387  	p256Sqr(out, out, 2)
   388  	p256Mul(out, out, p2)
   389  
   390  	p256Sqr(out, out, 2)
   391  	p256Mul(out, out, in)
   392  }
   393  
   394  func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
   395  	copy(r[index*12:], p.xyz[:])
   396  }
   397  
   398  func boothW5(in uint) (int, int) {
   399  	var s uint = ^((in >> 5) - 1)
   400  	var d uint = (1 << 6) - in - 1
   401  	d = (d & s) | (in & (^s))
   402  	d = (d >> 1) + (d & 1)
   403  	return int(d), int(s & 1)
   404  }
   405  
   406  func boothW6(in uint) (int, int) {
   407  	var s uint = ^((in >> 6) - 1)
   408  	var d uint = (1 << 7) - in - 1
   409  	d = (d & s) | (in & (^s))
   410  	d = (d >> 1) + (d & 1)
   411  	return int(d), int(s & 1)
   412  }
   413  
   414  func initTable() {
   415  	p256Precomputed = new([43][32 * 8]uint64)
   416  
   417  	basePoint := []uint64{
   418  		0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
   419  		0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
   420  		0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
   421  	}
   422  	t1 := make([]uint64, 12)
   423  	t2 := make([]uint64, 12)
   424  	copy(t2, basePoint)
   425  
   426  	zInv := make([]uint64, 4)
   427  	zInvSq := make([]uint64, 4)
   428  	for j := 0; j < 32; j++ {
   429  		copy(t1, t2)
   430  		for i := 0; i < 43; i++ {
   431  			// The window size is 6 so we need to double 6 times.
   432  			if i != 0 {
   433  				for k := 0; k < 6; k++ {
   434  					p256PointDoubleAsm(t1, t1)
   435  				}
   436  			}
   437  			// Convert the point to affine form. (Its values are
   438  			// still in Montgomery form however.)
   439  			p256Inverse(zInv, t1[8:12])
   440  			p256Sqr(zInvSq, zInv, 1)
   441  			p256Mul(zInv, zInv, zInvSq)
   442  
   443  			p256Mul(t1[:4], t1[:4], zInvSq)
   444  			p256Mul(t1[4:8], t1[4:8], zInv)
   445  
   446  			copy(t1[8:12], basePoint[8:12])
   447  			// Update the table entry
   448  			copy(p256Precomputed[i][j*8:], t1[:8])
   449  		}
   450  		if j == 0 {
   451  			p256PointDoubleAsm(t2, basePoint)
   452  		} else {
   453  			p256PointAddAsm(t2, t2, basePoint)
   454  		}
   455  	}
   456  }
   457  
   458  func (p *p256Point) p256BaseMult(scalar []uint64) {
   459  	precomputeOnce.Do(initTable)
   460  
   461  	wvalue := (scalar[0] << 1) & 0x7f
   462  	sel, sign := boothW6(uint(wvalue))
   463  	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
   464  	p256NegCond(p.xyz[4:8], sign)
   465  
   466  	// (This is one, in the Montgomery domain.)
   467  	p.xyz[8] = 0x0000000000000001
   468  	p.xyz[9] = 0xffffffff00000000
   469  	p.xyz[10] = 0xffffffffffffffff
   470  	p.xyz[11] = 0x00000000fffffffe
   471  
   472  	var t0 p256Point
   473  	// (This is one, in the Montgomery domain.)
   474  	t0.xyz[8] = 0x0000000000000001
   475  	t0.xyz[9] = 0xffffffff00000000
   476  	t0.xyz[10] = 0xffffffffffffffff
   477  	t0.xyz[11] = 0x00000000fffffffe
   478  
   479  	index := uint(5)
   480  	zero := sel
   481  
   482  	for i := 1; i < 43; i++ {
   483  		if index < 192 {
   484  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
   485  		} else {
   486  			wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
   487  		}
   488  		index += 6
   489  		sel, sign = boothW6(uint(wvalue))
   490  		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
   491  		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
   492  		zero |= sel
   493  	}
   494  }
   495  
   496  func (p *p256Point) p256ScalarMult(scalar []uint64) {
   497  	// precomp is a table of precomputed points that stores powers of p
   498  	// from p^1 to p^16.
   499  	var precomp [16 * 4 * 3]uint64
   500  	var t0, t1, t2, t3 p256Point
   501  
   502  	// Prepare the table
   503  	p.p256StorePoint(&precomp, 0) // 1
   504  
   505  	p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
   506  	p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
   507  	p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
   508  	p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
   509  	t0.p256StorePoint(&precomp, 1)  // 2
   510  	t1.p256StorePoint(&precomp, 3)  // 4
   511  	t2.p256StorePoint(&precomp, 7)  // 8
   512  	t3.p256StorePoint(&precomp, 15) // 16
   513  
   514  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   515  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   516  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   517  	t0.p256StorePoint(&precomp, 2) // 3
   518  	t1.p256StorePoint(&precomp, 4) // 5
   519  	t2.p256StorePoint(&precomp, 8) // 9
   520  
   521  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   522  	p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
   523  	t0.p256StorePoint(&precomp, 5) // 6
   524  	t1.p256StorePoint(&precomp, 9) // 10
   525  
   526  	p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
   527  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   528  	t2.p256StorePoint(&precomp, 6)  // 7
   529  	t1.p256StorePoint(&precomp, 10) // 11
   530  
   531  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   532  	p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
   533  	t0.p256StorePoint(&precomp, 11) // 12
   534  	t2.p256StorePoint(&precomp, 13) // 14
   535  
   536  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   537  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   538  	t0.p256StorePoint(&precomp, 12) // 13
   539  	t2.p256StorePoint(&precomp, 14) // 15
   540  
   541  	// Start scanning the window from top bit
   542  	index := uint(254)
   543  	var sel, sign int
   544  
   545  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   546  	sel, _ = boothW5(uint(wvalue))
   547  
   548  	p256Select(p.xyz[0:12], precomp[0:], sel)
   549  	zero := sel
   550  
   551  	for index > 4 {
   552  		index -= 5
   553  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   554  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   555  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   556  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   557  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   558  
   559  		if index < 192 {
   560  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   561  		} else {
   562  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   563  		}
   564  
   565  		sel, sign = boothW5(uint(wvalue))
   566  
   567  		p256Select(t0.xyz[0:], precomp[0:], sel)
   568  		p256NegCond(t0.xyz[4:8], sign)
   569  		p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   570  		p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   571  		p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   572  		zero |= sel
   573  	}
   574  
   575  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   576  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   577  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   578  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   579  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   580  
   581  	wvalue = (scalar[0] << 1) & 0x3f
   582  	sel, sign = boothW5(uint(wvalue))
   583  
   584  	p256Select(t0.xyz[0:], precomp[0:], sel)
   585  	p256NegCond(t0.xyz[4:8], sign)
   586  	p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   587  	p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   588  	p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   589  }