github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/crypto/elliptic/p256_amd64.go

github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/crypto/elliptic/p256_amd64.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the Go wrapper for the constant-time, 64-bit assembly
     6  // implementation of P256. The optimizations performed here are described in
     7  // detail in:
     8  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     9  //                          256-bit primes"
    10  // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x
    11  // https://eprint.iacr.org/2013/816.pdf
    12  
    13  // +build amd64
    14  
    15  package elliptic
    16  
    17  import (
    18  	"math/big"
    19  	"sync"
    20  )
    21  
    22  type (
    23  	p256Curve struct {
    24  		*CurveParams
    25  	}
    26  
    27  	p256Point struct {
    28  		xyz [12]uint64
    29  	}
    30  )
    31  
    32  var (
    33  	p256            p256Curve
    34  	p256Precomputed *[37][64 * 8]uint64
    35  	precomputeOnce  sync.Once
    36  )
    37  
    38  func initP256() {
    39  	// See FIPS 186-3, section D.2.3
    40  	p256.CurveParams = &CurveParams{Name: "P-256"}
    41  	p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
    42  	p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
    43  	p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
    44  	p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
    45  	p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
    46  	p256.BitSize = 256
    47  }
    48  
    49  func (curve p256Curve) Params() *CurveParams {
    50  	return curve.CurveParams
    51  }
    52  
    53  // Functions implemented in p256_asm_amd64.s
    54  // Montgomery multiplication modulo P256
    55  func p256Mul(res, in1, in2 []uint64)
    56  
    57  // Montgomery square modulo P256
    58  func p256Sqr(res, in []uint64)
    59  
    60  // Montgomery multiplication by 1
    61  func p256FromMont(res, in []uint64)
    62  
    63  // iff cond == 1  val <- -val
    64  func p256NegCond(val []uint64, cond int)
    65  
    66  // if cond == 0 res <- b; else res <- a
    67  func p256MovCond(res, a, b []uint64, cond int)
    68  
    69  // Endianness swap
    70  func p256BigToLittle(res []uint64, in []byte)
    71  func p256LittleToBig(res []byte, in []uint64)
    72  
    73  // Constant time table access
    74  func p256Select(point, table []uint64, idx int)
    75  func p256SelectBase(point, table []uint64, idx int)
    76  
    77  // Montgomery multiplication modulo Ord(G)
    78  func p256OrdMul(res, in1, in2 []uint64)
    79  
    80  // Montgomery square modulo Ord(G), repeated n times
    81  func p256OrdSqr(res, in []uint64, n int)
    82  
    83  // Point add with in2 being affine point
    84  // If sign == 1 -> in2 = -in2
    85  // If sel == 0 -> res = in1
    86  // if zero == 0 -> res = in2
    87  func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
    88  
    89  // Point add. Returns one if the two input points were equal and zero
    90  // otherwise. (Note that, due to the way that the equations work out, some
    91  // representations of ∞ are considered equal to everything by this function.)
    92  func p256PointAddAsm(res, in1, in2 []uint64) int
    93  
    94  // Point double
    95  func p256PointDoubleAsm(res, in []uint64)
    96  
    97  func (curve p256Curve) Inverse(k *big.Int) *big.Int {
    98  	if k.Sign() < 0 {
    99  		// This should never happen.
   100  		k = new(big.Int).Neg(k)
   101  	}
   102  
   103  	if k.Cmp(p256.N) >= 0 {
   104  		// This should never happen.
   105  		k = new(big.Int).Mod(k, p256.N)
   106  	}
   107  
   108  	// table will store precomputed powers of x. The four words at index
   109  	// 4×i store x^(i+1).
   110  	var table [4 * 15]uint64
   111  
   112  	x := make([]uint64, 4)
   113  	fromBig(x[:], k)
   114  	// This code operates in the Montgomery domain where R = 2^256 mod n
   115  	// and n is the order of the scalar field. (See initP256 for the
   116  	// value.) Elements in the Montgomery domain take the form a×R and
   117  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   118  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   119  	// i.e. converts x into the Montgomery domain.
   120  	RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
   121  	p256OrdMul(table[:4], x, RR)
   122  
   123  	// Prepare the table, no need in constant time access, because the
   124  	// power is not a secret. (Entry 0 is never used.)
   125  	for i := 2; i < 16; i += 2 {
   126  		p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1)
   127  		p256OrdMul(table[4*i:], table[4*(i-1):], table[:4])
   128  	}
   129  
   130  	x[0] = table[4*14+0] // f
   131  	x[1] = table[4*14+1]
   132  	x[2] = table[4*14+2]
   133  	x[3] = table[4*14+3]
   134  
   135  	p256OrdSqr(x, x, 4)
   136  	p256OrdMul(x, x, table[4*14:4*14+4]) // ff
   137  	t := make([]uint64, 4, 4)
   138  	t[0] = x[0]
   139  	t[1] = x[1]
   140  	t[2] = x[2]
   141  	t[3] = x[3]
   142  
   143  	p256OrdSqr(x, x, 8)
   144  	p256OrdMul(x, x, t) // ffff
   145  	t[0] = x[0]
   146  	t[1] = x[1]
   147  	t[2] = x[2]
   148  	t[3] = x[3]
   149  
   150  	p256OrdSqr(x, x, 16)
   151  	p256OrdMul(x, x, t) // ffffffff
   152  	t[0] = x[0]
   153  	t[1] = x[1]
   154  	t[2] = x[2]
   155  	t[3] = x[3]
   156  
   157  	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
   158  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
   159  	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
   160  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
   161  
   162  	// Remaining 32 windows
   163  	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
   164  	for i := 0; i < 32; i++ {
   165  		p256OrdSqr(x, x, 4)
   166  		p256OrdMul(x, x, table[4*(expLo[i]-1):])
   167  	}
   168  
   169  	// Multiplying by one in the Montgomery domain converts a Montgomery
   170  	// value out of the domain.
   171  	one := []uint64{1, 0, 0, 0}
   172  	p256OrdMul(x, x, one)
   173  
   174  	xOut := make([]byte, 32)
   175  	p256LittleToBig(xOut, x)
   176  	return new(big.Int).SetBytes(xOut)
   177  }
   178  
   179  // fromBig converts a *big.Int into a format used by this code.
   180  func fromBig(out []uint64, big *big.Int) {
   181  	for i := range out {
   182  		out[i] = 0
   183  	}
   184  
   185  	for i, v := range big.Bits() {
   186  		out[i] = uint64(v)
   187  	}
   188  }
   189  
   190  // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
   191  // to out. If the scalar is equal or greater than the order of the group, it's
   192  // reduced modulo that order.
   193  func p256GetScalar(out []uint64, in []byte) {
   194  	n := new(big.Int).SetBytes(in)
   195  
   196  	if n.Cmp(p256.N) >= 0 {
   197  		n.Mod(n, p256.N)
   198  	}
   199  	fromBig(out, n)
   200  }
   201  
   202  // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
   203  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   204  // R×R mod p. See comment in Inverse about how this is used.
   205  var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
   206  
   207  func maybeReduceModP(in *big.Int) *big.Int {
   208  	if in.Cmp(p256.P) < 0 {
   209  		return in
   210  	}
   211  	return new(big.Int).Mod(in, p256.P)
   212  }
   213  
   214  func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   215  	scalarReversed := make([]uint64, 4)
   216  	var r1, r2 p256Point
   217  	p256GetScalar(scalarReversed, baseScalar)
   218  	r1IsInfinity := scalarIsZero(scalarReversed)
   219  	r1.p256BaseMult(scalarReversed)
   220  
   221  	p256GetScalar(scalarReversed, scalar)
   222  	r2IsInfinity := scalarIsZero(scalarReversed)
   223  	fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
   224  	fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
   225  	p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
   226  	p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
   227  
   228  	// This sets r2's Z value to 1, in the Montgomery domain.
   229  	r2.xyz[8] = 0x0000000000000001
   230  	r2.xyz[9] = 0xffffffff00000000
   231  	r2.xyz[10] = 0xffffffffffffffff
   232  	r2.xyz[11] = 0x00000000fffffffe
   233  
   234  	r2.p256ScalarMult(scalarReversed)
   235  
   236  	var sum, double p256Point
   237  	pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
   238  	p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
   239  	sum.CopyConditional(&double, pointsEqual)
   240  	sum.CopyConditional(&r1, r2IsInfinity)
   241  	sum.CopyConditional(&r2, r1IsInfinity)
   242  
   243  	return sum.p256PointToAffine()
   244  }
   245  
   246  func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   247  	scalarReversed := make([]uint64, 4)
   248  	p256GetScalar(scalarReversed, scalar)
   249  
   250  	var r p256Point
   251  	r.p256BaseMult(scalarReversed)
   252  	return r.p256PointToAffine()
   253  }
   254  
   255  func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   256  	scalarReversed := make([]uint64, 4)
   257  	p256GetScalar(scalarReversed, scalar)
   258  
   259  	var r p256Point
   260  	fromBig(r.xyz[0:4], maybeReduceModP(bigX))
   261  	fromBig(r.xyz[4:8], maybeReduceModP(bigY))
   262  	p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
   263  	p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
   264  	// This sets r2's Z value to 1, in the Montgomery domain.
   265  	r.xyz[8] = 0x0000000000000001
   266  	r.xyz[9] = 0xffffffff00000000
   267  	r.xyz[10] = 0xffffffffffffffff
   268  	r.xyz[11] = 0x00000000fffffffe
   269  
   270  	r.p256ScalarMult(scalarReversed)
   271  	return r.p256PointToAffine()
   272  }
   273  
   274  // uint64IsZero returns 1 if x is zero and zero otherwise.
   275  func uint64IsZero(x uint64) int {
   276  	x = ^x
   277  	x &= x >> 32
   278  	x &= x >> 16
   279  	x &= x >> 8
   280  	x &= x >> 4
   281  	x &= x >> 2
   282  	x &= x >> 1
   283  	return int(x & 1)
   284  }
   285  
   286  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   287  // otherwise.
   288  func scalarIsZero(scalar []uint64) int {
   289  	return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
   290  }
   291  
   292  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   293  	zInv := make([]uint64, 4)
   294  	zInvSq := make([]uint64, 4)
   295  	p256Inverse(zInv, p.xyz[8:12])
   296  	p256Sqr(zInvSq, zInv)
   297  	p256Mul(zInv, zInv, zInvSq)
   298  
   299  	p256Mul(zInvSq, p.xyz[0:4], zInvSq)
   300  	p256Mul(zInv, p.xyz[4:8], zInv)
   301  
   302  	p256FromMont(zInvSq, zInvSq)
   303  	p256FromMont(zInv, zInv)
   304  
   305  	xOut := make([]byte, 32)
   306  	yOut := make([]byte, 32)
   307  	p256LittleToBig(xOut, zInvSq)
   308  	p256LittleToBig(yOut, zInv)
   309  
   310  	return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
   311  }
   312  
   313  // CopyConditional copies overwrites p with src if v == 1, and leaves p
   314  // unchanged if v == 0.
   315  func (p *p256Point) CopyConditional(src *p256Point, v int) {
   316  	pMask := uint64(v) - 1
   317  	srcMask := ^pMask
   318  
   319  	for i, n := range p.xyz {
   320  		p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
   321  	}
   322  }
   323  
   324  // p256Inverse sets out to in^-1 mod p.
   325  func p256Inverse(out, in []uint64) {
   326  	var stack [6 * 4]uint64
   327  	p2 := stack[4*0 : 4*0+4]
   328  	p4 := stack[4*1 : 4*1+4]
   329  	p8 := stack[4*2 : 4*2+4]
   330  	p16 := stack[4*3 : 4*3+4]
   331  	p32 := stack[4*4 : 4*4+4]
   332  
   333  	p256Sqr(out, in)
   334  	p256Mul(p2, out, in) // 3*p
   335  
   336  	p256Sqr(out, p2)
   337  	p256Sqr(out, out)
   338  	p256Mul(p4, out, p2) // f*p
   339  
   340  	p256Sqr(out, p4)
   341  	p256Sqr(out, out)
   342  	p256Sqr(out, out)
   343  	p256Sqr(out, out)
   344  	p256Mul(p8, out, p4) // ff*p
   345  
   346  	p256Sqr(out, p8)
   347  
   348  	for i := 0; i < 7; i++ {
   349  		p256Sqr(out, out)
   350  	}
   351  	p256Mul(p16, out, p8) // ffff*p
   352  
   353  	p256Sqr(out, p16)
   354  	for i := 0; i < 15; i++ {
   355  		p256Sqr(out, out)
   356  	}
   357  	p256Mul(p32, out, p16) // ffffffff*p
   358  
   359  	p256Sqr(out, p32)
   360  
   361  	for i := 0; i < 31; i++ {
   362  		p256Sqr(out, out)
   363  	}
   364  	p256Mul(out, out, in)
   365  
   366  	for i := 0; i < 32*4; i++ {
   367  		p256Sqr(out, out)
   368  	}
   369  	p256Mul(out, out, p32)
   370  
   371  	for i := 0; i < 32; i++ {
   372  		p256Sqr(out, out)
   373  	}
   374  	p256Mul(out, out, p32)
   375  
   376  	for i := 0; i < 16; i++ {
   377  		p256Sqr(out, out)
   378  	}
   379  	p256Mul(out, out, p16)
   380  
   381  	for i := 0; i < 8; i++ {
   382  		p256Sqr(out, out)
   383  	}
   384  	p256Mul(out, out, p8)
   385  
   386  	p256Sqr(out, out)
   387  	p256Sqr(out, out)
   388  	p256Sqr(out, out)
   389  	p256Sqr(out, out)
   390  	p256Mul(out, out, p4)
   391  
   392  	p256Sqr(out, out)
   393  	p256Sqr(out, out)
   394  	p256Mul(out, out, p2)
   395  
   396  	p256Sqr(out, out)
   397  	p256Sqr(out, out)
   398  	p256Mul(out, out, in)
   399  }
   400  
   401  func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
   402  	copy(r[index*12:], p.xyz[:])
   403  }
   404  
   405  func boothW5(in uint) (int, int) {
   406  	var s uint = ^((in >> 5) - 1)
   407  	var d uint = (1 << 6) - in - 1
   408  	d = (d & s) | (in & (^s))
   409  	d = (d >> 1) + (d & 1)
   410  	return int(d), int(s & 1)
   411  }
   412  
   413  func boothW7(in uint) (int, int) {
   414  	var s uint = ^((in >> 7) - 1)
   415  	var d uint = (1 << 8) - in - 1
   416  	d = (d & s) | (in & (^s))
   417  	d = (d >> 1) + (d & 1)
   418  	return int(d), int(s & 1)
   419  }
   420  
   421  func initTable() {
   422  	p256Precomputed = new([37][64 * 8]uint64)
   423  
   424  	basePoint := []uint64{
   425  		0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
   426  		0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
   427  		0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
   428  	}
   429  	t1 := make([]uint64, 12)
   430  	t2 := make([]uint64, 12)
   431  	copy(t2, basePoint)
   432  
   433  	zInv := make([]uint64, 4)
   434  	zInvSq := make([]uint64, 4)
   435  	for j := 0; j < 64; j++ {
   436  		copy(t1, t2)
   437  		for i := 0; i < 37; i++ {
   438  			// The window size is 7 so we need to double 7 times.
   439  			if i != 0 {
   440  				for k := 0; k < 7; k++ {
   441  					p256PointDoubleAsm(t1, t1)
   442  				}
   443  			}
   444  			// Convert the point to affine form. (Its values are
   445  			// still in Montgomery form however.)
   446  			p256Inverse(zInv, t1[8:12])
   447  			p256Sqr(zInvSq, zInv)
   448  			p256Mul(zInv, zInv, zInvSq)
   449  
   450  			p256Mul(t1[:4], t1[:4], zInvSq)
   451  			p256Mul(t1[4:8], t1[4:8], zInv)
   452  
   453  			copy(t1[8:12], basePoint[8:12])
   454  			// Update the table entry
   455  			copy(p256Precomputed[i][j*8:], t1[:8])
   456  		}
   457  		if j == 0 {
   458  			p256PointDoubleAsm(t2, basePoint)
   459  		} else {
   460  			p256PointAddAsm(t2, t2, basePoint)
   461  		}
   462  	}
   463  }
   464  
   465  func (p *p256Point) p256BaseMult(scalar []uint64) {
   466  	precomputeOnce.Do(initTable)
   467  
   468  	wvalue := (scalar[0] << 1) & 0xff
   469  	sel, sign := boothW7(uint(wvalue))
   470  	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
   471  	p256NegCond(p.xyz[4:8], sign)
   472  
   473  	// (This is one, in the Montgomery domain.)
   474  	p.xyz[8] = 0x0000000000000001
   475  	p.xyz[9] = 0xffffffff00000000
   476  	p.xyz[10] = 0xffffffffffffffff
   477  	p.xyz[11] = 0x00000000fffffffe
   478  
   479  	var t0 p256Point
   480  	// (This is one, in the Montgomery domain.)
   481  	t0.xyz[8] = 0x0000000000000001
   482  	t0.xyz[9] = 0xffffffff00000000
   483  	t0.xyz[10] = 0xffffffffffffffff
   484  	t0.xyz[11] = 0x00000000fffffffe
   485  
   486  	index := uint(6)
   487  	zero := sel
   488  
   489  	for i := 1; i < 37; i++ {
   490  		if index < 192 {
   491  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff
   492  		} else {
   493  			wvalue = (scalar[index/64] >> (index % 64)) & 0xff
   494  		}
   495  		index += 7
   496  		sel, sign = boothW7(uint(wvalue))
   497  		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
   498  		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
   499  		zero |= sel
   500  	}
   501  }
   502  
   503  func (p *p256Point) p256ScalarMult(scalar []uint64) {
   504  	// precomp is a table of precomputed points that stores powers of p
   505  	// from p^1 to p^16.
   506  	var precomp [16 * 4 * 3]uint64
   507  	var t0, t1, t2, t3 p256Point
   508  
   509  	// Prepare the table
   510  	p.p256StorePoint(&precomp, 0) // 1
   511  
   512  	p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
   513  	p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
   514  	p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
   515  	p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
   516  	t0.p256StorePoint(&precomp, 1)  // 2
   517  	t1.p256StorePoint(&precomp, 3)  // 4
   518  	t2.p256StorePoint(&precomp, 7)  // 8
   519  	t3.p256StorePoint(&precomp, 15) // 16
   520  
   521  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   522  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   523  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   524  	t0.p256StorePoint(&precomp, 2) // 3
   525  	t1.p256StorePoint(&precomp, 4) // 5
   526  	t2.p256StorePoint(&precomp, 8) // 9
   527  
   528  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   529  	p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
   530  	t0.p256StorePoint(&precomp, 5) // 6
   531  	t1.p256StorePoint(&precomp, 9) // 10
   532  
   533  	p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
   534  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   535  	t2.p256StorePoint(&precomp, 6)  // 7
   536  	t1.p256StorePoint(&precomp, 10) // 11
   537  
   538  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   539  	p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
   540  	t0.p256StorePoint(&precomp, 11) // 12
   541  	t2.p256StorePoint(&precomp, 13) // 14
   542  
   543  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   544  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   545  	t0.p256StorePoint(&precomp, 12) // 13
   546  	t2.p256StorePoint(&precomp, 14) // 15
   547  
   548  	// Start scanning the window from top bit
   549  	index := uint(254)
   550  	var sel, sign int
   551  
   552  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   553  	sel, _ = boothW5(uint(wvalue))
   554  
   555  	p256Select(p.xyz[0:12], precomp[0:], sel)
   556  	zero := sel
   557  
   558  	for index > 4 {
   559  		index -= 5
   560  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   561  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   562  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   563  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   564  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   565  
   566  		if index < 192 {
   567  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   568  		} else {
   569  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   570  		}
   571  
   572  		sel, sign = boothW5(uint(wvalue))
   573  
   574  		p256Select(t0.xyz[0:], precomp[0:], sel)
   575  		p256NegCond(t0.xyz[4:8], sign)
   576  		p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   577  		p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   578  		p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   579  		zero |= sel
   580  	}
   581  
   582  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   583  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   584  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   585  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   586  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   587  
   588  	wvalue = (scalar[0] << 1) & 0x3f
   589  	sel, sign = boothW5(uint(wvalue))
   590  
   591  	p256Select(t0.xyz[0:], precomp[0:], sel)
   592  	p256NegCond(t0.xyz[4:8], sign)
   593  	p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   594  	p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   595  	p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   596  }