github.com/FISCO-BCOS/crypto@v0.0.0-20200202032121-bd8ab0b5d4f1/elliptic/p256_asm.go

github.com/FISCO-BCOS/crypto@v0.0.0-20200202032121-bd8ab0b5d4f1/elliptic/p256_asm.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the Go wrapper for the constant-time, 64-bit assembly
     6  // implementation of P256. The optimizations performed here are described in
     7  // detail in:
     8  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     9  //                          256-bit primes"
    10  // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
    11  // https://eprint.iacr.org/2013/816.pdf
    12  
    13  // +build amd64 arm64
    14  
    15  package elliptic
    16  
    17  import (
    18  	"math/big"
    19  	"sync"
    20  )
    21  
    22  type (
    23  	p256Curve struct {
    24  		*CurveParams
    25  	}
    26  
    27  	p256Point struct {
    28  		xyz [12]uint64
    29  	}
    30  )
    31  
    32  var (
    33  	p256            p256Curve
    34  	p256Precomputed *[43][32 * 8]uint64
    35  	precomputeOnce  sync.Once
    36  )
    37  
    38  func initP256() {
    39  	// See FIPS 186-3, section D.2.3
    40  	p256.CurveParams = &CurveParams{Name: "P-256"}
    41  	p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
    42  	p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
    43  	p256.A, _ = new(big.Int).SetString("ffffffff00000001000000000000000000000000fffffffffffffffffffffffc", 16)
    44  	p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
    45  	p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
    46  	p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
    47  	p256.BitSize = 256
    48  }
    49  
    50  func (curve p256Curve) Params() *CurveParams {
    51  	return curve.CurveParams
    52  }
    53  
    54  // Functions implemented in p256_asm_*64.s
    55  // Montgomery multiplication modulo P256
    56  //go:noescape
    57  func p256Mul(res, in1, in2 []uint64)
    58  
    59  // Montgomery square modulo P256, repeated n times (n >= 1)
    60  //go:noescape
    61  func p256Sqr(res, in []uint64, n int)
    62  
    63  // Montgomery multiplication by 1
    64  //go:noescape
    65  func p256FromMont(res, in []uint64)
    66  
    67  // iff cond == 1  val <- -val
    68  //go:noescape
    69  func p256NegCond(val []uint64, cond int)
    70  
    71  // if cond == 0 res <- b; else res <- a
    72  //go:noescape
    73  func p256MovCond(res, a, b []uint64, cond int)
    74  
    75  // Endianness swap
    76  //go:noescape
    77  func p256BigToLittle(res []uint64, in []byte)
    78  
    79  //go:noescape
    80  func p256LittleToBig(res []byte, in []uint64)
    81  
    82  // Constant time table access
    83  //go:noescape
    84  func p256Select(point, table []uint64, idx int)
    85  
    86  //go:noescape
    87  func p256SelectBase(point, table []uint64, idx int)
    88  
    89  // Montgomery multiplication modulo Ord(G)
    90  //go:noescape
    91  func p256OrdMul(res, in1, in2 []uint64)
    92  
    93  // Montgomery square modulo Ord(G), repeated n times
    94  //go:noescape
    95  func p256OrdSqr(res, in []uint64, n int)
    96  
    97  // Point add with in2 being affine point
    98  // If sign == 1 -> in2 = -in2
    99  // If sel == 0 -> res = in1
   100  // if zero == 0 -> res = in2
   101  //go:noescape
   102  func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
   103  
   104  // Point add. Returns one if the two input points were equal and zero
   105  // otherwise. (Note that, due to the way that the equations work out, some
   106  // representations of ∞ are considered equal to everything by this function.)
   107  //go:noescape
   108  func p256PointAddAsm(res, in1, in2 []uint64) int
   109  
   110  // Point double
   111  //go:noescape
   112  func p256PointDoubleAsm(res, in []uint64)
   113  
   114  func (curve p256Curve) Inverse(k *big.Int) *big.Int {
   115  	if k.Sign() < 0 {
   116  		// This should never happen.
   117  		k = new(big.Int).Neg(k)
   118  	}
   119  
   120  	if k.Cmp(p256.N) >= 0 {
   121  		// This should never happen.
   122  		k = new(big.Int).Mod(k, p256.N)
   123  	}
   124  
   125  	// table will store precomputed powers of x.
   126  	var table [4 * 9]uint64
   127  	var (
   128  		_1      = table[4*0 : 4*1]
   129  		_11     = table[4*1 : 4*2]
   130  		_101    = table[4*2 : 4*3]
   131  		_111    = table[4*3 : 4*4]
   132  		_1111   = table[4*4 : 4*5]
   133  		_10101  = table[4*5 : 4*6]
   134  		_101111 = table[4*6 : 4*7]
   135  		x       = table[4*7 : 4*8]
   136  		t       = table[4*8 : 4*9]
   137  	)
   138  
   139  	fromBig(x[:], k)
   140  	// This code operates in the Montgomery domain where R = 2^256 mod n
   141  	// and n is the order of the scalar field. (See initP256 for the
   142  	// value.) Elements in the Montgomery domain take the form a×R and
   143  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   144  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   145  	// i.e. converts x into the Montgomery domain.
   146  	// Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
   147  	RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
   148  	p256OrdMul(_1, x, RR)      // _1
   149  	p256OrdSqr(x, _1, 1)       // _10
   150  	p256OrdMul(_11, x, _1)     // _11
   151  	p256OrdMul(_101, x, _11)   // _101
   152  	p256OrdMul(_111, x, _101)  // _111
   153  	p256OrdSqr(x, _101, 1)     // _1010
   154  	p256OrdMul(_1111, _101, x) // _1111
   155  
   156  	p256OrdSqr(t, x, 1)          // _10100
   157  	p256OrdMul(_10101, t, _1)    // _10101
   158  	p256OrdSqr(x, _10101, 1)     // _101010
   159  	p256OrdMul(_101111, _101, x) // _101111
   160  	p256OrdMul(x, _10101, x)     // _111111 = x6
   161  	p256OrdSqr(t, x, 2)          // _11111100
   162  	p256OrdMul(t, t, _11)        // _11111111 = x8
   163  	p256OrdSqr(x, t, 8)          // _ff00
   164  	p256OrdMul(x, x, t)          // _ffff = x16
   165  	p256OrdSqr(t, x, 16)         // _ffff0000
   166  	p256OrdMul(t, t, x)          // _ffffffff = x32
   167  
   168  	p256OrdSqr(x, t, 64)
   169  	p256OrdMul(x, x, t)
   170  	p256OrdSqr(x, x, 32)
   171  	p256OrdMul(x, x, t)
   172  
   173  	sqrs := []uint8{
   174  		6, 5, 4, 5, 5,
   175  		4, 3, 3, 5, 9,
   176  		6, 2, 5, 6, 5,
   177  		4, 5, 5, 3, 10,
   178  		2, 5, 5, 3, 7, 6}
   179  	muls := [][]uint64{
   180  		_101111, _111, _11, _1111, _10101,
   181  		_101, _101, _101, _111, _101111,
   182  		_1111, _1, _1, _1111, _111,
   183  		_111, _111, _101, _11, _101111,
   184  		_11, _11, _11, _1, _10101, _1111}
   185  
   186  	for i, s := range sqrs {
   187  		p256OrdSqr(x, x, int(s))
   188  		p256OrdMul(x, x, muls[i])
   189  	}
   190  
   191  	// Multiplying by one in the Montgomery domain converts a Montgomery
   192  	// value out of the domain.
   193  	one := []uint64{1, 0, 0, 0}
   194  	p256OrdMul(x, x, one)
   195  
   196  	xOut := make([]byte, 32)
   197  	p256LittleToBig(xOut, x)
   198  	return new(big.Int).SetBytes(xOut)
   199  }
   200  
   201  // fromBig converts a *big.Int into a format used by this code.
   202  func fromBig(out []uint64, big *big.Int) {
   203  	for i := range out {
   204  		out[i] = 0
   205  	}
   206  
   207  	for i, v := range big.Bits() {
   208  		out[i] = uint64(v)
   209  	}
   210  }
   211  
   212  // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
   213  // to out. If the scalar is equal or greater than the order of the group, it's
   214  // reduced modulo that order.
   215  func p256GetScalar(out []uint64, in []byte) {
   216  	n := new(big.Int).SetBytes(in)
   217  
   218  	if n.Cmp(p256.N) >= 0 {
   219  		n.Mod(n, p256.N)
   220  	}
   221  	fromBig(out, n)
   222  }
   223  
   224  // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
   225  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   226  // R×R mod p. See comment in Inverse about how this is used.
   227  var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
   228  
   229  func maybeReduceModP(in *big.Int) *big.Int {
   230  	if in.Cmp(p256.P) < 0 {
   231  		return in
   232  	}
   233  	return new(big.Int).Mod(in, p256.P)
   234  }
   235  
   236  func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   237  	scalarReversed := make([]uint64, 4)
   238  	var r1, r2 p256Point
   239  	p256GetScalar(scalarReversed, baseScalar)
   240  	r1IsInfinity := scalarIsZero(scalarReversed)
   241  	r1.p256BaseMult(scalarReversed)
   242  
   243  	p256GetScalar(scalarReversed, scalar)
   244  	r2IsInfinity := scalarIsZero(scalarReversed)
   245  	fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
   246  	fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
   247  	p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
   248  	p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
   249  
   250  	// This sets r2's Z value to 1, in the Montgomery domain.
   251  	r2.xyz[8] = 0x0000000000000001
   252  	r2.xyz[9] = 0xffffffff00000000
   253  	r2.xyz[10] = 0xffffffffffffffff
   254  	r2.xyz[11] = 0x00000000fffffffe
   255  
   256  	r2.p256ScalarMult(scalarReversed)
   257  
   258  	var sum, double p256Point
   259  	pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
   260  	p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
   261  	sum.CopyConditional(&double, pointsEqual)
   262  	sum.CopyConditional(&r1, r2IsInfinity)
   263  	sum.CopyConditional(&r2, r1IsInfinity)
   264  
   265  	return sum.p256PointToAffine()
   266  }
   267  
   268  func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   269  	scalarReversed := make([]uint64, 4)
   270  	p256GetScalar(scalarReversed, scalar)
   271  
   272  	var r p256Point
   273  	r.p256BaseMult(scalarReversed)
   274  	return r.p256PointToAffine()
   275  }
   276  
   277  func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   278  	scalarReversed := make([]uint64, 4)
   279  	p256GetScalar(scalarReversed, scalar)
   280  
   281  	var r p256Point
   282  	fromBig(r.xyz[0:4], maybeReduceModP(bigX))
   283  	fromBig(r.xyz[4:8], maybeReduceModP(bigY))
   284  	p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
   285  	p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
   286  	// This sets r2's Z value to 1, in the Montgomery domain.
   287  	r.xyz[8] = 0x0000000000000001
   288  	r.xyz[9] = 0xffffffff00000000
   289  	r.xyz[10] = 0xffffffffffffffff
   290  	r.xyz[11] = 0x00000000fffffffe
   291  
   292  	r.p256ScalarMult(scalarReversed)
   293  	return r.p256PointToAffine()
   294  }
   295  
   296  // uint64IsZero returns 1 if x is zero and zero otherwise.
   297  func uint64IsZero(x uint64) int {
   298  	x = ^x
   299  	x &= x >> 32
   300  	x &= x >> 16
   301  	x &= x >> 8
   302  	x &= x >> 4
   303  	x &= x >> 2
   304  	x &= x >> 1
   305  	return int(x & 1)
   306  }
   307  
   308  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   309  // otherwise.
   310  func scalarIsZero(scalar []uint64) int {
   311  	return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
   312  }
   313  
   314  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   315  	zInv := make([]uint64, 4)
   316  	zInvSq := make([]uint64, 4)
   317  	p256Inverse(zInv, p.xyz[8:12])
   318  	p256Sqr(zInvSq, zInv, 1)
   319  	p256Mul(zInv, zInv, zInvSq)
   320  
   321  	p256Mul(zInvSq, p.xyz[0:4], zInvSq)
   322  	p256Mul(zInv, p.xyz[4:8], zInv)
   323  
   324  	p256FromMont(zInvSq, zInvSq)
   325  	p256FromMont(zInv, zInv)
   326  
   327  	xOut := make([]byte, 32)
   328  	yOut := make([]byte, 32)
   329  	p256LittleToBig(xOut, zInvSq)
   330  	p256LittleToBig(yOut, zInv)
   331  
   332  	return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
   333  }
   334  
   335  // CopyConditional copies overwrites p with src if v == 1, and leaves p
   336  // unchanged if v == 0.
   337  func (p *p256Point) CopyConditional(src *p256Point, v int) {
   338  	pMask := uint64(v) - 1
   339  	srcMask := ^pMask
   340  
   341  	for i, n := range p.xyz {
   342  		p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
   343  	}
   344  }
   345  
   346  // p256Inverse sets out to in^-1 mod p.
   347  func p256Inverse(out, in []uint64) {
   348  	var stack [6 * 4]uint64
   349  	p2 := stack[4*0 : 4*0+4]
   350  	p4 := stack[4*1 : 4*1+4]
   351  	p8 := stack[4*2 : 4*2+4]
   352  	p16 := stack[4*3 : 4*3+4]
   353  	p32 := stack[4*4 : 4*4+4]
   354  
   355  	p256Sqr(out, in, 1)
   356  	p256Mul(p2, out, in) // 3*p
   357  
   358  	p256Sqr(out, p2, 2)
   359  	p256Mul(p4, out, p2) // f*p
   360  
   361  	p256Sqr(out, p4, 4)
   362  	p256Mul(p8, out, p4) // ff*p
   363  
   364  	p256Sqr(out, p8, 8)
   365  	p256Mul(p16, out, p8) // ffff*p
   366  
   367  	p256Sqr(out, p16, 16)
   368  	p256Mul(p32, out, p16) // ffffffff*p
   369  
   370  	p256Sqr(out, p32, 32)
   371  	p256Mul(out, out, in)
   372  
   373  	p256Sqr(out, out, 128)
   374  	p256Mul(out, out, p32)
   375  
   376  	p256Sqr(out, out, 32)
   377  	p256Mul(out, out, p32)
   378  
   379  	p256Sqr(out, out, 16)
   380  	p256Mul(out, out, p16)
   381  
   382  	p256Sqr(out, out, 8)
   383  	p256Mul(out, out, p8)
   384  
   385  	p256Sqr(out, out, 4)
   386  	p256Mul(out, out, p4)
   387  
   388  	p256Sqr(out, out, 2)
   389  	p256Mul(out, out, p2)
   390  
   391  	p256Sqr(out, out, 2)
   392  	p256Mul(out, out, in)
   393  }
   394  
   395  func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
   396  	copy(r[index*12:], p.xyz[:])
   397  }
   398  
   399  func boothW5(in uint) (int, int) {
   400  	var s uint = ^((in >> 5) - 1)
   401  	var d uint = (1 << 6) - in - 1
   402  	d = (d & s) | (in & (^s))
   403  	d = (d >> 1) + (d & 1)
   404  	return int(d), int(s & 1)
   405  }
   406  
   407  func boothW6(in uint) (int, int) {
   408  	var s uint = ^((in >> 6) - 1)
   409  	var d uint = (1 << 7) - in - 1
   410  	d = (d & s) | (in & (^s))
   411  	d = (d >> 1) + (d & 1)
   412  	return int(d), int(s & 1)
   413  }
   414  
   415  func initTable() {
   416  	p256Precomputed = new([43][32 * 8]uint64)
   417  
   418  	basePoint := []uint64{
   419  		0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
   420  		0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
   421  		0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
   422  	}
   423  	t1 := make([]uint64, 12)
   424  	t2 := make([]uint64, 12)
   425  	copy(t2, basePoint)
   426  
   427  	zInv := make([]uint64, 4)
   428  	zInvSq := make([]uint64, 4)
   429  	for j := 0; j < 32; j++ {
   430  		copy(t1, t2)
   431  		for i := 0; i < 43; i++ {
   432  			// The window size is 6 so we need to double 6 times.
   433  			if i != 0 {
   434  				for k := 0; k < 6; k++ {
   435  					p256PointDoubleAsm(t1, t1)
   436  				}
   437  			}
   438  			// Convert the point to affine form. (Its values are
   439  			// still in Montgomery form however.)
   440  			p256Inverse(zInv, t1[8:12])
   441  			p256Sqr(zInvSq, zInv, 1)
   442  			p256Mul(zInv, zInv, zInvSq)
   443  
   444  			p256Mul(t1[:4], t1[:4], zInvSq)
   445  			p256Mul(t1[4:8], t1[4:8], zInv)
   446  
   447  			copy(t1[8:12], basePoint[8:12])
   448  			// Update the table entry
   449  			copy(p256Precomputed[i][j*8:], t1[:8])
   450  		}
   451  		if j == 0 {
   452  			p256PointDoubleAsm(t2, basePoint)
   453  		} else {
   454  			p256PointAddAsm(t2, t2, basePoint)
   455  		}
   456  	}
   457  }
   458  
   459  func (p *p256Point) p256BaseMult(scalar []uint64) {
   460  	precomputeOnce.Do(initTable)
   461  
   462  	wvalue := (scalar[0] << 1) & 0x7f
   463  	sel, sign := boothW6(uint(wvalue))
   464  	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
   465  	p256NegCond(p.xyz[4:8], sign)
   466  
   467  	// (This is one, in the Montgomery domain.)
   468  	p.xyz[8] = 0x0000000000000001
   469  	p.xyz[9] = 0xffffffff00000000
   470  	p.xyz[10] = 0xffffffffffffffff
   471  	p.xyz[11] = 0x00000000fffffffe
   472  
   473  	var t0 p256Point
   474  	// (This is one, in the Montgomery domain.)
   475  	t0.xyz[8] = 0x0000000000000001
   476  	t0.xyz[9] = 0xffffffff00000000
   477  	t0.xyz[10] = 0xffffffffffffffff
   478  	t0.xyz[11] = 0x00000000fffffffe
   479  
   480  	index := uint(5)
   481  	zero := sel
   482  
   483  	for i := 1; i < 43; i++ {
   484  		if index < 192 {
   485  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
   486  		} else {
   487  			wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
   488  		}
   489  		index += 6
   490  		sel, sign = boothW6(uint(wvalue))
   491  		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
   492  		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
   493  		zero |= sel
   494  	}
   495  }
   496  
   497  func (p *p256Point) p256ScalarMult(scalar []uint64) {
   498  	// precomp is a table of precomputed points that stores powers of p
   499  	// from p^1 to p^16.
   500  	var precomp [16 * 4 * 3]uint64
   501  	var t0, t1, t2, t3 p256Point
   502  
   503  	// Prepare the table
   504  	p.p256StorePoint(&precomp, 0) // 1
   505  
   506  	p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
   507  	p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
   508  	p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
   509  	p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
   510  	t0.p256StorePoint(&precomp, 1)  // 2
   511  	t1.p256StorePoint(&precomp, 3)  // 4
   512  	t2.p256StorePoint(&precomp, 7)  // 8
   513  	t3.p256StorePoint(&precomp, 15) // 16
   514  
   515  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   516  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   517  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   518  	t0.p256StorePoint(&precomp, 2) // 3
   519  	t1.p256StorePoint(&precomp, 4) // 5
   520  	t2.p256StorePoint(&precomp, 8) // 9
   521  
   522  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   523  	p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
   524  	t0.p256StorePoint(&precomp, 5) // 6
   525  	t1.p256StorePoint(&precomp, 9) // 10
   526  
   527  	p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
   528  	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   529  	t2.p256StorePoint(&precomp, 6)  // 7
   530  	t1.p256StorePoint(&precomp, 10) // 11
   531  
   532  	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   533  	p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
   534  	t0.p256StorePoint(&precomp, 11) // 12
   535  	t2.p256StorePoint(&precomp, 13) // 14
   536  
   537  	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   538  	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   539  	t0.p256StorePoint(&precomp, 12) // 13
   540  	t2.p256StorePoint(&precomp, 14) // 15
   541  
   542  	// Start scanning the window from top bit
   543  	index := uint(254)
   544  	var sel, sign int
   545  
   546  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   547  	sel, _ = boothW5(uint(wvalue))
   548  
   549  	p256Select(p.xyz[0:12], precomp[0:], sel)
   550  	zero := sel
   551  
   552  	for index > 4 {
   553  		index -= 5
   554  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   555  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   556  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   557  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   558  		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   559  
   560  		if index < 192 {
   561  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   562  		} else {
   563  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   564  		}
   565  
   566  		sel, sign = boothW5(uint(wvalue))
   567  
   568  		p256Select(t0.xyz[0:], precomp[0:], sel)
   569  		p256NegCond(t0.xyz[4:8], sign)
   570  		p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   571  		p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   572  		p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   573  		zero |= sel
   574  	}
   575  
   576  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   577  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   578  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   579  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   580  	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
   581  
   582  	wvalue = (scalar[0] << 1) & 0x3f
   583  	sel, sign = boothW5(uint(wvalue))
   584  
   585  	p256Select(t0.xyz[0:], precomp[0:], sel)
   586  	p256NegCond(t0.xyz[4:8], sign)
   587  	p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   588  	p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   589  	p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   590  }