github.com/hellobchain/newcryptosm@v0.0.0-20221019060107-edb949a317e9/sm2/sm2_amd64.go

github.com/hellobchain/newcryptosm@v0.0.0-20221019060107-edb949a317e9/sm2/sm2_amd64.go (about)

     1  // This file contains the Go wrapper for the constant-time, 64-bit assembly
     2  // implementation of SM2. The optimizations performed here are described in
     3  // detail in:
     4  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     5  //                          256-bit primes"
     6  // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x
     7  // https://eprint.iacr.org/2013/816.pdf
     8  
     9  //go:build amd64
    10  // +build amd64
    11  
    12  package sm2
    13  
    14  import (
    15  	"crypto/elliptic"
    16  	"math/big"
    17  	"sync"
    18  )
    19  
    20  type (
    21  	sm2Curve struct {
    22  		*elliptic.CurveParams
    23  	}
    24  
    25  	sm2Point struct {
    26  		xyz [12]uint64
    27  	}
    28  )
    29  
    30  var (
    31  	sm2            sm2Curve
    32  	sm2Precomputed *[43][32 * 8]uint64
    33  	sm2precomputeOnce sync.Once
    34  )
    35  
    36  func initSM2() {
    37  	sm2.CurveParams = &elliptic.CurveParams{Name: "SM2"}
    38  	sm2.P, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
    39  	sm2.N, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
    40  	sm2.B, _ = new(big.Int).SetString("28E9FA9E9D9F5E344D5A9E4BCF6509A7F39789F515AB8F92DDBCBD414D940E93", 16)
    41  	sm2.Gx, _ = new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16)
    42  	sm2.Gy, _ = new(big.Int).SetString("BC3736A2F4F6779C59BDCEE36B692153D0A9877CC62A474002DF32E52139F0A0", 16)
    43  	sm2.BitSize = 256
    44  	return
    45  }
    46  
    47  func (curve sm2Curve) Params() *elliptic.CurveParams {
    48  	return curve.CurveParams
    49  }
    50  
    51  // Functions implemented in sm2_asm_amd64.s
    52  // Montgomery multiplication modulo sm2
    53  //go:noescape
    54  func sm2Mul(res, in1, in2 []uint64)
    55  
    56  // Montgomery square modulo sm2, repeated n times (n >= 1)
    57  //go:noescape
    58  func sm2Sqr(res, in []uint64, n int)
    59  
    60  // Montgomery multiplication by 1
    61  //go:noescape
    62  func sm2FromMont(res, in []uint64)
    63  
    64  // iff cond == 1  val <- -val
    65  //go:noescape
    66  func sm2NegCond(val []uint64, cond int)
    67  
    68  // if cond == 0 res <- b; else res <- a
    69  //go:noescape
    70  func sm2MovCond(res, a, b []uint64, cond int)
    71  
    72  // Endianness swap
    73  //go:noescape
    74  func sm2BigToLittle(res []uint64, in []byte)
    75  
    76  //go:noescape
    77  func sm2LittleToBig(res []byte, in []uint64)
    78  
    79  // Constant time table access
    80  //go:noescape
    81  func sm2Select(point, table []uint64, idx int)
    82  
    83  //go:noescape
    84  func sm2SelectBase(point, table []uint64, idx int)
    85  
    86  // Montgomery multiplication modulo Ord(G)
    87  //go:noescape
    88  func sm2OrdMul(res, in1, in2 []uint64)
    89  
    90  // Montgomery square modulo Ord(G), repeated n times
    91  //go:noescape
    92  func sm2OrdSqr(res, in []uint64, n int)
    93  
    94  // Point add with in2 being affine point
    95  // If sign == 1 -> in2 = -in2
    96  // If sel == 0 -> res = in1
    97  // if zero == 0 -> res = in2
    98  //go:noescape
    99  func sm2PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
   100  
   101  // Point add. Returns one if the two input points were equal and zero
   102  // otherwise. (Note that, due to the way that the equations work out, some
   103  // representations of ∞ are considered equal to everything by this function.)
   104  //go:noescape
   105  func sm2PointAddAsm(res, in1, in2 []uint64) int
   106  
   107  // Point double
   108  //go:noescape
   109  func sm2PointDoubleAsm(res, in []uint64)
   110  
   111  func (curve sm2Curve) Inverse(k *big.Int) *big.Int {
   112  	if k.Sign() < 0 {
   113  		// This should never happen.
   114  		k = new(big.Int).Neg(k)
   115  	}
   116  
   117  	if k.Cmp(sm2.N) >= 0 {
   118  		// This should never happen.
   119  		k = new(big.Int).Mod(k, sm2.N)
   120  	}
   121  
   122  	// table will store precomputed powers of x. The four words at index
   123  	// 4×i store x^(i+1).
   124  	var table [4 * 15]uint64
   125  
   126  	x := make([]uint64, 4)
   127  	fromBig(x[:], k)
   128  	// This code operates in the Montgomery domain where R = 2^256 mod n
   129  	// and n is the order of the scalar field. (See initSM2 for the
   130  	// value.) Elements in the Montgomery domain take the form a×R and
   131  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   132  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   133  	// i.e. converts x into the Montgomery domain.
   134  	RR := []uint64{0x901192af7c114f20, 0x3464504ade6fa2fa, 0x620fc84c3affe0d4, 0x1eb5e412a22b3d3b}
   135  	sm2OrdMul(table[:4], x, RR)
   136  
   137  	// Prepare the table, no need in constant time access, because the
   138  	// power is not a secret. (Entry 0 is never used.)
   139  	for i := 2; i < 16; i += 2 {
   140  		sm2OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1)
   141  		sm2OrdMul(table[4*i:], table[4*(i-1):], table[:4])
   142  	}
   143  
   144  	x[0] = table[4*14+0] // f
   145  	x[1] = table[4*14+1]
   146  	x[2] = table[4*14+2]
   147  	x[3] = table[4*14+3]
   148  
   149  	sm2OrdSqr(x, x, 4)
   150  	sm2OrdMul(x, x, table[4*14:4*14+4]) // ff
   151  	t := make([]uint64, 4, 4)
   152  	t[0] = x[0]
   153  	t[1] = x[1]
   154  	t[2] = x[2]
   155  	t[3] = x[3]
   156  
   157  	sm2OrdSqr(x, x, 8)
   158  	sm2OrdMul(x, x, t) // ffff
   159  
   160  	sm2OrdSqr(x, x, 8)
   161  	sm2OrdMul(x, x, t) // ffffff
   162  	t[0] = x[0]
   163  	t[1] = x[1]
   164  	t[2] = x[2]
   165  	t[3] = x[3]
   166  
   167  	sm2OrdSqr(x, x, 4)
   168  	sm2OrdMul(x, x, table[4*14:4*14+4]) // fffffff
   169  
   170  	sm2OrdSqr(x, x, 4)
   171  	sm2OrdMul(x, x, table[4*13:4*13+4]) // fffffffe
   172  
   173  	for i := 0; i < 4; i++ {
   174  		sm2OrdSqr(x, x, 24)
   175  		sm2OrdMul(x, x, t) // fffffffe_ffffff fffffffe_ffffffff_ffff fffffffe_ffffffff_ffffffff_ff fffffffe_ffffffff_ffffffff_ffffffff
   176  	}
   177  
   178  	// Remaining 32 windows
   179  	expLo := [32]byte{0x7, 0x2, 0x0, 0x3, 0xd, 0xf, 0x6, 0xb, 0x2, 0x1, 0xc, 0x6, 0x0, 0x5, 0x2, 0xb, 0x5, 0x3, 0xb, 0xb, 0xf, 0x4, 0x0, 0x9, 0x3, 0x9, 0xd, 0x5, 0x4, 0x1, 0x2, 0x1}
   180  	for i := 0; i < 32; i++ {
   181  		sm2OrdSqr(x, x, 4)
   182  		if expLo[i] != 0 {
   183  			sm2OrdMul(x, x, table[4*(expLo[i]-1):])
   184  		}
   185  	}
   186  
   187  	// Multiplying by one in the Montgomery domain converts a Montgomery
   188  	// value out of the domain.
   189  	one := []uint64{1, 0, 0, 0}
   190  	sm2OrdMul(x, x, one)
   191  
   192  	xOut := make([]byte, 32)
   193  	sm2LittleToBig(xOut, x)
   194  	return new(big.Int).SetBytes(xOut)
   195  }
   196  
   197  // fromBig converts a *big.Int into a format used by this code.
   198  func fromBig(out []uint64, big *big.Int) {
   199  	for i := range out {
   200  		out[i] = 0
   201  	}
   202  
   203  	for i, v := range big.Bits() {
   204  		out[i] = uint64(v)
   205  	}
   206  }
   207  
   208  // sm2GetScalar endian-swaps the big-endian scalar value from in and writes it
   209  // to out. If the scalar is equal or greater than the order of the group, it's
   210  // reduced modulo that order.
   211  func sm2GetScalar(out []uint64, in []byte) {
   212  	n := new(big.Int).SetBytes(in)
   213  
   214  	if n.Cmp(sm2.N) >= 0 {
   215  		n.Mod(n, sm2.N)
   216  	}
   217  	fromBig(out, n)
   218  }
   219  
   220  // sm2Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
   221  // underlying field of the curve. (See initSM2 for the value.) Thus rr here is
   222  // R×R mod p. See comment in Inverse about how this is used.
   223  var sm2rr = []uint64{0x0000000200000003, 0x00000002ffffffff, 0x0000000100000001, 0x0000000400000002}
   224  
   225  func maybeReduceModP(in *big.Int) *big.Int {
   226  	if in.Cmp(sm2.P) < 0 {
   227  		return in
   228  	}
   229  	return new(big.Int).Mod(in, sm2.P)
   230  }
   231  
   232  func (curve sm2Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   233  	scalarReversed := make([]uint64, 4)
   234  	var r1, r2 sm2Point
   235  	sm2GetScalar(scalarReversed, baseScalar)
   236  	r1IsInfinity := scalarIsZero(scalarReversed)
   237  	r1.sm2BaseMult(scalarReversed)
   238  
   239  	sm2GetScalar(scalarReversed, scalar)
   240  	r2IsInfinity := scalarIsZero(scalarReversed)
   241  	fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
   242  	fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
   243  	sm2Mul(r2.xyz[0:4], r2.xyz[0:4], sm2rr[:])
   244  	sm2Mul(r2.xyz[4:8], r2.xyz[4:8], sm2rr[:])
   245  
   246  	// This sets r2's Z value to 1, in the Montgomery domain.
   247  	r2.xyz[8] = 0x0000000000000001
   248  	r2.xyz[9] = 0x00000000ffffffff
   249  	r2.xyz[10] = 0x0000000000000000
   250  	r2.xyz[11] = 0x0000000100000000
   251  
   252  	r2.sm2ScalarMult(scalarReversed)
   253  
   254  	var sum, double sm2Point
   255  	pointsEqual := sm2PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
   256  	sm2PointDoubleAsm(double.xyz[:], r1.xyz[:])
   257  	sum.CopyConditional(&double, pointsEqual)
   258  	sum.CopyConditional(&r1, r2IsInfinity)
   259  	sum.CopyConditional(&r2, r1IsInfinity)
   260  
   261  	return sum.sm2PointToAffine()
   262  }
   263  
   264  func (curve sm2Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   265  	scalarReversed := make([]uint64, 4)
   266  	sm2GetScalar(scalarReversed, scalar)
   267  
   268  	var r sm2Point
   269  	r.sm2BaseMult(scalarReversed)
   270  	return r.sm2PointToAffine()
   271  }
   272  
   273  func (curve sm2Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   274  	scalarReversed := make([]uint64, 4)
   275  	sm2GetScalar(scalarReversed, scalar)
   276  
   277  	var r sm2Point
   278  	fromBig(r.xyz[0:4], maybeReduceModP(bigX))
   279  	fromBig(r.xyz[4:8], maybeReduceModP(bigY))
   280  	sm2Mul(r.xyz[0:4], r.xyz[0:4], sm2rr[:])
   281  	sm2Mul(r.xyz[4:8], r.xyz[4:8], sm2rr[:])
   282  	// This sets r2's Z value to 1, in the Montgomery domain.
   283  	r.xyz[8] = 0x0000000000000001
   284  	r.xyz[9] = 0x00000000ffffffff
   285  	r.xyz[10] = 0x0000000000000000
   286  	r.xyz[11] = 0x0000000100000000
   287  
   288  	r.sm2ScalarMult(scalarReversed)
   289  	return r.sm2PointToAffine()
   290  }
   291  
   292  // uint64IsZero returns 1 if x is zero and zero otherwise.
   293  func uint64IsZero(x uint64) int {
   294  	x = ^x
   295  	x &= x >> 32
   296  	x &= x >> 16
   297  	x &= x >> 8
   298  	x &= x >> 4
   299  	x &= x >> 2
   300  	x &= x >> 1
   301  	return int(x & 1)
   302  }
   303  
   304  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   305  // otherwise.
   306  func scalarIsZero(scalar []uint64) int {
   307  	return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
   308  }
   309  
   310  func (p *sm2Point) sm2PointToAffine() (x, y *big.Int) {
   311  	zInv := make([]uint64, 4)
   312  	zInvSq := make([]uint64, 4)
   313  	sm2Inverse(zInv, p.xyz[8:12])
   314  	sm2Sqr(zInvSq, zInv, 1)
   315  	sm2Mul(zInv, zInv, zInvSq)
   316  
   317  	sm2Mul(zInvSq, p.xyz[0:4], zInvSq)
   318  	sm2Mul(zInv, p.xyz[4:8], zInv)
   319  
   320  	sm2FromMont(zInvSq, zInvSq)
   321  	sm2FromMont(zInv, zInv)
   322  
   323  	xOut := make([]byte, 32)
   324  	yOut := make([]byte, 32)
   325  	sm2LittleToBig(xOut, zInvSq)
   326  	sm2LittleToBig(yOut, zInv)
   327  
   328  	return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
   329  }
   330  
   331  // CopyConditional copies overwrites p with src if v == 1, and leaves p
   332  // unchanged if v == 0.
   333  func (p *sm2Point) CopyConditional(src *sm2Point, v int) {
   334  	pMask := uint64(v) - 1
   335  	srcMask := ^pMask
   336  
   337  	for i, n := range p.xyz {
   338  		p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
   339  	}
   340  }
   341  
   342  // sm2Inverse sets out to in^-1 mod p.
   343  func sm2Inverse(out, in []uint64) {
   344  	var stack [6 * 4]uint64
   345  	p2 := stack[4*0 : 4*0+4]
   346  	p4 := stack[4*1 : 4*1+4]
   347  	p8 := stack[4*2 : 4*2+4]
   348  	p16 := stack[4*3 : 4*3+4]
   349  	p32 := stack[4*4 : 4*4+4]
   350  
   351  	sm2Sqr(out, in, 1)
   352  	sm2Mul(p2, out, in) // 3*p
   353  
   354  	sm2Sqr(out, p2, 2)
   355  	sm2Mul(p4, out, p2) // f*p
   356  
   357  	sm2Sqr(out, p4, 4)
   358  	sm2Mul(p8, out, p4) // ff*p
   359  
   360  	sm2Sqr(out, p8, 8)
   361  	sm2Mul(p16, out, p8) // ffff*p
   362  
   363  	sm2Sqr(out, p16, 8)
   364  	sm2Mul(out, out, p8) //ffffff*p
   365  
   366  	sm2Sqr(out, out, 4)
   367  	sm2Mul(out, out, p4) // fffffff*p
   368  
   369  	sm2Sqr(out, out, 2)
   370  	sm2Mul(out, out, p2) // fffffff*p
   371  
   372  	sm2Sqr(out, out, 1)
   373  	sm2Mul(out, out, in)
   374  
   375  	sm2Sqr(out, out, 1) //fffffffe*p
   376  
   377  	sm2Mul(p32, out, in) // ffffffff*p
   378  
   379  	for j := 0; j < 4; j++ {
   380  		sm2Sqr(out, out, 32)
   381  		sm2Mul(out, out, p32)
   382  	}
   383  
   384  	sm2Sqr(out, out, 64)
   385  	sm2Mul(out, out, p32)
   386  
   387  	sm2Sqr(out, out, 16)
   388  	sm2Mul(out, out, p16)
   389  
   390  	sm2Sqr(out, out, 8)
   391  	sm2Mul(out, out, p8)
   392  
   393  	sm2Sqr(out, out, 4)
   394  	sm2Mul(out, out, p4)
   395  
   396  	sm2Sqr(out, out, 2)
   397  	sm2Mul(out, out, p2)
   398  
   399  	sm2Sqr(out, out, 2)
   400  	sm2Mul(out, out, in)
   401  }
   402  
   403  func (p *sm2Point) sm2StorePoint(r *[16 * 4 * 3]uint64, index int) {
   404  	copy(r[index*12:], p.xyz[:])
   405  }
   406  
   407  func boothW5(in uint) (int, int) {
   408  	var s uint = ^((in >> 5) - 1)
   409  	var d uint = (1 << 6) - in - 1
   410  	d = (d & s) | (in & (^s))
   411  	d = (d >> 1) + (d & 1)
   412  	return int(d), int(s & 1)
   413  }
   414  func boothW6(in uint) (int, int) {
   415  	var s uint = ^((in >> 6) - 1)
   416  	var d uint = (1 << 7) - in - 1
   417  	d = (d & s) | (in & (^s))
   418  	d = (d >> 1) + (d & 1)
   419  	return int(d), int(s & 1)
   420  }
   421  
   422  func boothW7(in uint) (int, int) {
   423  	var s uint = ^((in >> 7) - 1)
   424  	var d uint = (1 << 8) - in - 1
   425  	d = (d & s) | (in & (^s))
   426  	d = (d >> 1) + (d & 1)
   427  	return int(d), int(s & 1)
   428  }
   429  
   430  func sm2InitTable() {
   431  	sm2Precomputed = new([43][32 * 8]uint64)
   432  
   433  	basePoint := []uint64{
   434  		0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05,
   435  		0xc1354e593c2d0ddd, 0xc1f5e5788d3295fa, 0x8d4cfb066e2a48f8, 0x63cd65d481d735bd,
   436  		0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000,
   437  	}
   438  	t1 := make([]uint64, 12)
   439  	t2 := make([]uint64, 12)
   440  	copy(t2, basePoint)
   441  
   442  	zInv := make([]uint64, 4)
   443  	zInvSq := make([]uint64, 4)
   444  	for j := 0; j < 32; j++ {
   445  		copy(t1, t2)
   446  		for i := 0; i < 43; i++ {
   447  			// The window size is 6 so we need to double 6 times.
   448  			if i != 0 {
   449  				for k := 0; k < 6; k++ {
   450  					sm2PointDoubleAsm(t1, t1)
   451  				}
   452  			}
   453  			// Convert the point to affine form. (Its values are
   454  			// still in Montgomery form however.)
   455  			sm2Inverse(zInv, t1[8:12])
   456  			sm2Sqr(zInvSq, zInv, 1)
   457  			sm2Mul(zInv, zInv, zInvSq)
   458  
   459  			sm2Mul(t1[:4], t1[:4], zInvSq)
   460  			sm2Mul(t1[4:8], t1[4:8], zInv)
   461  
   462  			copy(t1[8:12], basePoint[8:12])
   463  			// Update the table entry
   464  			copy(sm2Precomputed[i][j*8:], t1[:8])
   465  		}
   466  		if j == 0 {
   467  			sm2PointDoubleAsm(t2, basePoint)
   468  		} else {
   469  			sm2PointAddAsm(t2, t2, basePoint)
   470  		}
   471  	}
   472  }
   473  
   474  func (p *sm2Point) sm2BaseMult(scalar []uint64) {
   475  	sm2precomputeOnce.Do(sm2InitTable)
   476  
   477  	wvalue := (scalar[0] << 1) & 0x7f
   478  	sel, sign := boothW6(uint(wvalue))
   479  	sm2SelectBase(p.xyz[0:8], sm2Precomputed[0][0:], sel)
   480  	sm2NegCond(p.xyz[4:8], sign)
   481  
   482  	// (This is one, in the Montgomery domain.)
   483  	p.xyz[8] = 0x0000000000000001
   484  	p.xyz[9] = 0x00000000ffffffff
   485  	p.xyz[10] = 0x0000000000000000
   486  	p.xyz[11] = 0x0000000100000000
   487  
   488  	var t0 sm2Point
   489  	// (This is one, in the Montgomery domain.)
   490  	t0.xyz[8] = 0x0000000000000001
   491  	t0.xyz[9] = 0x00000000ffffffff
   492  	t0.xyz[10] = 0x0000000000000000
   493  	t0.xyz[11] = 0x0000000100000000
   494  
   495  	index := uint(5)
   496  	zero := sel
   497  
   498  	for i := 1; i < 43; i++ {
   499  		if index < 192 {
   500  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
   501  		} else {
   502  			wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
   503  		}
   504  		index += 6
   505  		sel, sign = boothW6(uint(wvalue))
   506  		sm2SelectBase(t0.xyz[0:8], sm2Precomputed[i][0:], sel)
   507  		sm2PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
   508  		zero |= sel
   509  	}
   510  }
   511  
   512  func (p *sm2Point) sm2ScalarMult(scalar []uint64) {
   513  	// precomp is a table of precomputed points that stores powers of p
   514  	// from p^1 to p^16.
   515  	var precomp [16 * 4 * 3]uint64
   516  	var t0, t1, t2, t3 sm2Point
   517  
   518  	// Prepare the table
   519  	p.sm2StorePoint(&precomp, 0) // 1
   520  
   521  	sm2PointDoubleAsm(t0.xyz[:], p.xyz[:])
   522  	sm2PointDoubleAsm(t1.xyz[:], t0.xyz[:])
   523  	sm2PointDoubleAsm(t2.xyz[:], t1.xyz[:])
   524  	sm2PointDoubleAsm(t3.xyz[:], t2.xyz[:])
   525  	t0.sm2StorePoint(&precomp, 1)  // 2
   526  	t1.sm2StorePoint(&precomp, 3)  // 4
   527  	t2.sm2StorePoint(&precomp, 7)  // 8
   528  	t3.sm2StorePoint(&precomp, 15) // 16
   529  
   530  	sm2PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   531  	sm2PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   532  	sm2PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   533  	t0.sm2StorePoint(&precomp, 2) // 3
   534  	t1.sm2StorePoint(&precomp, 4) // 5
   535  	t2.sm2StorePoint(&precomp, 8) // 9
   536  
   537  	sm2PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   538  	sm2PointDoubleAsm(t1.xyz[:], t1.xyz[:])
   539  	t0.sm2StorePoint(&precomp, 5) // 6
   540  	t1.sm2StorePoint(&precomp, 9) // 10
   541  
   542  	sm2PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
   543  	sm2PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
   544  	t2.sm2StorePoint(&precomp, 6)  // 7
   545  	t1.sm2StorePoint(&precomp, 10) // 11
   546  
   547  	sm2PointDoubleAsm(t0.xyz[:], t0.xyz[:])
   548  	sm2PointDoubleAsm(t2.xyz[:], t2.xyz[:])
   549  	t0.sm2StorePoint(&precomp, 11) // 12
   550  	t2.sm2StorePoint(&precomp, 13) // 14
   551  
   552  	sm2PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
   553  	sm2PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
   554  	t0.sm2StorePoint(&precomp, 12) // 13
   555  	t2.sm2StorePoint(&precomp, 14) // 15
   556  
   557  	// Start scanning the window from top bit
   558  	index := uint(254)
   559  	var sel, sign int
   560  
   561  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   562  	sel, _ = boothW5(uint(wvalue))
   563  
   564  	sm2Select(p.xyz[0:12], precomp[0:], sel)
   565  	zero := sel
   566  
   567  	for index > 4 {
   568  		index -= 5
   569  		sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   570  		sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   571  		sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   572  		sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   573  		sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   574  
   575  		if index < 192 {
   576  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   577  		} else {
   578  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   579  		}
   580  
   581  		sel, sign = boothW5(uint(wvalue))
   582  
   583  		sm2Select(t0.xyz[0:], precomp[0:], sel)
   584  		sm2NegCond(t0.xyz[4:8], sign)
   585  		sm2PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   586  		sm2MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   587  		sm2MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   588  		zero |= sel
   589  	}
   590  
   591  	sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   592  	sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   593  	sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   594  	sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   595  	sm2PointDoubleAsm(p.xyz[:], p.xyz[:])
   596  
   597  	wvalue = (scalar[0] << 1) & 0x3f
   598  	sel, sign = boothW5(uint(wvalue))
   599  
   600  	sm2Select(t0.xyz[0:], precomp[0:], sel)
   601  	sm2NegCond(t0.xyz[4:8], sign)
   602  	sm2PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
   603  	sm2MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
   604  	sm2MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
   605  }
   606  
   607  var initOnce sync.Once
   608  
   609  // SM2 returns a Curve which implements SM2
   610  // The cryptographic operations are implemented using constant-time algorithms.
   611  func SM2() elliptic.Curve {
   612  	initOnce.Do(initSM2)
   613  	return sm2
   614  }