github.com/emmansun/gmsm@v0.29.1/internal/sm2ec/sm2p256.go

github.com/emmansun/gmsm@v0.29.1/internal/sm2ec/sm2p256.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Code generated by generate.go. DO NOT EDIT.
     6  
     7  //go:build purego || !(amd64 || arm64 || s390x || ppc64le)
     8  
     9  package sm2ec
    10  
    11  import (
    12  	"crypto/subtle"
    13  	"errors"
    14  	"github.com/emmansun/gmsm/internal/sm2ec/fiat"
    15  	"sync"
    16  )
    17  
    18  // sm2p256ElementLength is the length of an element of the base or scalar field,
    19  // which have the same bytes length for all NIST P curves.
    20  const sm2p256ElementLength = 32
    21  
    22  // SM2P256Point is a SM2P256 point. The zero value is NOT valid.
    23  type SM2P256Point struct {
    24  	// The point is represented in projective coordinates (X:Y:Z),
    25  	// where x = X/Z and y = Y/Z.
    26  	x, y, z *fiat.SM2P256Element
    27  }
    28  
    29  // NewSM2P256Point returns a new SM2P256Point representing the point at infinity point.
    30  func NewSM2P256Point() *SM2P256Point {
    31  	return &SM2P256Point{
    32  		x: new(fiat.SM2P256Element),
    33  		y: new(fiat.SM2P256Element).One(),
    34  		z: new(fiat.SM2P256Element),
    35  	}
    36  }
    37  
    38  // SetGenerator sets p to the canonical generator and returns p.
    39  func (p *SM2P256Point) SetGenerator() *SM2P256Point {
    40  	p.x.SetBytes([]byte{0x32, 0xc4, 0xae, 0x2c, 0x1f, 0x19, 0x81, 0x19, 0x5f, 0x99, 0x4, 0x46, 0x6a, 0x39, 0xc9, 0x94, 0x8f, 0xe3, 0xb, 0xbf, 0xf2, 0x66, 0xb, 0xe1, 0x71, 0x5a, 0x45, 0x89, 0x33, 0x4c, 0x74, 0xc7})
    41  	p.y.SetBytes([]byte{0xbc, 0x37, 0x36, 0xa2, 0xf4, 0xf6, 0x77, 0x9c, 0x59, 0xbd, 0xce, 0xe3, 0x6b, 0x69, 0x21, 0x53, 0xd0, 0xa9, 0x87, 0x7c, 0xc6, 0x2a, 0x47, 0x40, 0x2, 0xdf, 0x32, 0xe5, 0x21, 0x39, 0xf0, 0xa0})
    42  	p.z.One()
    43  	return p
    44  }
    45  
    46  // Set sets p = q and returns p.
    47  func (p *SM2P256Point) Set(q *SM2P256Point) *SM2P256Point {
    48  	p.x.Set(q.x)
    49  	p.y.Set(q.y)
    50  	p.z.Set(q.z)
    51  	return p
    52  }
    53  
    54  // SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
    55  // b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
    56  // the curve, it returns nil and an error, and the receiver is unchanged.
    57  // Otherwise, it returns p.
    58  func (p *SM2P256Point) SetBytes(b []byte) (*SM2P256Point, error) {
    59  	switch {
    60  	// Point at infinity.
    61  	case len(b) == 1 && b[0] == 0:
    62  		return p.Set(NewSM2P256Point()), nil
    63  	// Uncompressed form.
    64  	case len(b) == 1+2*sm2p256ElementLength && b[0] == 4:
    65  		x, err := new(fiat.SM2P256Element).SetBytes(b[1 : 1+sm2p256ElementLength])
    66  		if err != nil {
    67  			return nil, err
    68  		}
    69  		y, err := new(fiat.SM2P256Element).SetBytes(b[1+sm2p256ElementLength:])
    70  		if err != nil {
    71  			return nil, err
    72  		}
    73  		if err := sm2p256CheckOnCurve(x, y); err != nil {
    74  			return nil, err
    75  		}
    76  		p.x.Set(x)
    77  		p.y.Set(y)
    78  		p.z.One()
    79  		return p, nil
    80  	// Compressed form.
    81  	case len(b) == 1+sm2p256ElementLength && (b[0] == 2 || b[0] == 3):
    82  		x, err := new(fiat.SM2P256Element).SetBytes(b[1:])
    83  		if err != nil {
    84  			return nil, err
    85  		}
    86  		// y² = x³ - 3x + b
    87  		y := sm2p256Polynomial(new(fiat.SM2P256Element), x)
    88  		if !sm2p256Sqrt(y, y) {
    89  			return nil, errors.New("invalid SM2P256 compressed point encoding")
    90  		}
    91  		// Select the positive or negative root, as indicated by the least
    92  		// significant bit, based on the encoding type byte.
    93  		otherRoot := new(fiat.SM2P256Element)
    94  		otherRoot.Sub(otherRoot, y)
    95  		cond := y.Bytes()[sm2p256ElementLength-1]&1 ^ b[0]&1
    96  		y.Select(otherRoot, y, int(cond))
    97  		p.x.Set(x)
    98  		p.y.Set(y)
    99  		p.z.One()
   100  		return p, nil
   101  	default:
   102  		return nil, errors.New("invalid SM2P256 point encoding")
   103  	}
   104  }
   105  
   106  var _sm2p256B *fiat.SM2P256Element
   107  var _sm2p256BOnce sync.Once
   108  
   109  func sm2p256B() *fiat.SM2P256Element {
   110  	_sm2p256BOnce.Do(func() {
   111  		_sm2p256B, _ = new(fiat.SM2P256Element).SetBytes([]byte{0x28, 0xe9, 0xfa, 0x9e, 0x9d, 0x9f, 0x5e, 0x34, 0x4d, 0x5a, 0x9e, 0x4b, 0xcf, 0x65, 0x9, 0xa7, 0xf3, 0x97, 0x89, 0xf5, 0x15, 0xab, 0x8f, 0x92, 0xdd, 0xbc, 0xbd, 0x41, 0x4d, 0x94, 0xe, 0x93})
   112  	})
   113  	return _sm2p256B
   114  }
   115  
   116  // sm2p256Polynomial sets y2 to x³ - 3x + b, and returns y2.
   117  func sm2p256Polynomial(y2, x *fiat.SM2P256Element) *fiat.SM2P256Element {
   118  	y2.Square(x)
   119  	y2.Mul(y2, x)
   120  
   121  	threeX := new(fiat.SM2P256Element).Add(x, x)
   122  	threeX.Add(threeX, x)
   123  
   124  	y2.Sub(y2, threeX)
   125  
   126  	return y2.Add(y2, sm2p256B())
   127  }
   128  
   129  func sm2p256CheckOnCurve(x, y *fiat.SM2P256Element) error {
   130  	// y² = x³ - 3x + b
   131  	rhs := sm2p256Polynomial(new(fiat.SM2P256Element), x)
   132  	lhs := new(fiat.SM2P256Element).Square(y)
   133  	if rhs.Equal(lhs) != 1 {
   134  		return errors.New("point not on SM2 P256 curve")
   135  	}
   136  	return nil
   137  }
   138  
   139  // Bytes returns the uncompressed or infinity encoding of p, as specified in
   140  // SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
   141  // infinity is shorter than all other encodings.
   142  func (p *SM2P256Point) Bytes() []byte {
   143  	// This function is outlined to make the allocations inline in the caller
   144  	// rather than happen on the heap.
   145  	var out [1 + 2*sm2p256ElementLength]byte
   146  	return p.bytes(&out)
   147  }
   148  
   149  func (p *SM2P256Point) bytes(out *[1 + 2*sm2p256ElementLength]byte) []byte {
   150  	if p.z.IsZero() == 1 {
   151  		return append(out[:0], 0)
   152  	}
   153  	zinv := new(fiat.SM2P256Element).Invert(p.z)
   154  	x := new(fiat.SM2P256Element).Mul(p.x, zinv)
   155  	y := new(fiat.SM2P256Element).Mul(p.y, zinv)
   156  	buf := append(out[:0], 4)
   157  	buf = append(buf, x.Bytes()...)
   158  	buf = append(buf, y.Bytes()...)
   159  	return buf
   160  }
   161  
   162  // BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1,
   163  // Version 2.0, Section 2.3.5, or an error if p is the point at infinity.
   164  func (p *SM2P256Point) BytesX() ([]byte, error) {
   165  	// This function is outlined to make the allocations inline in the caller
   166  	// rather than happen on the heap.
   167  	var out [sm2p256ElementLength]byte
   168  	return p.bytesX(&out)
   169  }
   170  
   171  func (p *SM2P256Point) bytesX(out *[sm2p256ElementLength]byte) ([]byte, error) {
   172  	if p.z.IsZero() == 1 {
   173  		return nil, errors.New("SM2P256 point is the point at infinity")
   174  	}
   175  	zinv := new(fiat.SM2P256Element).Invert(p.z)
   176  	x := new(fiat.SM2P256Element).Mul(p.x, zinv)
   177  	return append(out[:0], x.Bytes()...), nil
   178  }
   179  
   180  // BytesCompressed returns the compressed or infinity encoding of p, as
   181  // specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the
   182  // point at infinity is shorter than all other encodings.
   183  func (p *SM2P256Point) BytesCompressed() []byte {
   184  	// This function is outlined to make the allocations inline in the caller
   185  	// rather than happen on the heap.
   186  	var out [1 + sm2p256ElementLength]byte
   187  	return p.bytesCompressed(&out)
   188  }
   189  
   190  func (p *SM2P256Point) bytesCompressed(out *[1 + sm2p256ElementLength]byte) []byte {
   191  	if p.z.IsZero() == 1 {
   192  		return append(out[:0], 0)
   193  	}
   194  	zinv := new(fiat.SM2P256Element).Invert(p.z)
   195  	x := new(fiat.SM2P256Element).Mul(p.x, zinv)
   196  	y := new(fiat.SM2P256Element).Mul(p.y, zinv)
   197  	// Encode the sign of the y coordinate (indicated by the least significant
   198  	// bit) as the encoding type (2 or 3).
   199  	buf := append(out[:0], 2)
   200  	buf[0] |= y.Bytes()[sm2p256ElementLength-1] & 1
   201  	buf = append(buf, x.Bytes()...)
   202  	return buf
   203  }
   204  
   205  // Add sets q = p1 + p2, and returns q. The points may overlap.
   206  func (q *SM2P256Point) Add(p1, p2 *SM2P256Point) *SM2P256Point {
   207  	// Complete addition formula for a = -3 from "Complete addition formulas for
   208  	// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
   209  	t0 := new(fiat.SM2P256Element).Mul(p1.x, p2.x)     // t0 := X1 * X2
   210  	t1 := new(fiat.SM2P256Element).Mul(p1.y, p2.y)     // t1 := Y1 * Y2
   211  	t2 := new(fiat.SM2P256Element).Mul(p1.z, p2.z)     // t2 := Z1 * Z2
   212  	t3 := new(fiat.SM2P256Element).Add(p1.x, p1.y)     // t3 := X1 + Y1
   213  	t4 := new(fiat.SM2P256Element).Add(p2.x, p2.y)     // t4 := X2 + Y2
   214  	t3.Mul(t3, t4)                                     // t3 := t3 * t4
   215  	t4.Add(t0, t1)                                     // t4 := t0 + t1
   216  	t3.Sub(t3, t4)                                     // t3 := t3 - t4
   217  	t4.Add(p1.y, p1.z)                                 // t4 := Y1 + Z1
   218  	x3 := new(fiat.SM2P256Element).Add(p2.y, p2.z)     // X3 := Y2 + Z2
   219  	t4.Mul(t4, x3)                                     // t4 := t4 * X3
   220  	x3.Add(t1, t2)                                     // X3 := t1 + t2
   221  	t4.Sub(t4, x3)                                     // t4 := t4 - X3
   222  	x3.Add(p1.x, p1.z)                                 // X3 := X1 + Z1
   223  	y3 := new(fiat.SM2P256Element).Add(p2.x, p2.z)     // Y3 := X2 + Z2
   224  	x3.Mul(x3, y3)                                     // X3 := X3 * Y3
   225  	y3.Add(t0, t2)                                     // Y3 := t0 + t2
   226  	y3.Sub(x3, y3)                                     // Y3 := X3 - Y3
   227  	z3 := new(fiat.SM2P256Element).Mul(sm2p256B(), t2) // Z3 := b * t2
   228  	x3.Sub(y3, z3)                                     // X3 := Y3 - Z3
   229  	z3.Add(x3, x3)                                     // Z3 := X3 + X3
   230  	x3.Add(x3, z3)                                     // X3 := X3 + Z3
   231  	z3.Sub(t1, x3)                                     // Z3 := t1 - X3
   232  	x3.Add(t1, x3)                                     // X3 := t1 + X3
   233  	y3.Mul(sm2p256B(), y3)                             // Y3 := b * Y3
   234  	t1.Add(t2, t2)                                     // t1 := t2 + t2
   235  	t2.Add(t1, t2)                                     // t2 := t1 + t2
   236  	y3.Sub(y3, t2)                                     // Y3 := Y3 - t2
   237  	y3.Sub(y3, t0)                                     // Y3 := Y3 - t0
   238  	t1.Add(y3, y3)                                     // t1 := Y3 + Y3
   239  	y3.Add(t1, y3)                                     // Y3 := t1 + Y3
   240  	t1.Add(t0, t0)                                     // t1 := t0 + t0
   241  	t0.Add(t1, t0)                                     // t0 := t1 + t0
   242  	t0.Sub(t0, t2)                                     // t0 := t0 - t2
   243  	t1.Mul(t4, y3)                                     // t1 := t4 * Y3
   244  	t2.Mul(t0, y3)                                     // t2 := t0 * Y3
   245  	y3.Mul(x3, z3)                                     // Y3 := X3 * Z3
   246  	y3.Add(y3, t2)                                     // Y3 := Y3 + t2
   247  	x3.Mul(t3, x3)                                     // X3 := t3 * X3
   248  	x3.Sub(x3, t1)                                     // X3 := X3 - t1
   249  	z3.Mul(t4, z3)                                     // Z3 := t4 * Z3
   250  	t1.Mul(t3, t0)                                     // t1 := t3 * t0
   251  	z3.Add(z3, t1)                                     // Z3 := Z3 + t1
   252  
   253  	q.x.Set(x3)
   254  	q.y.Set(y3)
   255  	q.z.Set(z3)
   256  	return q
   257  }
   258  
   259  // Double sets q = p + p, and returns q. The points may overlap.
   260  func (q *SM2P256Point) Double(p *SM2P256Point) *SM2P256Point {
   261  	// Complete addition formula for a = -3 from "Complete addition formulas for
   262  	// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
   263  	t0 := new(fiat.SM2P256Element).Square(p.x)         // t0 := X ^ 2
   264  	t1 := new(fiat.SM2P256Element).Square(p.y)         // t1 := Y ^ 2
   265  	t2 := new(fiat.SM2P256Element).Square(p.z)         // t2 := Z ^ 2
   266  	t3 := new(fiat.SM2P256Element).Mul(p.x, p.y)       // t3 := X * Y
   267  	t3.Add(t3, t3)                                     // t3 := t3 + t3
   268  	z3 := new(fiat.SM2P256Element).Mul(p.x, p.z)       // Z3 := X * Z
   269  	z3.Add(z3, z3)                                     // Z3 := Z3 + Z3
   270  	y3 := new(fiat.SM2P256Element).Mul(sm2p256B(), t2) // Y3 := b * t2
   271  	y3.Sub(y3, z3)                                     // Y3 := Y3 - Z3
   272  	x3 := new(fiat.SM2P256Element).Add(y3, y3)         // X3 := Y3 + Y3
   273  	y3.Add(x3, y3)                                     // Y3 := X3 + Y3
   274  	x3.Sub(t1, y3)                                     // X3 := t1 - Y3
   275  	y3.Add(t1, y3)                                     // Y3 := t1 + Y3
   276  	y3.Mul(x3, y3)                                     // Y3 := X3 * Y3
   277  	x3.Mul(x3, t3)                                     // X3 := X3 * t3
   278  	t3.Add(t2, t2)                                     // t3 := t2 + t2
   279  	t2.Add(t2, t3)                                     // t2 := t2 + t3
   280  	z3.Mul(sm2p256B(), z3)                             // Z3 := b * Z3
   281  	z3.Sub(z3, t2)                                     // Z3 := Z3 - t2
   282  	z3.Sub(z3, t0)                                     // Z3 := Z3 - t0
   283  	t3.Add(z3, z3)                                     // t3 := Z3 + Z3
   284  	z3.Add(z3, t3)                                     // Z3 := Z3 + t3
   285  	t3.Add(t0, t0)                                     // t3 := t0 + t0
   286  	t0.Add(t3, t0)                                     // t0 := t3 + t0
   287  	t0.Sub(t0, t2)                                     // t0 := t0 - t2
   288  	t0.Mul(t0, z3)                                     // t0 := t0 * Z3
   289  	y3.Add(y3, t0)                                     // Y3 := Y3 + t0
   290  	t0.Mul(p.y, p.z)                                   // t0 := Y * Z
   291  	t0.Add(t0, t0)                                     // t0 := t0 + t0
   292  	z3.Mul(t0, z3)                                     // Z3 := t0 * Z3
   293  	x3.Sub(x3, z3)                                     // X3 := X3 - Z3
   294  	z3.Mul(t0, t1)                                     // Z3 := t0 * t1
   295  	z3.Add(z3, z3)                                     // Z3 := Z3 + Z3
   296  	z3.Add(z3, z3)                                     // Z3 := Z3 + Z3
   297  
   298  	q.x.Set(x3)
   299  	q.y.Set(y3)
   300  	q.z.Set(z3)
   301  	return q
   302  }
   303  
   304  // Select sets q to p1 if cond == 1, and to p2 if cond == 0.
   305  func (q *SM2P256Point) Select(p1, p2 *SM2P256Point, cond int) *SM2P256Point {
   306  	q.x.Select(p1.x, p2.x, cond)
   307  	q.y.Select(p1.y, p2.y, cond)
   308  	q.z.Select(p1.z, p2.z, cond)
   309  	return q
   310  }
   311  
   312  // A sm2p256Table holds the first 15 multiples of a point at offset -1, so [1]P
   313  // is at table[0], [15]P is at table[14], and [0]P is implicitly the identity
   314  // point.
   315  type sm2p256Table [15]*SM2P256Point
   316  
   317  // Select selects the n-th multiple of the table base point into p. It works in
   318  // constant time by iterating over every entry of the table. n must be in [0, 15].
   319  func (table *sm2p256Table) Select(p *SM2P256Point, n uint8) {
   320  	if n >= 16 {
   321  		panic("sm2ec: internal error: sm2p256Table called with out-of-bounds value")
   322  	}
   323  	p.Set(NewSM2P256Point())
   324  	for i, f := range table {
   325  		cond := subtle.ConstantTimeByteEq(uint8(i+1), n)
   326  		p.Select(f, p, cond)
   327  	}
   328  }
   329  
   330  // ScalarMult sets p = scalar * q, and returns p.
   331  func (p *SM2P256Point) ScalarMult(q *SM2P256Point, scalar []byte) (*SM2P256Point, error) {
   332  	// Compute a sm2p256Table for the base point q. The explicit NewSM2P256Point
   333  	// calls get inlined, letting the allocations live on the stack.
   334  	var table = sm2p256Table{NewSM2P256Point(), NewSM2P256Point(), NewSM2P256Point(),
   335  		NewSM2P256Point(), NewSM2P256Point(), NewSM2P256Point(), NewSM2P256Point(),
   336  		NewSM2P256Point(), NewSM2P256Point(), NewSM2P256Point(), NewSM2P256Point(),
   337  		NewSM2P256Point(), NewSM2P256Point(), NewSM2P256Point(), NewSM2P256Point()}
   338  	table[0].Set(q)
   339  	for i := 1; i < 15; i += 2 {
   340  		table[i].Double(table[i/2])
   341  		table[i+1].Add(table[i], q)
   342  	}
   343  
   344  	// Instead of doing the classic double-and-add chain, we do it with a
   345  	// four-bit window: we double four times, and then add [0-15]P.
   346  	t := NewSM2P256Point()
   347  	p.Set(NewSM2P256Point())
   348  	for i, byte := range scalar {
   349  		// No need to double on the first iteration, as p is the identity at
   350  		// this point, and [N]∞ = ∞.
   351  		if i != 0 {
   352  			p.Double(p)
   353  			p.Double(p)
   354  			p.Double(p)
   355  			p.Double(p)
   356  		}
   357  
   358  		windowValue := byte >> 4
   359  		table.Select(t, windowValue)
   360  		p.Add(p, t)
   361  
   362  		p.Double(p)
   363  		p.Double(p)
   364  		p.Double(p)
   365  		p.Double(p)
   366  
   367  		windowValue = byte & 0b1111
   368  		table.Select(t, windowValue)
   369  		p.Add(p, t)
   370  	}
   371  
   372  	return p, nil
   373  }
   374  
   375  var sm2p256GeneratorTable *[sm2p256ElementLength * 2]sm2p256Table
   376  var sm2p256GeneratorTableOnce sync.Once
   377  
   378  // generatorTable returns a sequence of sm2p256Tables. The first table contains
   379  // multiples of G. Each successive table is the previous table doubled four
   380  // times.
   381  func (p *SM2P256Point) generatorTable() *[sm2p256ElementLength * 2]sm2p256Table {
   382  	sm2p256GeneratorTableOnce.Do(func() {
   383  		sm2p256GeneratorTable = new([sm2p256ElementLength * 2]sm2p256Table)
   384  		base := NewSM2P256Point().SetGenerator()
   385  		for i := 0; i < sm2p256ElementLength*2; i++ {
   386  			sm2p256GeneratorTable[i][0] = NewSM2P256Point().Set(base)
   387  			for j := 1; j < 15; j++ {
   388  				sm2p256GeneratorTable[i][j] = NewSM2P256Point().Add(sm2p256GeneratorTable[i][j-1], base)
   389  			}
   390  			base.Double(base)
   391  			base.Double(base)
   392  			base.Double(base)
   393  			base.Double(base)
   394  		}
   395  	})
   396  	return sm2p256GeneratorTable
   397  }
   398  
   399  // ScalarBaseMult sets p = scalar * B, where B is the canonical generator, and
   400  // returns p.
   401  func (p *SM2P256Point) ScalarBaseMult(scalar []byte) (*SM2P256Point, error) {
   402  	if len(scalar) != sm2p256ElementLength {
   403  		return nil, errors.New("invalid scalar length")
   404  	}
   405  	tables := p.generatorTable()
   406  
   407  	// This is also a scalar multiplication with a four-bit window like in
   408  	// ScalarMult, but in this case the doublings are precomputed. The value
   409  	// [windowValue]G added at iteration k would normally get doubled
   410  	// (totIterations-k)×4 times, but with a larger precomputation we can
   411  	// instead add [2^((totIterations-k)×4)][windowValue]G and avoid the
   412  	// doublings between iterations.
   413  	t := NewSM2P256Point()
   414  	p.Set(NewSM2P256Point())
   415  	tableIndex := len(tables) - 1
   416  	for _, byte := range scalar {
   417  		windowValue := byte >> 4
   418  		tables[tableIndex].Select(t, windowValue)
   419  		p.Add(p, t)
   420  		tableIndex--
   421  
   422  		windowValue = byte & 0b1111
   423  		tables[tableIndex].Select(t, windowValue)
   424  		p.Add(p, t)
   425  		tableIndex--
   426  	}
   427  
   428  	return p, nil
   429  }
   430  
   431  // sm2p256Sqrt sets e to a square root of x. If x is not a square, sm2p256Sqrt returns
   432  // false and e is unchanged. e and x can overlap.
   433  func sm2p256Sqrt(e, x *fiat.SM2P256Element) (isSquare bool) {
   434  	candidate := new(fiat.SM2P256Element)
   435  	sm2p256SqrtCandidate(candidate, x)
   436  	square := new(fiat.SM2P256Element).Square(candidate)
   437  	if square.Equal(x) != 1 {
   438  		return false
   439  	}
   440  	e.Set(candidate)
   441  	return true
   442  }
   443  
   444  // sm2p256SqrtCandidate sets z to a square root candidate for x. z and x must not overlap.
   445  func sm2p256SqrtCandidate(z, x *fiat.SM2P256Element) {
   446  	// Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate.
   447  	//
   448  	// The sequence of 13 multiplications and 253 squarings is derived from the
   449  	// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
   450  	//
   451  	//	_10      = 2*1
   452  	//	_11      = 1 + _10
   453  	//	_110     = 2*_11
   454  	//	_111     = 1 + _110
   455  	//	_1110    = 2*_111
   456  	//	_1111    = 1 + _1110
   457  	//	_11110   = 2*_1111
   458  	//	_111100  = 2*_11110
   459  	//	_1111000 = 2*_111100
   460  	//	i19      = (_1111000 << 3 + _111100) << 5 + _1111000
   461  	//	x31      = (i19 << 2 + _11110) << 14 + i19 + _111
   462  	//	i42      = x31 << 4
   463  	//	i73      = i42 << 31
   464  	//	i74      = i42 + i73
   465  	//	i171     = (i73 << 32 + i74) << 62 + i74 + _1111
   466  	//	return     (i171 << 32 + 1) << 62
   467  	//
   468  	var t0 = new(fiat.SM2P256Element)
   469  	var t1 = new(fiat.SM2P256Element)
   470  	var t2 = new(fiat.SM2P256Element)
   471  	var t3 = new(fiat.SM2P256Element)
   472  	var t4 = new(fiat.SM2P256Element)
   473  
   474  	z.Square(x)
   475  	z.Mul(x, z)
   476  	z.Square(z)
   477  	t0.Mul(x, z)
   478  	z.Square(t0)
   479  	z.Mul(x, z)
   480  	t2.Square(z)
   481  	t3.Square(t2)
   482  	t1.Square(t3)
   483  	t4.Square(t1)
   484  	for s := 1; s < 3; s++ {
   485  		t4.Square(t4)
   486  	}
   487  	t3.Mul(t3, t4)
   488  	for s := 0; s < 5; s++ {
   489  		t3.Square(t3)
   490  	}
   491  	t1.Mul(t1, t3)
   492  	t3.Square(t1)
   493  	for s := 1; s < 2; s++ {
   494  		t3.Square(t3)
   495  	}
   496  	t2.Mul(t2, t3)
   497  	for s := 0; s < 14; s++ {
   498  		t2.Square(t2)
   499  	}
   500  	t1.Mul(t1, t2)
   501  	t0.Mul(t0, t1)
   502  	for s := 0; s < 4; s++ {
   503  		t0.Square(t0)
   504  	}
   505  	t1.Square(t0)
   506  	for s := 1; s < 31; s++ {
   507  		t1.Square(t1)
   508  	}
   509  	t0.Mul(t0, t1)
   510  	for s := 0; s < 32; s++ {
   511  		t1.Square(t1)
   512  	}
   513  	t1.Mul(t0, t1)
   514  	for s := 0; s < 62; s++ {
   515  		t1.Square(t1)
   516  	}
   517  	t0.Mul(t0, t1)
   518  	z.Mul(z, t0)
   519  	for s := 0; s < 32; s++ {
   520  		z.Square(z)
   521  	}
   522  	z.Mul(x, z)
   523  	for s := 0; s < 62; s++ {
   524  		z.Square(z)
   525  	}
   526  }