github.com/mangodowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/crypto/sm2/p256.go

github.com/mangodowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/crypto/sm2/p256.go (about)

     1  /*
     2  Copyright Suzhou Tongji Fintech Research Institute 2017 All Rights Reserved.
     3  Licensed under the Apache License, Version 2.0 (the "License");
     4  you may not use this file except in compliance with the License.
     5  You may obtain a copy of the License at
     6  
     7  	http://www.apache.org/licenses/LICENSE-2.0
     8  
     9  Unless required by applicable law or agreed to in writing, software
    10  distributed under the License is distributed on an "AS IS" BASIS,
    11  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  See the License for the specific language governing permissions and
    13  limitations under the License.
    14  */
    15  
    16  package sm2
    17  
    18  import (
    19  	"crypto/elliptic"
    20  	"math/big"
    21  	"sync"
    22  )
    23  
    24  /** 学习标准库p256的优化方法实现sm2的快速版本
    25   * 标准库的p256的代码实现有些晦涩难懂，当然sm2的同样如此，有兴趣的大家可以研究研究，最后神兽压阵。。。
    26   *
    27   * ━━━━━━animal━━━━━━
    28   * 　　　┏┓　　　┏┓
    29   * 　　┏┛┻━━━┛┻┓
    30   * 　　┃　　　　　　　┃
    31   * 　　┃　　　━　　　┃
    32   * 　　┃　┳┛　┗┳　┃
    33   * 　　┃　　　　　　　┃
    34   * 　　┃　　　┻　　　┃
    35   * 　　┃　　　　　　　┃
    36   * 　　┗━┓　　　┏━┛
    37   * 　　　┃　　　┃
    38   *　　 　┃　　　┃
    39   *　　　 ┃　　　┗━━━┓
    40   *	   　┃　　　　　┣┓
    41   *   　　┃　　　　　┏┛
    42   *　　 　┗┓┓┏━┳┓┏┛
    43   *　　　　┃┫┫ ┃┫┫
    44   *　　　　┗┻┛ ┗┻┛
    45   *
    46   * ━━━━━Kawaii ━━━━━━
    47   */
    48  
    49  type sm2P256Curve struct {
    50  	RInverse *big.Int
    51  	*elliptic.CurveParams
    52  	a, b, gx, gy sm2P256FieldElement
    53  }
    54  
    55  var initonce sync.Once
    56  var sm2P256 sm2P256Curve
    57  
    58  type sm2P256FieldElement [9]uint32
    59  type sm2P256LargeFieldElement [17]uint64
    60  
    61  const (
    62  	bottom28Bits = 0xFFFFFFF
    63  	bottom29Bits = 0x1FFFFFFF
    64  )
    65  
    66  func initP256Sm2() {
    67  	sm2P256.CurveParams = &elliptic.CurveParams{Name: "SM2-P-256"} // sm2
    68  	A, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFC", 16)
    69  	//SM2椭	椭 圆 曲 线 公 钥 密 码 算 法 推 荐 曲 线 参 数
    70  	sm2P256.P, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
    71  	sm2P256.N, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
    72  	sm2P256.B, _ = new(big.Int).SetString("28E9FA9E9D9F5E344D5A9E4BCF6509A7F39789F515AB8F92DDBCBD414D940E93", 16)
    73  	sm2P256.Gx, _ = new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16)
    74  	sm2P256.Gy, _ = new(big.Int).SetString("BC3736A2F4F6779C59BDCEE36B692153D0A9877CC62A474002DF32E52139F0A0", 16)
    75  	sm2P256.RInverse, _ = new(big.Int).SetString("7ffffffd80000002fffffffe000000017ffffffe800000037ffffffc80000002", 16)
    76  	sm2P256.BitSize = 256
    77  	sm2P256FromBig(&sm2P256.a, A)
    78  	sm2P256FromBig(&sm2P256.gx, sm2P256.Gx)
    79  	sm2P256FromBig(&sm2P256.gy, sm2P256.Gy)
    80  	sm2P256FromBig(&sm2P256.b, sm2P256.B)
    81  }
    82  
    83  func P256Sm2() elliptic.Curve {
    84  	initonce.Do(initP256Sm2)
    85  	return sm2P256
    86  }
    87  
    88  func (curve sm2P256Curve) Params() *elliptic.CurveParams {
    89  	return sm2P256.CurveParams
    90  }
    91  
    92  // y^2 = x^3 + ax + b
    93  func (curve sm2P256Curve) IsOnCurve(X, Y *big.Int) bool {
    94  	var a, x, y, y2, x3 sm2P256FieldElement
    95  
    96  	sm2P256FromBig(&x, X)
    97  	sm2P256FromBig(&y, Y)
    98  
    99  	sm2P256Square(&x3, &x)       // x3 = x ^ 2
   100  	sm2P256Mul(&x3, &x3, &x)     // x3 = x ^ 2 * x
   101  	sm2P256Mul(&a, &curve.a, &x) // a = a * x
   102  	sm2P256Add(&x3, &x3, &a)
   103  	sm2P256Add(&x3, &x3, &curve.b)
   104  
   105  	sm2P256Square(&y2, &y) // y2 = y ^ 2
   106  	return sm2P256ToBig(&x3).Cmp(sm2P256ToBig(&y2)) == 0
   107  }
   108  
   109  func zForAffine(x, y *big.Int) *big.Int {
   110  	z := new(big.Int)
   111  	if x.Sign() != 0 || y.Sign() != 0 {
   112  		z.SetInt64(1)
   113  	}
   114  	return z
   115  }
   116  
   117  func (curve sm2P256Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
   118  	var X1, Y1, Z1, X2, Y2, Z2, X3, Y3, Z3 sm2P256FieldElement
   119  
   120  	z1 := zForAffine(x1, y1)
   121  	z2 := zForAffine(x2, y2)
   122  	sm2P256FromBig(&X1, x1)
   123  	sm2P256FromBig(&Y1, y1)
   124  	sm2P256FromBig(&Z1, z1)
   125  	sm2P256FromBig(&X2, x2)
   126  	sm2P256FromBig(&Y2, y2)
   127  	sm2P256FromBig(&Z2, z2)
   128  	sm2P256PointAdd(&X1, &Y1, &Z1, &X2, &Y2, &Z2, &X3, &Y3, &Z3)
   129  	return sm2P256ToAffine(&X3, &Y3, &Z3)
   130  }
   131  
   132  func (curve sm2P256Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
   133  	var X1, Y1, Z1 sm2P256FieldElement
   134  
   135  	z1 := zForAffine(x1, y1)
   136  	sm2P256FromBig(&X1, x1)
   137  	sm2P256FromBig(&Y1, y1)
   138  	sm2P256FromBig(&Z1, z1)
   139  	sm2P256PointDouble(&X1, &Y1, &Z1, &X1, &Y1, &Z1)
   140  	return sm2P256ToAffine(&X1, &Y1, &Z1)
   141  }
   142  
   143  func (curve sm2P256Curve) ScalarMult(x1, y1 *big.Int, k []byte) (*big.Int, *big.Int) {
   144  	var scalarReversed [32]byte
   145  	var X, Y, Z, X1, Y1 sm2P256FieldElement
   146  
   147  	sm2P256FromBig(&X1, x1)
   148  	sm2P256FromBig(&Y1, y1)
   149  	sm2P256GetScalar(&scalarReversed, k)
   150  	sm2P256ScalarMult(&X, &Y, &Z, &X1, &Y1, &scalarReversed)
   151  	return sm2P256ToAffine(&X, &Y, &Z)
   152  }
   153  
   154  func (curve sm2P256Curve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
   155  	var scalarReversed [32]byte
   156  	var X, Y, Z sm2P256FieldElement
   157  
   158  	sm2P256GetScalar(&scalarReversed, k)
   159  	sm2P256ScalarBaseMult(&X, &Y, &Z, &scalarReversed)
   160  	return sm2P256ToAffine(&X, &Y, &Z)
   161  }
   162  
   163  var sm2P256Precomputed = [9 * 2 * 15 * 2]uint32{
   164  	0x830053d, 0x328990f, 0x6c04fe1, 0xc0f72e5, 0x1e19f3c, 0x666b093, 0x175a87b, 0xec38276, 0x222cf4b,
   165  	0x185a1bba, 0x354e593, 0x1295fac1, 0xf2bc469, 0x47c60fa, 0xc19b8a9, 0xf63533e, 0x903ae6b, 0xc79acba,
   166  	0x15b061a4, 0x33e020b, 0xdffb34b, 0xfcf2c8, 0x16582e08, 0x262f203, 0xfb34381, 0xa55452, 0x604f0ff,
   167  	0x41f1f90, 0xd64ced2, 0xee377bf, 0x75f05f0, 0x189467ae, 0xe2244e, 0x1e7700e8, 0x3fbc464, 0x9612d2e,
   168  	0x1341b3b8, 0xee84e23, 0x1edfa5b4, 0x14e6030, 0x19e87be9, 0x92f533c, 0x1665d96c, 0x226653e, 0xa238d3e,
   169  	0xf5c62c, 0x95bb7a, 0x1f0e5a41, 0x28789c3, 0x1f251d23, 0x8726609, 0xe918910, 0x8096848, 0xf63d028,
   170  	0x152296a1, 0x9f561a8, 0x14d376fb, 0x898788a, 0x61a95fb, 0xa59466d, 0x159a003d, 0x1ad1698, 0x93cca08,
   171  	0x1b314662, 0x706e006, 0x11ce1e30, 0x97b710, 0x172fbc0d, 0x8f50158, 0x11c7ffe7, 0xd182cce, 0xc6ad9e8,
   172  	0x12ea31b2, 0xc4e4f38, 0x175b0d96, 0xec06337, 0x75a9c12, 0xb001fdf, 0x93e82f5, 0x34607de, 0xb8035ed,
   173  	0x17f97924, 0x75cf9e6, 0xdceaedd, 0x2529924, 0x1a10c5ff, 0xb1a54dc, 0x19464d8, 0x2d1997, 0xde6a110,
   174  	0x1e276ee5, 0x95c510c, 0x1aca7c7a, 0xfe48aca, 0x121ad4d9, 0xe4132c6, 0x8239b9d, 0x40ea9cd, 0x816c7b,
   175  	0x632d7a4, 0xa679813, 0x5911fcf, 0x82b0f7c, 0x57b0ad5, 0xbef65, 0xd541365, 0x7f9921f, 0xc62e7a,
   176  	0x3f4b32d, 0x58e50e1, 0x6427aed, 0xdcdda67, 0xe8c2d3e, 0x6aa54a4, 0x18df4c35, 0x49a6a8e, 0x3cd3d0c,
   177  	0xd7adf2, 0xcbca97, 0x1bda5f2d, 0x3258579, 0x606b1e6, 0x6fc1b5b, 0x1ac27317, 0x503ca16, 0xa677435,
   178  	0x57bc73, 0x3992a42, 0xbab987b, 0xfab25eb, 0x128912a4, 0x90a1dc4, 0x1402d591, 0x9ffbcfc, 0xaa48856,
   179  	0x7a7c2dc, 0xcefd08a, 0x1b29bda6, 0xa785641, 0x16462d8c, 0x76241b7, 0x79b6c3b, 0x204ae18, 0xf41212b,
   180  	0x1f567a4d, 0xd6ce6db, 0xedf1784, 0x111df34, 0x85d7955, 0x55fc189, 0x1b7ae265, 0xf9281ac, 0xded7740,
   181  	0xf19468b, 0x83763bb, 0x8ff7234, 0x3da7df8, 0x9590ac3, 0xdc96f2a, 0x16e44896, 0x7931009, 0x99d5acc,
   182  	0x10f7b842, 0xaef5e84, 0xc0310d7, 0xdebac2c, 0x2a7b137, 0x4342344, 0x19633649, 0x3a10624, 0x4b4cb56,
   183  	0x1d809c59, 0xac007f, 0x1f0f4bcd, 0xa1ab06e, 0xc5042cf, 0x82c0c77, 0x76c7563, 0x22c30f3, 0x3bf1568,
   184  	0x7a895be, 0xfcca554, 0x12e90e4c, 0x7b4ab5f, 0x13aeb76b, 0x5887e2c, 0x1d7fe1e3, 0x908c8e3, 0x95800ee,
   185  	0xb36bd54, 0xf08905d, 0x4e73ae8, 0xf5a7e48, 0xa67cb0, 0x50e1067, 0x1b944a0a, 0xf29c83a, 0xb23cfb9,
   186  	0xbe1db1, 0x54de6e8, 0xd4707f2, 0x8ebcc2d, 0x2c77056, 0x1568ce4, 0x15fcc849, 0x4069712, 0xe2ed85f,
   187  	0x2c5ff09, 0x42a6929, 0x628e7ea, 0xbd5b355, 0xaf0bd79, 0xaa03699, 0xdb99816, 0x4379cef, 0x81d57b,
   188  	0x11237f01, 0xe2a820b, 0xfd53b95, 0x6beb5ee, 0x1aeb790c, 0xe470d53, 0x2c2cfee, 0x1c1d8d8, 0xa520fc4,
   189  	0x1518e034, 0xa584dd4, 0x29e572b, 0xd4594fc, 0x141a8f6f, 0x8dfccf3, 0x5d20ba3, 0x2eb60c3, 0x9f16eb0,
   190  	0x11cec356, 0xf039f84, 0x1b0990c1, 0xc91e526, 0x10b65bae, 0xf0616e8, 0x173fa3ff, 0xec8ccf9, 0xbe32790,
   191  	0x11da3e79, 0xe2f35c7, 0x908875c, 0xdacf7bd, 0x538c165, 0x8d1487f, 0x7c31aed, 0x21af228, 0x7e1689d,
   192  	0xdfc23ca, 0x24f15dc, 0x25ef3c4, 0x35248cd, 0x99a0f43, 0xa4b6ecc, 0xd066b3, 0x2481152, 0x37a7688,
   193  	0x15a444b6, 0xb62300c, 0x4b841b, 0xa655e79, 0xd53226d, 0xbeb348a, 0x127f3c2, 0xb989247, 0x71a277d,
   194  	0x19e9dfcb, 0xb8f92d0, 0xe2d226c, 0x390a8b0, 0x183cc462, 0x7bd8167, 0x1f32a552, 0x5e02db4, 0xa146ee9,
   195  	0x1a003957, 0x1c95f61, 0x1eeec155, 0x26f811f, 0xf9596ba, 0x3082bfb, 0x96df083, 0x3e3a289, 0x7e2d8be,
   196  	0x157a63e0, 0x99b8941, 0x1da7d345, 0xcc6cd0, 0x10beed9a, 0x48e83c0, 0x13aa2e25, 0x7cad710, 0x4029988,
   197  	0x13dfa9dd, 0xb94f884, 0x1f4adfef, 0xb88543, 0x16f5f8dc, 0xa6a67f4, 0x14e274e2, 0x5e56cf4, 0x2f24ef,
   198  	0x1e9ef967, 0xfe09bad, 0xfe079b3, 0xcc0ae9e, 0xb3edf6d, 0x3e961bc, 0x130d7831, 0x31043d6, 0xba986f9,
   199  	0x1d28055, 0x65240ca, 0x4971fa3, 0x81b17f8, 0x11ec34a5, 0x8366ddc, 0x1471809, 0xfa5f1c6, 0xc911e15,
   200  	0x8849491, 0xcf4c2e2, 0x14471b91, 0x39f75be, 0x445c21e, 0xf1585e9, 0x72cc11f, 0x4c79f0c, 0xe5522e1,
   201  	0x1874c1ee, 0x4444211, 0x7914884, 0x3d1b133, 0x25ba3c, 0x4194f65, 0x1c0457ef, 0xac4899d, 0xe1fa66c,
   202  	0x130a7918, 0x9b8d312, 0x4b1c5c8, 0x61ccac3, 0x18c8aa6f, 0xe93cb0a, 0xdccb12c, 0xde10825, 0x969737d,
   203  	0xf58c0c3, 0x7cee6a9, 0xc2c329a, 0xc7f9ed9, 0x107b3981, 0x696a40e, 0x152847ff, 0x4d88754, 0xb141f47,
   204  	0x5a16ffe, 0x3a7870a, 0x18667659, 0x3b72b03, 0xb1c9435, 0x9285394, 0xa00005a, 0x37506c, 0x2edc0bb,
   205  	0x19afe392, 0xeb39cac, 0x177ef286, 0xdf87197, 0x19f844ed, 0x31fe8, 0x15f9bfd, 0x80dbec, 0x342e96e,
   206  	0x497aced, 0xe88e909, 0x1f5fa9ba, 0x530a6ee, 0x1ef4e3f1, 0x69ffd12, 0x583006d, 0x2ecc9b1, 0x362db70,
   207  	0x18c7bdc5, 0xf4bb3c5, 0x1c90b957, 0xf067c09, 0x9768f2b, 0xf73566a, 0x1939a900, 0x198c38a, 0x202a2a1,
   208  	0x4bbf5a6, 0x4e265bc, 0x1f44b6e7, 0x185ca49, 0xa39e81b, 0x24aff5b, 0x4acc9c2, 0x638bdd3, 0xb65b2a8,
   209  	0x6def8be, 0xb94537a, 0x10b81dee, 0xe00ec55, 0x2f2cdf7, 0xc20622d, 0x2d20f36, 0xe03c8c9, 0x898ea76,
   210  	0x8e3921b, 0x8905bff, 0x1e94b6c8, 0xee7ad86, 0x154797f2, 0xa620863, 0x3fbd0d9, 0x1f3caab, 0x30c24bd,
   211  	0x19d3892f, 0x59c17a2, 0x1ab4b0ae, 0xf8714ee, 0x90c4098, 0xa9c800d, 0x1910236b, 0xea808d3, 0x9ae2f31,
   212  	0x1a15ad64, 0xa48c8d1, 0x184635a4, 0xb725ef1, 0x11921dcc, 0x3f866df, 0x16c27568, 0xbdf580a, 0xb08f55c,
   213  	0x186ee1c, 0xb1627fa, 0x34e82f6, 0x933837e, 0xf311be5, 0xfedb03b, 0x167f72cd, 0xa5469c0, 0x9c82531,
   214  	0xb92a24b, 0x14fdc8b, 0x141980d1, 0xbdc3a49, 0x7e02bb1, 0xaf4e6dd, 0x106d99e1, 0xd4616fc, 0x93c2717,
   215  	0x1c0a0507, 0xc6d5fed, 0x9a03d8b, 0xa1d22b0, 0x127853e3, 0xc4ac6b8, 0x1a048cf7, 0x9afb72c, 0x65d485d,
   216  	0x72d5998, 0xe9fa744, 0xe49e82c, 0x253cf80, 0x5f777ce, 0xa3799a5, 0x17270cbb, 0xc1d1ef0, 0xdf74977,
   217  	0x114cb859, 0xfa8e037, 0xb8f3fe5, 0xc734cc6, 0x70d3d61, 0xeadac62, 0x12093dd0, 0x9add67d, 0x87200d6,
   218  	0x175bcbb, 0xb29b49f, 0x1806b79c, 0x12fb61f, 0x170b3a10, 0x3aaf1cf, 0xa224085, 0x79d26af, 0x97759e2,
   219  	0x92e19f1, 0xb32714d, 0x1f00d9f1, 0xc728619, 0x9e6f627, 0xe745e24, 0x18ea4ace, 0xfc60a41, 0x125f5b2,
   220  	0xc3cf512, 0x39ed486, 0xf4d15fa, 0xf9167fd, 0x1c1f5dd5, 0xc21a53e, 0x1897930, 0x957a112, 0x21059a0,
   221  	0x1f9e3ddc, 0xa4dfced, 0x8427f6f, 0x726fbe7, 0x1ea658f8, 0x2fdcd4c, 0x17e9b66f, 0xb2e7c2e, 0x39923bf,
   222  	0x1bae104, 0x3973ce5, 0xc6f264c, 0x3511b84, 0x124195d7, 0x11996bd, 0x20be23d, 0xdc437c4, 0x4b4f16b,
   223  	0x11902a0, 0x6c29cc9, 0x1d5ffbe6, 0xdb0b4c7, 0x10144c14, 0x2f2b719, 0x301189, 0x2343336, 0xa0bf2ac,
   224  }
   225  
   226  func sm2P256GetScalar(b *[32]byte, a []byte) {
   227  	var scalarBytes []byte
   228  
   229  	n := new(big.Int).SetBytes(a)
   230  	if n.Cmp(sm2P256.N) >= 0 {
   231  		n.Mod(n, sm2P256.N)
   232  		scalarBytes = n.Bytes()
   233  	} else {
   234  		scalarBytes = a
   235  	}
   236  	for i, v := range scalarBytes {
   237  		b[len(scalarBytes)-(1+i)] = v
   238  	}
   239  }
   240  
   241  func sm2P256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *sm2P256FieldElement) {
   242  	var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp sm2P256FieldElement
   243  
   244  	sm2P256Square(&z1z1, z1)
   245  	sm2P256Add(&tmp, z1, z1)
   246  
   247  	sm2P256Mul(&u2, x2, &z1z1)
   248  	sm2P256Mul(&z1z1z1, z1, &z1z1)
   249  	sm2P256Mul(&s2, y2, &z1z1z1)
   250  	sm2P256Sub(&h, &u2, x1)
   251  	sm2P256Add(&i, &h, &h)
   252  	sm2P256Square(&i, &i)
   253  	sm2P256Mul(&j, &h, &i)
   254  	sm2P256Sub(&r, &s2, y1)
   255  	sm2P256Add(&r, &r, &r)
   256  	sm2P256Mul(&v, x1, &i)
   257  
   258  	sm2P256Mul(zOut, &tmp, &h)
   259  	sm2P256Square(&rr, &r)
   260  	sm2P256Sub(xOut, &rr, &j)
   261  	sm2P256Sub(xOut, xOut, &v)
   262  	sm2P256Sub(xOut, xOut, &v)
   263  
   264  	sm2P256Sub(&tmp, &v, xOut)
   265  	sm2P256Mul(yOut, &tmp, &r)
   266  	sm2P256Mul(&tmp, y1, &j)
   267  	sm2P256Sub(yOut, yOut, &tmp)
   268  	sm2P256Sub(yOut, yOut, &tmp)
   269  }
   270  
   271  // sm2P256CopyConditional sets out=in if mask = 0xffffffff in constant time.
   272  //
   273  // On entry: mask is either 0 or 0xffffffff.
   274  func sm2P256CopyConditional(out, in *sm2P256FieldElement, mask uint32) {
   275  	for i := 0; i < 9; i++ {
   276  		tmp := mask & (in[i] ^ out[i])
   277  		out[i] ^= tmp
   278  	}
   279  }
   280  
   281  // sm2P256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table.
   282  // On entry: index < 16, table[0] must be zero.
   283  func sm2P256SelectAffinePoint(xOut, yOut *sm2P256FieldElement, table []uint32, index uint32) {
   284  	for i := range xOut {
   285  		xOut[i] = 0
   286  	}
   287  	for i := range yOut {
   288  		yOut[i] = 0
   289  	}
   290  
   291  	for i := uint32(1); i < 16; i++ {
   292  		mask := i ^ index
   293  		mask |= mask >> 2
   294  		mask |= mask >> 1
   295  		mask &= 1
   296  		mask--
   297  		for j := range xOut {
   298  			xOut[j] |= table[0] & mask
   299  			table = table[1:]
   300  		}
   301  		for j := range yOut {
   302  			yOut[j] |= table[0] & mask
   303  			table = table[1:]
   304  		}
   305  	}
   306  }
   307  
   308  // sm2P256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of
   309  // table.
   310  // On entry: index < 16, table[0] must be zero.
   311  func sm2P256SelectJacobianPoint(xOut, yOut, zOut *sm2P256FieldElement, table *[16][3]sm2P256FieldElement, index uint32) {
   312  	for i := range xOut {
   313  		xOut[i] = 0
   314  	}
   315  	for i := range yOut {
   316  		yOut[i] = 0
   317  	}
   318  	for i := range zOut {
   319  		zOut[i] = 0
   320  	}
   321  
   322  	// The implicit value at index 0 is all zero. We don't need to perform that
   323  	// iteration of the loop because we already set out_* to zero.
   324  	for i := uint32(1); i < 16; i++ {
   325  		mask := i ^ index
   326  		mask |= mask >> 2
   327  		mask |= mask >> 1
   328  		mask &= 1
   329  		mask--
   330  		for j := range xOut {
   331  			xOut[j] |= table[i][0][j] & mask
   332  		}
   333  		for j := range yOut {
   334  			yOut[j] |= table[i][1][j] & mask
   335  		}
   336  		for j := range zOut {
   337  			zOut[j] |= table[i][2][j] & mask
   338  		}
   339  	}
   340  }
   341  
   342  // sm2P256GetBit returns the bit'th bit of scalar.
   343  func sm2P256GetBit(scalar *[32]uint8, bit uint) uint32 {
   344  	return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1)
   345  }
   346  
   347  // sm2P256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a
   348  // little-endian number. Note that the value of scalar must be less than the
   349  // order of the group.
   350  func sm2P256ScalarBaseMult(xOut, yOut, zOut *sm2P256FieldElement, scalar *[32]uint8) {
   351  	nIsInfinityMask := ^uint32(0)
   352  	var px, py, tx, ty, tz sm2P256FieldElement
   353  	var pIsNoninfiniteMask, mask, tableOffset uint32
   354  
   355  	for i := range xOut {
   356  		xOut[i] = 0
   357  	}
   358  	for i := range yOut {
   359  		yOut[i] = 0
   360  	}
   361  	for i := range zOut {
   362  		zOut[i] = 0
   363  	}
   364  
   365  	// The loop adds bits at positions 0, 64, 128 and 192, followed by
   366  	// positions 32,96,160 and 224 and does this 32 times.
   367  	for i := uint(0); i < 32; i++ {
   368  		if i != 0 {
   369  			sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
   370  		}
   371  		tableOffset = 0
   372  		for j := uint(0); j <= 32; j += 32 {
   373  			bit0 := sm2P256GetBit(scalar, 31-i+j)
   374  			bit1 := sm2P256GetBit(scalar, 95-i+j)
   375  			bit2 := sm2P256GetBit(scalar, 159-i+j)
   376  			bit3 := sm2P256GetBit(scalar, 223-i+j)
   377  			index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3)
   378  
   379  			sm2P256SelectAffinePoint(&px, &py, sm2P256Precomputed[tableOffset:], index)
   380  			tableOffset += 30 * 9
   381  
   382  			// Since scalar is less than the order of the group, we know that
   383  			// {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle
   384  			// below.
   385  			sm2P256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py)
   386  			// The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero
   387  			// (a.k.a.  the point at infinity). We handle that situation by
   388  			// copying the point from the table.
   389  			sm2P256CopyConditional(xOut, &px, nIsInfinityMask)
   390  			sm2P256CopyConditional(yOut, &py, nIsInfinityMask)
   391  			sm2P256CopyConditional(zOut, &sm2P256Factor[1], nIsInfinityMask)
   392  
   393  			// Equally, the result is also wrong if the point from the table is
   394  			// zero, which happens when the index is zero. We handle that by
   395  			// only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0.
   396  			pIsNoninfiniteMask = nonZeroToAllOnes(index)
   397  			mask = pIsNoninfiniteMask & ^nIsInfinityMask
   398  			sm2P256CopyConditional(xOut, &tx, mask)
   399  			sm2P256CopyConditional(yOut, &ty, mask)
   400  			sm2P256CopyConditional(zOut, &tz, mask)
   401  			// If p was not zero, then n is now non-zero.
   402  			nIsInfinityMask &^= pIsNoninfiniteMask
   403  		}
   404  	}
   405  }
   406  
   407  func sm2P256ScalarMult(xOut, yOut, zOut, x, y *sm2P256FieldElement, scalar *[32]uint8) {
   408  	var precomp [16][3]sm2P256FieldElement
   409  	var px, py, pz, tx, ty, tz sm2P256FieldElement
   410  	var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32
   411  
   412  	// We precompute 0,1,2,... times {x,y}.
   413  	precomp[1][0] = *x
   414  	precomp[1][1] = *y
   415  	precomp[1][2] = sm2P256Factor[1]
   416  
   417  	for i := 2; i < 16; i += 2 {
   418  		sm2P256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2])
   419  		sm2P256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y)
   420  	}
   421  
   422  	for i := range xOut {
   423  		xOut[i] = 0
   424  	}
   425  	for i := range yOut {
   426  		yOut[i] = 0
   427  	}
   428  	for i := range zOut {
   429  		zOut[i] = 0
   430  	}
   431  	nIsInfinityMask = ^uint32(0)
   432  
   433  	// We add in a window of four bits each iteration and do this 64 times.
   434  	for i := 0; i < 64; i++ {
   435  		if i != 0 {
   436  			sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
   437  			sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
   438  			sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
   439  			sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
   440  		}
   441  
   442  		index = uint32(scalar[31-i/2])
   443  		if (i & 1) == 1 {
   444  			index &= 15
   445  		} else {
   446  			index >>= 4
   447  		}
   448  
   449  		// See the comments in scalarBaseMult about handling infinities.
   450  		sm2P256SelectJacobianPoint(&px, &py, &pz, &precomp, index)
   451  		sm2P256PointAdd(xOut, yOut, zOut, &px, &py, &pz, &tx, &ty, &tz)
   452  		sm2P256CopyConditional(xOut, &px, nIsInfinityMask)
   453  		sm2P256CopyConditional(yOut, &py, nIsInfinityMask)
   454  		sm2P256CopyConditional(zOut, &pz, nIsInfinityMask)
   455  
   456  		pIsNoninfiniteMask = nonZeroToAllOnes(index)
   457  		mask = pIsNoninfiniteMask & ^nIsInfinityMask
   458  		sm2P256CopyConditional(xOut, &tx, mask)
   459  		sm2P256CopyConditional(yOut, &ty, mask)
   460  		sm2P256CopyConditional(zOut, &tz, mask)
   461  		nIsInfinityMask &^= pIsNoninfiniteMask
   462  	}
   463  }
   464  
   465  func sm2P256PointToAffine(xOut, yOut, x, y, z *sm2P256FieldElement) {
   466  	var zInv, zInvSq sm2P256FieldElement
   467  
   468  	zz := sm2P256ToBig(z)
   469  	zz.ModInverse(zz, sm2P256.P)
   470  	sm2P256FromBig(&zInv, zz)
   471  
   472  	sm2P256Square(&zInvSq, &zInv)
   473  	sm2P256Mul(xOut, x, &zInvSq)
   474  	sm2P256Mul(&zInv, &zInv, &zInvSq)
   475  	sm2P256Mul(yOut, y, &zInv)
   476  }
   477  
   478  func sm2P256ToAffine(x, y, z *sm2P256FieldElement) (xOut, yOut *big.Int) {
   479  	var xx, yy sm2P256FieldElement
   480  
   481  	sm2P256PointToAffine(&xx, &yy, x, y, z)
   482  	return sm2P256ToBig(&xx), sm2P256ToBig(&yy)
   483  }
   484  
   485  var sm2P256Factor = []sm2P256FieldElement{
   486  	sm2P256FieldElement{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   487  	sm2P256FieldElement{0x2, 0x0, 0x1FFFFF00, 0x7FF, 0x0, 0x0, 0x0, 0x2000000, 0x0},
   488  	sm2P256FieldElement{0x4, 0x0, 0x1FFFFE00, 0xFFF, 0x0, 0x0, 0x0, 0x4000000, 0x0},
   489  	sm2P256FieldElement{0x6, 0x0, 0x1FFFFD00, 0x17FF, 0x0, 0x0, 0x0, 0x6000000, 0x0},
   490  	sm2P256FieldElement{0x8, 0x0, 0x1FFFFC00, 0x1FFF, 0x0, 0x0, 0x0, 0x8000000, 0x0},
   491  	sm2P256FieldElement{0xA, 0x0, 0x1FFFFB00, 0x27FF, 0x0, 0x0, 0x0, 0xA000000, 0x0},
   492  	sm2P256FieldElement{0xC, 0x0, 0x1FFFFA00, 0x2FFF, 0x0, 0x0, 0x0, 0xC000000, 0x0},
   493  	sm2P256FieldElement{0xE, 0x0, 0x1FFFF900, 0x37FF, 0x0, 0x0, 0x0, 0xE000000, 0x0},
   494  	sm2P256FieldElement{0x10, 0x0, 0x1FFFF800, 0x3FFF, 0x0, 0x0, 0x0, 0x0, 0x01},
   495  }
   496  
   497  func sm2P256Scalar(b *sm2P256FieldElement, a int) {
   498  	sm2P256Mul(b, b, &sm2P256Factor[a])
   499  }
   500  
   501  // (x3, y3, z3) = (x1, y1, z1) + (x2, y2, z2)
   502  func sm2P256PointAdd(x1, y1, z1, x2, y2, z2, x3, y3, z3 *sm2P256FieldElement) {
   503  	var u1, u2, z22, z12, z23, z13, s1, s2, h, h2, r, r2, tm sm2P256FieldElement
   504  
   505  	if sm2P256ToBig(z1).Sign() == 0 {
   506  		sm2P256Dup(x3, x2)
   507  		sm2P256Dup(y3, y2)
   508  		sm2P256Dup(z3, z2)
   509  		return
   510  	}
   511  
   512  	if sm2P256ToBig(z2).Sign() == 0 {
   513  		sm2P256Dup(x3, x1)
   514  		sm2P256Dup(y3, y1)
   515  		sm2P256Dup(z3, z1)
   516  		return
   517  	}
   518  
   519  	sm2P256Square(&z12, z1) // z12 = z1 ^ 2
   520  	sm2P256Square(&z22, z2) // z22 = z2 ^ 2
   521  
   522  	sm2P256Mul(&z13, &z12, z1) // z13 = z1 ^ 3
   523  	sm2P256Mul(&z23, &z22, z2) // z23 = z2 ^ 3
   524  
   525  	sm2P256Mul(&u1, x1, &z22) // u1 = x1 * z2 ^ 2
   526  	sm2P256Mul(&u2, x2, &z12) // u2 = x2 * z1 ^ 2
   527  
   528  	sm2P256Mul(&s1, y1, &z23) // s1 = y1 * z2 ^ 3
   529  	sm2P256Mul(&s2, y2, &z13) // s2 = y2 * z1 ^ 3
   530  
   531  	if sm2P256ToBig(&u1).Cmp(sm2P256ToBig(&u2)) == 0 &&
   532  		sm2P256ToBig(&s1).Cmp(sm2P256ToBig(&s2)) == 0 {
   533  		sm2P256PointDouble(x1, y1, z1, x1, y1, z1)
   534  	}
   535  
   536  	sm2P256Sub(&h, &u2, &u1) // h = u2 - u1
   537  	sm2P256Sub(&r, &s2, &s1) // r = s2 - s1
   538  
   539  	sm2P256Square(&r2, &r) // r2 = r ^ 2
   540  	sm2P256Square(&h2, &h) // h2 = h ^ 2
   541  
   542  	sm2P256Mul(&tm, &h2, &h) // tm = h ^ 3
   543  	sm2P256Sub(x3, &r2, &tm)
   544  	sm2P256Mul(&tm, &u1, &h2)
   545  	sm2P256Scalar(&tm, 2)   // tm = 2 * (u1 * h ^ 2)
   546  	sm2P256Sub(x3, x3, &tm) // x3 = r ^ 2 - h ^ 3 - 2 * u1 * h ^ 2
   547  
   548  	sm2P256Mul(&tm, &u1, &h2) // tm = u1 * h ^ 2
   549  	sm2P256Sub(&tm, &tm, x3)  // tm = u1 * h ^ 2 - x3
   550  	sm2P256Mul(y3, &r, &tm)
   551  	sm2P256Mul(&tm, &h2, &h)  // tm = h ^ 3
   552  	sm2P256Mul(&tm, &tm, &s1) // tm = s1 * h ^ 3
   553  	sm2P256Sub(y3, y3, &tm)   // y3 = r * (u1 * h ^ 2 - x3) - s1 * h ^ 3
   554  
   555  	sm2P256Mul(z3, z1, z2)
   556  	sm2P256Mul(z3, z3, &h) // z3 = z1 * z3 * h
   557  }
   558  
   559  func sm2P256PointDouble(x3, y3, z3, x, y, z *sm2P256FieldElement) {
   560  	var s, m, m2, x2, y2, z2, z4, y4, az4 sm2P256FieldElement
   561  
   562  	sm2P256Square(&x2, x) // x2 = x ^ 2
   563  	sm2P256Square(&y2, y) // y2 = y ^ 2
   564  	sm2P256Square(&z2, z) // z2 = z ^ 2
   565  
   566  	sm2P256Square(&z4, z)   // z4 = z ^ 2
   567  	sm2P256Mul(&z4, &z4, z) // z4 = z ^ 3
   568  	sm2P256Mul(&z4, &z4, z) // z4 = z ^ 4
   569  
   570  	sm2P256Square(&y4, y)   // y4 = y ^ 2
   571  	sm2P256Mul(&y4, &y4, y) // y4 = y ^ 3
   572  	sm2P256Mul(&y4, &y4, y) // y4 = y ^ 4
   573  	sm2P256Scalar(&y4, 8)   // y4 = 8 * y ^ 4
   574  
   575  	sm2P256Mul(&s, x, &y2)
   576  	sm2P256Scalar(&s, 4) // s = 4 * x * y ^ 2
   577  
   578  	sm2P256Dup(&m, &x2)
   579  	sm2P256Scalar(&m, 3)
   580  	sm2P256Mul(&az4, &sm2P256.a, &z4)
   581  	sm2P256Add(&m, &m, &az4) // m = 3 * x ^ 2 + a * z ^ 4
   582  
   583  	sm2P256Square(&m2, &m) // m2 = m ^ 2
   584  
   585  	sm2P256Add(z3, y, z)
   586  	sm2P256Square(z3, z3)
   587  	sm2P256Sub(z3, z3, &z2)
   588  	sm2P256Sub(z3, z3, &y2) // z' = (y + z) ^2 - z ^ 2 - y ^ 2
   589  
   590  	sm2P256Sub(x3, &m2, &s)
   591  	sm2P256Sub(x3, x3, &s) // x' = m2 - 2 * s
   592  
   593  	sm2P256Sub(y3, &s, x3)
   594  	sm2P256Mul(y3, y3, &m)
   595  	sm2P256Sub(y3, y3, &y4) // y' = m * (s - x') - 8 * y ^ 4
   596  }
   597  
   598  // p256Zero31 is 0 mod p.
   599  var sm2P256Zero31 = sm2P256FieldElement{0x7FFFFFF8, 0x3FFFFFFC, 0x800003FC, 0x3FFFDFFC, 0x7FFFFFFC, 0x3FFFFFFC, 0x7FFFFFFC, 0x37FFFFFC, 0x7FFFFFFC}
   600  
   601  // c = a + b
   602  func sm2P256Add(c, a, b *sm2P256FieldElement) {
   603  	carry := uint32(0)
   604  	for i := 0; ; i++ {
   605  		c[i] = a[i] + b[i]
   606  		c[i] += carry
   607  		carry = c[i] >> 29
   608  		c[i] &= bottom29Bits
   609  		i++
   610  		if i == 9 {
   611  			break
   612  		}
   613  		c[i] = a[i] + b[i]
   614  		c[i] += carry
   615  		carry = c[i] >> 28
   616  		c[i] &= bottom28Bits
   617  	}
   618  	sm2P256ReduceCarry(c, carry)
   619  }
   620  
   621  // c = a - b
   622  func sm2P256Sub(c, a, b *sm2P256FieldElement) {
   623  	var carry uint32
   624  
   625  	for i := 0; ; i++ {
   626  		c[i] = a[i] - b[i]
   627  		c[i] += sm2P256Zero31[i]
   628  		c[i] += carry
   629  		carry = c[i] >> 29
   630  		c[i] &= bottom29Bits
   631  		i++
   632  		if i == 9 {
   633  			break
   634  		}
   635  		c[i] = a[i] - b[i]
   636  		c[i] += sm2P256Zero31[i]
   637  		c[i] += carry
   638  		carry = c[i] >> 28
   639  		c[i] &= bottom28Bits
   640  	}
   641  	sm2P256ReduceCarry(c, carry)
   642  }
   643  
   644  // c = a * b
   645  func sm2P256Mul(c, a, b *sm2P256FieldElement) {
   646  	var tmp sm2P256LargeFieldElement
   647  
   648  	tmp[0] = uint64(a[0]) * uint64(b[0])
   649  	tmp[1] = uint64(a[0])*(uint64(b[1])<<0) +
   650  		uint64(a[1])*(uint64(b[0])<<0)
   651  	tmp[2] = uint64(a[0])*(uint64(b[2])<<0) +
   652  		uint64(a[1])*(uint64(b[1])<<1) +
   653  		uint64(a[2])*(uint64(b[0])<<0)
   654  	tmp[3] = uint64(a[0])*(uint64(b[3])<<0) +
   655  		uint64(a[1])*(uint64(b[2])<<0) +
   656  		uint64(a[2])*(uint64(b[1])<<0) +
   657  		uint64(a[3])*(uint64(b[0])<<0)
   658  	tmp[4] = uint64(a[0])*(uint64(b[4])<<0) +
   659  		uint64(a[1])*(uint64(b[3])<<1) +
   660  		uint64(a[2])*(uint64(b[2])<<0) +
   661  		uint64(a[3])*(uint64(b[1])<<1) +
   662  		uint64(a[4])*(uint64(b[0])<<0)
   663  	tmp[5] = uint64(a[0])*(uint64(b[5])<<0) +
   664  		uint64(a[1])*(uint64(b[4])<<0) +
   665  		uint64(a[2])*(uint64(b[3])<<0) +
   666  		uint64(a[3])*(uint64(b[2])<<0) +
   667  		uint64(a[4])*(uint64(b[1])<<0) +
   668  		uint64(a[5])*(uint64(b[0])<<0)
   669  	tmp[6] = uint64(a[0])*(uint64(b[6])<<0) +
   670  		uint64(a[1])*(uint64(b[5])<<1) +
   671  		uint64(a[2])*(uint64(b[4])<<0) +
   672  		uint64(a[3])*(uint64(b[3])<<1) +
   673  		uint64(a[4])*(uint64(b[2])<<0) +
   674  		uint64(a[5])*(uint64(b[1])<<1) +
   675  		uint64(a[6])*(uint64(b[0])<<0)
   676  	tmp[7] = uint64(a[0])*(uint64(b[7])<<0) +
   677  		uint64(a[1])*(uint64(b[6])<<0) +
   678  		uint64(a[2])*(uint64(b[5])<<0) +
   679  		uint64(a[3])*(uint64(b[4])<<0) +
   680  		uint64(a[4])*(uint64(b[3])<<0) +
   681  		uint64(a[5])*(uint64(b[2])<<0) +
   682  		uint64(a[6])*(uint64(b[1])<<0) +
   683  		uint64(a[7])*(uint64(b[0])<<0)
   684  	// tmp[8] has the greatest value but doesn't overflow. See logic in
   685  	// p256Square.
   686  	tmp[8] = uint64(a[0])*(uint64(b[8])<<0) +
   687  		uint64(a[1])*(uint64(b[7])<<1) +
   688  		uint64(a[2])*(uint64(b[6])<<0) +
   689  		uint64(a[3])*(uint64(b[5])<<1) +
   690  		uint64(a[4])*(uint64(b[4])<<0) +
   691  		uint64(a[5])*(uint64(b[3])<<1) +
   692  		uint64(a[6])*(uint64(b[2])<<0) +
   693  		uint64(a[7])*(uint64(b[1])<<1) +
   694  		uint64(a[8])*(uint64(b[0])<<0)
   695  	tmp[9] = uint64(a[1])*(uint64(b[8])<<0) +
   696  		uint64(a[2])*(uint64(b[7])<<0) +
   697  		uint64(a[3])*(uint64(b[6])<<0) +
   698  		uint64(a[4])*(uint64(b[5])<<0) +
   699  		uint64(a[5])*(uint64(b[4])<<0) +
   700  		uint64(a[6])*(uint64(b[3])<<0) +
   701  		uint64(a[7])*(uint64(b[2])<<0) +
   702  		uint64(a[8])*(uint64(b[1])<<0)
   703  	tmp[10] = uint64(a[2])*(uint64(b[8])<<0) +
   704  		uint64(a[3])*(uint64(b[7])<<1) +
   705  		uint64(a[4])*(uint64(b[6])<<0) +
   706  		uint64(a[5])*(uint64(b[5])<<1) +
   707  		uint64(a[6])*(uint64(b[4])<<0) +
   708  		uint64(a[7])*(uint64(b[3])<<1) +
   709  		uint64(a[8])*(uint64(b[2])<<0)
   710  	tmp[11] = uint64(a[3])*(uint64(b[8])<<0) +
   711  		uint64(a[4])*(uint64(b[7])<<0) +
   712  		uint64(a[5])*(uint64(b[6])<<0) +
   713  		uint64(a[6])*(uint64(b[5])<<0) +
   714  		uint64(a[7])*(uint64(b[4])<<0) +
   715  		uint64(a[8])*(uint64(b[3])<<0)
   716  	tmp[12] = uint64(a[4])*(uint64(b[8])<<0) +
   717  		uint64(a[5])*(uint64(b[7])<<1) +
   718  		uint64(a[6])*(uint64(b[6])<<0) +
   719  		uint64(a[7])*(uint64(b[5])<<1) +
   720  		uint64(a[8])*(uint64(b[4])<<0)
   721  	tmp[13] = uint64(a[5])*(uint64(b[8])<<0) +
   722  		uint64(a[6])*(uint64(b[7])<<0) +
   723  		uint64(a[7])*(uint64(b[6])<<0) +
   724  		uint64(a[8])*(uint64(b[5])<<0)
   725  	tmp[14] = uint64(a[6])*(uint64(b[8])<<0) +
   726  		uint64(a[7])*(uint64(b[7])<<1) +
   727  		uint64(a[8])*(uint64(b[6])<<0)
   728  	tmp[15] = uint64(a[7])*(uint64(b[8])<<0) +
   729  		uint64(a[8])*(uint64(b[7])<<0)
   730  	tmp[16] = uint64(a[8]) * (uint64(b[8]) << 0)
   731  	sm2P256ReduceDegree(c, &tmp)
   732  }
   733  
   734  // b = a * a
   735  func sm2P256Square(b, a *sm2P256FieldElement) {
   736  	var tmp sm2P256LargeFieldElement
   737  
   738  	tmp[0] = uint64(a[0]) * uint64(a[0])
   739  	tmp[1] = uint64(a[0]) * (uint64(a[1]) << 1)
   740  	tmp[2] = uint64(a[0])*(uint64(a[2])<<1) +
   741  		uint64(a[1])*(uint64(a[1])<<1)
   742  	tmp[3] = uint64(a[0])*(uint64(a[3])<<1) +
   743  		uint64(a[1])*(uint64(a[2])<<1)
   744  	tmp[4] = uint64(a[0])*(uint64(a[4])<<1) +
   745  		uint64(a[1])*(uint64(a[3])<<2) +
   746  		uint64(a[2])*uint64(a[2])
   747  	tmp[5] = uint64(a[0])*(uint64(a[5])<<1) +
   748  		uint64(a[1])*(uint64(a[4])<<1) +
   749  		uint64(a[2])*(uint64(a[3])<<1)
   750  	tmp[6] = uint64(a[0])*(uint64(a[6])<<1) +
   751  		uint64(a[1])*(uint64(a[5])<<2) +
   752  		uint64(a[2])*(uint64(a[4])<<1) +
   753  		uint64(a[3])*(uint64(a[3])<<1)
   754  	tmp[7] = uint64(a[0])*(uint64(a[7])<<1) +
   755  		uint64(a[1])*(uint64(a[6])<<1) +
   756  		uint64(a[2])*(uint64(a[5])<<1) +
   757  		uint64(a[3])*(uint64(a[4])<<1)
   758  	// tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
   759  	// which is < 2**64 as required.
   760  	tmp[8] = uint64(a[0])*(uint64(a[8])<<1) +
   761  		uint64(a[1])*(uint64(a[7])<<2) +
   762  		uint64(a[2])*(uint64(a[6])<<1) +
   763  		uint64(a[3])*(uint64(a[5])<<2) +
   764  		uint64(a[4])*uint64(a[4])
   765  	tmp[9] = uint64(a[1])*(uint64(a[8])<<1) +
   766  		uint64(a[2])*(uint64(a[7])<<1) +
   767  		uint64(a[3])*(uint64(a[6])<<1) +
   768  		uint64(a[4])*(uint64(a[5])<<1)
   769  	tmp[10] = uint64(a[2])*(uint64(a[8])<<1) +
   770  		uint64(a[3])*(uint64(a[7])<<2) +
   771  		uint64(a[4])*(uint64(a[6])<<1) +
   772  		uint64(a[5])*(uint64(a[5])<<1)
   773  	tmp[11] = uint64(a[3])*(uint64(a[8])<<1) +
   774  		uint64(a[4])*(uint64(a[7])<<1) +
   775  		uint64(a[5])*(uint64(a[6])<<1)
   776  	tmp[12] = uint64(a[4])*(uint64(a[8])<<1) +
   777  		uint64(a[5])*(uint64(a[7])<<2) +
   778  		uint64(a[6])*uint64(a[6])
   779  	tmp[13] = uint64(a[5])*(uint64(a[8])<<1) +
   780  		uint64(a[6])*(uint64(a[7])<<1)
   781  	tmp[14] = uint64(a[6])*(uint64(a[8])<<1) +
   782  		uint64(a[7])*(uint64(a[7])<<1)
   783  	tmp[15] = uint64(a[7]) * (uint64(a[8]) << 1)
   784  	tmp[16] = uint64(a[8]) * uint64(a[8])
   785  	sm2P256ReduceDegree(b, &tmp)
   786  }
   787  
   788  // nonZeroToAllOnes returns:
   789  //   0xffffffff for 0 < x <= 2**31
   790  //   0 for x == 0 or x > 2**31.
   791  func nonZeroToAllOnes(x uint32) uint32 {
   792  	return ((x - 1) >> 31) - 1
   793  }
   794  
   795  var sm2P256Carry = [8 * 9]uint32{
   796  	0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
   797  	0x2, 0x0, 0x1FFFFF00, 0x7FF, 0x0, 0x0, 0x0, 0x2000000, 0x0,
   798  	0x4, 0x0, 0x1FFFFE00, 0xFFF, 0x0, 0x0, 0x0, 0x4000000, 0x0,
   799  	0x6, 0x0, 0x1FFFFD00, 0x17FF, 0x0, 0x0, 0x0, 0x6000000, 0x0,
   800  	0x8, 0x0, 0x1FFFFC00, 0x1FFF, 0x0, 0x0, 0x0, 0x8000000, 0x0,
   801  	0xA, 0x0, 0x1FFFFB00, 0x27FF, 0x0, 0x0, 0x0, 0xA000000, 0x0,
   802  	0xC, 0x0, 0x1FFFFA00, 0x2FFF, 0x0, 0x0, 0x0, 0xC000000, 0x0,
   803  	0xE, 0x0, 0x1FFFF900, 0x37FF, 0x0, 0x0, 0x0, 0xE000000, 0x0,
   804  }
   805  
   806  // carry < 2 ^ 3
   807  func sm2P256ReduceCarry(a *sm2P256FieldElement, carry uint32) {
   808  	a[0] += sm2P256Carry[carry*9+0]
   809  	a[2] += sm2P256Carry[carry*9+2]
   810  	a[3] += sm2P256Carry[carry*9+3]
   811  	a[7] += sm2P256Carry[carry*9+7]
   812  }
   813  
   814  // 这代码真是丑比了，我也是对自己醉了。。。
   815  // 你最好别改这个代码，不然你会死的很惨。。
   816  func sm2P256ReduceDegree(a *sm2P256FieldElement, b *sm2P256LargeFieldElement) {
   817  	var tmp [18]uint32
   818  	var carry, x, xMask uint32
   819  
   820  	// tmp
   821  	// 0  | 1  | 2  | 3  | 4  | 5  | 6  | 7  | 8  |  9 | 10 ...
   822  	// 29 | 28 | 29 | 28 | 29 | 28 | 29 | 28 | 29 | 28 | 29 ...
   823  	tmp[0] = uint32(b[0]) & bottom29Bits
   824  	tmp[1] = uint32(b[0]) >> 29
   825  	tmp[1] |= (uint32(b[0]>>32) << 3) & bottom28Bits
   826  	tmp[1] += uint32(b[1]) & bottom28Bits
   827  	carry = tmp[1] >> 28
   828  	tmp[1] &= bottom28Bits
   829  	for i := 2; i < 17; i++ {
   830  		tmp[i] = (uint32(b[i-2] >> 32)) >> 25
   831  		tmp[i] += (uint32(b[i-1])) >> 28
   832  		tmp[i] += (uint32(b[i-1]>>32) << 4) & bottom29Bits
   833  		tmp[i] += uint32(b[i]) & bottom29Bits
   834  		tmp[i] += carry
   835  		carry = tmp[i] >> 29
   836  		tmp[i] &= bottom29Bits
   837  
   838  		i++
   839  		if i == 17 {
   840  			break
   841  		}
   842  		tmp[i] = uint32(b[i-2]>>32) >> 25
   843  		tmp[i] += uint32(b[i-1]) >> 29
   844  		tmp[i] += ((uint32(b[i-1] >> 32)) << 3) & bottom28Bits
   845  		tmp[i] += uint32(b[i]) & bottom28Bits
   846  		tmp[i] += carry
   847  		carry = tmp[i] >> 28
   848  		tmp[i] &= bottom28Bits
   849  	}
   850  	tmp[17] = uint32(b[15]>>32) >> 25
   851  	tmp[17] += uint32(b[16]) >> 29
   852  	tmp[17] += uint32(b[16]>>32) << 3
   853  	tmp[17] += carry
   854  
   855  	for i := 0; ; i += 2 {
   856  
   857  		tmp[i+1] += tmp[i] >> 29
   858  		x = tmp[i] & bottom29Bits
   859  		tmp[i] = 0
   860  		if x > 0 {
   861  			set4 := uint32(0)
   862  			set7 := uint32(0)
   863  			xMask = nonZeroToAllOnes(x)
   864  			tmp[i+2] += (x << 7) & bottom29Bits
   865  			tmp[i+3] += x >> 22
   866  			if tmp[i+3] < 0x10000000 {
   867  				set4 = 1
   868  				tmp[i+3] += 0x10000000 & xMask
   869  				tmp[i+3] -= (x << 10) & bottom28Bits
   870  			} else {
   871  				tmp[i+3] -= (x << 10) & bottom28Bits
   872  			}
   873  			if tmp[i+4] < 0x20000000 {
   874  				tmp[i+4] += 0x20000000 & xMask
   875  				tmp[i+4] -= set4 // 借位
   876  				tmp[i+4] -= x >> 18
   877  				if tmp[i+5] < 0x10000000 {
   878  					tmp[i+5] += 0x10000000 & xMask
   879  					tmp[i+5] -= 1 // 借位
   880  					if tmp[i+6] < 0x20000000 {
   881  						set7 = 1
   882  						tmp[i+6] += 0x20000000 & xMask
   883  						tmp[i+6] -= 1 // 借位
   884  					} else {
   885  						tmp[i+6] -= 1 // 借位
   886  					}
   887  				} else {
   888  					tmp[i+5] -= 1
   889  				}
   890  			} else {
   891  				tmp[i+4] -= set4 // 借位
   892  				tmp[i+4] -= x >> 18
   893  			}
   894  			if tmp[i+7] < 0x10000000 {
   895  				tmp[i+7] += 0x10000000 & xMask
   896  				tmp[i+7] -= set7
   897  				tmp[i+7] -= (x << 24) & bottom28Bits
   898  				tmp[i+8] += (x << 28) & bottom29Bits
   899  				if tmp[i+8] < 0x20000000 {
   900  					tmp[i+8] += 0x20000000 & xMask
   901  					tmp[i+8] -= 1
   902  					tmp[i+8] -= x >> 4
   903  					tmp[i+9] += ((x >> 1) - 1) & xMask
   904  				} else {
   905  					tmp[i+8] -= 1
   906  					tmp[i+8] -= x >> 4
   907  					tmp[i+9] += (x >> 1) & xMask
   908  				}
   909  			} else {
   910  				tmp[i+7] -= set7 // 借位
   911  				tmp[i+7] -= (x << 24) & bottom28Bits
   912  				tmp[i+8] += (x << 28) & bottom29Bits
   913  				if tmp[i+8] < 0x20000000 {
   914  					tmp[i+8] += 0x20000000 & xMask
   915  					tmp[i+8] -= x >> 4
   916  					tmp[i+9] += ((x >> 1) - 1) & xMask
   917  				} else {
   918  					tmp[i+8] -= x >> 4
   919  					tmp[i+9] += (x >> 1) & xMask
   920  				}
   921  			}
   922  
   923  		}
   924  
   925  		if i+1 == 9 {
   926  			break
   927  		}
   928  
   929  		tmp[i+2] += tmp[i+1] >> 28
   930  		x = tmp[i+1] & bottom28Bits
   931  		tmp[i+1] = 0
   932  		if x > 0 {
   933  			set5 := uint32(0)
   934  			set8 := uint32(0)
   935  			set9 := uint32(0)
   936  			xMask = nonZeroToAllOnes(x)
   937  			tmp[i+3] += (x << 7) & bottom28Bits
   938  			tmp[i+4] += x >> 21
   939  			if tmp[i+4] < 0x20000000 {
   940  				set5 = 1
   941  				tmp[i+4] += 0x20000000 & xMask
   942  				tmp[i+4] -= (x << 11) & bottom29Bits
   943  			} else {
   944  				tmp[i+4] -= (x << 11) & bottom29Bits
   945  			}
   946  			if tmp[i+5] < 0x10000000 {
   947  				tmp[i+5] += 0x10000000 & xMask
   948  				tmp[i+5] -= set5 // 借位
   949  				tmp[i+5] -= x >> 18
   950  				if tmp[i+6] < 0x20000000 {
   951  					tmp[i+6] += 0x20000000 & xMask
   952  					tmp[i+6] -= 1 // 借位
   953  					if tmp[i+7] < 0x10000000 {
   954  						set8 = 1
   955  						tmp[i+7] += 0x10000000 & xMask
   956  						tmp[i+7] -= 1 // 借位
   957  					} else {
   958  						tmp[i+7] -= 1 // 借位
   959  					}
   960  				} else {
   961  					tmp[i+6] -= 1 // 借位
   962  				}
   963  			} else {
   964  				tmp[i+5] -= set5 // 借位
   965  				tmp[i+5] -= x >> 18
   966  			}
   967  			if tmp[i+8] < 0x20000000 {
   968  				set9 = 1
   969  				tmp[i+8] += 0x20000000 & xMask
   970  				tmp[i+8] -= set8
   971  				tmp[i+8] -= (x << 25) & bottom29Bits
   972  			} else {
   973  				tmp[i+8] -= set8
   974  				tmp[i+8] -= (x << 25) & bottom29Bits
   975  			}
   976  			if tmp[i+9] < 0x10000000 {
   977  				tmp[i+9] += 0x10000000 & xMask
   978  				tmp[i+9] -= set9 // 借位
   979  				tmp[i+9] -= x >> 4
   980  				tmp[i+10] += (x - 1) & xMask
   981  			} else {
   982  				tmp[i+9] -= set9 // 借位
   983  				tmp[i+9] -= x >> 4
   984  				tmp[i+10] += x & xMask
   985  			}
   986  		}
   987  	}
   988  
   989  	carry = uint32(0)
   990  	for i := 0; i < 8; i++ {
   991  		a[i] = tmp[i+9]
   992  		a[i] += carry
   993  		a[i] += (tmp[i+10] << 28) & bottom29Bits
   994  		carry = a[i] >> 29
   995  		a[i] &= bottom29Bits
   996  
   997  		i++
   998  		a[i] = tmp[i+9] >> 1
   999  		a[i] += carry
  1000  		carry = a[i] >> 28
  1001  		a[i] &= bottom28Bits
  1002  	}
  1003  	a[8] = tmp[17]
  1004  	a[8] += carry
  1005  	carry = a[8] >> 29
  1006  	a[8] &= bottom29Bits
  1007  	sm2P256ReduceCarry(a, carry)
  1008  }
  1009  
  1010  // b = a
  1011  func sm2P256Dup(b, a *sm2P256FieldElement) {
  1012  	*b = *a
  1013  }
  1014  
  1015  // X = a * R mod P
  1016  func sm2P256FromBig(X *sm2P256FieldElement, a *big.Int) {
  1017  	x := new(big.Int).Lsh(a, 257)
  1018  	x.Mod(x, sm2P256.P)
  1019  	for i := 0; i < 9; i++ {
  1020  		if bits := x.Bits(); len(bits) > 0 {
  1021  			X[i] = uint32(bits[0]) & bottom29Bits
  1022  		} else {
  1023  			X[i] = 0
  1024  		}
  1025  		x.Rsh(x, 29)
  1026  		i++
  1027  		if i == 9 {
  1028  			break
  1029  		}
  1030  		if bits := x.Bits(); len(bits) > 0 {
  1031  			X[i] = uint32(bits[0]) & bottom28Bits
  1032  		} else {
  1033  			X[i] = 0
  1034  		}
  1035  		x.Rsh(x, 28)
  1036  	}
  1037  }
  1038  
  1039  // X = r * R mod P
  1040  // r = X * R' mod P
  1041  func sm2P256ToBig(X *sm2P256FieldElement) *big.Int {
  1042  	r, tm := new(big.Int), new(big.Int)
  1043  	r.SetInt64(int64(X[8]))
  1044  	for i := 7; i >= 0; i-- {
  1045  		if (i & 1) == 0 {
  1046  			r.Lsh(r, 29)
  1047  		} else {
  1048  			r.Lsh(r, 28)
  1049  		}
  1050  		tm.SetInt64(int64(X[i]))
  1051  		r.Add(r, tm)
  1052  	}
  1053  	r.Mul(r, sm2P256.RInverse)
  1054  	r.Mod(r, sm2P256.P)
  1055  	return r
  1056  }