github.com/emmansun/gmsm@v0.29.1/internal/sm2ec/p256_asm_ord.go (about)

     1  //go:build (amd64 || arm64 || s390x || ppc64le) && !purego
     2  
     3  package sm2ec
     4  
     5  import "errors"
     6  
     7  // Montgomery multiplication modulo org(G). Sets res = in1 * in2 * R⁻¹.
     8  //
     9  //go:noescape
    10  func p256OrdMul(res, in1, in2 *p256OrdElement)
    11  
    12  // Montgomery square modulo org(G), repeated n times (n >= 1).
    13  //
    14  //go:noescape
    15  func p256OrdSqr(res, in *p256OrdElement, n int)
    16  
    17  // This code operates in the Montgomery domain where R = 2²⁵⁶ mod n and n is
    18  // the order of the scalar field. Elements in the Montgomery domain take the
    19  // form a×R and p256OrdMul calculates (a × b × R⁻¹) mod n. RR is R in the
    20  // domain, or R×R mod n, thus p256OrdMul(x, RR) gives x×R, i.e. converts x
    21  // into the Montgomery domain.
    22  var RR = &p256OrdElement{0x901192af7c114f20, 0x3464504ade6fa2fa, 0x620fc84c3affe0d4, 0x1eb5e412a22b3d3b}
    23  
    24  // P256OrdInverse, sets out to in⁻¹ mod org(G). If in is zero, out will be zero.
    25  // n-2 =
    26  // 1111111111111111111111111111111011111111111111111111111111111111
    27  // 1111111111111111111111111111111111111111111111111111111111111111
    28  // 0111001000000011110111110110101100100001110001100000010100101011
    29  // 0101001110111011111101000000100100111001110101010100000100100001
    30  func P256OrdInverse(k []byte) ([]byte, error) {
    31  	if len(k) != 32 {
    32  		return nil, errors.New("invalid scalar length")
    33  	}
    34  	x := new(p256OrdElement)
    35  	p256OrdBigToLittle(x, (*[32]byte)(k))
    36  	p256OrdMul(x, x, RR)
    37  	// Inversion is implemented as exponentiation with exponent p − 2.
    38  	// The sequence of 41 multiplications and 253 squarings is derived from the
    39  	// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
    40  	//
    41  	//	_10      = 2*1
    42  	//	_11      = 1 + _10
    43  	//	_100     = 1 + _11
    44  	//	_101     = 1 + _100
    45  	//	_111     = _10 + _101
    46  	//	_1001    = _10 + _111
    47  	//	_1101    = _100 + _1001
    48  	//	_1111    = _10 + _1101
    49  	//	_11110   = 2*_1111
    50  	//	_11111   = 1 + _11110
    51  	//	_111110  = 2*_11111
    52  	//	_111111  = 1 + _111110
    53  	//	_1111110 = 2*_111111
    54  	//	i20      = _1111110 << 6 + _1111110
    55  	//	x18      = i20 << 5 + _111111
    56  	//	x31      = x18 << 13 + i20 + 1
    57  	//	i42      = 2*x31
    58  	//	i44      = i42 << 2
    59  	//	i140     = ((i44 << 32 + i44) << 29 + i42) << 33
    60  	//	i150     = ((i44 + i140 + _111) << 4 + _111) << 3
    61  	//	i170     = ((1 + i150) << 11 + _1111) << 6 + _11111
    62  	//	i183     = ((i170 << 5 + _1101) << 3 + _11) << 3
    63  	//	i198     = ((1 + i183) << 7 + _111) << 5 + _11
    64  	//	i219     = ((i198 << 9 + _101) << 5 + _101) << 5
    65  	//	i231     = ((_1101 + i219) << 5 + _1001) << 4 + _1101
    66  	//	i244     = ((i231 << 2 + _11) << 7 + _111111) << 2
    67  	//	i262     = ((1 + i244) << 10 + _1001) << 5 + _111
    68  	//	i277     = ((i262 << 5 + _111) << 4 + _101) << 4
    69  	//	return     ((_101 + i277) << 9 + _1001) << 5 + 1
    70  	//
    71  	var z = new(p256OrdElement)
    72  	var t0 = new(p256OrdElement)
    73  	var t1 = new(p256OrdElement)
    74  	var t2 = new(p256OrdElement)
    75  	var t3 = new(p256OrdElement)
    76  	var t4 = new(p256OrdElement)
    77  	var t5 = new(p256OrdElement)
    78  	var t6 = new(p256OrdElement)
    79  	var t7 = new(p256OrdElement)
    80  	var t8 = new(p256OrdElement)
    81  	var t9 = new(p256OrdElement)
    82  
    83  	p256OrdSqr(t3, x, 1)
    84  	p256OrdMul(z, x, t3)
    85  	p256OrdMul(t4, x, z)
    86  	p256OrdMul(t1, x, t4)
    87  	p256OrdMul(t2, t3, t1)
    88  	p256OrdMul(t0, t3, t2)
    89  	p256OrdMul(t4, t4, t0)
    90  	p256OrdMul(t6, t3, t4)
    91  	p256OrdSqr(t3, t6, 1)
    92  	p256OrdMul(t5, x, t3)
    93  	p256OrdSqr(t3, t5, 1)
    94  	p256OrdMul(t3, x, t3)
    95  	p256OrdSqr(t7, t3, 1)
    96  	p256OrdSqr(t8, t7, 6)
    97  	p256OrdMul(t7, t7, t8)
    98  	p256OrdSqr(t8, t7, 5)
    99  	p256OrdMul(t8, t3, t8)
   100  	p256OrdSqr(t8, t8, 13)
   101  	p256OrdMul(t7, t7, t8)
   102  	p256OrdMul(t7, x, t7)
   103  	p256OrdSqr(t8, t7, 1)
   104  	p256OrdSqr(t7, t8, 2)
   105  	p256OrdSqr(t9, t7, 32)
   106  	p256OrdMul(t9, t7, t9)
   107  	p256OrdSqr(t9, t9, 29)
   108  	p256OrdMul(t8, t8, t9)
   109  	p256OrdSqr(t8, t8, 33)
   110  	p256OrdMul(t7, t7, t8)
   111  	p256OrdMul(t7, t2, t7)
   112  	p256OrdSqr(t7, t7, 4)
   113  	p256OrdMul(t7, t2, t7)
   114  	p256OrdSqr(t7, t7, 3)
   115  	p256OrdMul(t7, x, t7)
   116  	p256OrdSqr(t7, t7, 11)
   117  	p256OrdMul(t6, t6, t7)
   118  	p256OrdSqr(t6, t6, 6)
   119  	p256OrdMul(t5, t5, t6)
   120  	p256OrdSqr(t5, t5, 5)
   121  	p256OrdMul(t5, t4, t5)
   122  	p256OrdSqr(t5, t5, 3)
   123  	p256OrdMul(t5, z, t5)
   124  	p256OrdSqr(t5, t5, 3)
   125  	p256OrdMul(t5, x, t5)
   126  	p256OrdSqr(t5, t5, 7)
   127  	p256OrdMul(t5, t2, t5)
   128  	p256OrdSqr(t5, t5, 5)
   129  	p256OrdMul(t5, z, t5)
   130  	p256OrdSqr(t5, t5, 9)
   131  	p256OrdMul(t5, t1, t5)
   132  	p256OrdSqr(t5, t5, 5)
   133  	p256OrdMul(t5, t1, t5)
   134  	p256OrdSqr(t5, t5, 5)
   135  	p256OrdMul(t5, t4, t5)
   136  	p256OrdSqr(t5, t5, 5)
   137  	p256OrdMul(t5, t0, t5)
   138  	p256OrdSqr(t5, t5, 4)
   139  	p256OrdMul(t4, t4, t5)
   140  	p256OrdSqr(t4, t4, 2)
   141  	p256OrdMul(t4, z, t4)
   142  	p256OrdSqr(t4, t4, 7)
   143  	p256OrdMul(t3, t3, t4)
   144  	p256OrdSqr(t3, t3, 2)
   145  	p256OrdMul(t3, x, t3)
   146  	p256OrdSqr(t3, t3, 10)
   147  	p256OrdMul(t3, t0, t3)
   148  	p256OrdSqr(t3, t3, 5)
   149  	p256OrdMul(t3, t2, t3)
   150  	p256OrdSqr(t3, t3, 5)
   151  	p256OrdMul(t2, t2, t3)
   152  	p256OrdSqr(t2, t2, 4)
   153  	p256OrdMul(t2, t1, t2)
   154  	p256OrdSqr(t2, t2, 4)
   155  	p256OrdMul(t1, t1, t2)
   156  	p256OrdSqr(t1, t1, 9)
   157  	p256OrdMul(t0, t0, t1)
   158  	p256OrdSqr(t0, t0, 5)
   159  	p256OrdMul(z, x, t0)
   160  	return p256OrderFromMont(z), nil
   161  }
   162  
   163  // P256OrdMul multiplication modulo org(G).
   164  func P256OrdMul(in1, in2 []byte) ([]byte, error) {
   165  	if len(in1) != 32 || len(in2) != 32 {
   166  		return nil, errors.New("invalid scalar length")
   167  	}
   168  	x1 := new(p256OrdElement)
   169  	p256OrdBigToLittle(x1, (*[32]byte)(in1))
   170  	p256OrdMul(x1, x1, RR)
   171  
   172  	x2 := new(p256OrdElement)
   173  	p256OrdBigToLittle(x2, (*[32]byte)(in2))
   174  	p256OrdMul(x2, x2, RR)
   175  
   176  	res := new(p256OrdElement)
   177  	p256OrdMul(res, x1, x2)
   178  
   179  	return p256OrderFromMont(res), nil
   180  }
   181  
   182  func p256OrderFromMont(in *p256OrdElement) []byte {
   183  	// Montgomery multiplication by R⁻¹, or 1 outside the domain as R⁻¹×R = 1,
   184  	// converts a Montgomery value out of the domain.
   185  	one := &p256OrdElement{1}
   186  	p256OrdMul(in, in, one)
   187  
   188  	var xOut [32]byte
   189  	p256OrdLittleToBig(&xOut, in)
   190  	return xOut[:]
   191  }