github.com/emmansun/gmsm@v0.29.1/internal/sm2ec/p256_asm_ord.go (about) 1 //go:build (amd64 || arm64 || s390x || ppc64le) && !purego 2 3 package sm2ec 4 5 import "errors" 6 7 // Montgomery multiplication modulo org(G). Sets res = in1 * in2 * R⁻¹. 8 // 9 //go:noescape 10 func p256OrdMul(res, in1, in2 *p256OrdElement) 11 12 // Montgomery square modulo org(G), repeated n times (n >= 1). 13 // 14 //go:noescape 15 func p256OrdSqr(res, in *p256OrdElement, n int) 16 17 // This code operates in the Montgomery domain where R = 2²⁵⁶ mod n and n is 18 // the order of the scalar field. Elements in the Montgomery domain take the 19 // form a×R and p256OrdMul calculates (a × b × R⁻¹) mod n. RR is R in the 20 // domain, or R×R mod n, thus p256OrdMul(x, RR) gives x×R, i.e. converts x 21 // into the Montgomery domain. 22 var RR = &p256OrdElement{0x901192af7c114f20, 0x3464504ade6fa2fa, 0x620fc84c3affe0d4, 0x1eb5e412a22b3d3b} 23 24 // P256OrdInverse, sets out to in⁻¹ mod org(G). If in is zero, out will be zero. 25 // n-2 = 26 // 1111111111111111111111111111111011111111111111111111111111111111 27 // 1111111111111111111111111111111111111111111111111111111111111111 28 // 0111001000000011110111110110101100100001110001100000010100101011 29 // 0101001110111011111101000000100100111001110101010100000100100001 30 func P256OrdInverse(k []byte) ([]byte, error) { 31 if len(k) != 32 { 32 return nil, errors.New("invalid scalar length") 33 } 34 x := new(p256OrdElement) 35 p256OrdBigToLittle(x, (*[32]byte)(k)) 36 p256OrdMul(x, x, RR) 37 // Inversion is implemented as exponentiation with exponent p − 2. 38 // The sequence of 41 multiplications and 253 squarings is derived from the 39 // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. 40 // 41 // _10 = 2*1 42 // _11 = 1 + _10 43 // _100 = 1 + _11 44 // _101 = 1 + _100 45 // _111 = _10 + _101 46 // _1001 = _10 + _111 47 // _1101 = _100 + _1001 48 // _1111 = _10 + _1101 49 // _11110 = 2*_1111 50 // _11111 = 1 + _11110 51 // _111110 = 2*_11111 52 // _111111 = 1 + _111110 53 // _1111110 = 2*_111111 54 // i20 = _1111110 << 6 + _1111110 55 // x18 = i20 << 5 + _111111 56 // x31 = x18 << 13 + i20 + 1 57 // i42 = 2*x31 58 // i44 = i42 << 2 59 // i140 = ((i44 << 32 + i44) << 29 + i42) << 33 60 // i150 = ((i44 + i140 + _111) << 4 + _111) << 3 61 // i170 = ((1 + i150) << 11 + _1111) << 6 + _11111 62 // i183 = ((i170 << 5 + _1101) << 3 + _11) << 3 63 // i198 = ((1 + i183) << 7 + _111) << 5 + _11 64 // i219 = ((i198 << 9 + _101) << 5 + _101) << 5 65 // i231 = ((_1101 + i219) << 5 + _1001) << 4 + _1101 66 // i244 = ((i231 << 2 + _11) << 7 + _111111) << 2 67 // i262 = ((1 + i244) << 10 + _1001) << 5 + _111 68 // i277 = ((i262 << 5 + _111) << 4 + _101) << 4 69 // return ((_101 + i277) << 9 + _1001) << 5 + 1 70 // 71 var z = new(p256OrdElement) 72 var t0 = new(p256OrdElement) 73 var t1 = new(p256OrdElement) 74 var t2 = new(p256OrdElement) 75 var t3 = new(p256OrdElement) 76 var t4 = new(p256OrdElement) 77 var t5 = new(p256OrdElement) 78 var t6 = new(p256OrdElement) 79 var t7 = new(p256OrdElement) 80 var t8 = new(p256OrdElement) 81 var t9 = new(p256OrdElement) 82 83 p256OrdSqr(t3, x, 1) 84 p256OrdMul(z, x, t3) 85 p256OrdMul(t4, x, z) 86 p256OrdMul(t1, x, t4) 87 p256OrdMul(t2, t3, t1) 88 p256OrdMul(t0, t3, t2) 89 p256OrdMul(t4, t4, t0) 90 p256OrdMul(t6, t3, t4) 91 p256OrdSqr(t3, t6, 1) 92 p256OrdMul(t5, x, t3) 93 p256OrdSqr(t3, t5, 1) 94 p256OrdMul(t3, x, t3) 95 p256OrdSqr(t7, t3, 1) 96 p256OrdSqr(t8, t7, 6) 97 p256OrdMul(t7, t7, t8) 98 p256OrdSqr(t8, t7, 5) 99 p256OrdMul(t8, t3, t8) 100 p256OrdSqr(t8, t8, 13) 101 p256OrdMul(t7, t7, t8) 102 p256OrdMul(t7, x, t7) 103 p256OrdSqr(t8, t7, 1) 104 p256OrdSqr(t7, t8, 2) 105 p256OrdSqr(t9, t7, 32) 106 p256OrdMul(t9, t7, t9) 107 p256OrdSqr(t9, t9, 29) 108 p256OrdMul(t8, t8, t9) 109 p256OrdSqr(t8, t8, 33) 110 p256OrdMul(t7, t7, t8) 111 p256OrdMul(t7, t2, t7) 112 p256OrdSqr(t7, t7, 4) 113 p256OrdMul(t7, t2, t7) 114 p256OrdSqr(t7, t7, 3) 115 p256OrdMul(t7, x, t7) 116 p256OrdSqr(t7, t7, 11) 117 p256OrdMul(t6, t6, t7) 118 p256OrdSqr(t6, t6, 6) 119 p256OrdMul(t5, t5, t6) 120 p256OrdSqr(t5, t5, 5) 121 p256OrdMul(t5, t4, t5) 122 p256OrdSqr(t5, t5, 3) 123 p256OrdMul(t5, z, t5) 124 p256OrdSqr(t5, t5, 3) 125 p256OrdMul(t5, x, t5) 126 p256OrdSqr(t5, t5, 7) 127 p256OrdMul(t5, t2, t5) 128 p256OrdSqr(t5, t5, 5) 129 p256OrdMul(t5, z, t5) 130 p256OrdSqr(t5, t5, 9) 131 p256OrdMul(t5, t1, t5) 132 p256OrdSqr(t5, t5, 5) 133 p256OrdMul(t5, t1, t5) 134 p256OrdSqr(t5, t5, 5) 135 p256OrdMul(t5, t4, t5) 136 p256OrdSqr(t5, t5, 5) 137 p256OrdMul(t5, t0, t5) 138 p256OrdSqr(t5, t5, 4) 139 p256OrdMul(t4, t4, t5) 140 p256OrdSqr(t4, t4, 2) 141 p256OrdMul(t4, z, t4) 142 p256OrdSqr(t4, t4, 7) 143 p256OrdMul(t3, t3, t4) 144 p256OrdSqr(t3, t3, 2) 145 p256OrdMul(t3, x, t3) 146 p256OrdSqr(t3, t3, 10) 147 p256OrdMul(t3, t0, t3) 148 p256OrdSqr(t3, t3, 5) 149 p256OrdMul(t3, t2, t3) 150 p256OrdSqr(t3, t3, 5) 151 p256OrdMul(t2, t2, t3) 152 p256OrdSqr(t2, t2, 4) 153 p256OrdMul(t2, t1, t2) 154 p256OrdSqr(t2, t2, 4) 155 p256OrdMul(t1, t1, t2) 156 p256OrdSqr(t1, t1, 9) 157 p256OrdMul(t0, t0, t1) 158 p256OrdSqr(t0, t0, 5) 159 p256OrdMul(z, x, t0) 160 return p256OrderFromMont(z), nil 161 } 162 163 // P256OrdMul multiplication modulo org(G). 164 func P256OrdMul(in1, in2 []byte) ([]byte, error) { 165 if len(in1) != 32 || len(in2) != 32 { 166 return nil, errors.New("invalid scalar length") 167 } 168 x1 := new(p256OrdElement) 169 p256OrdBigToLittle(x1, (*[32]byte)(in1)) 170 p256OrdMul(x1, x1, RR) 171 172 x2 := new(p256OrdElement) 173 p256OrdBigToLittle(x2, (*[32]byte)(in2)) 174 p256OrdMul(x2, x2, RR) 175 176 res := new(p256OrdElement) 177 p256OrdMul(res, x1, x2) 178 179 return p256OrderFromMont(res), nil 180 } 181 182 func p256OrderFromMont(in *p256OrdElement) []byte { 183 // Montgomery multiplication by R⁻¹, or 1 outside the domain as R⁻¹×R = 1, 184 // converts a Montgomery value out of the domain. 185 one := &p256OrdElement{1} 186 p256OrdMul(in, in, one) 187 188 var xOut [32]byte 189 p256OrdLittleToBig(&xOut, in) 190 return xOut[:] 191 }