github.com/AESNooper/go/src@v0.0.0-20220218095104-b56a4ab1bbbb/crypto/elliptic/p256.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build !amd64 && !arm64 6 7 package elliptic 8 9 // This file contains a constant-time, 32-bit implementation of P256. 10 11 import ( 12 "math/big" 13 ) 14 15 type p256Curve struct { 16 *CurveParams 17 } 18 19 var ( 20 p256Params *CurveParams 21 22 // RInverse contains 1/R mod p - the inverse of the Montgomery constant 23 // (2**257). 24 p256RInverse *big.Int 25 ) 26 27 func initP256() { 28 // See FIPS 186-3, section D.2.3 29 p256Params = &CurveParams{Name: "P-256"} 30 p256Params.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 31 p256Params.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 32 p256Params.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 33 p256Params.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 34 p256Params.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 35 p256Params.BitSize = 256 36 37 p256RInverse, _ = new(big.Int).SetString("7fffffff00000001fffffffe8000000100000000ffffffff0000000180000000", 16) 38 39 // Arch-specific initialization, i.e. let a platform dynamically pick a P256 implementation 40 initP256Arch() 41 } 42 43 func (curve p256Curve) Params() *CurveParams { 44 return curve.CurveParams 45 } 46 47 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 48 // to out. If the scalar is equal or greater than the order of the group, it's 49 // reduced modulo that order. 50 func p256GetScalar(out *[32]byte, in []byte) { 51 n := new(big.Int).SetBytes(in) 52 var scalarBytes []byte 53 54 if n.Cmp(p256Params.N) >= 0 { 55 n.Mod(n, p256Params.N) 56 scalarBytes = n.Bytes() 57 } else { 58 scalarBytes = in 59 } 60 61 for i, v := range scalarBytes { 62 out[len(scalarBytes)-(1+i)] = v 63 } 64 } 65 66 func (p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 67 var scalarReversed [32]byte 68 p256GetScalar(&scalarReversed, scalar) 69 70 var x1, y1, z1 [p256Limbs]uint32 71 p256ScalarBaseMult(&x1, &y1, &z1, &scalarReversed) 72 return p256ToAffine(&x1, &y1, &z1) 73 } 74 75 func (p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 76 var scalarReversed [32]byte 77 p256GetScalar(&scalarReversed, scalar) 78 79 var px, py, x1, y1, z1 [p256Limbs]uint32 80 p256FromBig(&px, bigX) 81 p256FromBig(&py, bigY) 82 p256ScalarMult(&x1, &y1, &z1, &px, &py, &scalarReversed) 83 return p256ToAffine(&x1, &y1, &z1) 84 } 85 86 // Field elements are represented as nine, unsigned 32-bit words. 87 // 88 // The value of a field element is: 89 // x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) 90 // 91 // That is, each limb is alternately 29 or 28-bits wide in little-endian 92 // order. 93 // 94 // This means that a field element hits 2**257, rather than 2**256 as we would 95 // like. A 28, 29, ... pattern would cause us to hit 2**256, but that causes 96 // problems when multiplying as terms end up one bit short of a limb which 97 // would require much bit-shifting to correct. 98 // 99 // Finally, the values stored in a field element are in Montgomery form. So the 100 // value |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 101 // 2**257. 102 103 const ( 104 p256Limbs = 9 105 bottom29Bits = 0x1fffffff 106 ) 107 108 var ( 109 // p256One is the number 1 as a field element. 110 p256One = [p256Limbs]uint32{2, 0, 0, 0xffff800, 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, 0} 111 p256Zero = [p256Limbs]uint32{0, 0, 0, 0, 0, 0, 0, 0, 0} 112 // p256P is the prime modulus as a field element. 113 p256P = [p256Limbs]uint32{0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, 0, 0, 0x200000, 0xf000000, 0xfffffff} 114 // p2562P is the twice prime modulus as a field element. 115 p2562P = [p256Limbs]uint32{0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, 0, 0, 0x400000, 0xe000000, 0x1fffffff} 116 ) 117 118 // p256Precomputed contains precomputed values to aid the calculation of scalar 119 // multiples of the base point, G. It's actually two, equal length, tables 120 // concatenated. 121 // 122 // The first table contains (x,y) field element pairs for 16 multiples of the 123 // base point, G. 124 // 125 // Index | Index (binary) | Value 126 // 0 | 0000 | 0G (all zeros, omitted) 127 // 1 | 0001 | G 128 // 2 | 0010 | 2**64G 129 // 3 | 0011 | 2**64G + G 130 // 4 | 0100 | 2**128G 131 // 5 | 0101 | 2**128G + G 132 // 6 | 0110 | 2**128G + 2**64G 133 // 7 | 0111 | 2**128G + 2**64G + G 134 // 8 | 1000 | 2**192G 135 // 9 | 1001 | 2**192G + G 136 // 10 | 1010 | 2**192G + 2**64G 137 // 11 | 1011 | 2**192G + 2**64G + G 138 // 12 | 1100 | 2**192G + 2**128G 139 // 13 | 1101 | 2**192G + 2**128G + G 140 // 14 | 1110 | 2**192G + 2**128G + 2**64G 141 // 15 | 1111 | 2**192G + 2**128G + 2**64G + G 142 // 143 // The second table follows the same style, but the terms are 2**32G, 144 // 2**96G, 2**160G, 2**224G. 145 // 146 // This is ~2KB of data. 147 var p256Precomputed = [p256Limbs * 2 * 15 * 2]uint32{ 148 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee, 149 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3, 150 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c, 151 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22, 152 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050, 153 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b, 154 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa, 155 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2, 156 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609, 157 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581, 158 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca, 159 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33, 160 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6, 161 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd, 162 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0, 163 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881, 164 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a, 165 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26, 166 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b, 167 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023, 168 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133, 169 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa, 170 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29, 171 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc, 172 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8, 173 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59, 174 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39, 175 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689, 176 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa, 177 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3, 178 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1, 179 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f, 180 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72, 181 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d, 182 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b, 183 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a, 184 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a, 185 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f, 186 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb, 187 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc, 188 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9, 189 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce, 190 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2, 191 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca, 192 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229, 193 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57, 194 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c, 195 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa, 196 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651, 197 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec, 198 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7, 199 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c, 200 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927, 201 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298, 202 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8, 203 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2, 204 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d, 205 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4, 206 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8, 207 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78, 208 } 209 210 // Field element operations: 211 212 const bottom28Bits = 0xfffffff 213 214 // nonZeroToAllOnes returns: 215 // 0xffffffff for 0 < x <= 2**31 216 // 0 for x == 0 or x > 2**31. 217 func nonZeroToAllOnes(x uint32) uint32 { 218 return ((x - 1) >> 31) - 1 219 } 220 221 // p256ReduceCarry adds a multiple of p in order to cancel |carry|, 222 // which is a term at 2**257. 223 // 224 // On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. 225 // On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. 226 func p256ReduceCarry(inout *[p256Limbs]uint32, carry uint32) { 227 carry_mask := nonZeroToAllOnes(carry) 228 229 inout[0] += carry << 1 230 inout[3] += 0x10000000 & carry_mask 231 // carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the 232 // previous line therefore this doesn't underflow. 233 inout[3] -= carry << 11 234 inout[4] += (0x20000000 - 1) & carry_mask 235 inout[5] += (0x10000000 - 1) & carry_mask 236 inout[6] += (0x20000000 - 1) & carry_mask 237 inout[6] -= carry << 22 238 // This may underflow if carry is non-zero but, if so, we'll fix it in the 239 // next line. 240 inout[7] -= 1 & carry_mask 241 inout[7] += carry << 25 242 } 243 244 // p256Sum sets out = in+in2. 245 // 246 // On entry, in[i]+in2[i] must not overflow a 32-bit word. 247 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 248 func p256Sum(out, in, in2 *[p256Limbs]uint32) { 249 carry := uint32(0) 250 for i := 0; ; i++ { 251 out[i] = in[i] + in2[i] 252 out[i] += carry 253 carry = out[i] >> 29 254 out[i] &= bottom29Bits 255 256 i++ 257 if i == p256Limbs { 258 break 259 } 260 261 out[i] = in[i] + in2[i] 262 out[i] += carry 263 carry = out[i] >> 28 264 out[i] &= bottom28Bits 265 } 266 267 p256ReduceCarry(out, carry) 268 } 269 270 const ( 271 two30m2 = 1<<30 - 1<<2 272 two30p13m2 = 1<<30 + 1<<13 - 1<<2 273 two31m2 = 1<<31 - 1<<2 274 two31m3 = 1<<31 - 1<<3 275 two31p24m2 = 1<<31 + 1<<24 - 1<<2 276 two30m27m2 = 1<<30 - 1<<27 - 1<<2 277 ) 278 279 // p256Zero31 is 0 mod p. 280 var p256Zero31 = [p256Limbs]uint32{two31m3, two30m2, two31m2, two30p13m2, two31m2, two30m2, two31p24m2, two30m27m2, two31m2} 281 282 // p256Diff sets out = in-in2. 283 // 284 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 285 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 286 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 287 func p256Diff(out, in, in2 *[p256Limbs]uint32) { 288 var carry uint32 289 290 for i := 0; ; i++ { 291 out[i] = in[i] - in2[i] 292 out[i] += p256Zero31[i] 293 out[i] += carry 294 carry = out[i] >> 29 295 out[i] &= bottom29Bits 296 297 i++ 298 if i == p256Limbs { 299 break 300 } 301 302 out[i] = in[i] - in2[i] 303 out[i] += p256Zero31[i] 304 out[i] += carry 305 carry = out[i] >> 28 306 out[i] &= bottom28Bits 307 } 308 309 p256ReduceCarry(out, carry) 310 } 311 312 // p256ReduceDegree sets out = tmp/R mod p where tmp contains 64-bit words with 313 // the same 29,28,... bit positions as a field element. 314 // 315 // The values in field elements are in Montgomery form: x*R mod p where R = 316 // 2**257. Since we just multiplied two Montgomery values together, the result 317 // is x*y*R*R mod p. We wish to divide by R in order for the result also to be 318 // in Montgomery form. 319 // 320 // On entry: tmp[i] < 2**64 321 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 322 func p256ReduceDegree(out *[p256Limbs]uint32, tmp [17]uint64) { 323 // The following table may be helpful when reading this code: 324 // 325 // Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10... 326 // Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29 327 // Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285 328 // (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 329 var tmp2 [18]uint32 330 var carry, x, xMask uint32 331 332 // tmp contains 64-bit words with the same 29,28,29-bit positions as a 333 // field element. So the top of an element of tmp might overlap with 334 // another element two positions down. The following loop eliminates 335 // this overlap. 336 tmp2[0] = uint32(tmp[0]) & bottom29Bits 337 338 tmp2[1] = uint32(tmp[0]) >> 29 339 tmp2[1] |= (uint32(tmp[0]>>32) << 3) & bottom28Bits 340 tmp2[1] += uint32(tmp[1]) & bottom28Bits 341 carry = tmp2[1] >> 28 342 tmp2[1] &= bottom28Bits 343 344 for i := 2; i < 17; i++ { 345 tmp2[i] = (uint32(tmp[i-2] >> 32)) >> 25 346 tmp2[i] += (uint32(tmp[i-1])) >> 28 347 tmp2[i] += (uint32(tmp[i-1]>>32) << 4) & bottom29Bits 348 tmp2[i] += uint32(tmp[i]) & bottom29Bits 349 tmp2[i] += carry 350 carry = tmp2[i] >> 29 351 tmp2[i] &= bottom29Bits 352 353 i++ 354 if i == 17 { 355 break 356 } 357 tmp2[i] = uint32(tmp[i-2]>>32) >> 25 358 tmp2[i] += uint32(tmp[i-1]) >> 29 359 tmp2[i] += ((uint32(tmp[i-1] >> 32)) << 3) & bottom28Bits 360 tmp2[i] += uint32(tmp[i]) & bottom28Bits 361 tmp2[i] += carry 362 carry = tmp2[i] >> 28 363 tmp2[i] &= bottom28Bits 364 } 365 366 tmp2[17] = uint32(tmp[15]>>32) >> 25 367 tmp2[17] += uint32(tmp[16]) >> 29 368 tmp2[17] += uint32(tmp[16]>>32) << 3 369 tmp2[17] += carry 370 371 // Montgomery elimination of terms: 372 // 373 // Since R is 2**257, we can divide by R with a bitwise shift if we can 374 // ensure that the right-most 257 bits are all zero. We can make that true 375 // by adding multiplies of p without affecting the value. 376 // 377 // So we eliminate limbs from right to left. Since the bottom 29 bits of p 378 // are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0. 379 // We can do that for 8 further limbs and then right shift to eliminate the 380 // extra factor of R. 381 for i := 0; ; i += 2 { 382 tmp2[i+1] += tmp2[i] >> 29 383 x = tmp2[i] & bottom29Bits 384 xMask = nonZeroToAllOnes(x) 385 tmp2[i] = 0 386 387 // The bounds calculations for this loop are tricky. Each iteration of 388 // the loop eliminates two words by adding values to words to their 389 // right. 390 // 391 // The following table contains the amounts added to each word (as an 392 // offset from the value of i at the top of the loop). The amounts are 393 // accounted for from the first and second half of the loop separately 394 // and are written as, for example, 28 to mean a value <2**28. 395 // 396 // Word: 3 4 5 6 7 8 9 10 397 // Added in top half: 28 11 29 21 29 28 398 // 28 29 399 // 29 400 // Added in bottom half: 29 10 28 21 28 28 401 // 29 402 // 403 // The value that is currently offset 7 will be offset 5 for the next 404 // iteration and then offset 3 for the iteration after that. Therefore 405 // the total value added will be the values added at 7, 5 and 3. 406 // 407 // The following table accumulates these values. The sums at the bottom 408 // are written as, for example, 29+28, to mean a value < 2**29+2**28. 409 // 410 // Word: 3 4 5 6 7 8 9 10 11 12 13 411 // 28 11 10 29 21 29 28 28 28 28 28 412 // 29 28 11 28 29 28 29 28 29 28 413 // 29 28 21 21 29 21 29 21 414 // 10 29 28 21 28 21 28 415 // 28 29 28 29 28 29 28 416 // 11 10 29 10 29 10 417 // 29 28 11 28 11 418 // 29 29 419 // -------------------------------------------- 420 // 30+ 31+ 30+ 31+ 30+ 421 // 28+ 29+ 28+ 29+ 21+ 422 // 21+ 28+ 21+ 28+ 10 423 // 10 21+ 10 21+ 424 // 11 11 425 // 426 // So the greatest amount is added to tmp2[10] and tmp2[12]. If 427 // tmp2[10/12] has an initial value of <2**29, then the maximum value 428 // will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32, 429 // as required. 430 tmp2[i+3] += (x << 10) & bottom28Bits 431 tmp2[i+4] += (x >> 18) 432 433 tmp2[i+6] += (x << 21) & bottom29Bits 434 tmp2[i+7] += x >> 8 435 436 // At position 200, which is the starting bit position for word 7, we 437 // have a factor of 0xf000000 = 2**28 - 2**24. 438 tmp2[i+7] += 0x10000000 & xMask 439 tmp2[i+8] += (x - 1) & xMask 440 tmp2[i+7] -= (x << 24) & bottom28Bits 441 tmp2[i+8] -= x >> 4 442 443 tmp2[i+8] += 0x20000000 & xMask 444 tmp2[i+8] -= x 445 tmp2[i+8] += (x << 28) & bottom29Bits 446 tmp2[i+9] += ((x >> 1) - 1) & xMask 447 448 if i+1 == p256Limbs { 449 break 450 } 451 tmp2[i+2] += tmp2[i+1] >> 28 452 x = tmp2[i+1] & bottom28Bits 453 xMask = nonZeroToAllOnes(x) 454 tmp2[i+1] = 0 455 456 tmp2[i+4] += (x << 11) & bottom29Bits 457 tmp2[i+5] += (x >> 18) 458 459 tmp2[i+7] += (x << 21) & bottom28Bits 460 tmp2[i+8] += x >> 7 461 462 // At position 199, which is the starting bit of the 8th word when 463 // dealing with a context starting on an odd word, we have a factor of 464 // 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th 465 // word from i+1 is i+8. 466 tmp2[i+8] += 0x20000000 & xMask 467 tmp2[i+9] += (x - 1) & xMask 468 tmp2[i+8] -= (x << 25) & bottom29Bits 469 tmp2[i+9] -= x >> 4 470 471 tmp2[i+9] += 0x10000000 & xMask 472 tmp2[i+9] -= x 473 tmp2[i+10] += (x - 1) & xMask 474 } 475 476 // We merge the right shift with a carry chain. The words above 2**257 have 477 // widths of 28,29,... which we need to correct when copying them down. 478 carry = 0 479 for i := 0; i < 8; i++ { 480 // The maximum value of tmp2[i + 9] occurs on the first iteration and 481 // is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is 482 // therefore safe. 483 out[i] = tmp2[i+9] 484 out[i] += carry 485 out[i] += (tmp2[i+10] << 28) & bottom29Bits 486 carry = out[i] >> 29 487 out[i] &= bottom29Bits 488 489 i++ 490 out[i] = tmp2[i+9] >> 1 491 out[i] += carry 492 carry = out[i] >> 28 493 out[i] &= bottom28Bits 494 } 495 496 out[8] = tmp2[17] 497 out[8] += carry 498 carry = out[8] >> 29 499 out[8] &= bottom29Bits 500 501 p256ReduceCarry(out, carry) 502 } 503 504 // p256Square sets out=in*in. 505 // 506 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29. 507 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 508 func p256Square(out, in *[p256Limbs]uint32) { 509 var tmp [17]uint64 510 511 tmp[0] = uint64(in[0]) * uint64(in[0]) 512 tmp[1] = uint64(in[0]) * (uint64(in[1]) << 1) 513 tmp[2] = uint64(in[0])*(uint64(in[2])<<1) + 514 uint64(in[1])*(uint64(in[1])<<1) 515 tmp[3] = uint64(in[0])*(uint64(in[3])<<1) + 516 uint64(in[1])*(uint64(in[2])<<1) 517 tmp[4] = uint64(in[0])*(uint64(in[4])<<1) + 518 uint64(in[1])*(uint64(in[3])<<2) + 519 uint64(in[2])*uint64(in[2]) 520 tmp[5] = uint64(in[0])*(uint64(in[5])<<1) + 521 uint64(in[1])*(uint64(in[4])<<1) + 522 uint64(in[2])*(uint64(in[3])<<1) 523 tmp[6] = uint64(in[0])*(uint64(in[6])<<1) + 524 uint64(in[1])*(uint64(in[5])<<2) + 525 uint64(in[2])*(uint64(in[4])<<1) + 526 uint64(in[3])*(uint64(in[3])<<1) 527 tmp[7] = uint64(in[0])*(uint64(in[7])<<1) + 528 uint64(in[1])*(uint64(in[6])<<1) + 529 uint64(in[2])*(uint64(in[5])<<1) + 530 uint64(in[3])*(uint64(in[4])<<1) 531 // tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60, 532 // which is < 2**64 as required. 533 tmp[8] = uint64(in[0])*(uint64(in[8])<<1) + 534 uint64(in[1])*(uint64(in[7])<<2) + 535 uint64(in[2])*(uint64(in[6])<<1) + 536 uint64(in[3])*(uint64(in[5])<<2) + 537 uint64(in[4])*uint64(in[4]) 538 tmp[9] = uint64(in[1])*(uint64(in[8])<<1) + 539 uint64(in[2])*(uint64(in[7])<<1) + 540 uint64(in[3])*(uint64(in[6])<<1) + 541 uint64(in[4])*(uint64(in[5])<<1) 542 tmp[10] = uint64(in[2])*(uint64(in[8])<<1) + 543 uint64(in[3])*(uint64(in[7])<<2) + 544 uint64(in[4])*(uint64(in[6])<<1) + 545 uint64(in[5])*(uint64(in[5])<<1) 546 tmp[11] = uint64(in[3])*(uint64(in[8])<<1) + 547 uint64(in[4])*(uint64(in[7])<<1) + 548 uint64(in[5])*(uint64(in[6])<<1) 549 tmp[12] = uint64(in[4])*(uint64(in[8])<<1) + 550 uint64(in[5])*(uint64(in[7])<<2) + 551 uint64(in[6])*uint64(in[6]) 552 tmp[13] = uint64(in[5])*(uint64(in[8])<<1) + 553 uint64(in[6])*(uint64(in[7])<<1) 554 tmp[14] = uint64(in[6])*(uint64(in[8])<<1) + 555 uint64(in[7])*(uint64(in[7])<<1) 556 tmp[15] = uint64(in[7]) * (uint64(in[8]) << 1) 557 tmp[16] = uint64(in[8]) * uint64(in[8]) 558 559 p256ReduceDegree(out, tmp) 560 } 561 562 // p256Mul sets out=in*in2. 563 // 564 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 565 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 566 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 567 func p256Mul(out, in, in2 *[p256Limbs]uint32) { 568 var tmp [17]uint64 569 570 tmp[0] = uint64(in[0]) * uint64(in2[0]) 571 tmp[1] = uint64(in[0])*(uint64(in2[1])<<0) + 572 uint64(in[1])*(uint64(in2[0])<<0) 573 tmp[2] = uint64(in[0])*(uint64(in2[2])<<0) + 574 uint64(in[1])*(uint64(in2[1])<<1) + 575 uint64(in[2])*(uint64(in2[0])<<0) 576 tmp[3] = uint64(in[0])*(uint64(in2[3])<<0) + 577 uint64(in[1])*(uint64(in2[2])<<0) + 578 uint64(in[2])*(uint64(in2[1])<<0) + 579 uint64(in[3])*(uint64(in2[0])<<0) 580 tmp[4] = uint64(in[0])*(uint64(in2[4])<<0) + 581 uint64(in[1])*(uint64(in2[3])<<1) + 582 uint64(in[2])*(uint64(in2[2])<<0) + 583 uint64(in[3])*(uint64(in2[1])<<1) + 584 uint64(in[4])*(uint64(in2[0])<<0) 585 tmp[5] = uint64(in[0])*(uint64(in2[5])<<0) + 586 uint64(in[1])*(uint64(in2[4])<<0) + 587 uint64(in[2])*(uint64(in2[3])<<0) + 588 uint64(in[3])*(uint64(in2[2])<<0) + 589 uint64(in[4])*(uint64(in2[1])<<0) + 590 uint64(in[5])*(uint64(in2[0])<<0) 591 tmp[6] = uint64(in[0])*(uint64(in2[6])<<0) + 592 uint64(in[1])*(uint64(in2[5])<<1) + 593 uint64(in[2])*(uint64(in2[4])<<0) + 594 uint64(in[3])*(uint64(in2[3])<<1) + 595 uint64(in[4])*(uint64(in2[2])<<0) + 596 uint64(in[5])*(uint64(in2[1])<<1) + 597 uint64(in[6])*(uint64(in2[0])<<0) 598 tmp[7] = uint64(in[0])*(uint64(in2[7])<<0) + 599 uint64(in[1])*(uint64(in2[6])<<0) + 600 uint64(in[2])*(uint64(in2[5])<<0) + 601 uint64(in[3])*(uint64(in2[4])<<0) + 602 uint64(in[4])*(uint64(in2[3])<<0) + 603 uint64(in[5])*(uint64(in2[2])<<0) + 604 uint64(in[6])*(uint64(in2[1])<<0) + 605 uint64(in[7])*(uint64(in2[0])<<0) 606 // tmp[8] has the greatest value but doesn't overflow. See logic in 607 // p256Square. 608 tmp[8] = uint64(in[0])*(uint64(in2[8])<<0) + 609 uint64(in[1])*(uint64(in2[7])<<1) + 610 uint64(in[2])*(uint64(in2[6])<<0) + 611 uint64(in[3])*(uint64(in2[5])<<1) + 612 uint64(in[4])*(uint64(in2[4])<<0) + 613 uint64(in[5])*(uint64(in2[3])<<1) + 614 uint64(in[6])*(uint64(in2[2])<<0) + 615 uint64(in[7])*(uint64(in2[1])<<1) + 616 uint64(in[8])*(uint64(in2[0])<<0) 617 tmp[9] = uint64(in[1])*(uint64(in2[8])<<0) + 618 uint64(in[2])*(uint64(in2[7])<<0) + 619 uint64(in[3])*(uint64(in2[6])<<0) + 620 uint64(in[4])*(uint64(in2[5])<<0) + 621 uint64(in[5])*(uint64(in2[4])<<0) + 622 uint64(in[6])*(uint64(in2[3])<<0) + 623 uint64(in[7])*(uint64(in2[2])<<0) + 624 uint64(in[8])*(uint64(in2[1])<<0) 625 tmp[10] = uint64(in[2])*(uint64(in2[8])<<0) + 626 uint64(in[3])*(uint64(in2[7])<<1) + 627 uint64(in[4])*(uint64(in2[6])<<0) + 628 uint64(in[5])*(uint64(in2[5])<<1) + 629 uint64(in[6])*(uint64(in2[4])<<0) + 630 uint64(in[7])*(uint64(in2[3])<<1) + 631 uint64(in[8])*(uint64(in2[2])<<0) 632 tmp[11] = uint64(in[3])*(uint64(in2[8])<<0) + 633 uint64(in[4])*(uint64(in2[7])<<0) + 634 uint64(in[5])*(uint64(in2[6])<<0) + 635 uint64(in[6])*(uint64(in2[5])<<0) + 636 uint64(in[7])*(uint64(in2[4])<<0) + 637 uint64(in[8])*(uint64(in2[3])<<0) 638 tmp[12] = uint64(in[4])*(uint64(in2[8])<<0) + 639 uint64(in[5])*(uint64(in2[7])<<1) + 640 uint64(in[6])*(uint64(in2[6])<<0) + 641 uint64(in[7])*(uint64(in2[5])<<1) + 642 uint64(in[8])*(uint64(in2[4])<<0) 643 tmp[13] = uint64(in[5])*(uint64(in2[8])<<0) + 644 uint64(in[6])*(uint64(in2[7])<<0) + 645 uint64(in[7])*(uint64(in2[6])<<0) + 646 uint64(in[8])*(uint64(in2[5])<<0) 647 tmp[14] = uint64(in[6])*(uint64(in2[8])<<0) + 648 uint64(in[7])*(uint64(in2[7])<<1) + 649 uint64(in[8])*(uint64(in2[6])<<0) 650 tmp[15] = uint64(in[7])*(uint64(in2[8])<<0) + 651 uint64(in[8])*(uint64(in2[7])<<0) 652 tmp[16] = uint64(in[8]) * (uint64(in2[8]) << 0) 653 654 p256ReduceDegree(out, tmp) 655 } 656 657 func p256Assign(out, in *[p256Limbs]uint32) { 658 *out = *in 659 } 660 661 // p256Invert calculates |out| = |in|^{-1} 662 // 663 // Based on Fermat's Little Theorem: 664 // a^p = a (mod p) 665 // a^{p-1} = 1 (mod p) 666 // a^{p-2} = a^{-1} (mod p) 667 func p256Invert(out, in *[p256Limbs]uint32) { 668 var ftmp, ftmp2 [p256Limbs]uint32 669 670 // each e_I will hold |in|^{2^I - 1} 671 var e2, e4, e8, e16, e32, e64 [p256Limbs]uint32 672 673 p256Square(&ftmp, in) // 2^1 674 p256Mul(&ftmp, in, &ftmp) // 2^2 - 2^0 675 p256Assign(&e2, &ftmp) 676 p256Square(&ftmp, &ftmp) // 2^3 - 2^1 677 p256Square(&ftmp, &ftmp) // 2^4 - 2^2 678 p256Mul(&ftmp, &ftmp, &e2) // 2^4 - 2^0 679 p256Assign(&e4, &ftmp) 680 p256Square(&ftmp, &ftmp) // 2^5 - 2^1 681 p256Square(&ftmp, &ftmp) // 2^6 - 2^2 682 p256Square(&ftmp, &ftmp) // 2^7 - 2^3 683 p256Square(&ftmp, &ftmp) // 2^8 - 2^4 684 p256Mul(&ftmp, &ftmp, &e4) // 2^8 - 2^0 685 p256Assign(&e8, &ftmp) 686 for i := 0; i < 8; i++ { 687 p256Square(&ftmp, &ftmp) 688 } // 2^16 - 2^8 689 p256Mul(&ftmp, &ftmp, &e8) // 2^16 - 2^0 690 p256Assign(&e16, &ftmp) 691 for i := 0; i < 16; i++ { 692 p256Square(&ftmp, &ftmp) 693 } // 2^32 - 2^16 694 p256Mul(&ftmp, &ftmp, &e16) // 2^32 - 2^0 695 p256Assign(&e32, &ftmp) 696 for i := 0; i < 32; i++ { 697 p256Square(&ftmp, &ftmp) 698 } // 2^64 - 2^32 699 p256Assign(&e64, &ftmp) 700 p256Mul(&ftmp, &ftmp, in) // 2^64 - 2^32 + 2^0 701 for i := 0; i < 192; i++ { 702 p256Square(&ftmp, &ftmp) 703 } // 2^256 - 2^224 + 2^192 704 705 p256Mul(&ftmp2, &e64, &e32) // 2^64 - 2^0 706 for i := 0; i < 16; i++ { 707 p256Square(&ftmp2, &ftmp2) 708 } // 2^80 - 2^16 709 p256Mul(&ftmp2, &ftmp2, &e16) // 2^80 - 2^0 710 for i := 0; i < 8; i++ { 711 p256Square(&ftmp2, &ftmp2) 712 } // 2^88 - 2^8 713 p256Mul(&ftmp2, &ftmp2, &e8) // 2^88 - 2^0 714 for i := 0; i < 4; i++ { 715 p256Square(&ftmp2, &ftmp2) 716 } // 2^92 - 2^4 717 p256Mul(&ftmp2, &ftmp2, &e4) // 2^92 - 2^0 718 p256Square(&ftmp2, &ftmp2) // 2^93 - 2^1 719 p256Square(&ftmp2, &ftmp2) // 2^94 - 2^2 720 p256Mul(&ftmp2, &ftmp2, &e2) // 2^94 - 2^0 721 p256Square(&ftmp2, &ftmp2) // 2^95 - 2^1 722 p256Square(&ftmp2, &ftmp2) // 2^96 - 2^2 723 p256Mul(&ftmp2, &ftmp2, in) // 2^96 - 3 724 725 p256Mul(out, &ftmp2, &ftmp) // 2^256 - 2^224 + 2^192 + 2^96 - 3 726 } 727 728 // p256Scalar3 sets out=3*out. 729 // 730 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 731 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 732 func p256Scalar3(out *[p256Limbs]uint32) { 733 var carry uint32 734 735 for i := 0; ; i++ { 736 out[i] *= 3 737 out[i] += carry 738 carry = out[i] >> 29 739 out[i] &= bottom29Bits 740 741 i++ 742 if i == p256Limbs { 743 break 744 } 745 746 out[i] *= 3 747 out[i] += carry 748 carry = out[i] >> 28 749 out[i] &= bottom28Bits 750 } 751 752 p256ReduceCarry(out, carry) 753 } 754 755 // p256Scalar4 sets out=4*out. 756 // 757 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 758 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 759 func p256Scalar4(out *[p256Limbs]uint32) { 760 var carry, nextCarry uint32 761 762 for i := 0; ; i++ { 763 nextCarry = out[i] >> 27 764 out[i] <<= 2 765 out[i] &= bottom29Bits 766 out[i] += carry 767 carry = nextCarry + (out[i] >> 29) 768 out[i] &= bottom29Bits 769 770 i++ 771 if i == p256Limbs { 772 break 773 } 774 nextCarry = out[i] >> 26 775 out[i] <<= 2 776 out[i] &= bottom28Bits 777 out[i] += carry 778 carry = nextCarry + (out[i] >> 28) 779 out[i] &= bottom28Bits 780 } 781 782 p256ReduceCarry(out, carry) 783 } 784 785 // p256Scalar8 sets out=8*out. 786 // 787 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 788 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 789 func p256Scalar8(out *[p256Limbs]uint32) { 790 var carry, nextCarry uint32 791 792 for i := 0; ; i++ { 793 nextCarry = out[i] >> 26 794 out[i] <<= 3 795 out[i] &= bottom29Bits 796 out[i] += carry 797 carry = nextCarry + (out[i] >> 29) 798 out[i] &= bottom29Bits 799 800 i++ 801 if i == p256Limbs { 802 break 803 } 804 nextCarry = out[i] >> 25 805 out[i] <<= 3 806 out[i] &= bottom28Bits 807 out[i] += carry 808 carry = nextCarry + (out[i] >> 28) 809 out[i] &= bottom28Bits 810 } 811 812 p256ReduceCarry(out, carry) 813 } 814 815 // Group operations: 816 // 817 // Elements of the elliptic curve group are represented in Jacobian 818 // coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in 819 // Jacobian form. 820 821 // p256PointDouble sets {xOut,yOut,zOut} = 2*{x,y,z}. 822 // 823 // See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l 824 func p256PointDouble(xOut, yOut, zOut, x, y, z *[p256Limbs]uint32) { 825 var delta, gamma, alpha, beta, tmp, tmp2 [p256Limbs]uint32 826 827 p256Square(&delta, z) 828 p256Square(&gamma, y) 829 p256Mul(&beta, x, &gamma) 830 831 p256Sum(&tmp, x, &delta) 832 p256Diff(&tmp2, x, &delta) 833 p256Mul(&alpha, &tmp, &tmp2) 834 p256Scalar3(&alpha) 835 836 p256Sum(&tmp, y, z) 837 p256Square(&tmp, &tmp) 838 p256Diff(&tmp, &tmp, &gamma) 839 p256Diff(zOut, &tmp, &delta) 840 841 p256Scalar4(&beta) 842 p256Square(xOut, &alpha) 843 p256Diff(xOut, xOut, &beta) 844 p256Diff(xOut, xOut, &beta) 845 846 p256Diff(&tmp, &beta, xOut) 847 p256Mul(&tmp, &alpha, &tmp) 848 p256Square(&tmp2, &gamma) 849 p256Scalar8(&tmp2) 850 p256Diff(yOut, &tmp, &tmp2) 851 } 852 853 // p256PointAddMixed sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,1}. 854 // (i.e. the second point is affine.) 855 // 856 // See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 857 // 858 // Note that this function does not handle P+P, infinity+P nor P+infinity 859 // correctly. 860 func p256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *[p256Limbs]uint32) { 861 var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 862 863 p256Square(&z1z1, z1) 864 p256Sum(&tmp, z1, z1) 865 866 p256Mul(&u2, x2, &z1z1) 867 p256Mul(&z1z1z1, z1, &z1z1) 868 p256Mul(&s2, y2, &z1z1z1) 869 p256Diff(&h, &u2, x1) 870 p256Sum(&i, &h, &h) 871 p256Square(&i, &i) 872 p256Mul(&j, &h, &i) 873 p256Diff(&r, &s2, y1) 874 p256Sum(&r, &r, &r) 875 p256Mul(&v, x1, &i) 876 877 p256Mul(zOut, &tmp, &h) 878 p256Square(&rr, &r) 879 p256Diff(xOut, &rr, &j) 880 p256Diff(xOut, xOut, &v) 881 p256Diff(xOut, xOut, &v) 882 883 p256Diff(&tmp, &v, xOut) 884 p256Mul(yOut, &tmp, &r) 885 p256Mul(&tmp, y1, &j) 886 p256Diff(yOut, yOut, &tmp) 887 p256Diff(yOut, yOut, &tmp) 888 } 889 890 // p256PointAdd sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,z2}. 891 // 892 // See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 893 // 894 // Note that this function does not handle P+P, infinity+P nor P+infinity 895 // correctly. 896 func p256PointAdd(xOut, yOut, zOut, x1, y1, z1, x2, y2, z2 *[p256Limbs]uint32) { 897 var z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 898 899 p256Square(&z1z1, z1) 900 p256Square(&z2z2, z2) 901 p256Mul(&u1, x1, &z2z2) 902 903 p256Sum(&tmp, z1, z2) 904 p256Square(&tmp, &tmp) 905 p256Diff(&tmp, &tmp, &z1z1) 906 p256Diff(&tmp, &tmp, &z2z2) 907 908 p256Mul(&z2z2z2, z2, &z2z2) 909 p256Mul(&s1, y1, &z2z2z2) 910 911 p256Mul(&u2, x2, &z1z1) 912 p256Mul(&z1z1z1, z1, &z1z1) 913 p256Mul(&s2, y2, &z1z1z1) 914 p256Diff(&h, &u2, &u1) 915 p256Sum(&i, &h, &h) 916 p256Square(&i, &i) 917 p256Mul(&j, &h, &i) 918 p256Diff(&r, &s2, &s1) 919 p256Sum(&r, &r, &r) 920 p256Mul(&v, &u1, &i) 921 922 p256Mul(zOut, &tmp, &h) 923 p256Square(&rr, &r) 924 p256Diff(xOut, &rr, &j) 925 p256Diff(xOut, xOut, &v) 926 p256Diff(xOut, xOut, &v) 927 928 p256Diff(&tmp, &v, xOut) 929 p256Mul(yOut, &tmp, &r) 930 p256Mul(&tmp, &s1, &j) 931 p256Diff(yOut, yOut, &tmp) 932 p256Diff(yOut, yOut, &tmp) 933 } 934 935 // p256CopyConditional sets out=in if mask = 0xffffffff in constant time. 936 // 937 // On entry: mask is either 0 or 0xffffffff. 938 func p256CopyConditional(out, in *[p256Limbs]uint32, mask uint32) { 939 for i := 0; i < p256Limbs; i++ { 940 tmp := mask & (in[i] ^ out[i]) 941 out[i] ^= tmp 942 } 943 } 944 945 // p256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table. 946 // On entry: index < 16, table[0] must be zero. 947 func p256SelectAffinePoint(xOut, yOut *[p256Limbs]uint32, table []uint32, index uint32) { 948 for i := range xOut { 949 xOut[i] = 0 950 } 951 for i := range yOut { 952 yOut[i] = 0 953 } 954 955 for i := uint32(1); i < 16; i++ { 956 mask := i ^ index 957 mask |= mask >> 2 958 mask |= mask >> 1 959 mask &= 1 960 mask-- 961 for j := range xOut { 962 xOut[j] |= table[0] & mask 963 table = table[1:] 964 } 965 for j := range yOut { 966 yOut[j] |= table[0] & mask 967 table = table[1:] 968 } 969 } 970 } 971 972 // p256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of 973 // table. 974 // On entry: index < 16, table[0] must be zero. 975 func p256SelectJacobianPoint(xOut, yOut, zOut *[p256Limbs]uint32, table *[16][3][p256Limbs]uint32, index uint32) { 976 for i := range xOut { 977 xOut[i] = 0 978 } 979 for i := range yOut { 980 yOut[i] = 0 981 } 982 for i := range zOut { 983 zOut[i] = 0 984 } 985 986 // The implicit value at index 0 is all zero. We don't need to perform that 987 // iteration of the loop because we already set out_* to zero. 988 for i := uint32(1); i < 16; i++ { 989 mask := i ^ index 990 mask |= mask >> 2 991 mask |= mask >> 1 992 mask &= 1 993 mask-- 994 for j := range xOut { 995 xOut[j] |= table[i][0][j] & mask 996 } 997 for j := range yOut { 998 yOut[j] |= table[i][1][j] & mask 999 } 1000 for j := range zOut { 1001 zOut[j] |= table[i][2][j] & mask 1002 } 1003 } 1004 } 1005 1006 // p256GetBit returns the bit'th bit of scalar. 1007 func p256GetBit(scalar *[32]uint8, bit uint) uint32 { 1008 return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1) 1009 } 1010 1011 // p256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a 1012 // little-endian number. Note that the value of scalar must be less than the 1013 // order of the group. 1014 func p256ScalarBaseMult(xOut, yOut, zOut *[p256Limbs]uint32, scalar *[32]uint8) { 1015 nIsInfinityMask := ^uint32(0) 1016 var pIsNoninfiniteMask, mask, tableOffset uint32 1017 var px, py, tx, ty, tz [p256Limbs]uint32 1018 1019 for i := range xOut { 1020 xOut[i] = 0 1021 } 1022 for i := range yOut { 1023 yOut[i] = 0 1024 } 1025 for i := range zOut { 1026 zOut[i] = 0 1027 } 1028 1029 // The loop adds bits at positions 0, 64, 128 and 192, followed by 1030 // positions 32,96,160 and 224 and does this 32 times. 1031 for i := uint(0); i < 32; i++ { 1032 if i != 0 { 1033 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1034 } 1035 tableOffset = 0 1036 for j := uint(0); j <= 32; j += 32 { 1037 bit0 := p256GetBit(scalar, 31-i+j) 1038 bit1 := p256GetBit(scalar, 95-i+j) 1039 bit2 := p256GetBit(scalar, 159-i+j) 1040 bit3 := p256GetBit(scalar, 223-i+j) 1041 index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3) 1042 1043 p256SelectAffinePoint(&px, &py, p256Precomputed[tableOffset:], index) 1044 tableOffset += 30 * p256Limbs 1045 1046 // Since scalar is less than the order of the group, we know that 1047 // {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle 1048 // below. 1049 p256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py) 1050 // The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero 1051 // (a.k.a. the point at infinity). We handle that situation by 1052 // copying the point from the table. 1053 p256CopyConditional(xOut, &px, nIsInfinityMask) 1054 p256CopyConditional(yOut, &py, nIsInfinityMask) 1055 p256CopyConditional(zOut, &p256One, nIsInfinityMask) 1056 1057 // Equally, the result is also wrong if the point from the table is 1058 // zero, which happens when the index is zero. We handle that by 1059 // only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0. 1060 pIsNoninfiniteMask = nonZeroToAllOnes(index) 1061 mask = pIsNoninfiniteMask & ^nIsInfinityMask 1062 p256CopyConditional(xOut, &tx, mask) 1063 p256CopyConditional(yOut, &ty, mask) 1064 p256CopyConditional(zOut, &tz, mask) 1065 // If p was not zero, then n is now non-zero. 1066 nIsInfinityMask &^= pIsNoninfiniteMask 1067 } 1068 } 1069 } 1070 1071 // p256PointToAffine converts a Jacobian point to an affine point. If the input 1072 // is the point at infinity then it returns (0, 0) in constant time. 1073 func p256PointToAffine(xOut, yOut, x, y, z *[p256Limbs]uint32) { 1074 var zInv, zInvSq [p256Limbs]uint32 1075 1076 p256Invert(&zInv, z) 1077 p256Square(&zInvSq, &zInv) 1078 p256Mul(xOut, x, &zInvSq) 1079 p256Mul(&zInv, &zInv, &zInvSq) 1080 p256Mul(yOut, y, &zInv) 1081 } 1082 1083 // p256ToAffine returns a pair of *big.Int containing the affine representation 1084 // of {x,y,z}. 1085 func p256ToAffine(x, y, z *[p256Limbs]uint32) (xOut, yOut *big.Int) { 1086 var xx, yy [p256Limbs]uint32 1087 p256PointToAffine(&xx, &yy, x, y, z) 1088 return p256ToBig(&xx), p256ToBig(&yy) 1089 } 1090 1091 // p256ScalarMult sets {xOut,yOut,zOut} = scalar*{x,y}. 1092 func p256ScalarMult(xOut, yOut, zOut, x, y *[p256Limbs]uint32, scalar *[32]uint8) { 1093 var px, py, pz, tx, ty, tz [p256Limbs]uint32 1094 var precomp [16][3][p256Limbs]uint32 1095 var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32 1096 1097 // We precompute 0,1,2,... times {x,y}. 1098 precomp[1][0] = *x 1099 precomp[1][1] = *y 1100 precomp[1][2] = p256One 1101 1102 for i := 2; i < 16; i += 2 { 1103 p256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2]) 1104 p256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y) 1105 } 1106 1107 for i := range xOut { 1108 xOut[i] = 0 1109 } 1110 for i := range yOut { 1111 yOut[i] = 0 1112 } 1113 for i := range zOut { 1114 zOut[i] = 0 1115 } 1116 nIsInfinityMask = ^uint32(0) 1117 1118 // We add in a window of four bits each iteration and do this 64 times. 1119 for i := 0; i < 64; i++ { 1120 if i != 0 { 1121 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1122 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1123 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1124 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1125 } 1126 1127 index = uint32(scalar[31-i/2]) 1128 if (i & 1) == 1 { 1129 index &= 15 1130 } else { 1131 index >>= 4 1132 } 1133 1134 // See the comments in scalarBaseMult about handling infinities. 1135 p256SelectJacobianPoint(&px, &py, &pz, &precomp, index) 1136 p256PointAdd(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py, &pz) 1137 p256CopyConditional(xOut, &px, nIsInfinityMask) 1138 p256CopyConditional(yOut, &py, nIsInfinityMask) 1139 p256CopyConditional(zOut, &pz, nIsInfinityMask) 1140 1141 pIsNoninfiniteMask = nonZeroToAllOnes(index) 1142 mask = pIsNoninfiniteMask & ^nIsInfinityMask 1143 p256CopyConditional(xOut, &tx, mask) 1144 p256CopyConditional(yOut, &ty, mask) 1145 p256CopyConditional(zOut, &tz, mask) 1146 nIsInfinityMask &^= pIsNoninfiniteMask 1147 } 1148 } 1149 1150 // p256FromBig sets out = R*in. 1151 func p256FromBig(out *[p256Limbs]uint32, in *big.Int) { 1152 tmp := new(big.Int).Lsh(in, 257) 1153 tmp.Mod(tmp, p256Params.P) 1154 1155 for i := 0; i < p256Limbs; i++ { 1156 if bits := tmp.Bits(); len(bits) > 0 { 1157 out[i] = uint32(bits[0]) & bottom29Bits 1158 } else { 1159 out[i] = 0 1160 } 1161 tmp.Rsh(tmp, 29) 1162 1163 i++ 1164 if i == p256Limbs { 1165 break 1166 } 1167 1168 if bits := tmp.Bits(); len(bits) > 0 { 1169 out[i] = uint32(bits[0]) & bottom28Bits 1170 } else { 1171 out[i] = 0 1172 } 1173 tmp.Rsh(tmp, 28) 1174 } 1175 } 1176 1177 // p256ToBig returns a *big.Int containing the value of in. 1178 func p256ToBig(in *[p256Limbs]uint32) *big.Int { 1179 result, tmp := new(big.Int), new(big.Int) 1180 1181 result.SetInt64(int64(in[p256Limbs-1])) 1182 for i := p256Limbs - 2; i >= 0; i-- { 1183 if (i & 1) == 0 { 1184 result.Lsh(result, 29) 1185 } else { 1186 result.Lsh(result, 28) 1187 } 1188 tmp.SetInt64(int64(in[i])) 1189 result.Add(result, tmp) 1190 } 1191 1192 result.Mul(result, p256RInverse) 1193 result.Mod(result, p256Params.P) 1194 return result 1195 }