github.com/remobjects/goldbaselibrary@v0.0.0-20230924164425-d458680a936b/Source/Gold/crypto/elliptic/p256_elements.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !amd64 6 //##,!arm64 7 8 package elliptic 9 10 // This file contains a constant-time, 32-bit implementation of P256. 11 12 import ( 13 "math/big" 14 ) 15 16 type p256Curve struct { 17 *CurveParams 18 } 19 20 var ( 21 p256Params *CurveParams 22 23 // RInverse contains 1/R mod p - the inverse of the Montgomery constant 24 // (2**257). 25 p256RInverse *big.Int 26 ) 27 28 func initP256() { 29 // See FIPS 186-3, section D.2.3 30 p256Params = &CurveParams{Name: "P-256"} 31 p256Params.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 32 p256Params.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 33 p256Params.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 34 p256Params.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 35 p256Params.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 36 p256Params.BitSize = 256 37 38 p256RInverse, _ = new(big.Int).SetString("7fffffff00000001fffffffe8000000100000000ffffffff0000000180000000", 16) 39 40 // Arch-specific initialization, i.e. let a platform dynamically pick a P256 implementation 41 initP256Arch() 42 } 43 44 func (curve p256Curve) Params() *CurveParams { 45 return curve.CurveParams 46 } 47 48 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 49 // to out. If the scalar is equal or greater than the order of the group, it's 50 // reduced modulo that order. 51 func p256GetScalar(out *[32]byte, in []byte) { 52 n := new(big.Int).SetBytes(in) 53 var scalarBytes []byte 54 55 if n.Cmp(p256Params.N) >= 0 { 56 n.Mod(n, p256Params.N) 57 scalarBytes = n.Bytes() 58 } else { 59 scalarBytes = in 60 } 61 62 for i, v := range scalarBytes { 63 out[len(scalarBytes)-(1+i)] = v 64 } 65 } 66 67 func (p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 68 var scalarReversed [32]byte 69 p256GetScalar(&scalarReversed, scalar) 70 71 var x1, y1, z1 [p256Limbs]uint32 72 p256ScalarBaseMult(&x1, &y1, &z1, &scalarReversed) 73 return p256ToAffine(&x1, &y1, &z1) 74 } 75 76 func (p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 77 var scalarReversed [32]byte 78 p256GetScalar(&scalarReversed, scalar) 79 80 var px, py, x1, y1, z1 [p256Limbs]uint32 81 p256FromBig(&px, bigX) 82 p256FromBig(&py, bigY) 83 p256ScalarMult(&x1, &y1, &z1, &px, &py, &scalarReversed) 84 return p256ToAffine(&x1, &y1, &z1) 85 } 86 87 // Field elements are represented as nine, unsigned 32-bit words. 88 // 89 // The value of an field element is: 90 // x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) 91 // 92 // That is, each limb is alternately 29 or 28-bits wide in little-endian 93 // order. 94 // 95 // This means that a field element hits 2**257, rather than 2**256 as we would 96 // like. A 28, 29, ... pattern would cause us to hit 2**256, but that causes 97 // problems when multiplying as terms end up one bit short of a limb which 98 // would require much bit-shifting to correct. 99 // 100 // Finally, the values stored in a field element are in Montgomery form. So the 101 // value |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 102 // 2**257. 103 104 const ( 105 p256Limbs = 9 106 bottom29Bits = 0x1fffffff 107 ) 108 109 var ( 110 // p256One is the number 1 as a field element. 111 p256One = [p256Limbs]uint32{2, 0, 0, 0xffff800, 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, 0} 112 p256Zero = [p256Limbs]uint32{0, 0, 0, 0, 0, 0, 0, 0, 0} 113 // p256P is the prime modulus as a field element. 114 p256P = [p256Limbs]uint32{0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, 0, 0, 0x200000, 0xf000000, 0xfffffff} 115 // p2562P is the twice prime modulus as a field element. 116 p2562P = [p256Limbs]uint32{0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, 0, 0, 0x400000, 0xe000000, 0x1fffffff} 117 ) 118 119 // p256Precomputed contains precomputed values to aid the calculation of scalar 120 // multiples of the base point, G. It's actually two, equal length, tables 121 // concatenated. 122 // 123 // The first table contains (x,y) field element pairs for 16 multiples of the 124 // base point, G. 125 // 126 // Index | Index (binary) | Value 127 // 0 | 0000 | 0G (all zeros, omitted) 128 // 1 | 0001 | G 129 // 2 | 0010 | 2**64G 130 // 3 | 0011 | 2**64G + G 131 // 4 | 0100 | 2**128G 132 // 5 | 0101 | 2**128G + G 133 // 6 | 0110 | 2**128G + 2**64G 134 // 7 | 0111 | 2**128G + 2**64G + G 135 // 8 | 1000 | 2**192G 136 // 9 | 1001 | 2**192G + G 137 // 10 | 1010 | 2**192G + 2**64G 138 // 11 | 1011 | 2**192G + 2**64G + G 139 // 12 | 1100 | 2**192G + 2**128G 140 // 13 | 1101 | 2**192G + 2**128G + G 141 // 14 | 1110 | 2**192G + 2**128G + 2**64G 142 // 15 | 1111 | 2**192G + 2**128G + 2**64G + G 143 // 144 // The second table follows the same style, but the terms are 2**32G, 145 // 2**96G, 2**160G, 2**224G. 146 // 147 // This is ~2KB of data. 148 var p256Precomputed = [p256Limbs * 2 * 15 * 2]uint32{ 149 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee, 150 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3, 151 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c, 152 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22, 153 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050, 154 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b, 155 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa, 156 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2, 157 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609, 158 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581, 159 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca, 160 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33, 161 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6, 162 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd, 163 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0, 164 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881, 165 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a, 166 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26, 167 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b, 168 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023, 169 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133, 170 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa, 171 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29, 172 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc, 173 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8, 174 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59, 175 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39, 176 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689, 177 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa, 178 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3, 179 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1, 180 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f, 181 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72, 182 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d, 183 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b, 184 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a, 185 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a, 186 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f, 187 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb, 188 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc, 189 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9, 190 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce, 191 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2, 192 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca, 193 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229, 194 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57, 195 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c, 196 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa, 197 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651, 198 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec, 199 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7, 200 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c, 201 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927, 202 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298, 203 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8, 204 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2, 205 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d, 206 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4, 207 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8, 208 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78, 209 } 210 211 // Field element operations: 212 213 // nonZeroToAllOnes returns: 214 // 0xffffffff for 0 < x <= 2**31 215 // 0 for x == 0 or x > 2**31. 216 func nonZeroToAllOnes(x uint32) uint32 { 217 return ((x - 1) >> 31) - 1 218 } 219 220 // p256ReduceCarry adds a multiple of p in order to cancel |carry|, 221 // which is a term at 2**257. 222 // 223 // On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. 224 // On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. 225 func p256ReduceCarry(inout *[p256Limbs]uint32, carry uint32) { 226 carry_mask := nonZeroToAllOnes(carry) 227 228 inout[0] += carry << 1 229 inout[3] += 0x10000000 & carry_mask 230 // carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the 231 // previous line therefore this doesn't underflow. 232 inout[3] -= carry << 11 233 inout[4] += (0x20000000 - 1) & carry_mask 234 inout[5] += (0x10000000 - 1) & carry_mask 235 inout[6] += (0x20000000 - 1) & carry_mask 236 inout[6] -= carry << 22 237 // This may underflow if carry is non-zero but, if so, we'll fix it in the 238 // next line. 239 inout[7] -= 1 & carry_mask 240 inout[7] += carry << 25 241 } 242 243 // p256Sum sets out = in+in2. 244 // 245 // On entry, in[i]+in2[i] must not overflow a 32-bit word. 246 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 247 func p256Sum(out, in, in2 *[p256Limbs]uint32) { 248 carry := uint32(0) 249 for i := 0; ; i++ { 250 out[i] = in[i] + in2[i] 251 out[i] += carry 252 carry = out[i] >> 29 253 out[i] &= bottom29Bits 254 255 i++ 256 if i == p256Limbs { 257 break 258 } 259 260 out[i] = in[i] + in2[i] 261 out[i] += carry 262 carry = out[i] >> 28 263 out[i] &= bottom28Bits 264 } 265 266 p256ReduceCarry(out, carry) 267 } 268 269 const ( 270 two30m2 = 1<<30 - 1<<2 271 two30p13m2 = 1<<30 + 1<<13 - 1<<2 272 two31m2 = 1<<31 - 1<<2 273 two31p24m2 = 1<<31 + 1<<24 - 1<<2 274 two30m27m2 = 1<<30 - 1<<27 - 1<<2 275 ) 276 277 // p256Zero31 is 0 mod p. 278 var p256Zero31 = [p256Limbs]uint32{two31m3, two30m2, two31m2, two30p13m2, two31m2, two30m2, two31p24m2, two30m27m2, two31m2} 279 280 // p256Diff sets out = in-in2. 281 // 282 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 283 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 284 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 285 func p256Diff(out, in, in2 *[p256Limbs]uint32) { 286 var carry uint32 287 288 for i := 0; ; i++ { 289 out[i] = in[i] - in2[i] 290 out[i] += p256Zero31[i] 291 out[i] += carry 292 carry = out[i] >> 29 293 out[i] &= bottom29Bits 294 295 i++ 296 if i == p256Limbs { 297 break 298 } 299 300 out[i] = in[i] - in2[i] 301 out[i] += p256Zero31[i] 302 out[i] += carry 303 carry = out[i] >> 28 304 out[i] &= bottom28Bits 305 } 306 307 p256ReduceCarry(out, carry) 308 } 309 310 // p256ReduceDegree sets out = tmp/R mod p where tmp contains 64-bit words with 311 // the same 29,28,... bit positions as an field element. 312 // 313 // The values in field elements are in Montgomery form: x*R mod p where R = 314 // 2**257. Since we just multiplied two Montgomery values together, the result 315 // is x*y*R*R mod p. We wish to divide by R in order for the result also to be 316 // in Montgomery form. 317 // 318 // On entry: tmp[i] < 2**64 319 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 320 func p256ReduceDegree(out *[p256Limbs]uint32, tmp [17]uint64) { 321 // The following table may be helpful when reading this code: 322 // 323 // Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10... 324 // Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29 325 // Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285 326 // (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 327 var tmp2 [18]uint32 328 var carry, x, xMask uint32 329 330 // tmp contains 64-bit words with the same 29,28,29-bit positions as an 331 // field element. So the top of an element of tmp might overlap with 332 // another element two positions down. The following loop eliminates 333 // this overlap. 334 tmp2[0] = uint32(tmp[0]) & bottom29Bits 335 336 tmp2[1] = uint32(tmp[0]) >> 29 337 tmp2[1] |= (uint32(tmp[0]>>32) << 3) & bottom28Bits 338 tmp2[1] += uint32(tmp[1]) & bottom28Bits 339 carry = tmp2[1] >> 28 340 tmp2[1] &= bottom28Bits 341 342 for i := 2; i < 17; i++ { 343 tmp2[i] = (uint32(tmp[i-2] >> 32)) >> 25 344 tmp2[i] += (uint32(tmp[i-1])) >> 28 345 tmp2[i] += (uint32(tmp[i-1]>>32) << 4) & bottom29Bits 346 tmp2[i] += uint32(tmp[i]) & bottom29Bits 347 tmp2[i] += carry 348 carry = tmp2[i] >> 29 349 tmp2[i] &= bottom29Bits 350 351 i++ 352 if i == 17 { 353 break 354 } 355 tmp2[i] = uint32(tmp[i-2]>>32) >> 25 356 tmp2[i] += uint32(tmp[i-1]) >> 29 357 tmp2[i] += ((uint32(tmp[i-1] >> 32)) << 3) & bottom28Bits 358 tmp2[i] += uint32(tmp[i]) & bottom28Bits 359 tmp2[i] += carry 360 carry = tmp2[i] >> 28 361 tmp2[i] &= bottom28Bits 362 } 363 364 tmp2[17] = uint32(tmp[15]>>32) >> 25 365 tmp2[17] += uint32(tmp[16]) >> 29 366 tmp2[17] += uint32(tmp[16]>>32) << 3 367 tmp2[17] += carry 368 369 // Montgomery elimination of terms: 370 // 371 // Since R is 2**257, we can divide by R with a bitwise shift if we can 372 // ensure that the right-most 257 bits are all zero. We can make that true 373 // by adding multiplies of p without affecting the value. 374 // 375 // So we eliminate limbs from right to left. Since the bottom 29 bits of p 376 // are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0. 377 // We can do that for 8 further limbs and then right shift to eliminate the 378 // extra factor of R. 379 for i := 0; ; i += 2 { 380 tmp2[i+1] += tmp2[i] >> 29 381 x = tmp2[i] & bottom29Bits 382 xMask = nonZeroToAllOnes(x) 383 tmp2[i] = 0 384 385 // The bounds calculations for this loop are tricky. Each iteration of 386 // the loop eliminates two words by adding values to words to their 387 // right. 388 // 389 // The following table contains the amounts added to each word (as an 390 // offset from the value of i at the top of the loop). The amounts are 391 // accounted for from the first and second half of the loop separately 392 // and are written as, for example, 28 to mean a value <2**28. 393 // 394 // Word: 3 4 5 6 7 8 9 10 395 // Added in top half: 28 11 29 21 29 28 396 // 28 29 397 // 29 398 // Added in bottom half: 29 10 28 21 28 28 399 // 29 400 // 401 // The value that is currently offset 7 will be offset 5 for the next 402 // iteration and then offset 3 for the iteration after that. Therefore 403 // the total value added will be the values added at 7, 5 and 3. 404 // 405 // The following table accumulates these values. The sums at the bottom 406 // are written as, for example, 29+28, to mean a value < 2**29+2**28. 407 // 408 // Word: 3 4 5 6 7 8 9 10 11 12 13 409 // 28 11 10 29 21 29 28 28 28 28 28 410 // 29 28 11 28 29 28 29 28 29 28 411 // 29 28 21 21 29 21 29 21 412 // 10 29 28 21 28 21 28 413 // 28 29 28 29 28 29 28 414 // 11 10 29 10 29 10 415 // 29 28 11 28 11 416 // 29 29 417 // -------------------------------------------- 418 // 30+ 31+ 30+ 31+ 30+ 419 // 28+ 29+ 28+ 29+ 21+ 420 // 21+ 28+ 21+ 28+ 10 421 // 10 21+ 10 21+ 422 // 11 11 423 // 424 // So the greatest amount is added to tmp2[10] and tmp2[12]. If 425 // tmp2[10/12] has an initial value of <2**29, then the maximum value 426 // will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32, 427 // as required. 428 tmp2[i+3] += (x << 10) & bottom28Bits 429 tmp2[i+4] += (x >> 18) 430 431 tmp2[i+6] += (x << 21) & bottom29Bits 432 tmp2[i+7] += x >> 8 433 434 // At position 200, which is the starting bit position for word 7, we 435 // have a factor of 0xf000000 = 2**28 - 2**24. 436 tmp2[i+7] += 0x10000000 & xMask 437 tmp2[i+8] += (x - 1) & xMask 438 tmp2[i+7] -= (x << 24) & bottom28Bits 439 tmp2[i+8] -= x >> 4 440 441 tmp2[i+8] += 0x20000000 & xMask 442 tmp2[i+8] -= x 443 tmp2[i+8] += (x << 28) & bottom29Bits 444 tmp2[i+9] += ((x >> 1) - 1) & xMask 445 446 if i+1 == p256Limbs { 447 break 448 } 449 tmp2[i+2] += tmp2[i+1] >> 28 450 x = tmp2[i+1] & bottom28Bits 451 xMask = nonZeroToAllOnes(x) 452 tmp2[i+1] = 0 453 454 tmp2[i+4] += (x << 11) & bottom29Bits 455 tmp2[i+5] += (x >> 18) 456 457 tmp2[i+7] += (x << 21) & bottom28Bits 458 tmp2[i+8] += x >> 7 459 460 // At position 199, which is the starting bit of the 8th word when 461 // dealing with a context starting on an odd word, we have a factor of 462 // 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th 463 // word from i+1 is i+8. 464 tmp2[i+8] += 0x20000000 & xMask 465 tmp2[i+9] += (x - 1) & xMask 466 tmp2[i+8] -= (x << 25) & bottom29Bits 467 tmp2[i+9] -= x >> 4 468 469 tmp2[i+9] += 0x10000000 & xMask 470 tmp2[i+9] -= x 471 tmp2[i+10] += (x - 1) & xMask 472 } 473 474 // We merge the right shift with a carry chain. The words above 2**257 have 475 // widths of 28,29,... which we need to correct when copying them down. 476 carry = 0 477 for i := 0; i < 8; i++ { 478 // The maximum value of tmp2[i + 9] occurs on the first iteration and 479 // is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is 480 // therefore safe. 481 out[i] = tmp2[i+9] 482 out[i] += carry 483 out[i] += (tmp2[i+10] << 28) & bottom29Bits 484 carry = out[i] >> 29 485 out[i] &= bottom29Bits 486 487 i++ 488 out[i] = tmp2[i+9] >> 1 489 out[i] += carry 490 carry = out[i] >> 28 491 out[i] &= bottom28Bits 492 } 493 494 out[8] = tmp2[17] 495 out[8] += carry 496 carry = out[8] >> 29 497 out[8] &= bottom29Bits 498 499 p256ReduceCarry(out, carry) 500 } 501 502 // p256Square sets out=in*in. 503 // 504 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29. 505 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 506 func p256Square(out, in *[p256Limbs]uint32) { 507 var tmp [17]uint64 508 509 tmp[0] = uint64(in[0]) * uint64(in[0]) 510 tmp[1] = uint64(in[0]) * (uint64(in[1]) << 1) 511 tmp[2] = uint64(in[0])*(uint64(in[2])<<1) + 512 uint64(in[1])*(uint64(in[1])<<1) 513 tmp[3] = uint64(in[0])*(uint64(in[3])<<1) + 514 uint64(in[1])*(uint64(in[2])<<1) 515 tmp[4] = uint64(in[0])*(uint64(in[4])<<1) + 516 uint64(in[1])*(uint64(in[3])<<2) + 517 uint64(in[2])*uint64(in[2]) 518 tmp[5] = uint64(in[0])*(uint64(in[5])<<1) + 519 uint64(in[1])*(uint64(in[4])<<1) + 520 uint64(in[2])*(uint64(in[3])<<1) 521 tmp[6] = uint64(in[0])*(uint64(in[6])<<1) + 522 uint64(in[1])*(uint64(in[5])<<2) + 523 uint64(in[2])*(uint64(in[4])<<1) + 524 uint64(in[3])*(uint64(in[3])<<1) 525 tmp[7] = uint64(in[0])*(uint64(in[7])<<1) + 526 uint64(in[1])*(uint64(in[6])<<1) + 527 uint64(in[2])*(uint64(in[5])<<1) + 528 uint64(in[3])*(uint64(in[4])<<1) 529 // tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60, 530 // which is < 2**64 as required. 531 tmp[8] = uint64(in[0])*(uint64(in[8])<<1) + 532 uint64(in[1])*(uint64(in[7])<<2) + 533 uint64(in[2])*(uint64(in[6])<<1) + 534 uint64(in[3])*(uint64(in[5])<<2) + 535 uint64(in[4])*uint64(in[4]) 536 tmp[9] = uint64(in[1])*(uint64(in[8])<<1) + 537 uint64(in[2])*(uint64(in[7])<<1) + 538 uint64(in[3])*(uint64(in[6])<<1) + 539 uint64(in[4])*(uint64(in[5])<<1) 540 tmp[10] = uint64(in[2])*(uint64(in[8])<<1) + 541 uint64(in[3])*(uint64(in[7])<<2) + 542 uint64(in[4])*(uint64(in[6])<<1) + 543 uint64(in[5])*(uint64(in[5])<<1) 544 tmp[11] = uint64(in[3])*(uint64(in[8])<<1) + 545 uint64(in[4])*(uint64(in[7])<<1) + 546 uint64(in[5])*(uint64(in[6])<<1) 547 tmp[12] = uint64(in[4])*(uint64(in[8])<<1) + 548 uint64(in[5])*(uint64(in[7])<<2) + 549 uint64(in[6])*uint64(in[6]) 550 tmp[13] = uint64(in[5])*(uint64(in[8])<<1) + 551 uint64(in[6])*(uint64(in[7])<<1) 552 tmp[14] = uint64(in[6])*(uint64(in[8])<<1) + 553 uint64(in[7])*(uint64(in[7])<<1) 554 tmp[15] = uint64(in[7]) * (uint64(in[8]) << 1) 555 tmp[16] = uint64(in[8]) * uint64(in[8]) 556 557 p256ReduceDegree(out, tmp) 558 } 559 560 // p256Mul sets out=in*in2. 561 // 562 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 563 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 564 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 565 func p256Mul(out, in, in2 *[p256Limbs]uint32) { 566 var tmp [17]uint64 567 568 tmp[0] = uint64(in[0]) * uint64(in2[0]) 569 tmp[1] = uint64(in[0])*(uint64(in2[1])<<0) + 570 uint64(in[1])*(uint64(in2[0])<<0) 571 tmp[2] = uint64(in[0])*(uint64(in2[2])<<0) + 572 uint64(in[1])*(uint64(in2[1])<<1) + 573 uint64(in[2])*(uint64(in2[0])<<0) 574 tmp[3] = uint64(in[0])*(uint64(in2[3])<<0) + 575 uint64(in[1])*(uint64(in2[2])<<0) + 576 uint64(in[2])*(uint64(in2[1])<<0) + 577 uint64(in[3])*(uint64(in2[0])<<0) 578 tmp[4] = uint64(in[0])*(uint64(in2[4])<<0) + 579 uint64(in[1])*(uint64(in2[3])<<1) + 580 uint64(in[2])*(uint64(in2[2])<<0) + 581 uint64(in[3])*(uint64(in2[1])<<1) + 582 uint64(in[4])*(uint64(in2[0])<<0) 583 tmp[5] = uint64(in[0])*(uint64(in2[5])<<0) + 584 uint64(in[1])*(uint64(in2[4])<<0) + 585 uint64(in[2])*(uint64(in2[3])<<0) + 586 uint64(in[3])*(uint64(in2[2])<<0) + 587 uint64(in[4])*(uint64(in2[1])<<0) + 588 uint64(in[5])*(uint64(in2[0])<<0) 589 tmp[6] = uint64(in[0])*(uint64(in2[6])<<0) + 590 uint64(in[1])*(uint64(in2[5])<<1) + 591 uint64(in[2])*(uint64(in2[4])<<0) + 592 uint64(in[3])*(uint64(in2[3])<<1) + 593 uint64(in[4])*(uint64(in2[2])<<0) + 594 uint64(in[5])*(uint64(in2[1])<<1) + 595 uint64(in[6])*(uint64(in2[0])<<0) 596 tmp[7] = uint64(in[0])*(uint64(in2[7])<<0) + 597 uint64(in[1])*(uint64(in2[6])<<0) + 598 uint64(in[2])*(uint64(in2[5])<<0) + 599 uint64(in[3])*(uint64(in2[4])<<0) + 600 uint64(in[4])*(uint64(in2[3])<<0) + 601 uint64(in[5])*(uint64(in2[2])<<0) + 602 uint64(in[6])*(uint64(in2[1])<<0) + 603 uint64(in[7])*(uint64(in2[0])<<0) 604 // tmp[8] has the greatest value but doesn't overflow. See logic in 605 // p256Square. 606 tmp[8] = uint64(in[0])*(uint64(in2[8])<<0) + 607 uint64(in[1])*(uint64(in2[7])<<1) + 608 uint64(in[2])*(uint64(in2[6])<<0) + 609 uint64(in[3])*(uint64(in2[5])<<1) + 610 uint64(in[4])*(uint64(in2[4])<<0) + 611 uint64(in[5])*(uint64(in2[3])<<1) + 612 uint64(in[6])*(uint64(in2[2])<<0) + 613 uint64(in[7])*(uint64(in2[1])<<1) + 614 uint64(in[8])*(uint64(in2[0])<<0) 615 tmp[9] = uint64(in[1])*(uint64(in2[8])<<0) + 616 uint64(in[2])*(uint64(in2[7])<<0) + 617 uint64(in[3])*(uint64(in2[6])<<0) + 618 uint64(in[4])*(uint64(in2[5])<<0) + 619 uint64(in[5])*(uint64(in2[4])<<0) + 620 uint64(in[6])*(uint64(in2[3])<<0) + 621 uint64(in[7])*(uint64(in2[2])<<0) + 622 uint64(in[8])*(uint64(in2[1])<<0) 623 tmp[10] = uint64(in[2])*(uint64(in2[8])<<0) + 624 uint64(in[3])*(uint64(in2[7])<<1) + 625 uint64(in[4])*(uint64(in2[6])<<0) + 626 uint64(in[5])*(uint64(in2[5])<<1) + 627 uint64(in[6])*(uint64(in2[4])<<0) + 628 uint64(in[7])*(uint64(in2[3])<<1) + 629 uint64(in[8])*(uint64(in2[2])<<0) 630 tmp[11] = uint64(in[3])*(uint64(in2[8])<<0) + 631 uint64(in[4])*(uint64(in2[7])<<0) + 632 uint64(in[5])*(uint64(in2[6])<<0) + 633 uint64(in[6])*(uint64(in2[5])<<0) + 634 uint64(in[7])*(uint64(in2[4])<<0) + 635 uint64(in[8])*(uint64(in2[3])<<0) 636 tmp[12] = uint64(in[4])*(uint64(in2[8])<<0) + 637 uint64(in[5])*(uint64(in2[7])<<1) + 638 uint64(in[6])*(uint64(in2[6])<<0) + 639 uint64(in[7])*(uint64(in2[5])<<1) + 640 uint64(in[8])*(uint64(in2[4])<<0) 641 tmp[13] = uint64(in[5])*(uint64(in2[8])<<0) + 642 uint64(in[6])*(uint64(in2[7])<<0) + 643 uint64(in[7])*(uint64(in2[6])<<0) + 644 uint64(in[8])*(uint64(in2[5])<<0) 645 tmp[14] = uint64(in[6])*(uint64(in2[8])<<0) + 646 uint64(in[7])*(uint64(in2[7])<<1) + 647 uint64(in[8])*(uint64(in2[6])<<0) 648 tmp[15] = uint64(in[7])*(uint64(in2[8])<<0) + 649 uint64(in[8])*(uint64(in2[7])<<0) 650 tmp[16] = uint64(in[8]) * (uint64(in2[8]) << 0) 651 652 p256ReduceDegree(out, tmp) 653 } 654 655 func p256Assign(out, in *[p256Limbs]uint32) { 656 for i := 0; i < p256Limbs; i++ { 657 out[i] = in[i] 658 } 659 // elements change 660 //*out = *in 661 } 662 663 // p256Invert calculates |out| = |in|^{-1} 664 // 665 // Based on Fermat's Little Theorem: 666 // a^p = a (mod p) 667 // a^{p-1} = 1 (mod p) 668 // a^{p-2} = a^{-1} (mod p) 669 func p256Invert(out, in *[p256Limbs]uint32) { 670 var ftmp, ftmp2 [p256Limbs]uint32 671 672 // each e_I will hold |in|^{2^I - 1} 673 var e2, e4, e8, e16, e32, e64 [p256Limbs]uint32 674 675 p256Square(&ftmp, in) // 2^1 676 p256Mul(&ftmp, in, &ftmp) // 2^2 - 2^0 677 p256Assign(&e2, &ftmp) 678 p256Square(&ftmp, &ftmp) // 2^3 - 2^1 679 p256Square(&ftmp, &ftmp) // 2^4 - 2^2 680 p256Mul(&ftmp, &ftmp, &e2) // 2^4 - 2^0 681 p256Assign(&e4, &ftmp) 682 p256Square(&ftmp, &ftmp) // 2^5 - 2^1 683 p256Square(&ftmp, &ftmp) // 2^6 - 2^2 684 p256Square(&ftmp, &ftmp) // 2^7 - 2^3 685 p256Square(&ftmp, &ftmp) // 2^8 - 2^4 686 p256Mul(&ftmp, &ftmp, &e4) // 2^8 - 2^0 687 p256Assign(&e8, &ftmp) 688 for i := 0; i < 8; i++ { 689 p256Square(&ftmp, &ftmp) 690 } // 2^16 - 2^8 691 p256Mul(&ftmp, &ftmp, &e8) // 2^16 - 2^0 692 p256Assign(&e16, &ftmp) 693 for i := 0; i < 16; i++ { 694 p256Square(&ftmp, &ftmp) 695 } // 2^32 - 2^16 696 p256Mul(&ftmp, &ftmp, &e16) // 2^32 - 2^0 697 p256Assign(&e32, &ftmp) 698 for i := 0; i < 32; i++ { 699 p256Square(&ftmp, &ftmp) 700 } // 2^64 - 2^32 701 p256Assign(&e64, &ftmp) 702 p256Mul(&ftmp, &ftmp, in) // 2^64 - 2^32 + 2^0 703 for i := 0; i < 192; i++ { 704 p256Square(&ftmp, &ftmp) 705 } // 2^256 - 2^224 + 2^192 706 707 p256Mul(&ftmp2, &e64, &e32) // 2^64 - 2^0 708 for i := 0; i < 16; i++ { 709 p256Square(&ftmp2, &ftmp2) 710 } // 2^80 - 2^16 711 p256Mul(&ftmp2, &ftmp2, &e16) // 2^80 - 2^0 712 for i := 0; i < 8; i++ { 713 p256Square(&ftmp2, &ftmp2) 714 } // 2^88 - 2^8 715 p256Mul(&ftmp2, &ftmp2, &e8) // 2^88 - 2^0 716 for i := 0; i < 4; i++ { 717 p256Square(&ftmp2, &ftmp2) 718 } // 2^92 - 2^4 719 p256Mul(&ftmp2, &ftmp2, &e4) // 2^92 - 2^0 720 p256Square(&ftmp2, &ftmp2) // 2^93 - 2^1 721 p256Square(&ftmp2, &ftmp2) // 2^94 - 2^2 722 p256Mul(&ftmp2, &ftmp2, &e2) // 2^94 - 2^0 723 p256Square(&ftmp2, &ftmp2) // 2^95 - 2^1 724 p256Square(&ftmp2, &ftmp2) // 2^96 - 2^2 725 p256Mul(&ftmp2, &ftmp2, in) // 2^96 - 3 726 727 p256Mul(out, &ftmp2, &ftmp) // 2^256 - 2^224 + 2^192 + 2^96 - 3 728 } 729 730 // p256Scalar3 sets out=3*out. 731 // 732 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 733 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 734 func p256Scalar3(out *[p256Limbs]uint32) { 735 var carry uint32 736 737 for i := 0; ; i++ { 738 out[i] *= 3 739 out[i] += carry 740 carry = out[i] >> 29 741 out[i] &= bottom29Bits 742 743 i++ 744 if i == p256Limbs { 745 break 746 } 747 748 out[i] *= 3 749 out[i] += carry 750 carry = out[i] >> 28 751 out[i] &= bottom28Bits 752 } 753 754 p256ReduceCarry(out, carry) 755 } 756 757 // p256Scalar4 sets out=4*out. 758 // 759 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 760 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 761 func p256Scalar4(out *[p256Limbs]uint32) { 762 var carry, nextCarry uint32 763 764 for i := 0; ; i++ { 765 nextCarry = out[i] >> 27 766 out[i] <<= 2 767 out[i] &= bottom29Bits 768 out[i] += carry 769 carry = nextCarry + (out[i] >> 29) 770 out[i] &= bottom29Bits 771 772 i++ 773 if i == p256Limbs { 774 break 775 } 776 nextCarry = out[i] >> 26 777 out[i] <<= 2 778 out[i] &= bottom28Bits 779 out[i] += carry 780 carry = nextCarry + (out[i] >> 28) 781 out[i] &= bottom28Bits 782 } 783 784 p256ReduceCarry(out, carry) 785 } 786 787 // p256Scalar8 sets out=8*out. 788 // 789 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 790 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 791 func p256Scalar8(out *[p256Limbs]uint32) { 792 var carry, nextCarry uint32 793 794 for i := 0; ; i++ { 795 nextCarry = out[i] >> 26 796 out[i] <<= 3 797 out[i] &= bottom29Bits 798 out[i] += carry 799 carry = nextCarry + (out[i] >> 29) 800 out[i] &= bottom29Bits 801 802 i++ 803 if i == p256Limbs { 804 break 805 } 806 nextCarry = out[i] >> 25 807 out[i] <<= 3 808 out[i] &= bottom28Bits 809 out[i] += carry 810 carry = nextCarry + (out[i] >> 28) 811 out[i] &= bottom28Bits 812 } 813 814 p256ReduceCarry(out, carry) 815 } 816 817 // Group operations: 818 // 819 // Elements of the elliptic curve group are represented in Jacobian 820 // coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in 821 // Jacobian form. 822 823 // p256PointDouble sets {xOut,yOut,zOut} = 2*{x,y,z}. 824 // 825 // See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l 826 func p256PointDouble(xOut, yOut, zOut, x, y, z *[p256Limbs]uint32) { 827 var delta, gamma, alpha, beta, tmp, tmp2 [p256Limbs]uint32 828 829 p256Square(&delta, z) 830 p256Square(&gamma, y) 831 p256Mul(&beta, x, &gamma) 832 833 p256Sum(&tmp, x, &delta) 834 p256Diff(&tmp2, x, &delta) 835 p256Mul(&alpha, &tmp, &tmp2) 836 p256Scalar3(&alpha) 837 838 p256Sum(&tmp, y, z) 839 p256Square(&tmp, &tmp) 840 p256Diff(&tmp, &tmp, &gamma) 841 p256Diff(zOut, &tmp, &delta) 842 843 p256Scalar4(&beta) 844 p256Square(xOut, &alpha) 845 p256Diff(xOut, xOut, &beta) 846 p256Diff(xOut, xOut, &beta) 847 848 p256Diff(&tmp, &beta, xOut) 849 p256Mul(&tmp, &alpha, &tmp) 850 p256Square(&tmp2, &gamma) 851 p256Scalar8(&tmp2) 852 p256Diff(yOut, &tmp, &tmp2) 853 } 854 855 // p256PointAddMixed sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,1}. 856 // (i.e. the second point is affine.) 857 // 858 // See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 859 // 860 // Note that this function does not handle P+P, infinity+P nor P+infinity 861 // correctly. 862 func p256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *[p256Limbs]uint32) { 863 var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 864 865 p256Square(&z1z1, z1) 866 p256Sum(&tmp, z1, z1) 867 868 p256Mul(&u2, x2, &z1z1) 869 p256Mul(&z1z1z1, z1, &z1z1) 870 p256Mul(&s2, y2, &z1z1z1) 871 p256Diff(&h, &u2, x1) 872 p256Sum(&i, &h, &h) 873 p256Square(&i, &i) 874 p256Mul(&j, &h, &i) 875 p256Diff(&r, &s2, y1) 876 p256Sum(&r, &r, &r) 877 p256Mul(&v, x1, &i) 878 879 p256Mul(zOut, &tmp, &h) 880 p256Square(&rr, &r) 881 p256Diff(xOut, &rr, &j) 882 p256Diff(xOut, xOut, &v) 883 p256Diff(xOut, xOut, &v) 884 885 p256Diff(&tmp, &v, xOut) 886 p256Mul(yOut, &tmp, &r) 887 p256Mul(&tmp, y1, &j) 888 p256Diff(yOut, yOut, &tmp) 889 p256Diff(yOut, yOut, &tmp) 890 } 891 892 // p256PointAdd sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,z2}. 893 // 894 // See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 895 // 896 // Note that this function does not handle P+P, infinity+P nor P+infinity 897 // correctly. 898 func p256PointAdd(xOut, yOut, zOut, x1, y1, z1, x2, y2, z2 *[p256Limbs]uint32) { 899 var z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 900 901 p256Square(&z1z1, z1) 902 p256Square(&z2z2, z2) 903 p256Mul(&u1, x1, &z2z2) 904 905 p256Sum(&tmp, z1, z2) 906 p256Square(&tmp, &tmp) 907 p256Diff(&tmp, &tmp, &z1z1) 908 p256Diff(&tmp, &tmp, &z2z2) 909 910 p256Mul(&z2z2z2, z2, &z2z2) 911 p256Mul(&s1, y1, &z2z2z2) 912 913 p256Mul(&u2, x2, &z1z1) 914 p256Mul(&z1z1z1, z1, &z1z1) 915 p256Mul(&s2, y2, &z1z1z1) 916 p256Diff(&h, &u2, &u1) 917 p256Sum(&i, &h, &h) 918 p256Square(&i, &i) 919 p256Mul(&j, &h, &i) 920 p256Diff(&r, &s2, &s1) 921 p256Sum(&r, &r, &r) 922 p256Mul(&v, &u1, &i) 923 924 p256Mul(zOut, &tmp, &h) 925 p256Square(&rr, &r) 926 p256Diff(xOut, &rr, &j) 927 p256Diff(xOut, xOut, &v) 928 p256Diff(xOut, xOut, &v) 929 930 p256Diff(&tmp, &v, xOut) 931 p256Mul(yOut, &tmp, &r) 932 p256Mul(&tmp, &s1, &j) 933 p256Diff(yOut, yOut, &tmp) 934 p256Diff(yOut, yOut, &tmp) 935 } 936 937 // p256CopyConditional sets out=in if mask = 0xffffffff in constant time. 938 // 939 // On entry: mask is either 0 or 0xffffffff. 940 func p256CopyConditional(out, in *[p256Limbs]uint32, mask uint32) { 941 for i := 0; i < p256Limbs; i++ { 942 tmp := mask & (in[i] ^ out[i]) 943 out[i] ^= tmp 944 } 945 } 946 947 // p256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table. 948 // On entry: index < 16, table[0] must be zero. 949 func p256SelectAffinePoint(xOut, yOut *[p256Limbs]uint32, table []uint32, index uint32) { 950 for i := range xOut { 951 xOut[i] = 0 952 } 953 for i := range yOut { 954 yOut[i] = 0 955 } 956 957 for i := uint32(1); i < 16; i++ { 958 mask := i ^ index 959 mask |= mask >> 2 960 mask |= mask >> 1 961 mask &= 1 962 mask-- 963 for j := range xOut { 964 xOut[j] |= table[0] & mask 965 table = table[1:] 966 } 967 for j := range yOut { 968 yOut[j] |= table[0] & mask 969 table = table[1:] 970 } 971 } 972 } 973 974 // p256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of 975 // table. 976 // On entry: index < 16, table[0] must be zero. 977 func p256SelectJacobianPoint(xOut, yOut, zOut *[p256Limbs]uint32, table *[16][3][p256Limbs]uint32, index uint32) { 978 for i := range xOut { 979 xOut[i] = 0 980 } 981 for i := range yOut { 982 yOut[i] = 0 983 } 984 for i := range zOut { 985 zOut[i] = 0 986 } 987 988 // The implicit value at index 0 is all zero. We don't need to perform that 989 // iteration of the loop because we already set out_* to zero. 990 for i := uint32(1); i < 16; i++ { 991 mask := i ^ index 992 mask |= mask >> 2 993 mask |= mask >> 1 994 mask &= 1 995 mask-- 996 for j := range xOut { 997 xOut[j] |= table[i][0][j] & mask 998 } 999 for j := range yOut { 1000 yOut[j] |= table[i][1][j] & mask 1001 } 1002 for j := range zOut { 1003 zOut[j] |= table[i][2][j] & mask 1004 } 1005 } 1006 } 1007 1008 // p256GetBit returns the bit'th bit of scalar. 1009 func p256GetBit(scalar *[32]uint8, bit uint) uint32 { 1010 return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1) 1011 } 1012 1013 // p256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a 1014 // little-endian number. Note that the value of scalar must be less than the 1015 // order of the group. 1016 func p256ScalarBaseMult(xOut, yOut, zOut *[p256Limbs]uint32, scalar *[32]uint8) { 1017 nIsInfinityMask := ^uint32(0) 1018 var pIsNoninfiniteMask, mask, tableOffset uint32 1019 var px, py, tx, ty, tz [p256Limbs]uint32 1020 1021 for i := range xOut { 1022 xOut[i] = 0 1023 } 1024 for i := range yOut { 1025 yOut[i] = 0 1026 } 1027 for i := range zOut { 1028 zOut[i] = 0 1029 } 1030 1031 // The loop adds bits at positions 0, 64, 128 and 192, followed by 1032 // positions 32,96,160 and 224 and does this 32 times. 1033 for i := uint(0); i < 32; i++ { 1034 if i != 0 { 1035 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1036 } 1037 tableOffset = 0 1038 for j := uint(0); j <= 32; j += 32 { 1039 bit0 := p256GetBit(scalar, 31-i+j) 1040 bit1 := p256GetBit(scalar, 95-i+j) 1041 bit2 := p256GetBit(scalar, 159-i+j) 1042 bit3 := p256GetBit(scalar, 223-i+j) 1043 index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3) 1044 1045 p256SelectAffinePoint(&px, &py, p256Precomputed[tableOffset:], index) 1046 tableOffset += 30 * p256Limbs 1047 1048 // Since scalar is less than the order of the group, we know that 1049 // {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle 1050 // below. 1051 p256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py) 1052 // The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero 1053 // (a.k.a. the point at infinity). We handle that situation by 1054 // copying the point from the table. 1055 p256CopyConditional(xOut, &px, nIsInfinityMask) 1056 p256CopyConditional(yOut, &py, nIsInfinityMask) 1057 p256CopyConditional(zOut, &p256One, nIsInfinityMask) 1058 1059 // Equally, the result is also wrong if the point from the table is 1060 // zero, which happens when the index is zero. We handle that by 1061 // only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0. 1062 pIsNoninfiniteMask = nonZeroToAllOnes(index) 1063 mask = pIsNoninfiniteMask & ^nIsInfinityMask 1064 p256CopyConditional(xOut, &tx, mask) 1065 p256CopyConditional(yOut, &ty, mask) 1066 p256CopyConditional(zOut, &tz, mask) 1067 // If p was not zero, then n is now non-zero. 1068 nIsInfinityMask &^= pIsNoninfiniteMask 1069 } 1070 } 1071 } 1072 1073 // p256PointToAffine converts a Jacobian point to an affine point. If the input 1074 // is the point at infinity then it returns (0, 0) in constant time. 1075 func p256PointToAffine(xOut, yOut, x, y, z *[p256Limbs]uint32) { 1076 var zInv, zInvSq [p256Limbs]uint32 1077 1078 p256Invert(&zInv, z) 1079 p256Square(&zInvSq, &zInv) 1080 p256Mul(xOut, x, &zInvSq) 1081 p256Mul(&zInv, &zInv, &zInvSq) 1082 p256Mul(yOut, y, &zInv) 1083 } 1084 1085 // p256ToAffine returns a pair of *big.Int containing the affine representation 1086 // of {x,y,z}. 1087 func p256ToAffine(x, y, z *[p256Limbs]uint32) (xOut, yOut *big.Int) { 1088 var xx, yy [p256Limbs]uint32 1089 p256PointToAffine(&xx, &yy, x, y, z) 1090 return p256ToBig(&xx), p256ToBig(&yy) 1091 } 1092 1093 // p256ScalarMult sets {xOut,yOut,zOut} = scalar*{x,y}. 1094 func p256ScalarMult(xOut, yOut, zOut, x, y *[p256Limbs]uint32, scalar *[32]uint8) { 1095 var px, py, pz, tx, ty, tz [p256Limbs]uint32 1096 var precomp [16][3][p256Limbs]uint32 1097 var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32 1098 1099 // We precompute 0,1,2,... times {x,y}. 1100 precomp[1][0] = *x 1101 precomp[1][1] = *y 1102 precomp[1][2] = p256One 1103 1104 for i := 2; i < 16; i += 2 { 1105 p256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2]) 1106 p256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y) 1107 } 1108 1109 for i := range xOut { 1110 xOut[i] = 0 1111 } 1112 for i := range yOut { 1113 yOut[i] = 0 1114 } 1115 for i := range zOut { 1116 zOut[i] = 0 1117 } 1118 nIsInfinityMask = ^uint32(0) 1119 1120 // We add in a window of four bits each iteration and do this 64 times. 1121 for i := 0; i < 64; i++ { 1122 if i != 0 { 1123 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1124 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1125 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1126 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1127 } 1128 1129 index = uint32(scalar[31-i/2]) 1130 if (i & 1) == 1 { 1131 index &= 15 1132 } else { 1133 index >>= 4 1134 } 1135 1136 // See the comments in scalarBaseMult about handling infinities. 1137 p256SelectJacobianPoint(&px, &py, &pz, &precomp, index) 1138 p256PointAdd(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py, &pz) 1139 p256CopyConditional(xOut, &px, nIsInfinityMask) 1140 p256CopyConditional(yOut, &py, nIsInfinityMask) 1141 p256CopyConditional(zOut, &pz, nIsInfinityMask) 1142 1143 pIsNoninfiniteMask = nonZeroToAllOnes(index) 1144 mask = pIsNoninfiniteMask & ^nIsInfinityMask 1145 p256CopyConditional(xOut, &tx, mask) 1146 p256CopyConditional(yOut, &ty, mask) 1147 p256CopyConditional(zOut, &tz, mask) 1148 nIsInfinityMask &^= pIsNoninfiniteMask 1149 } 1150 } 1151 1152 // p256FromBig sets out = R*in. 1153 func p256FromBig(out *[p256Limbs]uint32, in *big.Int) { 1154 tmp := new(big.Int).Lsh(in, 257) 1155 tmp.Mod(tmp, p256Params.P) 1156 1157 for i := 0; i < p256Limbs; i++ { 1158 if bits := tmp.Bits(); len(bits) > 0 { 1159 out[i] = uint32(bits[0]) & bottom29Bits 1160 } else { 1161 out[i] = 0 1162 } 1163 tmp.Rsh(tmp, 29) 1164 1165 i++ 1166 if i == p256Limbs { 1167 break 1168 } 1169 1170 if bits := tmp.Bits(); len(bits) > 0 { 1171 out[i] = uint32(bits[0]) & bottom28Bits 1172 } else { 1173 out[i] = 0 1174 } 1175 tmp.Rsh(tmp, 28) 1176 } 1177 } 1178 1179 // p256ToBig returns a *big.Int containing the value of in. 1180 func p256ToBig(in *[p256Limbs]uint32) *big.Int { 1181 result, tmp := new(big.Int), new(big.Int) 1182 1183 result.SetInt64(int64(in[p256Limbs-1])) 1184 for i := p256Limbs - 2; i >= 0; i-- { 1185 if (i & 1) == 0 { 1186 result.Lsh(result, 29) 1187 } else { 1188 result.Lsh(result, 28) 1189 } 1190 tmp.SetInt64(int64(in[i])) 1191 result.Add(result, tmp) 1192 } 1193 1194 result.Mul(result, p256RInverse) 1195 result.Mod(result, p256Params.P) 1196 return result 1197 }