github.com/euank/go@v0.0.0-20160829210321-495514729181/src/crypto/elliptic/p256.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !amd64 6 7 package elliptic 8 9 // This file contains a constant-time, 32-bit implementation of P256. 10 11 import ( 12 "math/big" 13 ) 14 15 type p256Curve struct { 16 *CurveParams 17 } 18 19 var ( 20 p256 p256Curve 21 // RInverse contains 1/R mod p - the inverse of the Montgomery constant 22 // (2**257). 23 p256RInverse *big.Int 24 ) 25 26 func initP256() { 27 // See FIPS 186-3, section D.2.3 28 p256.CurveParams = &CurveParams{Name: "P-256"} 29 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10) 30 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10) 31 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16) 32 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16) 33 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16) 34 p256.BitSize = 256 35 36 p256RInverse, _ = new(big.Int).SetString("7fffffff00000001fffffffe8000000100000000ffffffff0000000180000000", 16) 37 } 38 39 func (curve p256Curve) Params() *CurveParams { 40 return curve.CurveParams 41 } 42 43 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 44 // to out. If the scalar is equal or greater than the order of the group, it's 45 // reduced modulo that order. 46 func p256GetScalar(out *[32]byte, in []byte) { 47 n := new(big.Int).SetBytes(in) 48 var scalarBytes []byte 49 50 if n.Cmp(p256.N) >= 0 { 51 n.Mod(n, p256.N) 52 scalarBytes = n.Bytes() 53 } else { 54 scalarBytes = in 55 } 56 57 for i, v := range scalarBytes { 58 out[len(scalarBytes)-(1+i)] = v 59 } 60 } 61 62 func (p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 63 var scalarReversed [32]byte 64 p256GetScalar(&scalarReversed, scalar) 65 66 var x1, y1, z1 [p256Limbs]uint32 67 p256ScalarBaseMult(&x1, &y1, &z1, &scalarReversed) 68 return p256ToAffine(&x1, &y1, &z1) 69 } 70 71 func (p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 72 var scalarReversed [32]byte 73 p256GetScalar(&scalarReversed, scalar) 74 75 var px, py, x1, y1, z1 [p256Limbs]uint32 76 p256FromBig(&px, bigX) 77 p256FromBig(&py, bigY) 78 p256ScalarMult(&x1, &y1, &z1, &px, &py, &scalarReversed) 79 return p256ToAffine(&x1, &y1, &z1) 80 } 81 82 // Field elements are represented as nine, unsigned 32-bit words. 83 // 84 // The value of an field element is: 85 // x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) 86 // 87 // That is, each limb is alternately 29 or 28-bits wide in little-endian 88 // order. 89 // 90 // This means that a field element hits 2**257, rather than 2**256 as we would 91 // like. A 28, 29, ... pattern would cause us to hit 2**256, but that causes 92 // problems when multiplying as terms end up one bit short of a limb which 93 // would require much bit-shifting to correct. 94 // 95 // Finally, the values stored in a field element are in Montgomery form. So the 96 // value |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 97 // 2**257. 98 99 const ( 100 p256Limbs = 9 101 bottom29Bits = 0x1fffffff 102 ) 103 104 var ( 105 // p256One is the number 1 as a field element. 106 p256One = [p256Limbs]uint32{2, 0, 0, 0xffff800, 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, 0} 107 p256Zero = [p256Limbs]uint32{0, 0, 0, 0, 0, 0, 0, 0, 0} 108 // p256P is the prime modulus as a field element. 109 p256P = [p256Limbs]uint32{0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, 0, 0, 0x200000, 0xf000000, 0xfffffff} 110 // p2562P is the twice prime modulus as a field element. 111 p2562P = [p256Limbs]uint32{0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, 0, 0, 0x400000, 0xe000000, 0x1fffffff} 112 ) 113 114 // p256Precomputed contains precomputed values to aid the calculation of scalar 115 // multiples of the base point, G. It's actually two, equal length, tables 116 // concatenated. 117 // 118 // The first table contains (x,y) field element pairs for 16 multiples of the 119 // base point, G. 120 // 121 // Index | Index (binary) | Value 122 // 0 | 0000 | 0G (all zeros, omitted) 123 // 1 | 0001 | G 124 // 2 | 0010 | 2**64G 125 // 3 | 0011 | 2**64G + G 126 // 4 | 0100 | 2**128G 127 // 5 | 0101 | 2**128G + G 128 // 6 | 0110 | 2**128G + 2**64G 129 // 7 | 0111 | 2**128G + 2**64G + G 130 // 8 | 1000 | 2**192G 131 // 9 | 1001 | 2**192G + G 132 // 10 | 1010 | 2**192G + 2**64G 133 // 11 | 1011 | 2**192G + 2**64G + G 134 // 12 | 1100 | 2**192G + 2**128G 135 // 13 | 1101 | 2**192G + 2**128G + G 136 // 14 | 1110 | 2**192G + 2**128G + 2**64G 137 // 15 | 1111 | 2**192G + 2**128G + 2**64G + G 138 // 139 // The second table follows the same style, but the terms are 2**32G, 140 // 2**96G, 2**160G, 2**224G. 141 // 142 // This is ~2KB of data. 143 var p256Precomputed = [p256Limbs * 2 * 15 * 2]uint32{ 144 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee, 145 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3, 146 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c, 147 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22, 148 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050, 149 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b, 150 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa, 151 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2, 152 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609, 153 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581, 154 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca, 155 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33, 156 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6, 157 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd, 158 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0, 159 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881, 160 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a, 161 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26, 162 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b, 163 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023, 164 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133, 165 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa, 166 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29, 167 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc, 168 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8, 169 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59, 170 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39, 171 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689, 172 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa, 173 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3, 174 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1, 175 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f, 176 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72, 177 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d, 178 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b, 179 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a, 180 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a, 181 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f, 182 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb, 183 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc, 184 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9, 185 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce, 186 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2, 187 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca, 188 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229, 189 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57, 190 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c, 191 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa, 192 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651, 193 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec, 194 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7, 195 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c, 196 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927, 197 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298, 198 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8, 199 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2, 200 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d, 201 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4, 202 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8, 203 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78, 204 } 205 206 // Field element operations: 207 208 // nonZeroToAllOnes returns: 209 // 0xffffffff for 0 < x <= 2**31 210 // 0 for x == 0 or x > 2**31. 211 func nonZeroToAllOnes(x uint32) uint32 { 212 return ((x - 1) >> 31) - 1 213 } 214 215 // p256ReduceCarry adds a multiple of p in order to cancel |carry|, 216 // which is a term at 2**257. 217 // 218 // On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. 219 // On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. 220 func p256ReduceCarry(inout *[p256Limbs]uint32, carry uint32) { 221 carry_mask := nonZeroToAllOnes(carry) 222 223 inout[0] += carry << 1 224 inout[3] += 0x10000000 & carry_mask 225 // carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the 226 // previous line therefore this doesn't underflow. 227 inout[3] -= carry << 11 228 inout[4] += (0x20000000 - 1) & carry_mask 229 inout[5] += (0x10000000 - 1) & carry_mask 230 inout[6] += (0x20000000 - 1) & carry_mask 231 inout[6] -= carry << 22 232 // This may underflow if carry is non-zero but, if so, we'll fix it in the 233 // next line. 234 inout[7] -= 1 & carry_mask 235 inout[7] += carry << 25 236 } 237 238 // p256Sum sets out = in+in2. 239 // 240 // On entry, in[i]+in2[i] must not overflow a 32-bit word. 241 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 242 func p256Sum(out, in, in2 *[p256Limbs]uint32) { 243 carry := uint32(0) 244 for i := 0; ; i++ { 245 out[i] = in[i] + in2[i] 246 out[i] += carry 247 carry = out[i] >> 29 248 out[i] &= bottom29Bits 249 250 i++ 251 if i == p256Limbs { 252 break 253 } 254 255 out[i] = in[i] + in2[i] 256 out[i] += carry 257 carry = out[i] >> 28 258 out[i] &= bottom28Bits 259 } 260 261 p256ReduceCarry(out, carry) 262 } 263 264 const ( 265 two30m2 = 1<<30 - 1<<2 266 two30p13m2 = 1<<30 + 1<<13 - 1<<2 267 two31m2 = 1<<31 - 1<<2 268 two31p24m2 = 1<<31 + 1<<24 - 1<<2 269 two30m27m2 = 1<<30 - 1<<27 - 1<<2 270 ) 271 272 // p256Zero31 is 0 mod p. 273 var p256Zero31 = [p256Limbs]uint32{two31m3, two30m2, two31m2, two30p13m2, two31m2, two30m2, two31p24m2, two30m27m2, two31m2} 274 275 // p256Diff sets out = in-in2. 276 // 277 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 278 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 279 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 280 func p256Diff(out, in, in2 *[p256Limbs]uint32) { 281 var carry uint32 282 283 for i := 0; ; i++ { 284 out[i] = in[i] - in2[i] 285 out[i] += p256Zero31[i] 286 out[i] += carry 287 carry = out[i] >> 29 288 out[i] &= bottom29Bits 289 290 i++ 291 if i == p256Limbs { 292 break 293 } 294 295 out[i] = in[i] - in2[i] 296 out[i] += p256Zero31[i] 297 out[i] += carry 298 carry = out[i] >> 28 299 out[i] &= bottom28Bits 300 } 301 302 p256ReduceCarry(out, carry) 303 } 304 305 // p256ReduceDegree sets out = tmp/R mod p where tmp contains 64-bit words with 306 // the same 29,28,... bit positions as an field element. 307 // 308 // The values in field elements are in Montgomery form: x*R mod p where R = 309 // 2**257. Since we just multiplied two Montgomery values together, the result 310 // is x*y*R*R mod p. We wish to divide by R in order for the result also to be 311 // in Montgomery form. 312 // 313 // On entry: tmp[i] < 2**64 314 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 315 func p256ReduceDegree(out *[p256Limbs]uint32, tmp [17]uint64) { 316 // The following table may be helpful when reading this code: 317 // 318 // Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10... 319 // Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29 320 // Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285 321 // (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 322 var tmp2 [18]uint32 323 var carry, x, xMask uint32 324 325 // tmp contains 64-bit words with the same 29,28,29-bit positions as an 326 // field element. So the top of an element of tmp might overlap with 327 // another element two positions down. The following loop eliminates 328 // this overlap. 329 tmp2[0] = uint32(tmp[0]) & bottom29Bits 330 331 tmp2[1] = uint32(tmp[0]) >> 29 332 tmp2[1] |= (uint32(tmp[0]>>32) << 3) & bottom28Bits 333 tmp2[1] += uint32(tmp[1]) & bottom28Bits 334 carry = tmp2[1] >> 28 335 tmp2[1] &= bottom28Bits 336 337 for i := 2; i < 17; i++ { 338 tmp2[i] = (uint32(tmp[i-2] >> 32)) >> 25 339 tmp2[i] += (uint32(tmp[i-1])) >> 28 340 tmp2[i] += (uint32(tmp[i-1]>>32) << 4) & bottom29Bits 341 tmp2[i] += uint32(tmp[i]) & bottom29Bits 342 tmp2[i] += carry 343 carry = tmp2[i] >> 29 344 tmp2[i] &= bottom29Bits 345 346 i++ 347 if i == 17 { 348 break 349 } 350 tmp2[i] = uint32(tmp[i-2]>>32) >> 25 351 tmp2[i] += uint32(tmp[i-1]) >> 29 352 tmp2[i] += ((uint32(tmp[i-1] >> 32)) << 3) & bottom28Bits 353 tmp2[i] += uint32(tmp[i]) & bottom28Bits 354 tmp2[i] += carry 355 carry = tmp2[i] >> 28 356 tmp2[i] &= bottom28Bits 357 } 358 359 tmp2[17] = uint32(tmp[15]>>32) >> 25 360 tmp2[17] += uint32(tmp[16]) >> 29 361 tmp2[17] += uint32(tmp[16]>>32) << 3 362 tmp2[17] += carry 363 364 // Montgomery elimination of terms: 365 // 366 // Since R is 2**257, we can divide by R with a bitwise shift if we can 367 // ensure that the right-most 257 bits are all zero. We can make that true 368 // by adding multiplies of p without affecting the value. 369 // 370 // So we eliminate limbs from right to left. Since the bottom 29 bits of p 371 // are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0. 372 // We can do that for 8 further limbs and then right shift to eliminate the 373 // extra factor of R. 374 for i := 0; ; i += 2 { 375 tmp2[i+1] += tmp2[i] >> 29 376 x = tmp2[i] & bottom29Bits 377 xMask = nonZeroToAllOnes(x) 378 tmp2[i] = 0 379 380 // The bounds calculations for this loop are tricky. Each iteration of 381 // the loop eliminates two words by adding values to words to their 382 // right. 383 // 384 // The following table contains the amounts added to each word (as an 385 // offset from the value of i at the top of the loop). The amounts are 386 // accounted for from the first and second half of the loop separately 387 // and are written as, for example, 28 to mean a value <2**28. 388 // 389 // Word: 3 4 5 6 7 8 9 10 390 // Added in top half: 28 11 29 21 29 28 391 // 28 29 392 // 29 393 // Added in bottom half: 29 10 28 21 28 28 394 // 29 395 // 396 // The value that is currently offset 7 will be offset 5 for the next 397 // iteration and then offset 3 for the iteration after that. Therefore 398 // the total value added will be the values added at 7, 5 and 3. 399 // 400 // The following table accumulates these values. The sums at the bottom 401 // are written as, for example, 29+28, to mean a value < 2**29+2**28. 402 // 403 // Word: 3 4 5 6 7 8 9 10 11 12 13 404 // 28 11 10 29 21 29 28 28 28 28 28 405 // 29 28 11 28 29 28 29 28 29 28 406 // 29 28 21 21 29 21 29 21 407 // 10 29 28 21 28 21 28 408 // 28 29 28 29 28 29 28 409 // 11 10 29 10 29 10 410 // 29 28 11 28 11 411 // 29 29 412 // -------------------------------------------- 413 // 30+ 31+ 30+ 31+ 30+ 414 // 28+ 29+ 28+ 29+ 21+ 415 // 21+ 28+ 21+ 28+ 10 416 // 10 21+ 10 21+ 417 // 11 11 418 // 419 // So the greatest amount is added to tmp2[10] and tmp2[12]. If 420 // tmp2[10/12] has an initial value of <2**29, then the maximum value 421 // will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32, 422 // as required. 423 tmp2[i+3] += (x << 10) & bottom28Bits 424 tmp2[i+4] += (x >> 18) 425 426 tmp2[i+6] += (x << 21) & bottom29Bits 427 tmp2[i+7] += x >> 8 428 429 // At position 200, which is the starting bit position for word 7, we 430 // have a factor of 0xf000000 = 2**28 - 2**24. 431 tmp2[i+7] += 0x10000000 & xMask 432 tmp2[i+8] += (x - 1) & xMask 433 tmp2[i+7] -= (x << 24) & bottom28Bits 434 tmp2[i+8] -= x >> 4 435 436 tmp2[i+8] += 0x20000000 & xMask 437 tmp2[i+8] -= x 438 tmp2[i+8] += (x << 28) & bottom29Bits 439 tmp2[i+9] += ((x >> 1) - 1) & xMask 440 441 if i+1 == p256Limbs { 442 break 443 } 444 tmp2[i+2] += tmp2[i+1] >> 28 445 x = tmp2[i+1] & bottom28Bits 446 xMask = nonZeroToAllOnes(x) 447 tmp2[i+1] = 0 448 449 tmp2[i+4] += (x << 11) & bottom29Bits 450 tmp2[i+5] += (x >> 18) 451 452 tmp2[i+7] += (x << 21) & bottom28Bits 453 tmp2[i+8] += x >> 7 454 455 // At position 199, which is the starting bit of the 8th word when 456 // dealing with a context starting on an odd word, we have a factor of 457 // 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th 458 // word from i+1 is i+8. 459 tmp2[i+8] += 0x20000000 & xMask 460 tmp2[i+9] += (x - 1) & xMask 461 tmp2[i+8] -= (x << 25) & bottom29Bits 462 tmp2[i+9] -= x >> 4 463 464 tmp2[i+9] += 0x10000000 & xMask 465 tmp2[i+9] -= x 466 tmp2[i+10] += (x - 1) & xMask 467 } 468 469 // We merge the right shift with a carry chain. The words above 2**257 have 470 // widths of 28,29,... which we need to correct when copying them down. 471 carry = 0 472 for i := 0; i < 8; i++ { 473 // The maximum value of tmp2[i + 9] occurs on the first iteration and 474 // is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is 475 // therefore safe. 476 out[i] = tmp2[i+9] 477 out[i] += carry 478 out[i] += (tmp2[i+10] << 28) & bottom29Bits 479 carry = out[i] >> 29 480 out[i] &= bottom29Bits 481 482 i++ 483 out[i] = tmp2[i+9] >> 1 484 out[i] += carry 485 carry = out[i] >> 28 486 out[i] &= bottom28Bits 487 } 488 489 out[8] = tmp2[17] 490 out[8] += carry 491 carry = out[8] >> 29 492 out[8] &= bottom29Bits 493 494 p256ReduceCarry(out, carry) 495 } 496 497 // p256Square sets out=in*in. 498 // 499 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29. 500 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 501 func p256Square(out, in *[p256Limbs]uint32) { 502 var tmp [17]uint64 503 504 tmp[0] = uint64(in[0]) * uint64(in[0]) 505 tmp[1] = uint64(in[0]) * (uint64(in[1]) << 1) 506 tmp[2] = uint64(in[0])*(uint64(in[2])<<1) + 507 uint64(in[1])*(uint64(in[1])<<1) 508 tmp[3] = uint64(in[0])*(uint64(in[3])<<1) + 509 uint64(in[1])*(uint64(in[2])<<1) 510 tmp[4] = uint64(in[0])*(uint64(in[4])<<1) + 511 uint64(in[1])*(uint64(in[3])<<2) + 512 uint64(in[2])*uint64(in[2]) 513 tmp[5] = uint64(in[0])*(uint64(in[5])<<1) + 514 uint64(in[1])*(uint64(in[4])<<1) + 515 uint64(in[2])*(uint64(in[3])<<1) 516 tmp[6] = uint64(in[0])*(uint64(in[6])<<1) + 517 uint64(in[1])*(uint64(in[5])<<2) + 518 uint64(in[2])*(uint64(in[4])<<1) + 519 uint64(in[3])*(uint64(in[3])<<1) 520 tmp[7] = uint64(in[0])*(uint64(in[7])<<1) + 521 uint64(in[1])*(uint64(in[6])<<1) + 522 uint64(in[2])*(uint64(in[5])<<1) + 523 uint64(in[3])*(uint64(in[4])<<1) 524 // tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60, 525 // which is < 2**64 as required. 526 tmp[8] = uint64(in[0])*(uint64(in[8])<<1) + 527 uint64(in[1])*(uint64(in[7])<<2) + 528 uint64(in[2])*(uint64(in[6])<<1) + 529 uint64(in[3])*(uint64(in[5])<<2) + 530 uint64(in[4])*uint64(in[4]) 531 tmp[9] = uint64(in[1])*(uint64(in[8])<<1) + 532 uint64(in[2])*(uint64(in[7])<<1) + 533 uint64(in[3])*(uint64(in[6])<<1) + 534 uint64(in[4])*(uint64(in[5])<<1) 535 tmp[10] = uint64(in[2])*(uint64(in[8])<<1) + 536 uint64(in[3])*(uint64(in[7])<<2) + 537 uint64(in[4])*(uint64(in[6])<<1) + 538 uint64(in[5])*(uint64(in[5])<<1) 539 tmp[11] = uint64(in[3])*(uint64(in[8])<<1) + 540 uint64(in[4])*(uint64(in[7])<<1) + 541 uint64(in[5])*(uint64(in[6])<<1) 542 tmp[12] = uint64(in[4])*(uint64(in[8])<<1) + 543 uint64(in[5])*(uint64(in[7])<<2) + 544 uint64(in[6])*uint64(in[6]) 545 tmp[13] = uint64(in[5])*(uint64(in[8])<<1) + 546 uint64(in[6])*(uint64(in[7])<<1) 547 tmp[14] = uint64(in[6])*(uint64(in[8])<<1) + 548 uint64(in[7])*(uint64(in[7])<<1) 549 tmp[15] = uint64(in[7]) * (uint64(in[8]) << 1) 550 tmp[16] = uint64(in[8]) * uint64(in[8]) 551 552 p256ReduceDegree(out, tmp) 553 } 554 555 // p256Mul sets out=in*in2. 556 // 557 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 558 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 559 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 560 func p256Mul(out, in, in2 *[p256Limbs]uint32) { 561 var tmp [17]uint64 562 563 tmp[0] = uint64(in[0]) * uint64(in2[0]) 564 tmp[1] = uint64(in[0])*(uint64(in2[1])<<0) + 565 uint64(in[1])*(uint64(in2[0])<<0) 566 tmp[2] = uint64(in[0])*(uint64(in2[2])<<0) + 567 uint64(in[1])*(uint64(in2[1])<<1) + 568 uint64(in[2])*(uint64(in2[0])<<0) 569 tmp[3] = uint64(in[0])*(uint64(in2[3])<<0) + 570 uint64(in[1])*(uint64(in2[2])<<0) + 571 uint64(in[2])*(uint64(in2[1])<<0) + 572 uint64(in[3])*(uint64(in2[0])<<0) 573 tmp[4] = uint64(in[0])*(uint64(in2[4])<<0) + 574 uint64(in[1])*(uint64(in2[3])<<1) + 575 uint64(in[2])*(uint64(in2[2])<<0) + 576 uint64(in[3])*(uint64(in2[1])<<1) + 577 uint64(in[4])*(uint64(in2[0])<<0) 578 tmp[5] = uint64(in[0])*(uint64(in2[5])<<0) + 579 uint64(in[1])*(uint64(in2[4])<<0) + 580 uint64(in[2])*(uint64(in2[3])<<0) + 581 uint64(in[3])*(uint64(in2[2])<<0) + 582 uint64(in[4])*(uint64(in2[1])<<0) + 583 uint64(in[5])*(uint64(in2[0])<<0) 584 tmp[6] = uint64(in[0])*(uint64(in2[6])<<0) + 585 uint64(in[1])*(uint64(in2[5])<<1) + 586 uint64(in[2])*(uint64(in2[4])<<0) + 587 uint64(in[3])*(uint64(in2[3])<<1) + 588 uint64(in[4])*(uint64(in2[2])<<0) + 589 uint64(in[5])*(uint64(in2[1])<<1) + 590 uint64(in[6])*(uint64(in2[0])<<0) 591 tmp[7] = uint64(in[0])*(uint64(in2[7])<<0) + 592 uint64(in[1])*(uint64(in2[6])<<0) + 593 uint64(in[2])*(uint64(in2[5])<<0) + 594 uint64(in[3])*(uint64(in2[4])<<0) + 595 uint64(in[4])*(uint64(in2[3])<<0) + 596 uint64(in[5])*(uint64(in2[2])<<0) + 597 uint64(in[6])*(uint64(in2[1])<<0) + 598 uint64(in[7])*(uint64(in2[0])<<0) 599 // tmp[8] has the greatest value but doesn't overflow. See logic in 600 // p256Square. 601 tmp[8] = uint64(in[0])*(uint64(in2[8])<<0) + 602 uint64(in[1])*(uint64(in2[7])<<1) + 603 uint64(in[2])*(uint64(in2[6])<<0) + 604 uint64(in[3])*(uint64(in2[5])<<1) + 605 uint64(in[4])*(uint64(in2[4])<<0) + 606 uint64(in[5])*(uint64(in2[3])<<1) + 607 uint64(in[6])*(uint64(in2[2])<<0) + 608 uint64(in[7])*(uint64(in2[1])<<1) + 609 uint64(in[8])*(uint64(in2[0])<<0) 610 tmp[9] = uint64(in[1])*(uint64(in2[8])<<0) + 611 uint64(in[2])*(uint64(in2[7])<<0) + 612 uint64(in[3])*(uint64(in2[6])<<0) + 613 uint64(in[4])*(uint64(in2[5])<<0) + 614 uint64(in[5])*(uint64(in2[4])<<0) + 615 uint64(in[6])*(uint64(in2[3])<<0) + 616 uint64(in[7])*(uint64(in2[2])<<0) + 617 uint64(in[8])*(uint64(in2[1])<<0) 618 tmp[10] = uint64(in[2])*(uint64(in2[8])<<0) + 619 uint64(in[3])*(uint64(in2[7])<<1) + 620 uint64(in[4])*(uint64(in2[6])<<0) + 621 uint64(in[5])*(uint64(in2[5])<<1) + 622 uint64(in[6])*(uint64(in2[4])<<0) + 623 uint64(in[7])*(uint64(in2[3])<<1) + 624 uint64(in[8])*(uint64(in2[2])<<0) 625 tmp[11] = uint64(in[3])*(uint64(in2[8])<<0) + 626 uint64(in[4])*(uint64(in2[7])<<0) + 627 uint64(in[5])*(uint64(in2[6])<<0) + 628 uint64(in[6])*(uint64(in2[5])<<0) + 629 uint64(in[7])*(uint64(in2[4])<<0) + 630 uint64(in[8])*(uint64(in2[3])<<0) 631 tmp[12] = uint64(in[4])*(uint64(in2[8])<<0) + 632 uint64(in[5])*(uint64(in2[7])<<1) + 633 uint64(in[6])*(uint64(in2[6])<<0) + 634 uint64(in[7])*(uint64(in2[5])<<1) + 635 uint64(in[8])*(uint64(in2[4])<<0) 636 tmp[13] = uint64(in[5])*(uint64(in2[8])<<0) + 637 uint64(in[6])*(uint64(in2[7])<<0) + 638 uint64(in[7])*(uint64(in2[6])<<0) + 639 uint64(in[8])*(uint64(in2[5])<<0) 640 tmp[14] = uint64(in[6])*(uint64(in2[8])<<0) + 641 uint64(in[7])*(uint64(in2[7])<<1) + 642 uint64(in[8])*(uint64(in2[6])<<0) 643 tmp[15] = uint64(in[7])*(uint64(in2[8])<<0) + 644 uint64(in[8])*(uint64(in2[7])<<0) 645 tmp[16] = uint64(in[8]) * (uint64(in2[8]) << 0) 646 647 p256ReduceDegree(out, tmp) 648 } 649 650 func p256Assign(out, in *[p256Limbs]uint32) { 651 *out = *in 652 } 653 654 // p256Invert calculates |out| = |in|^{-1} 655 // 656 // Based on Fermat's Little Theorem: 657 // a^p = a (mod p) 658 // a^{p-1} = 1 (mod p) 659 // a^{p-2} = a^{-1} (mod p) 660 func p256Invert(out, in *[p256Limbs]uint32) { 661 var ftmp, ftmp2 [p256Limbs]uint32 662 663 // each e_I will hold |in|^{2^I - 1} 664 var e2, e4, e8, e16, e32, e64 [p256Limbs]uint32 665 666 p256Square(&ftmp, in) // 2^1 667 p256Mul(&ftmp, in, &ftmp) // 2^2 - 2^0 668 p256Assign(&e2, &ftmp) 669 p256Square(&ftmp, &ftmp) // 2^3 - 2^1 670 p256Square(&ftmp, &ftmp) // 2^4 - 2^2 671 p256Mul(&ftmp, &ftmp, &e2) // 2^4 - 2^0 672 p256Assign(&e4, &ftmp) 673 p256Square(&ftmp, &ftmp) // 2^5 - 2^1 674 p256Square(&ftmp, &ftmp) // 2^6 - 2^2 675 p256Square(&ftmp, &ftmp) // 2^7 - 2^3 676 p256Square(&ftmp, &ftmp) // 2^8 - 2^4 677 p256Mul(&ftmp, &ftmp, &e4) // 2^8 - 2^0 678 p256Assign(&e8, &ftmp) 679 for i := 0; i < 8; i++ { 680 p256Square(&ftmp, &ftmp) 681 } // 2^16 - 2^8 682 p256Mul(&ftmp, &ftmp, &e8) // 2^16 - 2^0 683 p256Assign(&e16, &ftmp) 684 for i := 0; i < 16; i++ { 685 p256Square(&ftmp, &ftmp) 686 } // 2^32 - 2^16 687 p256Mul(&ftmp, &ftmp, &e16) // 2^32 - 2^0 688 p256Assign(&e32, &ftmp) 689 for i := 0; i < 32; i++ { 690 p256Square(&ftmp, &ftmp) 691 } // 2^64 - 2^32 692 p256Assign(&e64, &ftmp) 693 p256Mul(&ftmp, &ftmp, in) // 2^64 - 2^32 + 2^0 694 for i := 0; i < 192; i++ { 695 p256Square(&ftmp, &ftmp) 696 } // 2^256 - 2^224 + 2^192 697 698 p256Mul(&ftmp2, &e64, &e32) // 2^64 - 2^0 699 for i := 0; i < 16; i++ { 700 p256Square(&ftmp2, &ftmp2) 701 } // 2^80 - 2^16 702 p256Mul(&ftmp2, &ftmp2, &e16) // 2^80 - 2^0 703 for i := 0; i < 8; i++ { 704 p256Square(&ftmp2, &ftmp2) 705 } // 2^88 - 2^8 706 p256Mul(&ftmp2, &ftmp2, &e8) // 2^88 - 2^0 707 for i := 0; i < 4; i++ { 708 p256Square(&ftmp2, &ftmp2) 709 } // 2^92 - 2^4 710 p256Mul(&ftmp2, &ftmp2, &e4) // 2^92 - 2^0 711 p256Square(&ftmp2, &ftmp2) // 2^93 - 2^1 712 p256Square(&ftmp2, &ftmp2) // 2^94 - 2^2 713 p256Mul(&ftmp2, &ftmp2, &e2) // 2^94 - 2^0 714 p256Square(&ftmp2, &ftmp2) // 2^95 - 2^1 715 p256Square(&ftmp2, &ftmp2) // 2^96 - 2^2 716 p256Mul(&ftmp2, &ftmp2, in) // 2^96 - 3 717 718 p256Mul(out, &ftmp2, &ftmp) // 2^256 - 2^224 + 2^192 + 2^96 - 3 719 } 720 721 // p256Scalar3 sets out=3*out. 722 // 723 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 724 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 725 func p256Scalar3(out *[p256Limbs]uint32) { 726 var carry uint32 727 728 for i := 0; ; i++ { 729 out[i] *= 3 730 out[i] += carry 731 carry = out[i] >> 29 732 out[i] &= bottom29Bits 733 734 i++ 735 if i == p256Limbs { 736 break 737 } 738 739 out[i] *= 3 740 out[i] += carry 741 carry = out[i] >> 28 742 out[i] &= bottom28Bits 743 } 744 745 p256ReduceCarry(out, carry) 746 } 747 748 // p256Scalar4 sets out=4*out. 749 // 750 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 751 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 752 func p256Scalar4(out *[p256Limbs]uint32) { 753 var carry, nextCarry uint32 754 755 for i := 0; ; i++ { 756 nextCarry = out[i] >> 27 757 out[i] <<= 2 758 out[i] &= bottom29Bits 759 out[i] += carry 760 carry = nextCarry + (out[i] >> 29) 761 out[i] &= bottom29Bits 762 763 i++ 764 if i == p256Limbs { 765 break 766 } 767 nextCarry = out[i] >> 26 768 out[i] <<= 2 769 out[i] &= bottom28Bits 770 out[i] += carry 771 carry = nextCarry + (out[i] >> 28) 772 out[i] &= bottom28Bits 773 } 774 775 p256ReduceCarry(out, carry) 776 } 777 778 // p256Scalar8 sets out=8*out. 779 // 780 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 781 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 782 func p256Scalar8(out *[p256Limbs]uint32) { 783 var carry, nextCarry uint32 784 785 for i := 0; ; i++ { 786 nextCarry = out[i] >> 26 787 out[i] <<= 3 788 out[i] &= bottom29Bits 789 out[i] += carry 790 carry = nextCarry + (out[i] >> 29) 791 out[i] &= bottom29Bits 792 793 i++ 794 if i == p256Limbs { 795 break 796 } 797 nextCarry = out[i] >> 25 798 out[i] <<= 3 799 out[i] &= bottom28Bits 800 out[i] += carry 801 carry = nextCarry + (out[i] >> 28) 802 out[i] &= bottom28Bits 803 } 804 805 p256ReduceCarry(out, carry) 806 } 807 808 // Group operations: 809 // 810 // Elements of the elliptic curve group are represented in Jacobian 811 // coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in 812 // Jacobian form. 813 814 // p256PointDouble sets {xOut,yOut,zOut} = 2*{x,y,z}. 815 // 816 // See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l 817 func p256PointDouble(xOut, yOut, zOut, x, y, z *[p256Limbs]uint32) { 818 var delta, gamma, alpha, beta, tmp, tmp2 [p256Limbs]uint32 819 820 p256Square(&delta, z) 821 p256Square(&gamma, y) 822 p256Mul(&beta, x, &gamma) 823 824 p256Sum(&tmp, x, &delta) 825 p256Diff(&tmp2, x, &delta) 826 p256Mul(&alpha, &tmp, &tmp2) 827 p256Scalar3(&alpha) 828 829 p256Sum(&tmp, y, z) 830 p256Square(&tmp, &tmp) 831 p256Diff(&tmp, &tmp, &gamma) 832 p256Diff(zOut, &tmp, &delta) 833 834 p256Scalar4(&beta) 835 p256Square(xOut, &alpha) 836 p256Diff(xOut, xOut, &beta) 837 p256Diff(xOut, xOut, &beta) 838 839 p256Diff(&tmp, &beta, xOut) 840 p256Mul(&tmp, &alpha, &tmp) 841 p256Square(&tmp2, &gamma) 842 p256Scalar8(&tmp2) 843 p256Diff(yOut, &tmp, &tmp2) 844 } 845 846 // p256PointAddMixed sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,1}. 847 // (i.e. the second point is affine.) 848 // 849 // See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 850 // 851 // Note that this function does not handle P+P, infinity+P nor P+infinity 852 // correctly. 853 func p256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *[p256Limbs]uint32) { 854 var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 855 856 p256Square(&z1z1, z1) 857 p256Sum(&tmp, z1, z1) 858 859 p256Mul(&u2, x2, &z1z1) 860 p256Mul(&z1z1z1, z1, &z1z1) 861 p256Mul(&s2, y2, &z1z1z1) 862 p256Diff(&h, &u2, x1) 863 p256Sum(&i, &h, &h) 864 p256Square(&i, &i) 865 p256Mul(&j, &h, &i) 866 p256Diff(&r, &s2, y1) 867 p256Sum(&r, &r, &r) 868 p256Mul(&v, x1, &i) 869 870 p256Mul(zOut, &tmp, &h) 871 p256Square(&rr, &r) 872 p256Diff(xOut, &rr, &j) 873 p256Diff(xOut, xOut, &v) 874 p256Diff(xOut, xOut, &v) 875 876 p256Diff(&tmp, &v, xOut) 877 p256Mul(yOut, &tmp, &r) 878 p256Mul(&tmp, y1, &j) 879 p256Diff(yOut, yOut, &tmp) 880 p256Diff(yOut, yOut, &tmp) 881 } 882 883 // p256PointAdd sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,z2}. 884 // 885 // See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 886 // 887 // Note that this function does not handle P+P, infinity+P nor P+infinity 888 // correctly. 889 func p256PointAdd(xOut, yOut, zOut, x1, y1, z1, x2, y2, z2 *[p256Limbs]uint32) { 890 var z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 891 892 p256Square(&z1z1, z1) 893 p256Square(&z2z2, z2) 894 p256Mul(&u1, x1, &z2z2) 895 896 p256Sum(&tmp, z1, z2) 897 p256Square(&tmp, &tmp) 898 p256Diff(&tmp, &tmp, &z1z1) 899 p256Diff(&tmp, &tmp, &z2z2) 900 901 p256Mul(&z2z2z2, z2, &z2z2) 902 p256Mul(&s1, y1, &z2z2z2) 903 904 p256Mul(&u2, x2, &z1z1) 905 p256Mul(&z1z1z1, z1, &z1z1) 906 p256Mul(&s2, y2, &z1z1z1) 907 p256Diff(&h, &u2, &u1) 908 p256Sum(&i, &h, &h) 909 p256Square(&i, &i) 910 p256Mul(&j, &h, &i) 911 p256Diff(&r, &s2, &s1) 912 p256Sum(&r, &r, &r) 913 p256Mul(&v, &u1, &i) 914 915 p256Mul(zOut, &tmp, &h) 916 p256Square(&rr, &r) 917 p256Diff(xOut, &rr, &j) 918 p256Diff(xOut, xOut, &v) 919 p256Diff(xOut, xOut, &v) 920 921 p256Diff(&tmp, &v, xOut) 922 p256Mul(yOut, &tmp, &r) 923 p256Mul(&tmp, &s1, &j) 924 p256Diff(yOut, yOut, &tmp) 925 p256Diff(yOut, yOut, &tmp) 926 } 927 928 // p256CopyConditional sets out=in if mask = 0xffffffff in constant time. 929 // 930 // On entry: mask is either 0 or 0xffffffff. 931 func p256CopyConditional(out, in *[p256Limbs]uint32, mask uint32) { 932 for i := 0; i < p256Limbs; i++ { 933 tmp := mask & (in[i] ^ out[i]) 934 out[i] ^= tmp 935 } 936 } 937 938 // p256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table. 939 // On entry: index < 16, table[0] must be zero. 940 func p256SelectAffinePoint(xOut, yOut *[p256Limbs]uint32, table []uint32, index uint32) { 941 for i := range xOut { 942 xOut[i] = 0 943 } 944 for i := range yOut { 945 yOut[i] = 0 946 } 947 948 for i := uint32(1); i < 16; i++ { 949 mask := i ^ index 950 mask |= mask >> 2 951 mask |= mask >> 1 952 mask &= 1 953 mask-- 954 for j := range xOut { 955 xOut[j] |= table[0] & mask 956 table = table[1:] 957 } 958 for j := range yOut { 959 yOut[j] |= table[0] & mask 960 table = table[1:] 961 } 962 } 963 } 964 965 // p256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of 966 // table. 967 // On entry: index < 16, table[0] must be zero. 968 func p256SelectJacobianPoint(xOut, yOut, zOut *[p256Limbs]uint32, table *[16][3][p256Limbs]uint32, index uint32) { 969 for i := range xOut { 970 xOut[i] = 0 971 } 972 for i := range yOut { 973 yOut[i] = 0 974 } 975 for i := range zOut { 976 zOut[i] = 0 977 } 978 979 // The implicit value at index 0 is all zero. We don't need to perform that 980 // iteration of the loop because we already set out_* to zero. 981 for i := uint32(1); i < 16; i++ { 982 mask := i ^ index 983 mask |= mask >> 2 984 mask |= mask >> 1 985 mask &= 1 986 mask-- 987 for j := range xOut { 988 xOut[j] |= table[i][0][j] & mask 989 } 990 for j := range yOut { 991 yOut[j] |= table[i][1][j] & mask 992 } 993 for j := range zOut { 994 zOut[j] |= table[i][2][j] & mask 995 } 996 } 997 } 998 999 // p256GetBit returns the bit'th bit of scalar. 1000 func p256GetBit(scalar *[32]uint8, bit uint) uint32 { 1001 return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1) 1002 } 1003 1004 // p256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a 1005 // little-endian number. Note that the value of scalar must be less than the 1006 // order of the group. 1007 func p256ScalarBaseMult(xOut, yOut, zOut *[p256Limbs]uint32, scalar *[32]uint8) { 1008 nIsInfinityMask := ^uint32(0) 1009 var pIsNoninfiniteMask, mask, tableOffset uint32 1010 var px, py, tx, ty, tz [p256Limbs]uint32 1011 1012 for i := range xOut { 1013 xOut[i] = 0 1014 } 1015 for i := range yOut { 1016 yOut[i] = 0 1017 } 1018 for i := range zOut { 1019 zOut[i] = 0 1020 } 1021 1022 // The loop adds bits at positions 0, 64, 128 and 192, followed by 1023 // positions 32,96,160 and 224 and does this 32 times. 1024 for i := uint(0); i < 32; i++ { 1025 if i != 0 { 1026 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1027 } 1028 tableOffset = 0 1029 for j := uint(0); j <= 32; j += 32 { 1030 bit0 := p256GetBit(scalar, 31-i+j) 1031 bit1 := p256GetBit(scalar, 95-i+j) 1032 bit2 := p256GetBit(scalar, 159-i+j) 1033 bit3 := p256GetBit(scalar, 223-i+j) 1034 index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3) 1035 1036 p256SelectAffinePoint(&px, &py, p256Precomputed[tableOffset:], index) 1037 tableOffset += 30 * p256Limbs 1038 1039 // Since scalar is less than the order of the group, we know that 1040 // {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle 1041 // below. 1042 p256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py) 1043 // The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero 1044 // (a.k.a. the point at infinity). We handle that situation by 1045 // copying the point from the table. 1046 p256CopyConditional(xOut, &px, nIsInfinityMask) 1047 p256CopyConditional(yOut, &py, nIsInfinityMask) 1048 p256CopyConditional(zOut, &p256One, nIsInfinityMask) 1049 1050 // Equally, the result is also wrong if the point from the table is 1051 // zero, which happens when the index is zero. We handle that by 1052 // only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0. 1053 pIsNoninfiniteMask = nonZeroToAllOnes(index) 1054 mask = pIsNoninfiniteMask & ^nIsInfinityMask 1055 p256CopyConditional(xOut, &tx, mask) 1056 p256CopyConditional(yOut, &ty, mask) 1057 p256CopyConditional(zOut, &tz, mask) 1058 // If p was not zero, then n is now non-zero. 1059 nIsInfinityMask &^= pIsNoninfiniteMask 1060 } 1061 } 1062 } 1063 1064 // p256PointToAffine converts a Jacobian point to an affine point. If the input 1065 // is the point at infinity then it returns (0, 0) in constant time. 1066 func p256PointToAffine(xOut, yOut, x, y, z *[p256Limbs]uint32) { 1067 var zInv, zInvSq [p256Limbs]uint32 1068 1069 p256Invert(&zInv, z) 1070 p256Square(&zInvSq, &zInv) 1071 p256Mul(xOut, x, &zInvSq) 1072 p256Mul(&zInv, &zInv, &zInvSq) 1073 p256Mul(yOut, y, &zInv) 1074 } 1075 1076 // p256ToAffine returns a pair of *big.Int containing the affine representation 1077 // of {x,y,z}. 1078 func p256ToAffine(x, y, z *[p256Limbs]uint32) (xOut, yOut *big.Int) { 1079 var xx, yy [p256Limbs]uint32 1080 p256PointToAffine(&xx, &yy, x, y, z) 1081 return p256ToBig(&xx), p256ToBig(&yy) 1082 } 1083 1084 // p256ScalarMult sets {xOut,yOut,zOut} = scalar*{x,y}. 1085 func p256ScalarMult(xOut, yOut, zOut, x, y *[p256Limbs]uint32, scalar *[32]uint8) { 1086 var px, py, pz, tx, ty, tz [p256Limbs]uint32 1087 var precomp [16][3][p256Limbs]uint32 1088 var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32 1089 1090 // We precompute 0,1,2,... times {x,y}. 1091 precomp[1][0] = *x 1092 precomp[1][1] = *y 1093 precomp[1][2] = p256One 1094 1095 for i := 2; i < 16; i += 2 { 1096 p256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2]) 1097 p256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y) 1098 } 1099 1100 for i := range xOut { 1101 xOut[i] = 0 1102 } 1103 for i := range yOut { 1104 yOut[i] = 0 1105 } 1106 for i := range zOut { 1107 zOut[i] = 0 1108 } 1109 nIsInfinityMask = ^uint32(0) 1110 1111 // We add in a window of four bits each iteration and do this 64 times. 1112 for i := 0; i < 64; i++ { 1113 if i != 0 { 1114 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1115 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1116 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1117 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1118 } 1119 1120 index = uint32(scalar[31-i/2]) 1121 if (i & 1) == 1 { 1122 index &= 15 1123 } else { 1124 index >>= 4 1125 } 1126 1127 // See the comments in scalarBaseMult about handling infinities. 1128 p256SelectJacobianPoint(&px, &py, &pz, &precomp, index) 1129 p256PointAdd(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py, &pz) 1130 p256CopyConditional(xOut, &px, nIsInfinityMask) 1131 p256CopyConditional(yOut, &py, nIsInfinityMask) 1132 p256CopyConditional(zOut, &pz, nIsInfinityMask) 1133 1134 pIsNoninfiniteMask = nonZeroToAllOnes(index) 1135 mask = pIsNoninfiniteMask & ^nIsInfinityMask 1136 p256CopyConditional(xOut, &tx, mask) 1137 p256CopyConditional(yOut, &ty, mask) 1138 p256CopyConditional(zOut, &tz, mask) 1139 nIsInfinityMask &^= pIsNoninfiniteMask 1140 } 1141 } 1142 1143 // p256FromBig sets out = R*in. 1144 func p256FromBig(out *[p256Limbs]uint32, in *big.Int) { 1145 tmp := new(big.Int).Lsh(in, 257) 1146 tmp.Mod(tmp, p256.P) 1147 1148 for i := 0; i < p256Limbs; i++ { 1149 if bits := tmp.Bits(); len(bits) > 0 { 1150 out[i] = uint32(bits[0]) & bottom29Bits 1151 } else { 1152 out[i] = 0 1153 } 1154 tmp.Rsh(tmp, 29) 1155 1156 i++ 1157 if i == p256Limbs { 1158 break 1159 } 1160 1161 if bits := tmp.Bits(); len(bits) > 0 { 1162 out[i] = uint32(bits[0]) & bottom28Bits 1163 } else { 1164 out[i] = 0 1165 } 1166 tmp.Rsh(tmp, 28) 1167 } 1168 } 1169 1170 // p256ToBig returns a *big.Int containing the value of in. 1171 func p256ToBig(in *[p256Limbs]uint32) *big.Int { 1172 result, tmp := new(big.Int), new(big.Int) 1173 1174 result.SetInt64(int64(in[p256Limbs-1])) 1175 for i := p256Limbs - 2; i >= 0; i-- { 1176 if (i & 1) == 0 { 1177 result.Lsh(result, 29) 1178 } else { 1179 result.Lsh(result, 28) 1180 } 1181 tmp.SetInt64(int64(in[i])) 1182 result.Add(result, tmp) 1183 } 1184 1185 result.Mul(result, p256RInverse) 1186 result.Mod(result, p256.P) 1187 return result 1188 }