github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/sm2/p256.go (about) 1 //go:build !amd64 && !arm64 2 // +build !amd64,!arm64 3 4 package sm2 5 6 import ( 7 "crypto/elliptic" 8 "fmt" 9 "math/big" 10 ) 11 12 // See https://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background. 13 // Group Level Optimizations, "Efficient and Secure Elliptic Curve Cryptography Implementation of Curve P-256" 14 // SM2 P256 parameters reference GB/T 32918.5-2017 part 5. 15 16 type p256Curve struct { 17 *elliptic.CurveParams 18 } 19 20 var ( 21 p256Params *elliptic.CurveParams 22 23 // RInverse contains 1/R mod p - the inverse of the Montgomery constant 24 // (2**257). 25 p256RInverse *big.Int 26 ) 27 28 func initP256() { 29 p256Params = &elliptic.CurveParams{Name: "SM2-P-256"} 30 // 2**256 - 2**224 - 2**96 + 2**64 - 1 31 p256Params.P, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) 32 p256Params.N, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16) 33 p256Params.B, _ = new(big.Int).SetString("28E9FA9E9D9F5E344D5A9E4BCF6509A7F39789F515AB8F92DDBCBD414D940E93", 16) 34 p256Params.Gx, _ = new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16) 35 p256Params.Gy, _ = new(big.Int).SetString("BC3736A2F4F6779C59BDCEE36B692153D0A9877CC62A474002DF32E52139F0A0", 16) 36 p256Params.BitSize = 256 37 38 // ModeInverse(2**257, P) 39 // p256RInverse = big.NewInt(0) 40 // r, _ := new(big.Int).SetString("20000000000000000000000000000000000000000000000000000000000000000", 16) 41 // p256RInverse.ModInverse(r, p256.P) 42 // fmt.Printf("%s\n", hex.EncodeToString(p256RInverse.Bytes())) 43 p256RInverse, _ = new(big.Int).SetString("7ffffffd80000002fffffffe000000017ffffffe800000037ffffffc80000002", 16) 44 45 // Arch-specific initialization, i.e. let a platform dynamically pick a P256 implementation 46 initP256Arch() 47 } 48 49 func (curve p256Curve) Params() *elliptic.CurveParams { 50 return curve.CurveParams 51 } 52 53 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it 54 // to out. If the scalar is equal or greater than the order of the group, it's 55 // reduced modulo that order. 56 func p256GetScalar(out *[32]byte, in []byte) { 57 n := new(big.Int).SetBytes(in) 58 var scalarBytes []byte 59 60 if n.Cmp(p256.N) >= 0 || len(in) > len(out) { 61 n.Mod(n, p256.N) 62 scalarBytes = n.Bytes() 63 } else { 64 scalarBytes = in 65 } 66 67 for i, v := range scalarBytes { 68 out[len(scalarBytes)-(1+i)] = v 69 } 70 } 71 72 func (p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) { 73 var scalarReversed [32]byte 74 p256GetScalar(&scalarReversed, scalar) 75 76 var x1, y1, z1 [p256Limbs]uint32 77 p256ScalarBaseMult(&x1, &y1, &z1, &scalarReversed) 78 return p256ToAffine(&x1, &y1, &z1) 79 } 80 81 func (p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { 82 var scalarReversed [32]byte 83 p256GetScalar(&scalarReversed, scalar) 84 85 var px, py, x1, y1, z1 [p256Limbs]uint32 86 p256FromBig(&px, bigX) 87 p256FromBig(&py, bigY) 88 p256ScalarMult(&x1, &y1, &z1, &px, &py, &scalarReversed) 89 return p256ToAffine(&x1, &y1, &z1) 90 } 91 92 // Field elements are represented as nine, unsigned 32-bit words. 93 // 94 // The value of a field element is: 95 // x[0] + (x[1] * 2**29) + (x[2] * 2**57) + (x[3] * 2**86) + (x[4] * 2**114) + (x[5] * 2**143) + (x[6] * 2**171) + (x[7] * 2**200) + (x[8] * 2**228) 96 // 97 // That is, each limb is alternately 29 or 28-bits wide in little-endian 98 // order. 99 // 100 // This means that a field element hits 2**257, rather than 2**256 as we would 101 // like. A 28, 29, ... pattern would cause us to hit 2**256, but that causes 102 // problems when multiplying as terms end up one bit short of a limb which 103 // would require much bit-shifting to correct. 104 // 105 // Finally, the values stored in a field element are in Montgomery form. So the 106 // value |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 107 // 2**257. 108 109 const ( 110 p256Limbs = 9 111 bottom28Bits = 0xfffffff 112 bottom29Bits = 0x1fffffff 113 ) 114 115 var ( 116 // p256One is the number 1 as a field element. 117 p256One = [p256Limbs]uint32{2, 0, 0x1fffff00, 0x7ff, 0, 0, 0, 0x2000000, 0} 118 p256Zero = [p256Limbs]uint32{0, 0, 0, 0, 0, 0, 0, 0, 0} 119 // p256P is the prime modulus as a field element. 120 p256P = [p256Limbs]uint32{0x1fffffff, 0xfffffff, 0x7f, 0xffffc00, 0x1fffffff, 0xfffffff, 0x1fffffff, 0xeffffff, 0xfffffff} 121 // p2562P is the twice prime modulus as a field element. 122 p2562P = [p256Limbs]uint32{0x1ffffffe, 0xfffffff, 0xff, 0xffff800, 0x1fffffff, 0xfffffff, 0x1fffffff, 0xdffffff, 0x1fffffff} 123 // p256b is the curve param b as a field element 124 p256b = [p256Limbs]uint32{0x1781ba84, 0xd230632, 0x1537ab90, 0x9bcd74d, 0xe1e38e7, 0x5417a94, 0x12149e60, 0x17441c5, 0x481fc31} 125 ) 126 127 // p256Precomputed contains precomputed values to aid the calculation of scalar 128 // multiples of the base point, G. It's actually two, equal length, tables 129 // concatenated. 130 // 131 // The first table contains (x,y) field element pairs for 16 multiples of the 132 // base point, G. 133 // 134 // Index | Index (binary) | Value 135 // 0 | 0000 | 0G (all zeros, omitted) 136 // 1 | 0001 | G 137 // 2 | 0010 | 2**64G 138 // 3 | 0011 | 2**64G + G 139 // 4 | 0100 | 2**128G 140 // 5 | 0101 | 2**128G + G 141 // 6 | 0110 | 2**128G + 2**64G 142 // 7 | 0111 | 2**128G + 2**64G + G 143 // 8 | 1000 | 2**192G 144 // 9 | 1001 | 2**192G + G 145 // 10 | 1010 | 2**192G + 2**64G 146 // 11 | 1011 | 2**192G + 2**64G + G 147 // 12 | 1100 | 2**192G + 2**128G 148 // 13 | 1101 | 2**192G + 2**128G + G 149 // 14 | 1110 | 2**192G + 2**128G + 2**64G 150 // 15 | 1111 | 2**192G + 2**128G + 2**64G + G 151 // 152 // The second table follows the same style, but the terms are 2**32G, 153 // 2**96G, 2**160G, 2**224G. 154 // 16 | 10000 | 2**32G 155 // 17 | 10010 | 2**96G 156 // 18 | 10001 | 2**96G + 2**32G 157 // 19 | 10011 | 2**160G 158 // 20 | 10100 | 2**160G + 2**32G 159 // 21 | 10101 | 2**160G + 2**96G 160 // 22 | 10110 | 2**160G + 2**96G + 2**32G 161 // 23 | 10111 | 2**224G 162 // 24 | 11000 | 2**224G + 2**32G 163 // 25 | 11001 | 2**224G + 2**96G 164 // 26 | 11011 | 2**224G + 2**96G + 2**32G 165 // 27 | 11100 | 2**224G + 2**160G 166 // 28 | 11101 | 2**224G + 2**160G + 2**32G 167 // 29 | 11110 | 2**224G + 2**160G + 2**96G 168 // 30 | 11111 | 2**224G + 2**160G + 2**96G + 2**32G 169 // This is ~2KB of data. 170 // precompute(1) 171 // precompute(2**32) 172 var p256Precomputed = [p256Limbs * 2 * 15 * 2]uint32{ 173 0x830053d, 0x328990f, 0x6c04fe1, 0xc0f72e5, 0x1e19f3c, 0x666b093, 0x175a87b, 0xec38276, 0x222cf4b, 174 0x185a1bba, 0x354e593, 0x1295fac1, 0xf2bc469, 0x47c60fa, 0xc19b8a9, 0xf63533e, 0x903ae6b, 0xc79acba, 175 0x15b061a4, 0x33e020b, 0xdffb34b, 0xfcf2c8, 0x16582e08, 0x262f203, 0xfb34381, 0xa55452, 0x604f0ff, 176 0x41f1f90, 0xd64ced2, 0xee377bf, 0x75f05f0, 0x189467ae, 0xe2244e, 0x1e7700e8, 0x3fbc464, 0x9612d2e, 177 0x1341b3b8, 0xee84e23, 0x1edfa5b4, 0x14e6030, 0x19e87be9, 0x92f533c, 0x1665d96c, 0x226653e, 0xa238d3e, 178 0xf5c62c, 0x95bb7a, 0x1f0e5a41, 0x28789c3, 0x1f251d23, 0x8726609, 0xe918910, 0x8096848, 0xf63d028, 179 0x152296a1, 0x9f561a8, 0x14d376fb, 0x898788a, 0x61a95fb, 0xa59466d, 0x159a003d, 0x1ad1698, 0x93cca08, 180 0x1b314662, 0x706e006, 0x11ce1e30, 0x97b710, 0x172fbc0d, 0x8f50158, 0x11c7ffe7, 0xd182cce, 0xc6ad9e8, 181 0x12ea31b2, 0xc4e4f38, 0x175b0d96, 0xec06337, 0x75a9c12, 0xb001fdf, 0x93e82f5, 0x34607de, 0xb8035ed, 182 0x17f97924, 0x75cf9e6, 0xdceaedd, 0x2529924, 0x1a10c5ff, 0xb1a54dc, 0x19464d8, 0x2d1997, 0xde6a110, 183 0x1e276ee5, 0x95c510c, 0x1aca7c7a, 0xfe48aca, 0x121ad4d9, 0xe4132c6, 0x8239b9d, 0x40ea9cd, 0x816c7b, 184 0x632d7a4, 0xa679813, 0x5911fcf, 0x82b0f7c, 0x57b0ad5, 0xbef65, 0xd541365, 0x7f9921f, 0xc62e7a, 185 0x3f4b32d, 0x58e50e1, 0x6427aed, 0xdcdda67, 0xe8c2d3e, 0x6aa54a4, 0x18df4c35, 0x49a6a8e, 0x3cd3d0c, 186 0xd7adf2, 0xcbca97, 0x1bda5f2d, 0x3258579, 0x606b1e6, 0x6fc1b5b, 0x1ac27317, 0x503ca16, 0xa677435, 187 0x57bc73, 0x3992a42, 0xbab987b, 0xfab25eb, 0x128912a4, 0x90a1dc4, 0x1402d591, 0x9ffbcfc, 0xaa48856, 188 0x7a7c2dc, 0xcefd08a, 0x1b29bda6, 0xa785641, 0x16462d8c, 0x76241b7, 0x79b6c3b, 0x204ae18, 0xf41212b, 189 0x1f567a4d, 0xd6ce6db, 0xedf1784, 0x111df34, 0x85d7955, 0x55fc189, 0x1b7ae265, 0xf9281ac, 0xded7740, 190 0xf19468b, 0x83763bb, 0x8ff7234, 0x3da7df8, 0x9590ac3, 0xdc96f2a, 0x16e44896, 0x7931009, 0x99d5acc, 191 0x10f7b842, 0xaef5e84, 0xc0310d7, 0xdebac2c, 0x2a7b137, 0x4342344, 0x19633649, 0x3a10624, 0x4b4cb56, 192 0x1d809c59, 0xac007f, 0x1f0f4bcd, 0xa1ab06e, 0xc5042cf, 0x82c0c77, 0x76c7563, 0x22c30f3, 0x3bf1568, 193 0x7a895be, 0xfcca554, 0x12e90e4c, 0x7b4ab5f, 0x13aeb76b, 0x5887e2c, 0x1d7fe1e3, 0x908c8e3, 0x95800ee, 194 0xb36bd54, 0xf08905d, 0x4e73ae8, 0xf5a7e48, 0xa67cb0, 0x50e1067, 0x1b944a0a, 0xf29c83a, 0xb23cfb9, 195 0xbe1db1, 0x54de6e8, 0xd4707f2, 0x8ebcc2d, 0x2c77056, 0x1568ce4, 0x15fcc849, 0x4069712, 0xe2ed85f, 196 0x2c5ff09, 0x42a6929, 0x628e7ea, 0xbd5b355, 0xaf0bd79, 0xaa03699, 0xdb99816, 0x4379cef, 0x81d57b, 197 0x11237f01, 0xe2a820b, 0xfd53b95, 0x6beb5ee, 0x1aeb790c, 0xe470d53, 0x2c2cfee, 0x1c1d8d8, 0xa520fc4, 198 0x1518e034, 0xa584dd4, 0x29e572b, 0xd4594fc, 0x141a8f6f, 0x8dfccf3, 0x5d20ba3, 0x2eb60c3, 0x9f16eb0, 199 0x11cec356, 0xf039f84, 0x1b0990c1, 0xc91e526, 0x10b65bae, 0xf0616e8, 0x173fa3ff, 0xec8ccf9, 0xbe32790, 200 0x11da3e79, 0xe2f35c7, 0x908875c, 0xdacf7bd, 0x538c165, 0x8d1487f, 0x7c31aed, 0x21af228, 0x7e1689d, 201 0xdfc23ca, 0x24f15dc, 0x25ef3c4, 0x35248cd, 0x99a0f43, 0xa4b6ecc, 0xd066b3, 0x2481152, 0x37a7688, 202 0x15a444b6, 0xb62300c, 0x4b841b, 0xa655e79, 0xd53226d, 0xbeb348a, 0x127f3c2, 0xb989247, 0x71a277d, 203 0x19e9dfcb, 0xb8f92d0, 0xe2d226c, 0x390a8b0, 0x183cc462, 0x7bd8167, 0x1f32a552, 0x5e02db4, 0xa146ee9, 204 0x1a003957, 0x1c95f61, 0x1eeec155, 0x26f811f, 0xf9596ba, 0x3082bfb, 0x96df083, 0x3e3a289, 0x7e2d8be, 205 0x157a63e0, 0x99b8941, 0x1da7d345, 0xcc6cd0, 0x10beed9a, 0x48e83c0, 0x13aa2e25, 0x7cad710, 0x4029988, 206 0x13dfa9dd, 0xb94f884, 0x1f4adfef, 0xb88543, 0x16f5f8dc, 0xa6a67f4, 0x14e274e2, 0x5e56cf4, 0x2f24ef, 207 0x1e9ef967, 0xfe09bad, 0xfe079b3, 0xcc0ae9e, 0xb3edf6d, 0x3e961bc, 0x130d7831, 0x31043d6, 0xba986f9, 208 0x1d28055, 0x65240ca, 0x4971fa3, 0x81b17f8, 0x11ec34a5, 0x8366ddc, 0x1471809, 0xfa5f1c6, 0xc911e15, 209 0x8849491, 0xcf4c2e2, 0x14471b91, 0x39f75be, 0x445c21e, 0xf1585e9, 0x72cc11f, 0x4c79f0c, 0xe5522e1, 210 0x1874c1ee, 0x4444211, 0x7914884, 0x3d1b133, 0x25ba3c, 0x4194f65, 0x1c0457ef, 0xac4899d, 0xe1fa66c, 211 0x130a7918, 0x9b8d312, 0x4b1c5c8, 0x61ccac3, 0x18c8aa6f, 0xe93cb0a, 0xdccb12c, 0xde10825, 0x969737d, 212 0xf58c0c3, 0x7cee6a9, 0xc2c329a, 0xc7f9ed9, 0x107b3981, 0x696a40e, 0x152847ff, 0x4d88754, 0xb141f47, 213 0x5a16ffe, 0x3a7870a, 0x18667659, 0x3b72b03, 0xb1c9435, 0x9285394, 0xa00005a, 0x37506c, 0x2edc0bb, 214 0x19afe392, 0xeb39cac, 0x177ef286, 0xdf87197, 0x19f844ed, 0x31fe8, 0x15f9bfd, 0x80dbec, 0x342e96e, 215 0x497aced, 0xe88e909, 0x1f5fa9ba, 0x530a6ee, 0x1ef4e3f1, 0x69ffd12, 0x583006d, 0x2ecc9b1, 0x362db70, 216 0x18c7bdc5, 0xf4bb3c5, 0x1c90b957, 0xf067c09, 0x9768f2b, 0xf73566a, 0x1939a900, 0x198c38a, 0x202a2a1, 217 0x4bbf5a6, 0x4e265bc, 0x1f44b6e7, 0x185ca49, 0xa39e81b, 0x24aff5b, 0x4acc9c2, 0x638bdd3, 0xb65b2a8, 218 0x6def8be, 0xb94537a, 0x10b81dee, 0xe00ec55, 0x2f2cdf7, 0xc20622d, 0x2d20f36, 0xe03c8c9, 0x898ea76, 219 0x8e3921b, 0x8905bff, 0x1e94b6c8, 0xee7ad86, 0x154797f2, 0xa620863, 0x3fbd0d9, 0x1f3caab, 0x30c24bd, 220 0x19d3892f, 0x59c17a2, 0x1ab4b0ae, 0xf8714ee, 0x90c4098, 0xa9c800d, 0x1910236b, 0xea808d3, 0x9ae2f31, 221 0x1a15ad64, 0xa48c8d1, 0x184635a4, 0xb725ef1, 0x11921dcc, 0x3f866df, 0x16c27568, 0xbdf580a, 0xb08f55c, 222 0x186ee1c, 0xb1627fa, 0x34e82f6, 0x933837e, 0xf311be5, 0xfedb03b, 0x167f72cd, 0xa5469c0, 0x9c82531, 223 0xb92a24b, 0x14fdc8b, 0x141980d1, 0xbdc3a49, 0x7e02bb1, 0xaf4e6dd, 0x106d99e1, 0xd4616fc, 0x93c2717, 224 0x1c0a0507, 0xc6d5fed, 0x9a03d8b, 0xa1d22b0, 0x127853e3, 0xc4ac6b8, 0x1a048cf7, 0x9afb72c, 0x65d485d, 225 0x72d5998, 0xe9fa744, 0xe49e82c, 0x253cf80, 0x5f777ce, 0xa3799a5, 0x17270cbb, 0xc1d1ef0, 0xdf74977, 226 0x114cb859, 0xfa8e037, 0xb8f3fe5, 0xc734cc6, 0x70d3d61, 0xeadac62, 0x12093dd0, 0x9add67d, 0x87200d6, 227 0x175bcbb, 0xb29b49f, 0x1806b79c, 0x12fb61f, 0x170b3a10, 0x3aaf1cf, 0xa224085, 0x79d26af, 0x97759e2, 228 0x92e19f1, 0xb32714d, 0x1f00d9f1, 0xc728619, 0x9e6f627, 0xe745e24, 0x18ea4ace, 0xfc60a41, 0x125f5b2, 229 0xc3cf512, 0x39ed486, 0xf4d15fa, 0xf9167fd, 0x1c1f5dd5, 0xc21a53e, 0x1897930, 0x957a112, 0x21059a0, 230 0x1f9e3ddc, 0xa4dfced, 0x8427f6f, 0x726fbe7, 0x1ea658f8, 0x2fdcd4c, 0x17e9b66f, 0xb2e7c2e, 0x39923bf, 231 0x1bae104, 0x3973ce5, 0xc6f264c, 0x3511b84, 0x124195d7, 0x11996bd, 0x20be23d, 0xdc437c4, 0x4b4f16b, 232 0x11902a0, 0x6c29cc9, 0x1d5ffbe6, 0xdb0b4c7, 0x10144c14, 0x2f2b719, 0x301189, 0x2343336, 0xa0bf2ac, 233 } 234 235 func precompute(params *elliptic.CurveParams, base *big.Int) { 236 // 1/32/64/96/128/160/192/224 237 var values [4]*big.Int 238 239 values[0] = base 240 for i := 1; i < 4; i++ { 241 values[i] = new(big.Int) 242 values[i].Lsh(values[i-1], 64) 243 } 244 for i := 0; i < 4; i++ { 245 x, y := params.ScalarBaseMult(values[i].Bytes()) 246 printPoint(params, x, y) 247 v := new(big.Int) 248 switch i { 249 case 1: 250 v.Add(values[0], values[1]) 251 x, y := params.ScalarBaseMult(v.Bytes()) 252 printPoint(params, x, y) 253 case 2: 254 v.Add(values[0], values[2]) 255 x, y := params.ScalarBaseMult(v.Bytes()) 256 printPoint(params, x, y) 257 v.Add(values[1], values[2]) 258 x, y = params.ScalarBaseMult(v.Bytes()) 259 printPoint(params, x, y) 260 v.Add(values[0], v) 261 x, y = params.ScalarBaseMult(v.Bytes()) 262 printPoint(params, x, y) 263 case 3: 264 v.Add(values[0], values[3]) 265 x, y := params.ScalarBaseMult(v.Bytes()) 266 printPoint(params, x, y) 267 v.Add(values[1], values[3]) 268 x, y = params.ScalarBaseMult(v.Bytes()) 269 printPoint(params, x, y) 270 v.Add(values[0], v) 271 x, y = params.ScalarBaseMult(v.Bytes()) 272 printPoint(params, x, y) 273 v.Add(values[2], values[3]) 274 x, y = params.ScalarBaseMult(v.Bytes()) 275 printPoint(params, x, y) 276 v.Add(values[0], v) 277 x, y = params.ScalarBaseMult(v.Bytes()) 278 printPoint(params, x, y) 279 v.Add(values[2], values[3]) 280 v.Add(v, values[1]) 281 x, y = params.ScalarBaseMult(v.Bytes()) 282 printPoint(params, x, y) 283 v.Add(v, values[0]) 284 x, y = params.ScalarBaseMult(v.Bytes()) 285 printPoint(params, x, y) 286 } 287 } 288 } 289 290 func printPoint(params *elliptic.CurveParams, x, y *big.Int) { 291 var out [p256Limbs]uint32 292 p256FromBigAgainstP(&out, x, params.P) 293 printp256Limbs(&out) 294 p256FromBigAgainstP(&out, y, params.P) 295 printp256Limbs(&out) 296 } 297 298 func printp256Limbs(one *[p256Limbs]uint32) { 299 for i := 0; i < p256Limbs; i++ { 300 fmt.Printf("0x%x, ", one[i]) 301 } 302 fmt.Println() 303 } 304 305 func print1to7(params *elliptic.CurveParams) { 306 var out [p256Limbs]uint32 307 for i := 1; i < 8; i++ { 308 value := big.NewInt(int64(i)) 309 p256FromBigAgainstP(&out, value, params.P) 310 printp256Limbs(&out) 311 } 312 } 313 314 // Field element operations: 315 316 // nonZeroToAllOnes returns: 317 // 0xffffffff for 0 < x <= 2**31 318 // 0 for x == 0 or x > 2**31. 319 func nonZeroToAllOnes(x uint32) uint32 { 320 return ((x - 1) >> 31) - 1 321 } 322 323 // p256ReduceCarry adds a multiple of p in order to cancel |carry|, 324 // which is a term at 2**257. 325 // 326 // On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. 327 // On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. 328 func p256ReduceCarry(inout *[p256Limbs]uint32, carry uint32) { 329 carry_mask := nonZeroToAllOnes(carry) 330 inout[0] += carry << 1 331 // 2**30 = 0x40000000, this doesn't underflow 332 inout[2] -= carry << 8 333 inout[2] += 0x20000000 & carry_mask 334 335 inout[3] -= 1 & carry_mask 336 inout[3] += carry << 11 337 338 // 2**29 = 0x20000000, this doesn't underflow: 0xfffffff + 0x2000000 = 0x11ffffff < 0x20000000 339 inout[7] += carry << 25 340 } 341 342 // p256Sum sets out = in+in2. 343 // 344 // On entry, in[i]+in2[i] must not overflow a 32-bit word. 345 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 346 func p256Sum(out, in, in2 *[p256Limbs]uint32) { 347 carry := uint32(0) 348 for i := 0; ; i++ { 349 out[i] = in[i] + in2[i] 350 out[i] += carry 351 carry = out[i] >> 29 352 out[i] &= bottom29Bits 353 354 i++ 355 if i == p256Limbs { 356 break 357 } 358 359 out[i] = in[i] + in2[i] 360 out[i] += carry 361 carry = out[i] >> 28 362 out[i] &= bottom28Bits 363 } 364 365 p256ReduceCarry(out, carry) 366 } 367 368 // p256Zero31 is 0 mod p. 369 // {two31m3, two30m2, two31p10m2, two30m13m2, two31m2, two30m2, two31m2, two30m27m2, two31m2} 370 var p256Zero31 = [p256Limbs]uint32{0x7FFFFFF8, 0x3FFFFFFC, 0x800003FC, 0x3FFFDFFC, 0x7FFFFFFC, 0x3FFFFFFC, 0x7FFFFFFC, 0x37FFFFFC, 0x7FFFFFFC} 371 372 func limbsToBig(in *[p256Limbs]uint32) *big.Int { 373 result, tmp := new(big.Int), new(big.Int) 374 375 result.SetInt64(int64(in[p256Limbs-1])) 376 for i := p256Limbs - 2; i >= 0; i-- { 377 if (i & 1) == 0 { 378 result.Lsh(result, 29) 379 } else { 380 result.Lsh(result, 28) 381 } 382 tmp.SetInt64(int64(in[i])) 383 result.Add(result, tmp) 384 } 385 return result 386 } 387 388 // p256GetZero31, the func to calucate p256Zero31 389 func p256GetZero31(out *[p256Limbs]uint32) { 390 tmp := big.NewInt(0) 391 result := limbsToBig(&[p256Limbs]uint32{1 << 31, 1 << 30, 1 << 31, 1 << 30, 1 << 31, 1 << 30, 1 << 31, 1 << 30, 1 << 31}) 392 tmp = tmp.Mod(result, p256.P) 393 tmp = tmp.Sub(result, tmp) 394 for i := 0; i < 9; i++ { 395 if bits := tmp.Bits(); len(bits) > 0 { 396 out[i] = uint32(bits[0]) & 0x7fffffff 397 if out[i] < 0x70000000 { 398 out[i] += 0x80000000 399 } 400 } else { 401 out[i] = 0x80000000 402 } 403 tmp.Sub(tmp, big.NewInt(int64(out[i]))) 404 tmp.Rsh(tmp, 29) 405 i++ 406 if i == p256Limbs { 407 break 408 } 409 410 if bits := tmp.Bits(); len(bits) > 0 { 411 out[i] = uint32(bits[0]) & 0x3fffffff 412 if out[i] < 0x30000000 { 413 out[i] += 0x40000000 414 } 415 } else { 416 out[i] = 0x40000000 417 } 418 tmp.Sub(tmp, big.NewInt(int64(out[i]))) 419 tmp.Rsh(tmp, 28) 420 } 421 } 422 423 // p256Diff sets out = in-in2. 424 // 425 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 426 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 427 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 428 func p256Diff(out, in, in2 *[p256Limbs]uint32) { 429 var carry uint32 430 431 for i := 0; ; i++ { 432 out[i] = in[i] - in2[i] 433 out[i] += p256Zero31[i] 434 out[i] += carry 435 carry = out[i] >> 29 436 out[i] &= bottom29Bits 437 i++ 438 if i == p256Limbs { 439 break 440 } 441 442 out[i] = in[i] - in2[i] 443 out[i] += p256Zero31[i] 444 out[i] += carry 445 carry = out[i] >> 28 446 out[i] &= bottom28Bits 447 } 448 449 p256ReduceCarry(out, carry) 450 } 451 452 // p256ReduceDegree sets out = tmp/R mod p where tmp contains 64-bit words with 453 // the same 29,28,... bit positions as a field element. 454 // 455 // The values in field elements are in Montgomery form: x*R mod p where R = 456 // 2**257. Since we just multiplied two Montgomery values together, the result 457 // is x*y*R*R mod p. We wish to divide by R in order for the result also to be 458 // in Montgomery form. 459 // 460 // On entry: tmp[i] < 2**64 461 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 462 func p256ReduceDegree(out *[p256Limbs]uint32, tmp [17]uint64) { 463 // The following table may be helpful when reading this code: 464 // 465 // Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10... 466 // Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29 467 // Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285 468 // (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 469 var tmp2 [18]uint32 470 var carry, x, xMask uint32 471 472 // tmp contains 64-bit words with the same 29,28,29-bit positions as an 473 // field element. So the top of an element of tmp might overlap with 474 // another element two positions down. The following loop eliminates 475 // this overlap. 476 tmp2[0] = uint32(tmp[0]) & bottom29Bits 477 478 tmp2[1] = uint32(tmp[0]) >> 29 479 tmp2[1] |= (uint32(tmp[0]>>32) << 3) & bottom28Bits 480 tmp2[1] += uint32(tmp[1]) & bottom28Bits 481 carry = tmp2[1] >> 28 482 tmp2[1] &= bottom28Bits 483 484 for i := 2; i < 17; i++ { 485 tmp2[i] = (uint32(tmp[i-2] >> 32)) >> 25 486 tmp2[i] += (uint32(tmp[i-1])) >> 28 487 tmp2[i] += (uint32(tmp[i-1]>>32) << 4) & bottom29Bits 488 tmp2[i] += uint32(tmp[i]) & bottom29Bits 489 tmp2[i] += carry 490 carry = tmp2[i] >> 29 491 tmp2[i] &= bottom29Bits 492 493 i++ 494 if i == 17 { 495 break 496 } 497 tmp2[i] = uint32(tmp[i-2]>>32) >> 25 498 tmp2[i] += uint32(tmp[i-1]) >> 29 499 tmp2[i] += ((uint32(tmp[i-1] >> 32)) << 3) & bottom28Bits 500 tmp2[i] += uint32(tmp[i]) & bottom28Bits 501 tmp2[i] += carry 502 carry = tmp2[i] >> 28 503 tmp2[i] &= bottom28Bits 504 } 505 506 tmp2[17] = uint32(tmp[15]>>32) >> 25 507 tmp2[17] += uint32(tmp[16]) >> 29 508 tmp2[17] += uint32(tmp[16]>>32) << 3 509 tmp2[17] += carry 510 511 // Montgomery elimination of terms: 512 // 513 // Since R is 2**257, we can divide by R with a bitwise shift if we can 514 // ensure that the right-most 257 bits are all zero. We can make that true 515 // by adding multiplies of p without affecting the value. 516 // 517 // So we eliminate limbs from right to left. Since the bottom 29 bits of p 518 // are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0. 519 // We can do that for 8 further limbs and then right shift to eliminate the 520 // extra factor of R. 521 for i := 0; ; i += 2 { 522 tmp2[i+1] += tmp2[i] >> 29 523 x = tmp2[i] & bottom29Bits 524 xMask = nonZeroToAllOnes(x) 525 tmp2[i] = 0 526 527 // The bounds calculations for this loop are tricky. Each iteration of 528 // the loop eliminates two words by adding values to words to their 529 // right. 530 // 531 // The following table contains the amounts added to each word (as an 532 // offset from the value of i at the top of the loop). The amounts are 533 // accounted for from the first and second half of the loop separately 534 // and are written as, for example, 28 to mean a value <2**28. 535 // 536 // Word: 2 3 4 5 6 7 8 9 10 537 // Added in top half: 29 28 29 29 29 29 29 28 538 // 29 28 29 28 29 539 // 29 540 // Added in bottom half: 28 29 28 28 28 29 28 28 541 // 28 29 28 29 28 542 // 543 // 544 // The following table accumulates these values. The sums at the bottom 545 // are written as, for example, 29+28, to mean a value < 2**29+2**28. 546 // 547 // Word: 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 548 // 29 28 29 29 29 29 29 28 28 28 28 28 28 28 28 28 549 // 28 29 28 29 28 29 28 29 28 29 28 29 28 29 550 // 29 28 28 28 29 28 29 28 29 28 29 28 29 551 // 29 28 29 28 29 29 29 29 29 29 29 29 29 552 // 28 29 29 29 28 29 28 29 28 29 28 553 // 28 29 28 29 28 29 28 29 28 29 554 // 29 28 29 28 29 28 29 28 29 555 // 29 28 28 29 29 29 29 29 29 556 // 28 29 28 28 28 28 28 557 // 28 29 28 29 28 29 558 // 29 28 29 28 29 559 // 29 28 29 28 29 560 // 29 28 29 561 // 29 562 // ------------------------------------------------- 563 // according the table, from tmp2[6] to tmp[14], consider their initial value, 564 // they will overflow the word of 32bits, so we need to normalize them every iteration. 565 // This requires more CPU resources than NIST P256. 566 // 567 568 tmp2[i+2] += (x << 7) & bottom29Bits 569 tmp2[i+3] += (x >> 22) 570 571 // At position 86, which is the starting bit position for word 3, we 572 // have a factor of 0xffffc00 = 2**28 - 2**10 573 tmp2[i+3] += 0x10000000 & xMask 574 tmp2[i+4] += (x - 1) & xMask 575 tmp2[i+3] -= (x << 10) & bottom28Bits 576 tmp2[i+4] -= x >> 18 577 578 tmp2[i+4] += 0x20000000 & xMask 579 tmp2[i+4] -= x 580 tmp2[i+5] += (x - 1) & xMask 581 582 tmp2[i+5] += 0x10000000 & xMask 583 tmp2[i+5] -= x 584 tmp2[i+6] += (x - 1) & xMask 585 586 tmp2[i+6] += 0x20000000 & xMask 587 tmp2[i+6] -= x 588 tmp2[i+7] += (x - 1) & xMask 589 590 // At position 200, which is the starting bit position for word 7, we 591 // have a factor of 0xeffffff = 2**28 - 2**24 - 1 592 tmp2[i+7] += 0x10000000 & xMask 593 tmp2[i+7] -= x 594 tmp2[i+8] += (x - 1) & xMask 595 tmp2[i+7] -= (x << 24) & bottom28Bits 596 tmp2[i+8] -= x >> 4 597 598 tmp2[i+8] += 0x20000000 & xMask 599 tmp2[i+8] -= x 600 tmp2[i+8] += (x << 28) & bottom29Bits 601 tmp2[i+9] += ((x >> 1) - 1) & xMask 602 603 if i+1 == p256Limbs { 604 break 605 } 606 607 tmp2[i+2] += tmp2[i+1] >> 28 608 x = tmp2[i+1] & bottom28Bits 609 xMask = nonZeroToAllOnes(x) 610 tmp2[i+1] = 0 611 612 tmp2[i+3] += (x << 7) & bottom28Bits 613 tmp2[i+4] += (x >> 21) 614 615 // At position 85, which is the starting bit position for word 3, we 616 // have a factor of 0x1ffff800 = 2**29 - 2**11 617 tmp2[i+4] += 0x20000000 & xMask 618 tmp2[i+5] += (x - 1) & xMask 619 tmp2[i+4] -= (x << 11) & bottom29Bits 620 tmp2[i+5] -= x >> 18 621 622 tmp2[i+5] += 0x10000000 & xMask 623 tmp2[i+5] -= x 624 tmp2[i+6] += (x - 1) & xMask 625 626 tmp2[i+6] += 0x20000000 & xMask 627 tmp2[i+6] -= x 628 tmp2[i+7] += (x - 1) & xMask 629 630 tmp2[i+7] += 0x10000000 & xMask 631 tmp2[i+7] -= x 632 tmp2[i+8] += (x - 1) & xMask 633 634 // At position 199, which is the starting bit position for word 7, we 635 // have a factor of 0x1dffffff = 2**29 - 2**25 - 1 636 tmp2[i+8] += 0x20000000 & xMask 637 tmp2[i+8] -= x 638 tmp2[i+9] += (x - 1) & xMask 639 tmp2[i+8] -= (x << 25) & bottom29Bits 640 tmp2[i+9] -= x >> 4 641 642 tmp2[i+9] += 0x10000000 & xMask 643 tmp2[i+9] -= x 644 tmp2[i+10] += (x - 1) & xMask 645 646 // Need to normalize below limbs to avoid overflow the word in the next iteration 647 tmp2[i+7] += tmp2[i+6] >> 29 648 tmp2[i+6] = tmp2[i+6] & bottom29Bits 649 650 tmp2[i+8] += tmp2[i+7] >> 28 651 tmp2[i+7] = tmp2[i+7] & bottom28Bits 652 653 tmp2[i+9] += tmp2[i+8] >> 29 654 tmp2[i+8] = tmp2[i+8] & bottom29Bits 655 656 tmp2[i+10] += tmp2[i+9] >> 28 657 tmp2[i+9] = tmp2[i+9] & bottom28Bits 658 } 659 660 // We merge the right shift with a carry chain. The words above 2**257 have 661 // widths of 28,29,... which we need to correct when copying them down. 662 carry = 0 663 for i := 0; i < 8; i++ { 664 // The maximum value of tmp2[i + 9] occurs on the first iteration and 665 // is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is 666 // therefore safe. 667 out[i] = tmp2[i+9] 668 out[i] += carry 669 out[i] += (tmp2[i+10] << 28) & bottom29Bits 670 carry = out[i] >> 29 671 out[i] &= bottom29Bits 672 673 i++ 674 out[i] = tmp2[i+9] >> 1 675 out[i] += carry 676 carry = out[i] >> 28 677 out[i] &= bottom28Bits 678 } 679 680 out[8] = tmp2[17] 681 out[8] += carry 682 carry = out[8] >> 29 683 out[8] &= bottom29Bits 684 685 p256ReduceCarry(out, carry) 686 } 687 688 // p256Square sets out=in*in. 689 // 690 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29. 691 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 692 func p256Square(out, in *[p256Limbs]uint32) { 693 var tmp [17]uint64 694 695 tmp[0] = uint64(in[0]) * uint64(in[0]) 696 tmp[1] = uint64(in[0]) * (uint64(in[1]) << 1) 697 tmp[2] = uint64(in[0])*(uint64(in[2])<<1) + 698 uint64(in[1])*(uint64(in[1])<<1) 699 tmp[3] = uint64(in[0])*(uint64(in[3])<<1) + 700 uint64(in[1])*(uint64(in[2])<<1) 701 tmp[4] = uint64(in[0])*(uint64(in[4])<<1) + 702 uint64(in[1])*(uint64(in[3])<<2) + 703 uint64(in[2])*uint64(in[2]) 704 tmp[5] = uint64(in[0])*(uint64(in[5])<<1) + 705 uint64(in[1])*(uint64(in[4])<<1) + 706 uint64(in[2])*(uint64(in[3])<<1) 707 tmp[6] = uint64(in[0])*(uint64(in[6])<<1) + 708 uint64(in[1])*(uint64(in[5])<<2) + 709 uint64(in[2])*(uint64(in[4])<<1) + 710 uint64(in[3])*(uint64(in[3])<<1) 711 tmp[7] = uint64(in[0])*(uint64(in[7])<<1) + 712 uint64(in[1])*(uint64(in[6])<<1) + 713 uint64(in[2])*(uint64(in[5])<<1) + 714 uint64(in[3])*(uint64(in[4])<<1) 715 // tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60, 716 // which is < 2**64 as required. 717 tmp[8] = uint64(in[0])*(uint64(in[8])<<1) + 718 uint64(in[1])*(uint64(in[7])<<2) + 719 uint64(in[2])*(uint64(in[6])<<1) + 720 uint64(in[3])*(uint64(in[5])<<2) + 721 uint64(in[4])*uint64(in[4]) 722 tmp[9] = uint64(in[1])*(uint64(in[8])<<1) + 723 uint64(in[2])*(uint64(in[7])<<1) + 724 uint64(in[3])*(uint64(in[6])<<1) + 725 uint64(in[4])*(uint64(in[5])<<1) 726 tmp[10] = uint64(in[2])*(uint64(in[8])<<1) + 727 uint64(in[3])*(uint64(in[7])<<2) + 728 uint64(in[4])*(uint64(in[6])<<1) + 729 uint64(in[5])*(uint64(in[5])<<1) 730 tmp[11] = uint64(in[3])*(uint64(in[8])<<1) + 731 uint64(in[4])*(uint64(in[7])<<1) + 732 uint64(in[5])*(uint64(in[6])<<1) 733 tmp[12] = uint64(in[4])*(uint64(in[8])<<1) + 734 uint64(in[5])*(uint64(in[7])<<2) + 735 uint64(in[6])*uint64(in[6]) 736 tmp[13] = uint64(in[5])*(uint64(in[8])<<1) + 737 uint64(in[6])*(uint64(in[7])<<1) 738 tmp[14] = uint64(in[6])*(uint64(in[8])<<1) + 739 uint64(in[7])*(uint64(in[7])<<1) 740 tmp[15] = uint64(in[7]) * (uint64(in[8]) << 1) 741 tmp[16] = uint64(in[8]) * uint64(in[8]) 742 743 p256ReduceDegree(out, tmp) 744 } 745 746 // p256Mul sets out=in*in2. 747 // 748 // On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and 749 // in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. 750 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 751 func p256Mul(out, in, in2 *[p256Limbs]uint32) { 752 var tmp [17]uint64 753 754 tmp[0] = uint64(in[0]) * uint64(in2[0]) 755 tmp[1] = uint64(in[0])*(uint64(in2[1])<<0) + //2**29 756 uint64(in[1])*(uint64(in2[0])<<0) 757 tmp[2] = uint64(in[0])*(uint64(in2[2])<<0) + //2**57 758 uint64(in[1])*(uint64(in2[1])<<1) + 759 uint64(in[2])*(uint64(in2[0])<<0) 760 tmp[3] = uint64(in[0])*(uint64(in2[3])<<0) + //2**86 761 uint64(in[1])*(uint64(in2[2])<<0) + 762 uint64(in[2])*(uint64(in2[1])<<0) + 763 uint64(in[3])*(uint64(in2[0])<<0) 764 tmp[4] = uint64(in[0])*(uint64(in2[4])<<0) + //2**114 765 uint64(in[1])*(uint64(in2[3])<<1) + 766 uint64(in[2])*(uint64(in2[2])<<0) + 767 uint64(in[3])*(uint64(in2[1])<<1) + 768 uint64(in[4])*(uint64(in2[0])<<0) 769 tmp[5] = uint64(in[0])*(uint64(in2[5])<<0) + //2**143 770 uint64(in[1])*(uint64(in2[4])<<0) + 771 uint64(in[2])*(uint64(in2[3])<<0) + 772 uint64(in[3])*(uint64(in2[2])<<0) + 773 uint64(in[4])*(uint64(in2[1])<<0) + 774 uint64(in[5])*(uint64(in2[0])<<0) 775 tmp[6] = uint64(in[0])*(uint64(in2[6])<<0) + //2**171 776 uint64(in[1])*(uint64(in2[5])<<1) + 777 uint64(in[2])*(uint64(in2[4])<<0) + 778 uint64(in[3])*(uint64(in2[3])<<1) + 779 uint64(in[4])*(uint64(in2[2])<<0) + 780 uint64(in[5])*(uint64(in2[1])<<1) + 781 uint64(in[6])*(uint64(in2[0])<<0) 782 tmp[7] = uint64(in[0])*(uint64(in2[7])<<0) + //2**200 783 uint64(in[1])*(uint64(in2[6])<<0) + 784 uint64(in[2])*(uint64(in2[5])<<0) + 785 uint64(in[3])*(uint64(in2[4])<<0) + 786 uint64(in[4])*(uint64(in2[3])<<0) + 787 uint64(in[5])*(uint64(in2[2])<<0) + 788 uint64(in[6])*(uint64(in2[1])<<0) + 789 uint64(in[7])*(uint64(in2[0])<<0) 790 // tmp[8] has the greatest value but doesn't overflow. See logic in 791 // p256Square. 792 tmp[8] = uint64(in[0])*(uint64(in2[8])<<0) + // 2**228 793 uint64(in[1])*(uint64(in2[7])<<1) + 794 uint64(in[2])*(uint64(in2[6])<<0) + 795 uint64(in[3])*(uint64(in2[5])<<1) + 796 uint64(in[4])*(uint64(in2[4])<<0) + 797 uint64(in[5])*(uint64(in2[3])<<1) + 798 uint64(in[6])*(uint64(in2[2])<<0) + 799 uint64(in[7])*(uint64(in2[1])<<1) + 800 uint64(in[8])*(uint64(in2[0])<<0) 801 tmp[9] = uint64(in[1])*(uint64(in2[8])<<0) + //2**257 802 uint64(in[2])*(uint64(in2[7])<<0) + 803 uint64(in[3])*(uint64(in2[6])<<0) + 804 uint64(in[4])*(uint64(in2[5])<<0) + 805 uint64(in[5])*(uint64(in2[4])<<0) + 806 uint64(in[6])*(uint64(in2[3])<<0) + 807 uint64(in[7])*(uint64(in2[2])<<0) + 808 uint64(in[8])*(uint64(in2[1])<<0) 809 tmp[10] = uint64(in[2])*(uint64(in2[8])<<0) + //2**285 810 uint64(in[3])*(uint64(in2[7])<<1) + 811 uint64(in[4])*(uint64(in2[6])<<0) + 812 uint64(in[5])*(uint64(in2[5])<<1) + 813 uint64(in[6])*(uint64(in2[4])<<0) + 814 uint64(in[7])*(uint64(in2[3])<<1) + 815 uint64(in[8])*(uint64(in2[2])<<0) 816 tmp[11] = uint64(in[3])*(uint64(in2[8])<<0) + //2**314 817 uint64(in[4])*(uint64(in2[7])<<0) + 818 uint64(in[5])*(uint64(in2[6])<<0) + 819 uint64(in[6])*(uint64(in2[5])<<0) + 820 uint64(in[7])*(uint64(in2[4])<<0) + 821 uint64(in[8])*(uint64(in2[3])<<0) 822 tmp[12] = uint64(in[4])*(uint64(in2[8])<<0) + //2**342 823 uint64(in[5])*(uint64(in2[7])<<1) + 824 uint64(in[6])*(uint64(in2[6])<<0) + 825 uint64(in[7])*(uint64(in2[5])<<1) + 826 uint64(in[8])*(uint64(in2[4])<<0) 827 tmp[13] = uint64(in[5])*(uint64(in2[8])<<0) + //2**371 828 uint64(in[6])*(uint64(in2[7])<<0) + 829 uint64(in[7])*(uint64(in2[6])<<0) + 830 uint64(in[8])*(uint64(in2[5])<<0) 831 tmp[14] = uint64(in[6])*(uint64(in2[8])<<0) + //2**399 832 uint64(in[7])*(uint64(in2[7])<<1) + 833 uint64(in[8])*(uint64(in2[6])<<0) 834 tmp[15] = uint64(in[7])*(uint64(in2[8])<<0) + //2**428 835 uint64(in[8])*(uint64(in2[7])<<0) 836 tmp[16] = uint64(in[8]) * (uint64(in2[8]) << 0) //2**456 837 838 p256ReduceDegree(out, tmp) 839 } 840 841 func p256Assign(out, in *[p256Limbs]uint32) { 842 *out = *in 843 } 844 845 // p256Invert calculates |out| = |in|^{-1} 846 // 847 // Based on Fermat's Little Theorem: 848 // a^p = a (mod p) 849 // a^{p-1} = 1 (mod p) 850 // a^{p-2} = a^{-1} (mod p) 851 func p256Invert(out, in *[p256Limbs]uint32) { 852 var ftmp, ftmp2 [p256Limbs]uint32 853 854 // each e_I will hold |in|^{2^I - 1} 855 var e2, e4, e8, e16, e32, e64 [p256Limbs]uint32 856 // 2^32-2 857 var e32m2 [p256Limbs]uint32 858 859 p256Square(&ftmp, in) // 2^1 860 p256Assign(&ftmp2, &ftmp) 861 p256Mul(&ftmp, in, &ftmp) // 2^2 - 2^0 862 p256Assign(&e2, &ftmp) 863 p256Square(&ftmp, &ftmp) // 2^3 - 2^1 864 p256Square(&ftmp, &ftmp) // 2^4 - 2^2 865 p256Assign(&e32m2, &ftmp) 866 p256Mul(&e32m2, &e32m2, &ftmp2) // 2^4 - 2^2 + 2^1 = 2^4 - 2 867 p256Mul(&ftmp, &ftmp, &e2) // 2^4 - 2^0 868 p256Assign(&e4, &ftmp) 869 for i := 0; i < 4; i++ { 870 p256Square(&ftmp, &ftmp) 871 } // 2^8 - 2^4 872 p256Mul(&e32m2, &e32m2, &ftmp) // 2^8 - 2 873 874 p256Mul(&ftmp, &ftmp, &e4) // 2^8 - 2^0 875 p256Assign(&e8, &ftmp) 876 for i := 0; i < 8; i++ { 877 p256Square(&ftmp, &ftmp) 878 } // 2^16 - 2^8 879 p256Mul(&e32m2, &e32m2, &ftmp) // 2^16 - 2 880 p256Mul(&ftmp, &ftmp, &e8) // 2^16 - 2^0 881 p256Assign(&e16, &ftmp) 882 for i := 0; i < 16; i++ { 883 p256Square(&ftmp, &ftmp) 884 } // 2^32 - 2^16 885 p256Mul(&e32m2, &e32m2, &ftmp) // 2^32 - 2 886 887 p256Mul(&ftmp, &ftmp, &e16) // 2^32 - 2^0 888 p256Assign(&e32, &ftmp) 889 for i := 0; i < 32; i++ { 890 p256Square(&ftmp, &ftmp) 891 } // 2^64 - 2^32 892 p256Assign(&e64, &ftmp) 893 p256Mul(&e64, &e64, &e32) // 2^64 - 2^0 894 p256Assign(&ftmp, &e64) 895 896 for i := 0; i < 64; i++ { 897 p256Square(&ftmp, &ftmp) 898 } // 2^128 - 2^64 899 p256Mul(&ftmp, &ftmp, &e64) // 2^128 - 1 900 901 for i := 0; i < 96; i++ { 902 p256Square(&ftmp, &ftmp) 903 } // 2^224 - 2^96 904 905 p256Assign(&ftmp2, &e32m2) 906 for i := 0; i < 224; i++ { 907 p256Square(&ftmp2, &ftmp2) 908 } // 2^256 - 2^225 909 910 p256Mul(&ftmp, &ftmp, &ftmp2) // 2^256 - 2^224 - 2^96 911 912 p256Assign(&ftmp2, &e32) 913 914 for i := 0; i < 16; i++ { 915 p256Square(&ftmp2, &ftmp2) 916 } // 2^48 - 2^16 917 p256Mul(&ftmp2, &e16, &ftmp2) // 2^48 - 2^0 918 919 for i := 0; i < 8; i++ { 920 p256Square(&ftmp2, &ftmp2) 921 } // 2^56 - 2^8 922 p256Mul(&ftmp2, &e8, &ftmp2) // 2^56 - 2^0 923 924 for i := 0; i < 4; i++ { 925 p256Square(&ftmp2, &ftmp2) 926 } // 2^60 - 2^4 927 p256Mul(&ftmp2, &e4, &ftmp2) // 2^60 - 2^0 928 929 for i := 0; i < 2; i++ { 930 p256Square(&ftmp2, &ftmp2) 931 } // 2^62 - 2^2 932 933 p256Mul(&ftmp2, &e2, &ftmp2) // 2^62 - 2^0 934 for i := 0; i < 2; i++ { 935 p256Square(&ftmp2, &ftmp2) 936 } // 2^64 - 2^2 937 p256Mul(&ftmp2, in, &ftmp2) // 2^64 - 3 938 p256Mul(out, &ftmp2, &ftmp) // 2^256 - 2^224 - 2^96 + 2^64 - 3 939 } 940 941 // p256Scalar3 sets out=3*out. 942 // 943 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 944 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 945 func p256Scalar3(out *[p256Limbs]uint32) { 946 var carry uint32 947 948 for i := 0; ; i++ { 949 out[i] *= 3 950 out[i] += carry 951 carry = out[i] >> 29 952 out[i] &= bottom29Bits 953 954 i++ 955 if i == p256Limbs { 956 break 957 } 958 959 out[i] *= 3 960 out[i] += carry 961 carry = out[i] >> 28 962 out[i] &= bottom28Bits 963 } 964 965 p256ReduceCarry(out, carry) 966 } 967 968 // p256Scalar4 sets out=4*out. 969 // 970 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 971 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 972 func p256Scalar4(out *[p256Limbs]uint32) { 973 var carry, nextCarry uint32 974 975 for i := 0; ; i++ { 976 nextCarry = out[i] >> 27 977 out[i] <<= 2 978 out[i] &= bottom29Bits 979 out[i] += carry 980 carry = nextCarry + (out[i] >> 29) 981 out[i] &= bottom29Bits 982 983 i++ 984 if i == p256Limbs { 985 break 986 } 987 nextCarry = out[i] >> 26 988 out[i] <<= 2 989 out[i] &= bottom28Bits 990 out[i] += carry 991 carry = nextCarry + (out[i] >> 28) 992 out[i] &= bottom28Bits 993 } 994 995 p256ReduceCarry(out, carry) 996 } 997 998 // p256Scalar8 sets out=8*out. 999 // 1000 // On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 1001 // On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. 1002 func p256Scalar8(out *[p256Limbs]uint32) { 1003 var carry, nextCarry uint32 1004 1005 for i := 0; ; i++ { 1006 nextCarry = out[i] >> 26 1007 out[i] <<= 3 1008 out[i] &= bottom29Bits 1009 out[i] += carry 1010 carry = nextCarry + (out[i] >> 29) 1011 out[i] &= bottom29Bits 1012 1013 i++ 1014 if i == p256Limbs { 1015 break 1016 } 1017 nextCarry = out[i] >> 25 1018 out[i] <<= 3 1019 out[i] &= bottom28Bits 1020 out[i] += carry 1021 carry = nextCarry + (out[i] >> 28) 1022 out[i] &= bottom28Bits 1023 } 1024 1025 p256ReduceCarry(out, carry) 1026 } 1027 1028 // Group operations: 1029 // 1030 // Elements of the elliptic curve group are represented in Jacobian 1031 // coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in 1032 // Jacobian form. 1033 1034 // p256PointDouble sets {xOut,yOut,zOut} = 2*{x,y,z}. 1035 // 1036 // See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l 1037 func p256PointDouble(xOut, yOut, zOut, x, y, z *[p256Limbs]uint32) { 1038 var delta, gamma, alpha, beta, tmp, tmp2 [p256Limbs]uint32 1039 1040 p256Square(&delta, z) // delta = z^2 1041 p256Square(&gamma, y) // gamma = y^2 1042 p256Mul(&beta, x, &gamma) // beta = x * gamma = x * y^2 1043 1044 p256Sum(&tmp, x, &delta) // tmp = x + delta = x + z^2 1045 p256Diff(&tmp2, x, &delta) // tmp2 = x - delta = x - z^2 1046 p256Mul(&alpha, &tmp, &tmp2) // alpha = tmp * tmp2 = (x + z^2) * (x - z^2) = x^2 - z^4 1047 p256Scalar3(&alpha) // alpha = alpah * 3 = 3*(x^2 - z^4) 1048 1049 p256Sum(&tmp, y, z) // tmp = y+z 1050 p256Square(&tmp, &tmp) // tmp = (y+z)^2 1051 p256Diff(&tmp, &tmp, &gamma) // tmp = tmp - gamma = (y+z)^2 - y^2 1052 p256Diff(zOut, &tmp, &delta) // zOut = tmp - delta = (y+z)^2 - y^2 - z^2 1053 1054 p256Scalar4(&beta) // beta = beta * 4 = 4 * x * y^2 1055 p256Square(xOut, &alpha) // xOut = alpha ^ 2 = (3*(x^2 - z^4))^2 1056 p256Diff(xOut, xOut, &beta) // xOut = xOut - beta = (3*(x^2 - z^4))^2 - 4 * x * y^2 1057 p256Diff(xOut, xOut, &beta) // xOut = xOut - beta = (3*(x^2 - z^4))^2 - 8 * x * y^2 1058 1059 p256Diff(&tmp, &beta, xOut) // tmp = beta - xOut 1060 p256Mul(&tmp, &alpha, &tmp) // tmp = 3*(x^2 - z^4) * (beta - xOut) 1061 p256Square(&tmp2, &gamma) // tmp2 = gamma^2 = y^4 1062 p256Scalar8(&tmp2) // tmp2 = 8*tmp2 = 8*y^4 1063 p256Diff(yOut, &tmp, &tmp2) // yOut = (3*x^2 - 3*z^4) * (beta - xOut) - 8*y^4 1064 } 1065 1066 // p256PointAddMixed sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,1}. 1067 // (i.e. the second point is affine.) 1068 // 1069 // See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 1070 // 1071 // Note that this function does not handle P+P, infinity+P nor P+infinity 1072 // correctly. 1073 func p256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *[p256Limbs]uint32) { 1074 var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 1075 1076 p256Square(&z1z1, z1) 1077 p256Sum(&tmp, z1, z1) 1078 1079 p256Mul(&u2, x2, &z1z1) 1080 p256Mul(&z1z1z1, z1, &z1z1) 1081 p256Mul(&s2, y2, &z1z1z1) 1082 p256Diff(&h, &u2, x1) 1083 p256Sum(&i, &h, &h) 1084 p256Square(&i, &i) 1085 p256Mul(&j, &h, &i) 1086 p256Diff(&r, &s2, y1) 1087 p256Sum(&r, &r, &r) 1088 p256Mul(&v, x1, &i) 1089 1090 p256Mul(zOut, &tmp, &h) 1091 p256Square(&rr, &r) 1092 p256Diff(xOut, &rr, &j) 1093 p256Diff(xOut, xOut, &v) 1094 p256Diff(xOut, xOut, &v) 1095 1096 p256Diff(&tmp, &v, xOut) 1097 p256Mul(yOut, &tmp, &r) 1098 p256Mul(&tmp, y1, &j) 1099 p256Diff(yOut, yOut, &tmp) 1100 p256Diff(yOut, yOut, &tmp) 1101 } 1102 1103 // p256PointAdd sets {xOut,yOut,zOut} = {x1,y1,z1} + {x2,y2,z2}. 1104 // 1105 // See https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl 1106 // 1107 // Note that this function does not handle P+P, infinity+P nor P+infinity 1108 // correctly. 1109 func p256PointAdd(xOut, yOut, zOut, x1, y1, z1, x2, y2, z2 *[p256Limbs]uint32) { 1110 var z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp [p256Limbs]uint32 1111 1112 p256Square(&z1z1, z1) 1113 p256Square(&z2z2, z2) 1114 p256Mul(&u1, x1, &z2z2) 1115 1116 p256Sum(&tmp, z1, z2) 1117 p256Square(&tmp, &tmp) 1118 p256Diff(&tmp, &tmp, &z1z1) 1119 p256Diff(&tmp, &tmp, &z2z2) 1120 1121 p256Mul(&z2z2z2, z2, &z2z2) 1122 p256Mul(&s1, y1, &z2z2z2) 1123 1124 p256Mul(&u2, x2, &z1z1) 1125 p256Mul(&z1z1z1, z1, &z1z1) 1126 p256Mul(&s2, y2, &z1z1z1) 1127 p256Diff(&h, &u2, &u1) 1128 p256Sum(&i, &h, &h) 1129 p256Square(&i, &i) 1130 p256Mul(&j, &h, &i) 1131 p256Diff(&r, &s2, &s1) 1132 p256Sum(&r, &r, &r) 1133 p256Mul(&v, &u1, &i) 1134 1135 p256Mul(zOut, &tmp, &h) 1136 p256Square(&rr, &r) 1137 p256Diff(xOut, &rr, &j) 1138 p256Diff(xOut, xOut, &v) 1139 p256Diff(xOut, xOut, &v) 1140 1141 p256Diff(&tmp, &v, xOut) 1142 p256Mul(yOut, &tmp, &r) 1143 p256Mul(&tmp, &s1, &j) 1144 p256Diff(yOut, yOut, &tmp) 1145 p256Diff(yOut, yOut, &tmp) 1146 } 1147 1148 // p256CopyConditional sets out=in if mask = 0xffffffff in constant time. 1149 // 1150 // On entry: mask is either 0 or 0xffffffff. 1151 func p256CopyConditional(out, in *[p256Limbs]uint32, mask uint32) { 1152 for i := 0; i < p256Limbs; i++ { 1153 tmp := mask & (in[i] ^ out[i]) 1154 out[i] ^= tmp 1155 } 1156 } 1157 1158 // p256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table. 1159 // On entry: index < 16, table[0] must be zero. 1160 // Constant time table access, safe select. 1161 func p256SelectAffinePoint(xOut, yOut *[p256Limbs]uint32, table []uint32, index uint32) { 1162 for i := range xOut { 1163 xOut[i] = 0 1164 } 1165 for i := range yOut { 1166 yOut[i] = 0 1167 } 1168 1169 for i := uint32(1); i < 16; i++ { 1170 mask := i ^ index // mask is zero when i equals index, otherwise non-zero. mask = {b3, b2, b1, b0}, ignore unused bits. 1171 mask |= mask >> 2 // mask = {b3, b2, b1 | b3, b0 | b2} 1172 mask |= mask >> 1 // mask = {b3, b2 | b3, b1 | b2 | b3, b0 | b1 | b2 | b3} 1173 mask &= 1 // mask = {0, 0, 0, b0 | b1 | b2 | b3} 1174 mask-- // mask = 0xffffffff when i equals index, otherwise 0x00000000 1175 for j := range xOut { 1176 xOut[j] |= table[0] & mask 1177 table = table[1:] 1178 } 1179 for j := range yOut { 1180 yOut[j] |= table[0] & mask 1181 table = table[1:] 1182 } 1183 } 1184 } 1185 1186 // p256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of 1187 // table. 1188 // On entry: index < 16, table[0] must be zero. 1189 func p256SelectJacobianPoint(xOut, yOut, zOut *[p256Limbs]uint32, table *[16][3][p256Limbs]uint32, index uint32) { 1190 for i := range xOut { 1191 xOut[i] = 0 1192 } 1193 for i := range yOut { 1194 yOut[i] = 0 1195 } 1196 for i := range zOut { 1197 zOut[i] = 0 1198 } 1199 1200 // The implicit value at index 0 is all zero. We don't need to perform that 1201 // iteration of the loop because we already set out_* to zero. 1202 for i := uint32(1); i < 16; i++ { 1203 mask := i ^ index 1204 mask |= mask >> 2 1205 mask |= mask >> 1 1206 mask &= 1 1207 mask-- 1208 for j := range xOut { 1209 xOut[j] |= table[i][0][j] & mask 1210 } 1211 for j := range yOut { 1212 yOut[j] |= table[i][1][j] & mask 1213 } 1214 for j := range zOut { 1215 zOut[j] |= table[i][2][j] & mask 1216 } 1217 } 1218 } 1219 1220 // p256GetBit returns the bit'th bit of scalar. 1221 func p256GetBit(scalar *[32]uint8, bit uint) uint32 { 1222 return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1) 1223 } 1224 1225 // p256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a 1226 // little-endian number. Note that the value of scalar must be less than the 1227 // order of the group. 1228 func p256ScalarBaseMult(xOut, yOut, zOut *[p256Limbs]uint32, scalar *[32]uint8) { 1229 nIsInfinityMask := ^uint32(0) 1230 var pIsNoninfiniteMask, mask, tableOffset uint32 1231 var px, py, tx, ty, tz [p256Limbs]uint32 1232 1233 for i := range xOut { 1234 xOut[i] = 0 1235 } 1236 for i := range yOut { 1237 yOut[i] = 0 1238 } 1239 for i := range zOut { 1240 zOut[i] = 0 1241 } 1242 1243 // The loop adds bits at positions 0, 64, 128 and 192, followed by 1244 // positions 32,96,160 and 224 and does this 32 times. 1245 for i := uint(0); i < 32; i++ { 1246 if i != 0 { 1247 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1248 } 1249 tableOffset = 0 1250 for j := uint(0); j <= 32; j += 32 { 1251 bit0 := p256GetBit(scalar, 31-i+j) 1252 bit1 := p256GetBit(scalar, 95-i+j) 1253 bit2 := p256GetBit(scalar, 159-i+j) 1254 bit3 := p256GetBit(scalar, 223-i+j) 1255 index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3) 1256 1257 p256SelectAffinePoint(&px, &py, p256Precomputed[tableOffset:], index) 1258 tableOffset += 30 * p256Limbs 1259 1260 // Since scalar is less than the order of the group, we know that 1261 // {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle 1262 // below. 1263 p256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py) 1264 // The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero 1265 // (a.k.a. the point at infinity). We handle that situation by 1266 // copying the point from the table. 1267 p256CopyConditional(xOut, &px, nIsInfinityMask) 1268 p256CopyConditional(yOut, &py, nIsInfinityMask) 1269 p256CopyConditional(zOut, &p256One, nIsInfinityMask) 1270 1271 // Equally, the result is also wrong if the point from the table is 1272 // zero, which happens when the index is zero. We handle that by 1273 // only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0. 1274 pIsNoninfiniteMask = nonZeroToAllOnes(index) 1275 mask = pIsNoninfiniteMask & ^nIsInfinityMask 1276 p256CopyConditional(xOut, &tx, mask) 1277 p256CopyConditional(yOut, &ty, mask) 1278 p256CopyConditional(zOut, &tz, mask) 1279 // If p was not zero, then n is now non-zero. 1280 nIsInfinityMask &^= pIsNoninfiniteMask 1281 } 1282 } 1283 } 1284 1285 // p256PointToAffine converts a Jacobian point to an affine point. If the input 1286 // is the point at infinity then it returns (0, 0) in constant time. 1287 func p256PointToAffine(xOut, yOut, x, y, z *[p256Limbs]uint32) { 1288 var zInv, zInvSq [p256Limbs]uint32 1289 1290 p256Invert(&zInv, z) 1291 p256Square(&zInvSq, &zInv) 1292 p256Mul(xOut, x, &zInvSq) 1293 p256Mul(&zInv, &zInv, &zInvSq) 1294 p256Mul(yOut, y, &zInv) 1295 } 1296 1297 // p256ToAffine returns a pair of *big.Int containing the affine representation 1298 // of {x,y,z}. 1299 func p256ToAffine(x, y, z *[p256Limbs]uint32) (xOut, yOut *big.Int) { 1300 var xx, yy [p256Limbs]uint32 1301 p256PointToAffine(&xx, &yy, x, y, z) 1302 return p256ToBig(&xx), p256ToBig(&yy) 1303 } 1304 1305 // p256ScalarMult sets {xOut,yOut,zOut} = scalar*{x,y}. 1306 func p256ScalarMult(xOut, yOut, zOut, x, y *[p256Limbs]uint32, scalar *[32]uint8) { 1307 var px, py, pz, tx, ty, tz [p256Limbs]uint32 1308 var precomp [16][3][p256Limbs]uint32 1309 var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32 1310 1311 // We precompute 0,1,2,... times {x,y}. 1312 precomp[1][0] = *x 1313 precomp[1][1] = *y 1314 precomp[1][2] = p256One 1315 1316 for i := 2; i < 16; i += 2 { 1317 p256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2]) 1318 p256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y) 1319 } 1320 1321 for i := range xOut { 1322 xOut[i] = 0 1323 } 1324 for i := range yOut { 1325 yOut[i] = 0 1326 } 1327 for i := range zOut { 1328 zOut[i] = 0 1329 } 1330 nIsInfinityMask = ^uint32(0) 1331 1332 // We add in a window of four bits each iteration and do this 64 times. 1333 for i := 0; i < 64; i++ { 1334 if i != 0 { 1335 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1336 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1337 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1338 p256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut) 1339 } 1340 1341 index = uint32(scalar[31-i/2]) 1342 if (i & 1) == 1 { 1343 index &= 15 1344 } else { 1345 index >>= 4 1346 } 1347 1348 // See the comments in scalarBaseMult about handling infinities. 1349 p256SelectJacobianPoint(&px, &py, &pz, &precomp, index) 1350 p256PointAdd(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py, &pz) 1351 p256CopyConditional(xOut, &px, nIsInfinityMask) 1352 p256CopyConditional(yOut, &py, nIsInfinityMask) 1353 p256CopyConditional(zOut, &pz, nIsInfinityMask) 1354 1355 pIsNoninfiniteMask = nonZeroToAllOnes(index) 1356 mask = pIsNoninfiniteMask & ^nIsInfinityMask 1357 p256CopyConditional(xOut, &tx, mask) 1358 p256CopyConditional(yOut, &ty, mask) 1359 p256CopyConditional(zOut, &tz, mask) 1360 nIsInfinityMask &^= pIsNoninfiniteMask 1361 } 1362 } 1363 1364 // p256FromBig sets out = R*in. 1365 func p256FromBig(out *[p256Limbs]uint32, in *big.Int) { 1366 p256FromBigAgainstP(out, in, p256.P) 1367 } 1368 1369 func p256FromBigAgainstP(out *[p256Limbs]uint32, in *big.Int, p *big.Int) { 1370 tmp := new(big.Int).Lsh(in, 257) 1371 tmp.Mod(tmp, p) 1372 1373 for i := 0; i < p256Limbs; i++ { 1374 if bits := tmp.Bits(); len(bits) > 0 { 1375 out[i] = uint32(bits[0]) & bottom29Bits 1376 } else { 1377 out[i] = 0 1378 } 1379 tmp.Rsh(tmp, 29) 1380 1381 i++ 1382 if i == p256Limbs { 1383 break 1384 } 1385 1386 if bits := tmp.Bits(); len(bits) > 0 { 1387 out[i] = uint32(bits[0]) & bottom28Bits 1388 } else { 1389 out[i] = 0 1390 } 1391 tmp.Rsh(tmp, 28) 1392 } 1393 } 1394 1395 // p256ToBig returns a *big.Int containing the value of in. 1396 func p256ToBig(in *[p256Limbs]uint32) *big.Int { 1397 result := limbsToBig(in) 1398 result.Mul(result, p256RInverse) 1399 result.Mod(result, p256.P) 1400 return result 1401 }