github.com/ethereum/go-ethereum@v1.16.1/crypto/secp256k1/libsecp256k1/src/ecmult_gen_impl.h (about) 1 /*********************************************************************** 2 * Copyright (c) Pieter Wuille, Gregory Maxwell, Peter Dettman * 3 * Distributed under the MIT software license, see the accompanying * 4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.* 5 ***********************************************************************/ 6 7 #ifndef SECP256K1_ECMULT_GEN_IMPL_H 8 #define SECP256K1_ECMULT_GEN_IMPL_H 9 10 #include "util.h" 11 #include "scalar.h" 12 #include "group.h" 13 #include "ecmult_gen.h" 14 #include "hash_impl.h" 15 #include "precomputed_ecmult_gen.h" 16 17 static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx) { 18 secp256k1_ecmult_gen_blind(ctx, NULL); 19 ctx->built = 1; 20 } 21 22 static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_context* ctx) { 23 return ctx->built; 24 } 25 26 static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context *ctx) { 27 ctx->built = 0; 28 secp256k1_scalar_clear(&ctx->scalar_offset); 29 secp256k1_ge_clear(&ctx->ge_offset); 30 secp256k1_fe_clear(&ctx->proj_blind); 31 } 32 33 /* Compute the scalar (2^COMB_BITS - 1) / 2, the difference between the gn argument to 34 * secp256k1_ecmult_gen, and the scalar whose encoding the table lookup bits are drawn 35 * from (before applying blinding). */ 36 static void secp256k1_ecmult_gen_scalar_diff(secp256k1_scalar* diff) { 37 int i; 38 39 /* Compute scalar -1/2. */ 40 secp256k1_scalar neghalf; 41 secp256k1_scalar_half(&neghalf, &secp256k1_scalar_one); 42 secp256k1_scalar_negate(&neghalf, &neghalf); 43 44 /* Compute offset = 2^(COMB_BITS - 1). */ 45 *diff = secp256k1_scalar_one; 46 for (i = 0; i < COMB_BITS - 1; ++i) { 47 secp256k1_scalar_add(diff, diff, diff); 48 } 49 50 /* The result is the sum 2^(COMB_BITS - 1) + (-1/2). */ 51 secp256k1_scalar_add(diff, diff, &neghalf); 52 } 53 54 static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp256k1_gej *r, const secp256k1_scalar *gn) { 55 uint32_t comb_off; 56 secp256k1_ge add; 57 secp256k1_fe neg; 58 secp256k1_ge_storage adds; 59 secp256k1_scalar d; 60 /* Array of uint32_t values large enough to store COMB_BITS bits. Only the bottom 61 * 8 are ever nonzero, but having the zero padding at the end if COMB_BITS>256 62 * avoids the need to deal with out-of-bounds reads from a scalar. */ 63 uint32_t recoded[(COMB_BITS + 31) >> 5] = {0}; 64 int first = 1, i; 65 66 memset(&adds, 0, sizeof(adds)); 67 68 /* We want to compute R = gn*G. 69 * 70 * To blind the scalar used in the computation, we rewrite this to be 71 * R = (gn - b)*G + b*G, with a blinding value b determined by the context. 72 * 73 * The multiplication (gn-b)*G will be performed using a signed-digit multi-comb (see Section 74 * 3.3 of "Fast and compact elliptic-curve cryptography" by Mike Hamburg, 75 * https://eprint.iacr.org/2012/309). 76 * 77 * Let comb(s, P) = sum((2*s[i]-1)*2^i*P for i=0..COMB_BITS-1), where s[i] is the i'th bit of 78 * the binary representation of scalar s. So the s[i] values determine whether -2^i*P (s[i]=0) 79 * or +2^i*P (s[i]=1) are added together. COMB_BITS is at least 256, so all bits of s are 80 * covered. By manipulating: 81 * 82 * comb(s, P) = sum((2*s[i]-1)*2^i*P for i=0..COMB_BITS-1) 83 * <=> comb(s, P) = sum((2*s[i]-1)*2^i for i=0..COMB_BITS-1) * P 84 * <=> comb(s, P) = (2*sum(s[i]*2^i for i=0..COMB_BITS-1) - sum(2^i for i=0..COMB_BITS-1)) * P 85 * <=> comb(s, P) = (2*s - (2^COMB_BITS - 1)) * P 86 * 87 * If we wanted to compute (gn-b)*G as comb(s, G), it would need to hold that 88 * 89 * (gn - b) * G = (2*s - (2^COMB_BITS - 1)) * G 90 * <=> s = (gn - b + (2^COMB_BITS - 1))/2 (mod order) 91 * 92 * We use an alternative here that avoids the modular division by two: instead we compute 93 * (gn-b)*G as comb(d, G/2). For that to hold it must be the case that 94 * 95 * (gn - b) * G = (2*d - (2^COMB_BITS - 1)) * (G/2) 96 * <=> d = gn - b + (2^COMB_BITS - 1)/2 (mod order) 97 * 98 * Adding precomputation, our final equations become: 99 * 100 * ctx->scalar_offset = (2^COMB_BITS - 1)/2 - b (mod order) 101 * ctx->ge_offset = b*G 102 * d = gn + ctx->scalar_offset (mod order) 103 * R = comb(d, G/2) + ctx->ge_offset 104 * 105 * comb(d, G/2) function is then computed by summing + or - 2^(i-1)*G, for i=0..COMB_BITS-1, 106 * depending on the value of the bits d[i] of the binary representation of scalar d. 107 */ 108 109 /* Compute the scalar d = (gn + ctx->scalar_offset). */ 110 secp256k1_scalar_add(&d, &ctx->scalar_offset, gn); 111 /* Convert to recoded array. */ 112 for (i = 0; i < 8 && i < ((COMB_BITS + 31) >> 5); ++i) { 113 recoded[i] = secp256k1_scalar_get_bits_limb32(&d, 32 * i, 32); 114 } 115 secp256k1_scalar_clear(&d); 116 117 /* In secp256k1_ecmult_gen_prec_table we have precomputed sums of the 118 * (2*d[i]-1) * 2^(i-1) * G points, for various combinations of i positions. 119 * We rewrite our equation in terms of these table entries. 120 * 121 * Let mask(b) = sum(2^((b*COMB_TEETH + t)*COMB_SPACING) for t=0..COMB_TEETH-1), 122 * with b ranging from 0 to COMB_BLOCKS-1. So for example with COMB_BLOCKS=11, 123 * COMB_TEETH=6, COMB_SPACING=4, we would have: 124 * mask(0) = 2^0 + 2^4 + 2^8 + 2^12 + 2^16 + 2^20, 125 * mask(1) = 2^24 + 2^28 + 2^32 + 2^36 + 2^40 + 2^44, 126 * mask(2) = 2^48 + 2^52 + 2^56 + 2^60 + 2^64 + 2^68, 127 * ... 128 * mask(10) = 2^240 + 2^244 + 2^248 + 2^252 + 2^256 + 2^260 129 * 130 * We will split up the bits d[i] using these masks. Specifically, each mask is 131 * used COMB_SPACING times, with different shifts: 132 * 133 * d = (d & mask(0)<<0) + (d & mask(1)<<0) + ... + (d & mask(COMB_BLOCKS-1)<<0) + 134 * (d & mask(0)<<1) + (d & mask(1)<<1) + ... + (d & mask(COMB_BLOCKS-1)<<1) + 135 * ... 136 * (d & mask(0)<<(COMB_SPACING-1)) + ... 137 * 138 * Now define table(b, m) = (m - mask(b)/2) * G, and we will precompute these values for 139 * b=0..COMB_BLOCKS-1, and for all values m which (d & mask(b)) can take (so m can take on 140 * 2^COMB_TEETH distinct values). 141 * 142 * If m=(d & mask(b)), then table(b, m) is the sum of 2^i * (2*d[i]-1) * G/2, with i 143 * iterating over the set bits in mask(b). In our example, table(2, 2^48 + 2^56 + 2^68) 144 * would equal (2^48 - 2^52 + 2^56 - 2^60 - 2^64 + 2^68) * G/2. 145 * 146 * With that, we can rewrite comb(d, G/2) as: 147 * 148 * 2^0 * (table(0, d>>0 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>0 & mask(COMP_BLOCKS-1))) 149 * + 2^1 * (table(0, d>>1 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>1 & mask(COMP_BLOCKS-1))) 150 * + 2^2 * (table(0, d>>2 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>2 & mask(COMP_BLOCKS-1))) 151 * + ... 152 * + 2^(COMB_SPACING-1) * (table(0, d>>(COMB_SPACING-1) & mask(0)) + ...) 153 * 154 * Or more generically as 155 * 156 * sum(2^i * sum(table(b, d>>i & mask(b)), b=0..COMB_BLOCKS-1), i=0..COMB_SPACING-1) 157 * 158 * This is implemented using an outer loop that runs in reverse order over the lines of this 159 * equation, which in each iteration runs an inner loop that adds the terms of that line and 160 * then doubles the result before proceeding to the next line. 161 * 162 * In pseudocode: 163 * c = infinity 164 * for comb_off in range(COMB_SPACING - 1, -1, -1): 165 * for block in range(COMB_BLOCKS): 166 * c += table(block, (d >> comb_off) & mask(block)) 167 * if comb_off > 0: 168 * c = 2*c 169 * return c 170 * 171 * This computes c = comb(d, G/2), and thus finally R = c + ctx->ge_offset. Note that it would 172 * be possible to apply an initial offset instead of a final offset (moving ge_offset to take 173 * the place of infinity above), but the chosen approach allows using (in a future improvement) 174 * an incomplete addition formula for most of the multiplication. 175 * 176 * The last question is how to implement the table(b, m) function. For any value of b, 177 * m=(d & mask(b)) can only take on at most 2^COMB_TEETH possible values (the last one may have 178 * fewer as there mask(b) may exceed the curve order). So we could create COMB_BLOCK tables 179 * which contain a value for each such m value. 180 * 181 * Now note that if m=(d & mask(b)), then flipping the relevant bits of m results in negating 182 * the result of table(b, m). This is because table(b,m XOR mask(b)) = table(b, mask(b) - m) = 183 * (mask(b) - m - mask(b)/2)*G = (-m + mask(b)/2)*G = -(m - mask(b)/2)*G = -table(b, m). 184 * Because of this it suffices to only store the first half of the m values for every b. If an 185 * entry from the second half is needed, we look up its bit-flipped version instead, and negate 186 * it. 187 * 188 * secp256k1_ecmult_gen_prec_table[b][index] stores the table(b, m) entries. Index 189 * is the relevant mask(b) bits of m packed together without gaps. */ 190 191 /* Outer loop: iterate over comb_off from COMB_SPACING - 1 down to 0. */ 192 comb_off = COMB_SPACING - 1; 193 while (1) { 194 uint32_t block; 195 uint32_t bit_pos = comb_off; 196 /* Inner loop: for each block, add table entries to the result. */ 197 for (block = 0; block < COMB_BLOCKS; ++block) { 198 /* Gather the mask(block)-selected bits of d into bits. They're packed: 199 * bits[tooth] = d[(block*COMB_TEETH + tooth)*COMB_SPACING + comb_off]. */ 200 uint32_t bits = 0, sign, abs, index, tooth; 201 /* Instead of reading individual bits here to construct the bits variable, 202 * build up the result by xoring rotated reads together. In every iteration, 203 * one additional bit is made correct, starting at the bottom. The bits 204 * above that contain junk. This reduces leakage by avoiding computations 205 * on variables that can have only a low number of possible values (e.g., 206 * just two values when reading a single bit into a variable.) See: 207 * https://www.usenix.org/system/files/conference/usenixsecurity18/sec18-alam.pdf 208 */ 209 for (tooth = 0; tooth < COMB_TEETH; ++tooth) { 210 /* Construct bitdata s.t. the bottom bit is the bit we'd like to read. 211 * 212 * We could just set bitdata = recoded[bit_pos >> 5] >> (bit_pos & 0x1f) 213 * but this would simply discard the bits that fall off at the bottom, 214 * and thus, for example, bitdata could still have only two values if we 215 * happen to shift by exactly 31 positions. We use a rotation instead, 216 * which ensures that bitdata doesn't loose entropy. This relies on the 217 * rotation being atomic, i.e., the compiler emitting an actual rot 218 * instruction. */ 219 uint32_t bitdata = secp256k1_rotr32(recoded[bit_pos >> 5], bit_pos & 0x1f); 220 221 /* Clear the bit at position tooth, but sssh, don't tell clang. */ 222 uint32_t volatile vmask = ~(1 << tooth); 223 bits &= vmask; 224 225 /* Write the bit into position tooth (and junk into higher bits). */ 226 bits ^= bitdata << tooth; 227 bit_pos += COMB_SPACING; 228 } 229 230 /* If the top bit of bits is 1, flip them all (corresponding to looking up 231 * the negated table value), and remember to negate the result in sign. */ 232 sign = (bits >> (COMB_TEETH - 1)) & 1; 233 abs = (bits ^ -sign) & (COMB_POINTS - 1); 234 VERIFY_CHECK(sign == 0 || sign == 1); 235 VERIFY_CHECK(abs < COMB_POINTS); 236 237 /** This uses a conditional move to avoid any secret data in array indexes. 238 * _Any_ use of secret indexes has been demonstrated to result in timing 239 * sidechannels, even when the cache-line access patterns are uniform. 240 * See also: 241 * "A word of warning", CHES 2013 Rump Session, by Daniel J. Bernstein and Peter Schwabe 242 * (https://cryptojedi.org/peter/data/chesrump-20130822.pdf) and 243 * "Cache Attacks and Countermeasures: the Case of AES", RSA 2006, 244 * by Dag Arne Osvik, Adi Shamir, and Eran Tromer 245 * (https://www.tau.ac.il/~tromer/papers/cache.pdf) 246 */ 247 for (index = 0; index < COMB_POINTS; ++index) { 248 secp256k1_ge_storage_cmov(&adds, &secp256k1_ecmult_gen_prec_table[block][index], index == abs); 249 } 250 251 /* Set add=adds or add=-adds, in constant time, based on sign. */ 252 secp256k1_ge_from_storage(&add, &adds); 253 secp256k1_fe_negate(&neg, &add.y, 1); 254 secp256k1_fe_cmov(&add.y, &neg, sign); 255 256 /* Add the looked up and conditionally negated value to r. */ 257 if (EXPECT(first, 0)) { 258 /* If this is the first table lookup, we can skip addition. */ 259 secp256k1_gej_set_ge(r, &add); 260 /* Give the entry a random Z coordinate to blind intermediary results. */ 261 secp256k1_gej_rescale(r, &ctx->proj_blind); 262 first = 0; 263 } else { 264 secp256k1_gej_add_ge(r, r, &add); 265 } 266 } 267 268 /* Double the result, except in the last iteration. */ 269 if (comb_off-- == 0) break; 270 secp256k1_gej_double(r, r); 271 } 272 273 /* Correct for the scalar_offset added at the start (ge_offset = b*G, while b was 274 * subtracted from the input scalar gn). */ 275 secp256k1_gej_add_ge(r, r, &ctx->ge_offset); 276 277 /* Cleanup. */ 278 secp256k1_fe_clear(&neg); 279 secp256k1_ge_clear(&add); 280 secp256k1_memclear(&adds, sizeof(adds)); 281 secp256k1_memclear(&recoded, sizeof(recoded)); 282 } 283 284 /* Setup blinding values for secp256k1_ecmult_gen. */ 285 static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const unsigned char *seed32) { 286 secp256k1_scalar b; 287 secp256k1_scalar diff; 288 secp256k1_gej gb; 289 secp256k1_fe f; 290 unsigned char nonce32[32]; 291 secp256k1_rfc6979_hmac_sha256 rng; 292 unsigned char keydata[64]; 293 294 /* Compute the (2^COMB_BITS - 1)/2 term once. */ 295 secp256k1_ecmult_gen_scalar_diff(&diff); 296 297 if (seed32 == NULL) { 298 /* When seed is NULL, reset the final point and blinding value. */ 299 secp256k1_ge_neg(&ctx->ge_offset, &secp256k1_ge_const_g); 300 secp256k1_scalar_add(&ctx->scalar_offset, &secp256k1_scalar_one, &diff); 301 ctx->proj_blind = secp256k1_fe_one; 302 return; 303 } 304 /* The prior blinding value (if not reset) is chained forward by including it in the hash. */ 305 secp256k1_scalar_get_b32(keydata, &ctx->scalar_offset); 306 /** Using a CSPRNG allows a failure free interface, avoids needing large amounts of random data, 307 * and guards against weak or adversarial seeds. This is a simpler and safer interface than 308 * asking the caller for blinding values directly and expecting them to retry on failure. 309 */ 310 VERIFY_CHECK(seed32 != NULL); 311 memcpy(keydata + 32, seed32, 32); 312 secp256k1_rfc6979_hmac_sha256_initialize(&rng, keydata, 64); 313 secp256k1_memclear(keydata, sizeof(keydata)); 314 315 /* Compute projective blinding factor (cannot be 0). */ 316 secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32); 317 secp256k1_fe_set_b32_mod(&f, nonce32); 318 secp256k1_fe_cmov(&f, &secp256k1_fe_one, secp256k1_fe_normalizes_to_zero(&f)); 319 ctx->proj_blind = f; 320 321 /* For a random blinding value b, set scalar_offset=diff-b, ge_offset=bG */ 322 secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32); 323 secp256k1_scalar_set_b32(&b, nonce32, NULL); 324 /* The blinding value cannot be zero, as that would mean ge_offset = infinity, 325 * which secp256k1_gej_add_ge cannot handle. */ 326 secp256k1_scalar_cmov(&b, &secp256k1_scalar_one, secp256k1_scalar_is_zero(&b)); 327 secp256k1_rfc6979_hmac_sha256_finalize(&rng); 328 secp256k1_ecmult_gen(ctx, &gb, &b); 329 secp256k1_scalar_negate(&b, &b); 330 secp256k1_scalar_add(&ctx->scalar_offset, &b, &diff); 331 secp256k1_ge_set_gej(&ctx->ge_offset, &gb); 332 333 /* Clean up. */ 334 secp256k1_memclear(nonce32, sizeof(nonce32)); 335 secp256k1_scalar_clear(&b); 336 secp256k1_gej_clear(&gb); 337 secp256k1_fe_clear(&f); 338 secp256k1_rfc6979_hmac_sha256_clear(&rng); 339 } 340 341 #endif /* SECP256K1_ECMULT_GEN_IMPL_H */