github.com/ethereum/go-ethereum@v1.16.1/crypto/secp256k1/libsecp256k1/src/ecmult_gen_impl.h (about)

     1  /***********************************************************************
     2   * Copyright (c) Pieter Wuille, Gregory Maxwell, Peter Dettman         *
     3   * Distributed under the MIT software license, see the accompanying    *
     4   * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
     5   ***********************************************************************/
     6  
     7  #ifndef SECP256K1_ECMULT_GEN_IMPL_H
     8  #define SECP256K1_ECMULT_GEN_IMPL_H
     9  
    10  #include "util.h"
    11  #include "scalar.h"
    12  #include "group.h"
    13  #include "ecmult_gen.h"
    14  #include "hash_impl.h"
    15  #include "precomputed_ecmult_gen.h"
    16  
    17  static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx) {
    18      secp256k1_ecmult_gen_blind(ctx, NULL);
    19      ctx->built = 1;
    20  }
    21  
    22  static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_context* ctx) {
    23      return ctx->built;
    24  }
    25  
    26  static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context *ctx) {
    27      ctx->built = 0;
    28      secp256k1_scalar_clear(&ctx->scalar_offset);
    29      secp256k1_ge_clear(&ctx->ge_offset);
    30      secp256k1_fe_clear(&ctx->proj_blind);
    31  }
    32  
    33  /* Compute the scalar (2^COMB_BITS - 1) / 2, the difference between the gn argument to
    34   * secp256k1_ecmult_gen, and the scalar whose encoding the table lookup bits are drawn
    35   * from (before applying blinding). */
    36  static void secp256k1_ecmult_gen_scalar_diff(secp256k1_scalar* diff) {
    37      int i;
    38  
    39      /* Compute scalar -1/2. */
    40      secp256k1_scalar neghalf;
    41      secp256k1_scalar_half(&neghalf, &secp256k1_scalar_one);
    42      secp256k1_scalar_negate(&neghalf, &neghalf);
    43  
    44      /* Compute offset = 2^(COMB_BITS - 1). */
    45      *diff = secp256k1_scalar_one;
    46      for (i = 0; i < COMB_BITS - 1; ++i) {
    47          secp256k1_scalar_add(diff, diff, diff);
    48      }
    49  
    50      /* The result is the sum 2^(COMB_BITS - 1) + (-1/2). */
    51      secp256k1_scalar_add(diff, diff, &neghalf);
    52  }
    53  
    54  static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp256k1_gej *r, const secp256k1_scalar *gn) {
    55      uint32_t comb_off;
    56      secp256k1_ge add;
    57      secp256k1_fe neg;
    58      secp256k1_ge_storage adds;
    59      secp256k1_scalar d;
    60      /* Array of uint32_t values large enough to store COMB_BITS bits. Only the bottom
    61       * 8 are ever nonzero, but having the zero padding at the end if COMB_BITS>256
    62       * avoids the need to deal with out-of-bounds reads from a scalar. */
    63      uint32_t recoded[(COMB_BITS + 31) >> 5] = {0};
    64      int first = 1, i;
    65  
    66      memset(&adds, 0, sizeof(adds));
    67  
    68      /* We want to compute R = gn*G.
    69       *
    70       * To blind the scalar used in the computation, we rewrite this to be
    71       * R = (gn - b)*G + b*G, with a blinding value b determined by the context.
    72       *
    73       * The multiplication (gn-b)*G will be performed using a signed-digit multi-comb (see Section
    74       * 3.3 of "Fast and compact elliptic-curve cryptography" by Mike Hamburg,
    75       * https://eprint.iacr.org/2012/309).
    76       *
    77       * Let comb(s, P) = sum((2*s[i]-1)*2^i*P for i=0..COMB_BITS-1), where s[i] is the i'th bit of
    78       * the binary representation of scalar s. So the s[i] values determine whether -2^i*P (s[i]=0)
    79       * or +2^i*P (s[i]=1) are added together. COMB_BITS is at least 256, so all bits of s are
    80       * covered. By manipulating:
    81       *
    82       *     comb(s, P) = sum((2*s[i]-1)*2^i*P for i=0..COMB_BITS-1)
    83       * <=> comb(s, P) = sum((2*s[i]-1)*2^i for i=0..COMB_BITS-1) * P
    84       * <=> comb(s, P) = (2*sum(s[i]*2^i for i=0..COMB_BITS-1) - sum(2^i for i=0..COMB_BITS-1)) * P
    85       * <=> comb(s, P) = (2*s - (2^COMB_BITS - 1)) * P
    86       *
    87       * If we wanted to compute (gn-b)*G as comb(s, G), it would need to hold that
    88       *
    89       *     (gn - b) * G = (2*s - (2^COMB_BITS - 1)) * G
    90       * <=> s = (gn - b + (2^COMB_BITS - 1))/2 (mod order)
    91       *
    92       * We use an alternative here that avoids the modular division by two: instead we compute
    93       * (gn-b)*G as comb(d, G/2). For that to hold it must be the case that
    94       *
    95       *     (gn - b) * G = (2*d - (2^COMB_BITS - 1)) * (G/2)
    96       * <=> d = gn - b + (2^COMB_BITS - 1)/2 (mod order)
    97       *
    98       * Adding precomputation, our final equations become:
    99       *
   100       *     ctx->scalar_offset = (2^COMB_BITS - 1)/2 - b (mod order)
   101       *     ctx->ge_offset = b*G
   102       *     d = gn + ctx->scalar_offset (mod order)
   103       *     R = comb(d, G/2) + ctx->ge_offset
   104       *
   105       * comb(d, G/2) function is then computed by summing + or - 2^(i-1)*G, for i=0..COMB_BITS-1,
   106       * depending on the value of the bits d[i] of the binary representation of scalar d.
   107       */
   108  
   109      /* Compute the scalar d = (gn + ctx->scalar_offset). */
   110      secp256k1_scalar_add(&d, &ctx->scalar_offset, gn);
   111      /* Convert to recoded array. */
   112      for (i = 0; i < 8 && i < ((COMB_BITS + 31) >> 5); ++i) {
   113          recoded[i] = secp256k1_scalar_get_bits_limb32(&d, 32 * i, 32);
   114      }
   115      secp256k1_scalar_clear(&d);
   116  
   117      /* In secp256k1_ecmult_gen_prec_table we have precomputed sums of the
   118       * (2*d[i]-1) * 2^(i-1) * G points, for various combinations of i positions.
   119       * We rewrite our equation in terms of these table entries.
   120       *
   121       * Let mask(b) = sum(2^((b*COMB_TEETH + t)*COMB_SPACING) for t=0..COMB_TEETH-1),
   122       * with b ranging from 0 to COMB_BLOCKS-1. So for example with COMB_BLOCKS=11,
   123       * COMB_TEETH=6, COMB_SPACING=4, we would have:
   124       *   mask(0)  = 2^0   + 2^4   + 2^8   + 2^12  + 2^16  + 2^20,
   125       *   mask(1)  = 2^24  + 2^28  + 2^32  + 2^36  + 2^40  + 2^44,
   126       *   mask(2)  = 2^48  + 2^52  + 2^56  + 2^60  + 2^64  + 2^68,
   127       *   ...
   128       *   mask(10) = 2^240 + 2^244 + 2^248 + 2^252 + 2^256 + 2^260
   129       *
   130       * We will split up the bits d[i] using these masks. Specifically, each mask is
   131       * used COMB_SPACING times, with different shifts:
   132       *
   133       * d = (d & mask(0)<<0) + (d & mask(1)<<0) + ... + (d & mask(COMB_BLOCKS-1)<<0) +
   134       *     (d & mask(0)<<1) + (d & mask(1)<<1) + ... + (d & mask(COMB_BLOCKS-1)<<1) +
   135       *     ...
   136       *     (d & mask(0)<<(COMB_SPACING-1)) + ...
   137       *
   138       * Now define table(b, m) = (m - mask(b)/2) * G, and we will precompute these values for
   139       * b=0..COMB_BLOCKS-1, and for all values m which (d & mask(b)) can take (so m can take on
   140       * 2^COMB_TEETH distinct values).
   141       *
   142       * If m=(d & mask(b)), then table(b, m) is the sum of 2^i * (2*d[i]-1) * G/2, with i
   143       * iterating over the set bits in mask(b). In our example, table(2, 2^48 + 2^56 + 2^68)
   144       * would equal (2^48 - 2^52 + 2^56 - 2^60 - 2^64 + 2^68) * G/2.
   145       *
   146       * With that, we can rewrite comb(d, G/2) as:
   147       *
   148       *     2^0 * (table(0, d>>0 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>0 & mask(COMP_BLOCKS-1)))
   149       *   + 2^1 * (table(0, d>>1 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>1 & mask(COMP_BLOCKS-1)))
   150       *   + 2^2 * (table(0, d>>2 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>2 & mask(COMP_BLOCKS-1)))
   151       *   + ...
   152       *   + 2^(COMB_SPACING-1) * (table(0, d>>(COMB_SPACING-1) & mask(0)) + ...)
   153       *
   154       * Or more generically as
   155       *
   156       *   sum(2^i * sum(table(b, d>>i & mask(b)), b=0..COMB_BLOCKS-1), i=0..COMB_SPACING-1)
   157       *
   158       * This is implemented using an outer loop that runs in reverse order over the lines of this
   159       * equation, which in each iteration runs an inner loop that adds the terms of that line and
   160       * then doubles the result before proceeding to the next line.
   161       *
   162       * In pseudocode:
   163       *   c = infinity
   164       *   for comb_off in range(COMB_SPACING - 1, -1, -1):
   165       *     for block in range(COMB_BLOCKS):
   166       *       c += table(block, (d >> comb_off) & mask(block))
   167       *     if comb_off > 0:
   168       *       c = 2*c
   169       *   return c
   170       *
   171       * This computes c = comb(d, G/2), and thus finally R = c + ctx->ge_offset. Note that it would
   172       * be possible to apply an initial offset instead of a final offset (moving ge_offset to take
   173       * the place of infinity above), but the chosen approach allows using (in a future improvement)
   174       * an incomplete addition formula for most of the multiplication.
   175       *
   176       * The last question is how to implement the table(b, m) function. For any value of b,
   177       * m=(d & mask(b)) can only take on at most 2^COMB_TEETH possible values (the last one may have
   178       * fewer as there mask(b) may exceed the curve order). So we could create COMB_BLOCK tables
   179       * which contain a value for each such m value.
   180       *
   181       * Now note that if m=(d & mask(b)), then flipping the relevant bits of m results in negating
   182       * the result of table(b, m). This is because table(b,m XOR mask(b)) = table(b, mask(b) - m) =
   183       * (mask(b) - m - mask(b)/2)*G = (-m + mask(b)/2)*G = -(m - mask(b)/2)*G = -table(b, m).
   184       * Because of this it suffices to only store the first half of the m values for every b. If an
   185       * entry from the second half is needed, we look up its bit-flipped version instead, and negate
   186       * it.
   187       *
   188       * secp256k1_ecmult_gen_prec_table[b][index] stores the table(b, m) entries. Index
   189       * is the relevant mask(b) bits of m packed together without gaps. */
   190  
   191      /* Outer loop: iterate over comb_off from COMB_SPACING - 1 down to 0. */
   192      comb_off = COMB_SPACING - 1;
   193      while (1) {
   194          uint32_t block;
   195          uint32_t bit_pos = comb_off;
   196          /* Inner loop: for each block, add table entries to the result. */
   197          for (block = 0; block < COMB_BLOCKS; ++block) {
   198              /* Gather the mask(block)-selected bits of d into bits. They're packed:
   199               * bits[tooth] = d[(block*COMB_TEETH + tooth)*COMB_SPACING + comb_off]. */
   200              uint32_t bits = 0, sign, abs, index, tooth;
   201              /* Instead of reading individual bits here to construct the bits variable,
   202               * build up the result by xoring rotated reads together. In every iteration,
   203               * one additional bit is made correct, starting at the bottom. The bits
   204               * above that contain junk. This reduces leakage by avoiding computations
   205               * on variables that can have only a low number of possible values (e.g.,
   206               * just two values when reading a single bit into a variable.) See:
   207               * https://www.usenix.org/system/files/conference/usenixsecurity18/sec18-alam.pdf
   208               */
   209              for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
   210                  /* Construct bitdata s.t. the bottom bit is the bit we'd like to read.
   211                   *
   212                   * We could just set bitdata = recoded[bit_pos >> 5] >> (bit_pos & 0x1f)
   213                   * but this would simply discard the bits that fall off at the bottom,
   214                   * and thus, for example, bitdata could still have only two values if we
   215                   * happen to shift by exactly 31 positions. We use a rotation instead,
   216                   * which ensures that bitdata doesn't loose entropy. This relies on the
   217                   * rotation being atomic, i.e., the compiler emitting an actual rot
   218                   * instruction. */
   219                  uint32_t bitdata = secp256k1_rotr32(recoded[bit_pos >> 5], bit_pos & 0x1f);
   220  
   221                  /* Clear the bit at position tooth, but sssh, don't tell clang. */
   222                  uint32_t volatile vmask = ~(1 << tooth);
   223                  bits &= vmask;
   224  
   225                  /* Write the bit into position tooth (and junk into higher bits). */
   226                  bits ^= bitdata << tooth;
   227                  bit_pos += COMB_SPACING;
   228              }
   229  
   230              /* If the top bit of bits is 1, flip them all (corresponding to looking up
   231               * the negated table value), and remember to negate the result in sign. */
   232              sign = (bits >> (COMB_TEETH - 1)) & 1;
   233              abs = (bits ^ -sign) & (COMB_POINTS - 1);
   234              VERIFY_CHECK(sign == 0 || sign == 1);
   235              VERIFY_CHECK(abs < COMB_POINTS);
   236  
   237              /** This uses a conditional move to avoid any secret data in array indexes.
   238               *   _Any_ use of secret indexes has been demonstrated to result in timing
   239               *   sidechannels, even when the cache-line access patterns are uniform.
   240               *  See also:
   241               *   "A word of warning", CHES 2013 Rump Session, by Daniel J. Bernstein and Peter Schwabe
   242               *    (https://cryptojedi.org/peter/data/chesrump-20130822.pdf) and
   243               *   "Cache Attacks and Countermeasures: the Case of AES", RSA 2006,
   244               *    by Dag Arne Osvik, Adi Shamir, and Eran Tromer
   245               *    (https://www.tau.ac.il/~tromer/papers/cache.pdf)
   246               */
   247              for (index = 0; index < COMB_POINTS; ++index) {
   248                  secp256k1_ge_storage_cmov(&adds, &secp256k1_ecmult_gen_prec_table[block][index], index == abs);
   249              }
   250  
   251              /* Set add=adds or add=-adds, in constant time, based on sign. */
   252              secp256k1_ge_from_storage(&add, &adds);
   253              secp256k1_fe_negate(&neg, &add.y, 1);
   254              secp256k1_fe_cmov(&add.y, &neg, sign);
   255  
   256              /* Add the looked up and conditionally negated value to r. */
   257              if (EXPECT(first, 0)) {
   258                  /* If this is the first table lookup, we can skip addition. */
   259                  secp256k1_gej_set_ge(r, &add);
   260                  /* Give the entry a random Z coordinate to blind intermediary results. */
   261                  secp256k1_gej_rescale(r, &ctx->proj_blind);
   262                  first = 0;
   263              } else {
   264                  secp256k1_gej_add_ge(r, r, &add);
   265              }
   266          }
   267  
   268          /* Double the result, except in the last iteration. */
   269          if (comb_off-- == 0) break;
   270          secp256k1_gej_double(r, r);
   271      }
   272  
   273      /* Correct for the scalar_offset added at the start (ge_offset = b*G, while b was
   274       * subtracted from the input scalar gn). */
   275      secp256k1_gej_add_ge(r, r, &ctx->ge_offset);
   276  
   277      /* Cleanup. */
   278      secp256k1_fe_clear(&neg);
   279      secp256k1_ge_clear(&add);
   280      secp256k1_memclear(&adds, sizeof(adds));
   281      secp256k1_memclear(&recoded, sizeof(recoded));
   282  }
   283  
   284  /* Setup blinding values for secp256k1_ecmult_gen. */
   285  static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const unsigned char *seed32) {
   286      secp256k1_scalar b;
   287      secp256k1_scalar diff;
   288      secp256k1_gej gb;
   289      secp256k1_fe f;
   290      unsigned char nonce32[32];
   291      secp256k1_rfc6979_hmac_sha256 rng;
   292      unsigned char keydata[64];
   293  
   294      /* Compute the (2^COMB_BITS - 1)/2 term once. */
   295      secp256k1_ecmult_gen_scalar_diff(&diff);
   296  
   297      if (seed32 == NULL) {
   298          /* When seed is NULL, reset the final point and blinding value. */
   299          secp256k1_ge_neg(&ctx->ge_offset, &secp256k1_ge_const_g);
   300          secp256k1_scalar_add(&ctx->scalar_offset, &secp256k1_scalar_one, &diff);
   301          ctx->proj_blind = secp256k1_fe_one;
   302          return;
   303      }
   304      /* The prior blinding value (if not reset) is chained forward by including it in the hash. */
   305      secp256k1_scalar_get_b32(keydata, &ctx->scalar_offset);
   306      /** Using a CSPRNG allows a failure free interface, avoids needing large amounts of random data,
   307       *   and guards against weak or adversarial seeds.  This is a simpler and safer interface than
   308       *   asking the caller for blinding values directly and expecting them to retry on failure.
   309       */
   310      VERIFY_CHECK(seed32 != NULL);
   311      memcpy(keydata + 32, seed32, 32);
   312      secp256k1_rfc6979_hmac_sha256_initialize(&rng, keydata, 64);
   313      secp256k1_memclear(keydata, sizeof(keydata));
   314  
   315      /* Compute projective blinding factor (cannot be 0). */
   316      secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
   317      secp256k1_fe_set_b32_mod(&f, nonce32);
   318      secp256k1_fe_cmov(&f, &secp256k1_fe_one, secp256k1_fe_normalizes_to_zero(&f));
   319      ctx->proj_blind = f;
   320  
   321      /* For a random blinding value b, set scalar_offset=diff-b, ge_offset=bG */
   322      secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
   323      secp256k1_scalar_set_b32(&b, nonce32, NULL);
   324      /* The blinding value cannot be zero, as that would mean ge_offset = infinity,
   325       * which secp256k1_gej_add_ge cannot handle. */
   326      secp256k1_scalar_cmov(&b, &secp256k1_scalar_one, secp256k1_scalar_is_zero(&b));
   327      secp256k1_rfc6979_hmac_sha256_finalize(&rng);
   328      secp256k1_ecmult_gen(ctx, &gb, &b);
   329      secp256k1_scalar_negate(&b, &b);
   330      secp256k1_scalar_add(&ctx->scalar_offset, &b, &diff);
   331      secp256k1_ge_set_gej(&ctx->ge_offset, &gb);
   332  
   333      /* Clean up. */
   334      secp256k1_memclear(nonce32, sizeof(nonce32));
   335      secp256k1_scalar_clear(&b);
   336      secp256k1_gej_clear(&gb);
   337      secp256k1_fe_clear(&f);
   338      secp256k1_rfc6979_hmac_sha256_clear(&rng);
   339  }
   340  
   341  #endif /* SECP256K1_ECMULT_GEN_IMPL_H */