github.com/ethereum/go-ethereum@v1.16.1/crypto/secp256k1/libsecp256k1/src/modinv32_impl.h (about)

     1  /***********************************************************************
     2   * Copyright (c) 2020 Peter Dettman                                    *
     3   * Distributed under the MIT software license, see the accompanying    *
     4   * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
     5   **********************************************************************/
     6  
     7  #ifndef SECP256K1_MODINV32_IMPL_H
     8  #define SECP256K1_MODINV32_IMPL_H
     9  
    10  #include "modinv32.h"
    11  
    12  #include "util.h"
    13  
    14  #include <stdlib.h>
    15  
    16  /* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
    17   * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
    18   *
    19   * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
    20   * implementation for N=30, using 30-bit signed limbs represented as int32_t.
    21   */
    22  
    23  #ifdef VERIFY
    24  static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
    25  
    26  /* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
    27  static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
    28      const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
    29      int64_t c = 0;
    30      int i;
    31      for (i = 0; i < 8; ++i) {
    32          if (i < alen) c += (int64_t)a->v[i] * factor;
    33          r->v[i] = (int32_t)c & M30; c >>= 30;
    34      }
    35      if (8 < alen) c += (int64_t)a->v[8] * factor;
    36      VERIFY_CHECK(c == (int32_t)c);
    37      r->v[8] = (int32_t)c;
    38  }
    39  
    40  /* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
    41  static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
    42      int i;
    43      secp256k1_modinv32_signed30 am, bm;
    44      secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
    45      secp256k1_modinv32_mul_30(&bm, b, 9, factor);
    46      for (i = 0; i < 8; ++i) {
    47          /* Verify that all but the top limb of a and b are normalized. */
    48          VERIFY_CHECK(am.v[i] >> 30 == 0);
    49          VERIFY_CHECK(bm.v[i] >> 30 == 0);
    50      }
    51      for (i = 8; i >= 0; --i) {
    52          if (am.v[i] < bm.v[i]) return -1;
    53          if (am.v[i] > bm.v[i]) return 1;
    54      }
    55      return 0;
    56  }
    57  #endif
    58  
    59  /* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
    60   * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
    61   * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
    62   * [0,2^30). */
    63  static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo) {
    64      const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
    65      int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
    66              r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
    67      volatile int32_t cond_add, cond_negate;
    68  
    69  #ifdef VERIFY
    70      /* Verify that all limbs are in range (-2^30,2^30). */
    71      int i;
    72      for (i = 0; i < 9; ++i) {
    73          VERIFY_CHECK(r->v[i] >= -M30);
    74          VERIFY_CHECK(r->v[i] <= M30);
    75      }
    76      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
    77      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
    78  #endif
    79  
    80      /* In a first step, add the modulus if the input is negative, and then negate if requested.
    81       * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
    82       * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
    83       * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
    84       * indeed the behavior of the right shift operator). */
    85      cond_add = r8 >> 31;
    86      r0 += modinfo->modulus.v[0] & cond_add;
    87      r1 += modinfo->modulus.v[1] & cond_add;
    88      r2 += modinfo->modulus.v[2] & cond_add;
    89      r3 += modinfo->modulus.v[3] & cond_add;
    90      r4 += modinfo->modulus.v[4] & cond_add;
    91      r5 += modinfo->modulus.v[5] & cond_add;
    92      r6 += modinfo->modulus.v[6] & cond_add;
    93      r7 += modinfo->modulus.v[7] & cond_add;
    94      r8 += modinfo->modulus.v[8] & cond_add;
    95      cond_negate = sign >> 31;
    96      r0 = (r0 ^ cond_negate) - cond_negate;
    97      r1 = (r1 ^ cond_negate) - cond_negate;
    98      r2 = (r2 ^ cond_negate) - cond_negate;
    99      r3 = (r3 ^ cond_negate) - cond_negate;
   100      r4 = (r4 ^ cond_negate) - cond_negate;
   101      r5 = (r5 ^ cond_negate) - cond_negate;
   102      r6 = (r6 ^ cond_negate) - cond_negate;
   103      r7 = (r7 ^ cond_negate) - cond_negate;
   104      r8 = (r8 ^ cond_negate) - cond_negate;
   105      /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
   106      r1 += r0 >> 30; r0 &= M30;
   107      r2 += r1 >> 30; r1 &= M30;
   108      r3 += r2 >> 30; r2 &= M30;
   109      r4 += r3 >> 30; r3 &= M30;
   110      r5 += r4 >> 30; r4 &= M30;
   111      r6 += r5 >> 30; r5 &= M30;
   112      r7 += r6 >> 30; r6 &= M30;
   113      r8 += r7 >> 30; r7 &= M30;
   114  
   115      /* In a second step add the modulus again if the result is still negative, bringing r to range
   116       * [0,modulus). */
   117      cond_add = r8 >> 31;
   118      r0 += modinfo->modulus.v[0] & cond_add;
   119      r1 += modinfo->modulus.v[1] & cond_add;
   120      r2 += modinfo->modulus.v[2] & cond_add;
   121      r3 += modinfo->modulus.v[3] & cond_add;
   122      r4 += modinfo->modulus.v[4] & cond_add;
   123      r5 += modinfo->modulus.v[5] & cond_add;
   124      r6 += modinfo->modulus.v[6] & cond_add;
   125      r7 += modinfo->modulus.v[7] & cond_add;
   126      r8 += modinfo->modulus.v[8] & cond_add;
   127      /* And propagate again. */
   128      r1 += r0 >> 30; r0 &= M30;
   129      r2 += r1 >> 30; r1 &= M30;
   130      r3 += r2 >> 30; r2 &= M30;
   131      r4 += r3 >> 30; r3 &= M30;
   132      r5 += r4 >> 30; r4 &= M30;
   133      r6 += r5 >> 30; r5 &= M30;
   134      r7 += r6 >> 30; r6 &= M30;
   135      r8 += r7 >> 30; r7 &= M30;
   136  
   137      r->v[0] = r0;
   138      r->v[1] = r1;
   139      r->v[2] = r2;
   140      r->v[3] = r3;
   141      r->v[4] = r4;
   142      r->v[5] = r5;
   143      r->v[6] = r6;
   144      r->v[7] = r7;
   145      r->v[8] = r8;
   146  
   147      VERIFY_CHECK(r0 >> 30 == 0);
   148      VERIFY_CHECK(r1 >> 30 == 0);
   149      VERIFY_CHECK(r2 >> 30 == 0);
   150      VERIFY_CHECK(r3 >> 30 == 0);
   151      VERIFY_CHECK(r4 >> 30 == 0);
   152      VERIFY_CHECK(r5 >> 30 == 0);
   153      VERIFY_CHECK(r6 >> 30 == 0);
   154      VERIFY_CHECK(r7 >> 30 == 0);
   155      VERIFY_CHECK(r8 >> 30 == 0);
   156      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
   157      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
   158  }
   159  
   160  /* Data type for transition matrices (see section 3 of explanation).
   161   *
   162   * t = [ u  v ]
   163   *     [ q  r ]
   164   */
   165  typedef struct {
   166      int32_t u, v, q, r;
   167  } secp256k1_modinv32_trans2x2;
   168  
   169  /* Compute the transition matrix and zeta for 30 divsteps.
   170   *
   171   * Input:  zeta: initial zeta
   172   *         f0:   bottom limb of initial f
   173   *         g0:   bottom limb of initial g
   174   * Output: t: transition matrix
   175   * Return: final zeta
   176   *
   177   * Implements the divsteps_n_matrix function from the explanation.
   178   */
   179  static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
   180      /* u,v,q,r are the elements of the transformation matrix being built up,
   181       * starting with the identity matrix. Semantically they are signed integers
   182       * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
   183       * permits left shifting (which is UB for negative numbers). The range
   184       * being inside [-2^31,2^31) means that casting to signed works correctly.
   185       */
   186      uint32_t u = 1, v = 0, q = 0, r = 1;
   187      volatile uint32_t c1, c2;
   188      uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
   189      int i;
   190  
   191      for (i = 0; i < 30; ++i) {
   192          VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
   193          VERIFY_CHECK((u * f0 + v * g0) == f << i);
   194          VERIFY_CHECK((q * f0 + r * g0) == g << i);
   195          /* Compute conditional masks for (zeta < 0) and for (g & 1). */
   196          c1 = zeta >> 31;
   197          mask1 = c1;
   198          c2 = g & 1;
   199          mask2 = -c2;
   200          /* Compute x,y,z, conditionally negated versions of f,u,v. */
   201          x = (f ^ mask1) - mask1;
   202          y = (u ^ mask1) - mask1;
   203          z = (v ^ mask1) - mask1;
   204          /* Conditionally add x,y,z to g,q,r. */
   205          g += x & mask2;
   206          q += y & mask2;
   207          r += z & mask2;
   208          /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
   209          mask1 &= mask2;
   210          /* Conditionally change zeta into -zeta-2 or zeta-1. */
   211          zeta = (zeta ^ mask1) - 1;
   212          /* Conditionally add g,q,r to f,u,v. */
   213          f += g & mask1;
   214          u += q & mask1;
   215          v += r & mask1;
   216          /* Shifts */
   217          g >>= 1;
   218          u <<= 1;
   219          v <<= 1;
   220          /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
   221          VERIFY_CHECK(zeta >= -601 && zeta <= 601);
   222      }
   223      /* Return data in t and return value. */
   224      t->u = (int32_t)u;
   225      t->v = (int32_t)v;
   226      t->q = (int32_t)q;
   227      t->r = (int32_t)r;
   228      /* The determinant of t must be a power of two. This guarantees that multiplication with t
   229       * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
   230       * will be divided out again). As each divstep's individual matrix has determinant 2, the
   231       * aggregate of 30 of them will have determinant 2^30. */
   232      VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
   233      return zeta;
   234  }
   235  
   236  /* secp256k1_modinv32_inv256[i] = -(2*i+1)^-1 (mod 256) */
   237  static const uint8_t secp256k1_modinv32_inv256[128] = {
   238      0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
   239      0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
   240      0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
   241      0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
   242      0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
   243      0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
   244      0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
   245      0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
   246      0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
   247      0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
   248      0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
   249  };
   250  
   251  /* Compute the transition matrix and eta for 30 divsteps (variable time).
   252   *
   253   * Input:  eta: initial eta
   254   *         f0:  bottom limb of initial f
   255   *         g0:  bottom limb of initial g
   256   * Output: t: transition matrix
   257   * Return: final eta
   258   *
   259   * Implements the divsteps_n_matrix_var function from the explanation.
   260   */
   261  static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
   262      /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
   263      uint32_t u = 1, v = 0, q = 0, r = 1;
   264      uint32_t f = f0, g = g0, m;
   265      uint16_t w;
   266      int i = 30, limit, zeros;
   267  
   268      for (;;) {
   269          /* Use a sentinel bit to count zeros only up to i. */
   270          zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
   271          /* Perform zeros divsteps at once; they all just divide g by two. */
   272          g >>= zeros;
   273          u <<= zeros;
   274          v <<= zeros;
   275          eta -= zeros;
   276          i -= zeros;
   277           /* We're done once we've done 30 divsteps. */
   278          if (i == 0) break;
   279          VERIFY_CHECK((f & 1) == 1);
   280          VERIFY_CHECK((g & 1) == 1);
   281          VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
   282          VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
   283          /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
   284          VERIFY_CHECK(eta >= -751 && eta <= 751);
   285          /* If eta is negative, negate it and replace f,g with g,-f. */
   286          if (eta < 0) {
   287              uint32_t tmp;
   288              eta = -eta;
   289              tmp = f; f = g; g = -tmp;
   290              tmp = u; u = q; q = -tmp;
   291              tmp = v; v = r; r = -tmp;
   292          }
   293          /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
   294           * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
   295           * can be done as its sign will flip once that happens. */
   296          limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
   297          /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
   298          VERIFY_CHECK(limit > 0 && limit <= 30);
   299          m = (UINT32_MAX >> (32 - limit)) & 255U;
   300          /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
   301          w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
   302          /* Do so. */
   303          g += f * w;
   304          q += u * w;
   305          r += v * w;
   306          VERIFY_CHECK((g & m) == 0);
   307      }
   308      /* Return data in t and return value. */
   309      t->u = (int32_t)u;
   310      t->v = (int32_t)v;
   311      t->q = (int32_t)q;
   312      t->r = (int32_t)r;
   313      /* The determinant of t must be a power of two. This guarantees that multiplication with t
   314       * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
   315       * will be divided out again). As each divstep's individual matrix has determinant 2, the
   316       * aggregate of 30 of them will have determinant 2^30. */
   317      VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
   318      return eta;
   319  }
   320  
   321  /* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
   322   * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
   323   * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
   324   *
   325   * Input:        eta: initial eta
   326   *               f0:  bottom limb of initial f
   327   *               g0:  bottom limb of initial g
   328   * Output:       t: transition matrix
   329   * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
   330   *               by applying the returned transformation matrix to it. The other bits of *jacp may
   331   *               change, but are meaningless.
   332   * Return: final eta
   333   */
   334  static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
   335      /* Transformation matrix. */
   336      uint32_t u = 1, v = 0, q = 0, r = 1;
   337      uint32_t f = f0, g = g0, m;
   338      uint16_t w;
   339      int i = 30, limit, zeros;
   340      int jac = *jacp;
   341  
   342      for (;;) {
   343          /* Use a sentinel bit to count zeros only up to i. */
   344          zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
   345          /* Perform zeros divsteps at once; they all just divide g by two. */
   346          g >>= zeros;
   347          u <<= zeros;
   348          v <<= zeros;
   349          eta -= zeros;
   350          i -= zeros;
   351          /* Update the bottom bit of jac: when dividing g by an odd power of 2,
   352           * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
   353          jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
   354          /* We're done once we've done 30 posdivsteps. */
   355          if (i == 0) break;
   356          VERIFY_CHECK((f & 1) == 1);
   357          VERIFY_CHECK((g & 1) == 1);
   358          VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
   359          VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
   360          /* If eta is negative, negate it and replace f,g with g,f. */
   361          if (eta < 0) {
   362              uint32_t tmp;
   363              eta = -eta;
   364              /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
   365               * if both f and g are 3 mod 4. */
   366              jac ^= ((f & g) >> 1);
   367              tmp = f; f = g; g = tmp;
   368              tmp = u; u = q; q = tmp;
   369              tmp = v; v = r; r = tmp;
   370          }
   371          /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
   372           * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
   373           * can be done as its sign will flip once that happens. */
   374          limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
   375          /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
   376          VERIFY_CHECK(limit > 0 && limit <= 30);
   377          m = (UINT32_MAX >> (32 - limit)) & 255U;
   378          /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
   379          w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
   380          /* Do so. */
   381          g += f * w;
   382          q += u * w;
   383          r += v * w;
   384          VERIFY_CHECK((g & m) == 0);
   385      }
   386      /* Return data in t and return value. */
   387      t->u = (int32_t)u;
   388      t->v = (int32_t)v;
   389      t->q = (int32_t)q;
   390      t->r = (int32_t)r;
   391      /* The determinant of t must be a power of two. This guarantees that multiplication with t
   392       * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
   393       * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
   394       * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
   395      VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
   396                   (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
   397      *jacp = jac;
   398      return eta;
   399  }
   400  
   401  /* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
   402   *
   403   * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
   404   * (-2^30,2^30).
   405   *
   406   * This implements the update_de function from the explanation.
   407   */
   408  static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo* modinfo) {
   409      const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
   410      const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
   411      int32_t di, ei, md, me, sd, se;
   412      int64_t cd, ce;
   413      int i;
   414      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
   415      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0);  /* d <    modulus */
   416      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
   417      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0);  /* e <    modulus */
   418      VERIFY_CHECK(labs(u) <= (M30 + 1 - labs(v))); /* |u|+|v| <= 2^30 */
   419      VERIFY_CHECK(labs(q) <= (M30 + 1 - labs(r))); /* |q|+|r| <= 2^30 */
   420  
   421      /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
   422      sd = d->v[8] >> 31;
   423      se = e->v[8] >> 31;
   424      md = (u & sd) + (v & se);
   425      me = (q & sd) + (r & se);
   426      /* Begin computing t*[d,e]. */
   427      di = d->v[0];
   428      ei = e->v[0];
   429      cd = (int64_t)u * di + (int64_t)v * ei;
   430      ce = (int64_t)q * di + (int64_t)r * ei;
   431      /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
   432      md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
   433      me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
   434      /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
   435      cd += (int64_t)modinfo->modulus.v[0] * md;
   436      ce += (int64_t)modinfo->modulus.v[0] * me;
   437      /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
   438      VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
   439      VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
   440      /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
   441       * limb i-1 (shifting down by 30 bits). */
   442      for (i = 1; i < 9; ++i) {
   443          di = d->v[i];
   444          ei = e->v[i];
   445          cd += (int64_t)u * di + (int64_t)v * ei;
   446          ce += (int64_t)q * di + (int64_t)r * ei;
   447          cd += (int64_t)modinfo->modulus.v[i] * md;
   448          ce += (int64_t)modinfo->modulus.v[i] * me;
   449          d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
   450          e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
   451      }
   452      /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
   453      d->v[8] = (int32_t)cd;
   454      e->v[8] = (int32_t)ce;
   455  
   456      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
   457      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0);  /* d <    modulus */
   458      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
   459      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0);  /* e <    modulus */
   460  }
   461  
   462  /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
   463   *
   464   * This implements the update_fg function from the explanation.
   465   */
   466  static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
   467      const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
   468      const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
   469      int32_t fi, gi;
   470      int64_t cf, cg;
   471      int i;
   472      /* Start computing t*[f,g]. */
   473      fi = f->v[0];
   474      gi = g->v[0];
   475      cf = (int64_t)u * fi + (int64_t)v * gi;
   476      cg = (int64_t)q * fi + (int64_t)r * gi;
   477      /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
   478      VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
   479      VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
   480      /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
   481       * down by 30 bits). */
   482      for (i = 1; i < 9; ++i) {
   483          fi = f->v[i];
   484          gi = g->v[i];
   485          cf += (int64_t)u * fi + (int64_t)v * gi;
   486          cg += (int64_t)q * fi + (int64_t)r * gi;
   487          f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
   488          g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
   489      }
   490      /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
   491      f->v[8] = (int32_t)cf;
   492      g->v[8] = (int32_t)cg;
   493  }
   494  
   495  /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
   496   *
   497   * Version that operates on a variable number of limbs in f and g.
   498   *
   499   * This implements the update_fg function from the explanation in modinv64_impl.h.
   500   */
   501  static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
   502      const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
   503      const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
   504      int32_t fi, gi;
   505      int64_t cf, cg;
   506      int i;
   507      VERIFY_CHECK(len > 0);
   508      /* Start computing t*[f,g]. */
   509      fi = f->v[0];
   510      gi = g->v[0];
   511      cf = (int64_t)u * fi + (int64_t)v * gi;
   512      cg = (int64_t)q * fi + (int64_t)r * gi;
   513      /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
   514      VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
   515      VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
   516      /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
   517       * down by 30 bits). */
   518      for (i = 1; i < len; ++i) {
   519          fi = f->v[i];
   520          gi = g->v[i];
   521          cf += (int64_t)u * fi + (int64_t)v * gi;
   522          cg += (int64_t)q * fi + (int64_t)r * gi;
   523          f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
   524          g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
   525      }
   526      /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
   527      f->v[len - 1] = (int32_t)cf;
   528      g->v[len - 1] = (int32_t)cg;
   529  }
   530  
   531  /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
   532  static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
   533      /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
   534      secp256k1_modinv32_signed30 d = {{0}};
   535      secp256k1_modinv32_signed30 e = {{1}};
   536      secp256k1_modinv32_signed30 f = modinfo->modulus;
   537      secp256k1_modinv32_signed30 g = *x;
   538      int i;
   539      int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
   540  
   541      /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
   542      for (i = 0; i < 20; ++i) {
   543          /* Compute transition matrix and new zeta after 30 divsteps. */
   544          secp256k1_modinv32_trans2x2 t;
   545          zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
   546          /* Update d,e using that transition matrix. */
   547          secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
   548          /* Update f,g using that transition matrix. */
   549          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
   550          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
   551          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
   552          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0);  /* g <  modulus */
   553  
   554          secp256k1_modinv32_update_fg_30(&f, &g, &t);
   555  
   556          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
   557          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
   558          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
   559          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0);  /* g <  modulus */
   560      }
   561  
   562      /* At this point sufficient iterations have been performed that g must have reached 0
   563       * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
   564       * values i.e. +/- 1, and d now contains +/- the modular inverse. */
   565  
   566      /* g == 0 */
   567      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
   568      /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
   569      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
   570                   secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
   571                   (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
   572                    secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
   573                    secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0));
   574  
   575      /* Optionally negate d, normalize to [0,modulus), and return it. */
   576      secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
   577      *x = d;
   578  }
   579  
   580  /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
   581  static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
   582      /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
   583      secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
   584      secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
   585      secp256k1_modinv32_signed30 f = modinfo->modulus;
   586      secp256k1_modinv32_signed30 g = *x;
   587  #ifdef VERIFY
   588      int i = 0;
   589  #endif
   590      int j, len = 9;
   591      int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
   592      int32_t cond, fn, gn;
   593  
   594      /* Do iterations of 30 divsteps each until g=0. */
   595      while (1) {
   596          /* Compute transition matrix and new eta after 30 divsteps. */
   597          secp256k1_modinv32_trans2x2 t;
   598          eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
   599          /* Update d,e using that transition matrix. */
   600          secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
   601          /* Update f,g using that transition matrix. */
   602  
   603          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
   604          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
   605          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
   606          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g <  modulus */
   607  
   608          secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
   609          /* If the bottom limb of g is 0, there is a chance g=0. */
   610          if (g.v[0] == 0) {
   611              cond = 0;
   612              /* Check if all other limbs are also 0. */
   613              for (j = 1; j < len; ++j) {
   614                  cond |= g.v[j];
   615              }
   616              /* If so, we're done. */
   617              if (cond == 0) break;
   618          }
   619  
   620          /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
   621          fn = f.v[len - 1];
   622          gn = g.v[len - 1];
   623          cond = ((int32_t)len - 2) >> 31;
   624          cond |= fn ^ (fn >> 31);
   625          cond |= gn ^ (gn >> 31);
   626          /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
   627          if (cond == 0) {
   628              f.v[len - 2] |= (uint32_t)fn << 30;
   629              g.v[len - 2] |= (uint32_t)gn << 30;
   630              --len;
   631          }
   632  
   633          VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
   634          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
   635          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
   636          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
   637          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g <  modulus */
   638      }
   639  
   640      /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
   641       * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
   642  
   643      /* g == 0 */
   644      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
   645      /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
   646      VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
   647                   secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
   648                   (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
   649                    secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
   650                    secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0));
   651  
   652      /* Optionally negate d, normalize to [0,modulus), and return it. */
   653      secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
   654      *x = d;
   655  }
   656  
   657  /* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
   658   * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
   659  #ifdef VERIFY
   660  #define JACOBI32_ITERATIONS 25
   661  #else
   662  #define JACOBI32_ITERATIONS 50
   663  #endif
   664  
   665  /* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
   666  static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
   667      /* Start with f=modulus, g=x, eta=-1. */
   668      secp256k1_modinv32_signed30 f = modinfo->modulus;
   669      secp256k1_modinv32_signed30 g = *x;
   670      int j, len = 9;
   671      int32_t eta = -1; /* eta = -delta; delta is initially 1 */
   672      int32_t cond, fn, gn;
   673      int jac = 0;
   674      int count;
   675  
   676      /* The input limbs must all be non-negative. */
   677      VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
   678  
   679      /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
   680       * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
   681       * time out). */
   682      VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
   683  
   684      for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
   685          /* Compute transition matrix and new eta after 30 posdivsteps. */
   686          secp256k1_modinv32_trans2x2 t;
   687          eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
   688          /* Update f,g using that transition matrix. */
   689          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
   690          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
   691          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
   692          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
   693  
   694          secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
   695          /* If the bottom limb of f is 1, there is a chance that f=1. */
   696          if (f.v[0] == 1) {
   697              cond = 0;
   698              /* Check if the other limbs are also 0. */
   699              for (j = 1; j < len; ++j) {
   700                  cond |= f.v[j];
   701              }
   702              /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
   703              if (cond == 0) return 1 - 2*(jac & 1);
   704          }
   705  
   706          /* Determine if len>1 and limb (len-1) of both f and g is 0. */
   707          fn = f.v[len - 1];
   708          gn = g.v[len - 1];
   709          cond = ((int32_t)len - 2) >> 31;
   710          cond |= fn;
   711          cond |= gn;
   712          /* If so, reduce length. */
   713          if (cond == 0) --len;
   714  
   715          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
   716          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
   717          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
   718          VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
   719      }
   720  
   721      /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
   722      return 0;
   723  }
   724  
   725  #endif /* SECP256K1_MODINV32_IMPL_H */