github.com/ethereum/go-ethereum@v1.16.1/crypto/secp256k1/libsecp256k1/src/scalar_4x64_impl.h (about)

     1  /***********************************************************************
     2   * Copyright (c) 2013, 2014 Pieter Wuille                              *
     3   * Distributed under the MIT software license, see the accompanying    *
     4   * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
     5   ***********************************************************************/
     6  
     7  #ifndef SECP256K1_SCALAR_REPR_IMPL_H
     8  #define SECP256K1_SCALAR_REPR_IMPL_H
     9  
    10  #include "checkmem.h"
    11  #include "int128.h"
    12  #include "modinv64_impl.h"
    13  #include "util.h"
    14  
    15  /* Limbs of the secp256k1 order. */
    16  #define SECP256K1_N_0 ((uint64_t)0xBFD25E8CD0364141ULL)
    17  #define SECP256K1_N_1 ((uint64_t)0xBAAEDCE6AF48A03BULL)
    18  #define SECP256K1_N_2 ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
    19  #define SECP256K1_N_3 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
    20  
    21  /* Limbs of 2^256 minus the secp256k1 order. */
    22  #define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
    23  #define SECP256K1_N_C_1 (~SECP256K1_N_1)
    24  #define SECP256K1_N_C_2 (1)
    25  
    26  /* Limbs of half the secp256k1 order. */
    27  #define SECP256K1_N_H_0 ((uint64_t)0xDFE92F46681B20A0ULL)
    28  #define SECP256K1_N_H_1 ((uint64_t)0x5D576E7357A4501DULL)
    29  #define SECP256K1_N_H_2 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
    30  #define SECP256K1_N_H_3 ((uint64_t)0x7FFFFFFFFFFFFFFFULL)
    31  
    32  SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar *r, unsigned int v) {
    33      r->d[0] = v;
    34      r->d[1] = 0;
    35      r->d[2] = 0;
    36      r->d[3] = 0;
    37  
    38      SECP256K1_SCALAR_VERIFY(r);
    39  }
    40  
    41  SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_limb32(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
    42      SECP256K1_SCALAR_VERIFY(a);
    43      VERIFY_CHECK(count > 0 && count <= 32);
    44      VERIFY_CHECK((offset + count - 1) >> 6 == offset >> 6);
    45  
    46      return (a->d[offset >> 6] >> (offset & 0x3F)) & (0xFFFFFFFF >> (32 - count));
    47  }
    48  
    49  SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_var(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
    50      SECP256K1_SCALAR_VERIFY(a);
    51      VERIFY_CHECK(count > 0 && count <= 32);
    52      VERIFY_CHECK(offset + count <= 256);
    53  
    54      if ((offset + count - 1) >> 6 == offset >> 6) {
    55          return secp256k1_scalar_get_bits_limb32(a, offset, count);
    56      } else {
    57          VERIFY_CHECK((offset >> 6) + 1 < 4);
    58          return ((a->d[offset >> 6] >> (offset & 0x3F)) | (a->d[(offset >> 6) + 1] << (64 - (offset & 0x3F)))) & (0xFFFFFFFF >> (32 - count));
    59      }
    60  }
    61  
    62  SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar *a) {
    63      int yes = 0;
    64      int no = 0;
    65      no |= (a->d[3] < SECP256K1_N_3); /* No need for a > check. */
    66      no |= (a->d[2] < SECP256K1_N_2);
    67      yes |= (a->d[2] > SECP256K1_N_2) & ~no;
    68      no |= (a->d[1] < SECP256K1_N_1);
    69      yes |= (a->d[1] > SECP256K1_N_1) & ~no;
    70      yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
    71      return yes;
    72  }
    73  
    74  SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar *r, unsigned int overflow) {
    75      secp256k1_uint128 t;
    76      VERIFY_CHECK(overflow <= 1);
    77  
    78      secp256k1_u128_from_u64(&t, r->d[0]);
    79      secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_0);
    80      r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
    81      secp256k1_u128_accum_u64(&t, r->d[1]);
    82      secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_1);
    83      r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
    84      secp256k1_u128_accum_u64(&t, r->d[2]);
    85      secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_2);
    86      r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
    87      secp256k1_u128_accum_u64(&t, r->d[3]);
    88      r->d[3] = secp256k1_u128_to_u64(&t);
    89  
    90      SECP256K1_SCALAR_VERIFY(r);
    91      return overflow;
    92  }
    93  
    94  static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
    95      int overflow;
    96      secp256k1_uint128 t;
    97      SECP256K1_SCALAR_VERIFY(a);
    98      SECP256K1_SCALAR_VERIFY(b);
    99  
   100      secp256k1_u128_from_u64(&t, a->d[0]);
   101      secp256k1_u128_accum_u64(&t, b->d[0]);
   102      r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   103      secp256k1_u128_accum_u64(&t, a->d[1]);
   104      secp256k1_u128_accum_u64(&t, b->d[1]);
   105      r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   106      secp256k1_u128_accum_u64(&t, a->d[2]);
   107      secp256k1_u128_accum_u64(&t, b->d[2]);
   108      r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   109      secp256k1_u128_accum_u64(&t, a->d[3]);
   110      secp256k1_u128_accum_u64(&t, b->d[3]);
   111      r->d[3] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   112      overflow = secp256k1_u128_to_u64(&t) + secp256k1_scalar_check_overflow(r);
   113      VERIFY_CHECK(overflow == 0 || overflow == 1);
   114      secp256k1_scalar_reduce(r, overflow);
   115  
   116      SECP256K1_SCALAR_VERIFY(r);
   117      return overflow;
   118  }
   119  
   120  static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) {
   121      secp256k1_uint128 t;
   122      volatile int vflag = flag;
   123      SECP256K1_SCALAR_VERIFY(r);
   124      VERIFY_CHECK(bit < 256);
   125  
   126      bit += ((uint32_t) vflag - 1) & 0x100;  /* forcing (bit >> 6) > 3 makes this a noop */
   127      secp256k1_u128_from_u64(&t, r->d[0]);
   128      secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
   129      r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   130      secp256k1_u128_accum_u64(&t, r->d[1]);
   131      secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
   132      r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   133      secp256k1_u128_accum_u64(&t, r->d[2]);
   134      secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F));
   135      r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   136      secp256k1_u128_accum_u64(&t, r->d[3]);
   137      secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F));
   138      r->d[3] = secp256k1_u128_to_u64(&t);
   139  
   140      SECP256K1_SCALAR_VERIFY(r);
   141      VERIFY_CHECK(secp256k1_u128_hi_u64(&t) == 0);
   142  }
   143  
   144  static void secp256k1_scalar_set_b32(secp256k1_scalar *r, const unsigned char *b32, int *overflow) {
   145      int over;
   146      r->d[0] = secp256k1_read_be64(&b32[24]);
   147      r->d[1] = secp256k1_read_be64(&b32[16]);
   148      r->d[2] = secp256k1_read_be64(&b32[8]);
   149      r->d[3] = secp256k1_read_be64(&b32[0]);
   150      over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
   151      if (overflow) {
   152          *overflow = over;
   153      }
   154  
   155      SECP256K1_SCALAR_VERIFY(r);
   156  }
   157  
   158  static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar* a) {
   159      SECP256K1_SCALAR_VERIFY(a);
   160  
   161      secp256k1_write_be64(&bin[0],  a->d[3]);
   162      secp256k1_write_be64(&bin[8],  a->d[2]);
   163      secp256k1_write_be64(&bin[16], a->d[1]);
   164      secp256k1_write_be64(&bin[24], a->d[0]);
   165  }
   166  
   167  SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar *a) {
   168      SECP256K1_SCALAR_VERIFY(a);
   169  
   170      return (a->d[0] | a->d[1] | a->d[2] | a->d[3]) == 0;
   171  }
   172  
   173  static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) {
   174      uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0);
   175      secp256k1_uint128 t;
   176      SECP256K1_SCALAR_VERIFY(a);
   177  
   178      secp256k1_u128_from_u64(&t, ~a->d[0]);
   179      secp256k1_u128_accum_u64(&t, SECP256K1_N_0 + 1);
   180      r->d[0] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
   181      secp256k1_u128_accum_u64(&t, ~a->d[1]);
   182      secp256k1_u128_accum_u64(&t, SECP256K1_N_1);
   183      r->d[1] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
   184      secp256k1_u128_accum_u64(&t, ~a->d[2]);
   185      secp256k1_u128_accum_u64(&t, SECP256K1_N_2);
   186      r->d[2] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
   187      secp256k1_u128_accum_u64(&t, ~a->d[3]);
   188      secp256k1_u128_accum_u64(&t, SECP256K1_N_3);
   189      r->d[3] = secp256k1_u128_to_u64(&t) & nonzero;
   190  
   191      SECP256K1_SCALAR_VERIFY(r);
   192  }
   193  
   194  static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
   195      /* Writing `/` for field division and `//` for integer division, we compute
   196       *
   197       *   a/2 = (a - (a&1))/2 + (a&1)/2
   198       *       = (a >> 1) + (a&1 ?    1/2 : 0)
   199       *       = (a >> 1) + (a&1 ? n//2+1 : 0),
   200       *
   201       * where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n).
   202       * For n//2, we have the constants SECP256K1_N_H_0, ...
   203       *
   204       * This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here:
   205       * - the left summand is:  a >> 1 = (a - a&1)/2 = (n-2-1)//2           = (n-3)//2
   206       * - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2
   207       * Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n.
   208       */
   209      uint64_t mask = -(uint64_t)(a->d[0] & 1U);
   210      secp256k1_uint128 t;
   211      SECP256K1_SCALAR_VERIFY(a);
   212  
   213      secp256k1_u128_from_u64(&t, (a->d[0] >> 1) | (a->d[1] << 63));
   214      secp256k1_u128_accum_u64(&t, (SECP256K1_N_H_0 + 1U) & mask);
   215      r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   216      secp256k1_u128_accum_u64(&t, (a->d[1] >> 1) | (a->d[2] << 63));
   217      secp256k1_u128_accum_u64(&t, SECP256K1_N_H_1 & mask);
   218      r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   219      secp256k1_u128_accum_u64(&t, (a->d[2] >> 1) | (a->d[3] << 63));
   220      secp256k1_u128_accum_u64(&t, SECP256K1_N_H_2 & mask);
   221      r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
   222      r->d[3] = secp256k1_u128_to_u64(&t) + (a->d[3] >> 1) + (SECP256K1_N_H_3 & mask);
   223  #ifdef VERIFY
   224      /* The line above only computed the bottom 64 bits of r->d[3]; redo the computation
   225       * in full 128 bits to make sure the top 64 bits are indeed zero. */
   226      secp256k1_u128_accum_u64(&t, a->d[3] >> 1);
   227      secp256k1_u128_accum_u64(&t, SECP256K1_N_H_3 & mask);
   228      secp256k1_u128_rshift(&t, 64);
   229      VERIFY_CHECK(secp256k1_u128_to_u64(&t) == 0);
   230  
   231      SECP256K1_SCALAR_VERIFY(r);
   232  #endif
   233  }
   234  
   235  SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
   236      SECP256K1_SCALAR_VERIFY(a);
   237  
   238      return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3]) == 0;
   239  }
   240  
   241  static int secp256k1_scalar_is_high(const secp256k1_scalar *a) {
   242      int yes = 0;
   243      int no = 0;
   244      SECP256K1_SCALAR_VERIFY(a);
   245  
   246      no |= (a->d[3] < SECP256K1_N_H_3);
   247      yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
   248      no |= (a->d[2] < SECP256K1_N_H_2) & ~yes; /* No need for a > check. */
   249      no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
   250      yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
   251      yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
   252      return yes;
   253  }
   254  
   255  static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
   256      /* If we are flag = 0, mask = 00...00 and this is a no-op;
   257       * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
   258      volatile int vflag = flag;
   259      uint64_t mask = -vflag;
   260      uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1;
   261      secp256k1_uint128 t;
   262      SECP256K1_SCALAR_VERIFY(r);
   263  
   264      secp256k1_u128_from_u64(&t, r->d[0] ^ mask);
   265      secp256k1_u128_accum_u64(&t, (SECP256K1_N_0 + 1) & mask);
   266      r->d[0] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
   267      secp256k1_u128_accum_u64(&t, r->d[1] ^ mask);
   268      secp256k1_u128_accum_u64(&t, SECP256K1_N_1 & mask);
   269      r->d[1] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
   270      secp256k1_u128_accum_u64(&t, r->d[2] ^ mask);
   271      secp256k1_u128_accum_u64(&t, SECP256K1_N_2 & mask);
   272      r->d[2] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
   273      secp256k1_u128_accum_u64(&t, r->d[3] ^ mask);
   274      secp256k1_u128_accum_u64(&t, SECP256K1_N_3 & mask);
   275      r->d[3] = secp256k1_u128_to_u64(&t) & nonzero;
   276  
   277      SECP256K1_SCALAR_VERIFY(r);
   278      return 2 * (mask == 0) - 1;
   279  }
   280  
   281  /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
   282  
   283  /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
   284  #define muladd(a,b) { \
   285      uint64_t tl, th; \
   286      { \
   287          secp256k1_uint128 t; \
   288          secp256k1_u128_mul(&t, a, b); \
   289          th = secp256k1_u128_hi_u64(&t);  /* at most 0xFFFFFFFFFFFFFFFE */ \
   290          tl = secp256k1_u128_to_u64(&t); \
   291      } \
   292      c0 += tl;                 /* overflow is handled on the next line */ \
   293      th += (c0 < tl);          /* at most 0xFFFFFFFFFFFFFFFF */ \
   294      c1 += th;                 /* overflow is handled on the next line */ \
   295      c2 += (c1 < th);          /* never overflows by contract (verified in the next line) */ \
   296      VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
   297  }
   298  
   299  /** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
   300  #define muladd_fast(a,b) { \
   301      uint64_t tl, th; \
   302      { \
   303          secp256k1_uint128 t; \
   304          secp256k1_u128_mul(&t, a, b); \
   305          th = secp256k1_u128_hi_u64(&t);  /* at most 0xFFFFFFFFFFFFFFFE */ \
   306          tl = secp256k1_u128_to_u64(&t); \
   307      } \
   308      c0 += tl;                 /* overflow is handled on the next line */ \
   309      th += (c0 < tl);          /* at most 0xFFFFFFFFFFFFFFFF */ \
   310      c1 += th;                 /* never overflows by contract (verified in the next line) */ \
   311      VERIFY_CHECK(c1 >= th); \
   312  }
   313  
   314  /** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
   315  #define sumadd(a) { \
   316      unsigned int over; \
   317      c0 += (a);                  /* overflow is handled on the next line */ \
   318      over = (c0 < (a));         \
   319      c1 += over;                 /* overflow is handled on the next line */ \
   320      c2 += (c1 < over);          /* never overflows by contract */ \
   321  }
   322  
   323  /** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
   324  #define sumadd_fast(a) { \
   325      c0 += (a);                 /* overflow is handled on the next line */ \
   326      c1 += (c0 < (a));          /* never overflows by contract (verified the next line) */ \
   327      VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
   328      VERIFY_CHECK(c2 == 0); \
   329  }
   330  
   331  /** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. */
   332  #define extract(n) { \
   333      (n) = c0; \
   334      c0 = c1; \
   335      c1 = c2; \
   336      c2 = 0; \
   337  }
   338  
   339  /** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. c2 is required to be zero. */
   340  #define extract_fast(n) { \
   341      (n) = c0; \
   342      c0 = c1; \
   343      c1 = 0; \
   344      VERIFY_CHECK(c2 == 0); \
   345  }
   346  
   347  static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l) {
   348  #ifdef USE_ASM_X86_64
   349      /* Reduce 512 bits into 385. */
   350      uint64_t m0, m1, m2, m3, m4, m5, m6;
   351      uint64_t p0, p1, p2, p3, p4;
   352      uint64_t c;
   353  
   354      __asm__ __volatile__(
   355      /* Preload. */
   356      "movq 32(%%rsi), %%r11\n"
   357      "movq 40(%%rsi), %%r12\n"
   358      "movq 48(%%rsi), %%r13\n"
   359      "movq 56(%%rsi), %%r14\n"
   360      /* Initialize r8,r9,r10 */
   361      "movq 0(%%rsi), %%r8\n"
   362      "xorq %%r9, %%r9\n"
   363      "xorq %%r10, %%r10\n"
   364      /* (r8,r9) += n0 * c0 */
   365      "movq %8, %%rax\n"
   366      "mulq %%r11\n"
   367      "addq %%rax, %%r8\n"
   368      "adcq %%rdx, %%r9\n"
   369      /* extract m0 */
   370      "movq %%r8, %q0\n"
   371      "xorq %%r8, %%r8\n"
   372      /* (r9,r10) += l1 */
   373      "addq 8(%%rsi), %%r9\n"
   374      "adcq $0, %%r10\n"
   375      /* (r9,r10,r8) += n1 * c0 */
   376      "movq %8, %%rax\n"
   377      "mulq %%r12\n"
   378      "addq %%rax, %%r9\n"
   379      "adcq %%rdx, %%r10\n"
   380      "adcq $0, %%r8\n"
   381      /* (r9,r10,r8) += n0 * c1 */
   382      "movq %9, %%rax\n"
   383      "mulq %%r11\n"
   384      "addq %%rax, %%r9\n"
   385      "adcq %%rdx, %%r10\n"
   386      "adcq $0, %%r8\n"
   387      /* extract m1 */
   388      "movq %%r9, %q1\n"
   389      "xorq %%r9, %%r9\n"
   390      /* (r10,r8,r9) += l2 */
   391      "addq 16(%%rsi), %%r10\n"
   392      "adcq $0, %%r8\n"
   393      "adcq $0, %%r9\n"
   394      /* (r10,r8,r9) += n2 * c0 */
   395      "movq %8, %%rax\n"
   396      "mulq %%r13\n"
   397      "addq %%rax, %%r10\n"
   398      "adcq %%rdx, %%r8\n"
   399      "adcq $0, %%r9\n"
   400      /* (r10,r8,r9) += n1 * c1 */
   401      "movq %9, %%rax\n"
   402      "mulq %%r12\n"
   403      "addq %%rax, %%r10\n"
   404      "adcq %%rdx, %%r8\n"
   405      "adcq $0, %%r9\n"
   406      /* (r10,r8,r9) += n0 */
   407      "addq %%r11, %%r10\n"
   408      "adcq $0, %%r8\n"
   409      "adcq $0, %%r9\n"
   410      /* extract m2 */
   411      "movq %%r10, %q2\n"
   412      "xorq %%r10, %%r10\n"
   413      /* (r8,r9,r10) += l3 */
   414      "addq 24(%%rsi), %%r8\n"
   415      "adcq $0, %%r9\n"
   416      "adcq $0, %%r10\n"
   417      /* (r8,r9,r10) += n3 * c0 */
   418      "movq %8, %%rax\n"
   419      "mulq %%r14\n"
   420      "addq %%rax, %%r8\n"
   421      "adcq %%rdx, %%r9\n"
   422      "adcq $0, %%r10\n"
   423      /* (r8,r9,r10) += n2 * c1 */
   424      "movq %9, %%rax\n"
   425      "mulq %%r13\n"
   426      "addq %%rax, %%r8\n"
   427      "adcq %%rdx, %%r9\n"
   428      "adcq $0, %%r10\n"
   429      /* (r8,r9,r10) += n1 */
   430      "addq %%r12, %%r8\n"
   431      "adcq $0, %%r9\n"
   432      "adcq $0, %%r10\n"
   433      /* extract m3 */
   434      "movq %%r8, %q3\n"
   435      "xorq %%r8, %%r8\n"
   436      /* (r9,r10,r8) += n3 * c1 */
   437      "movq %9, %%rax\n"
   438      "mulq %%r14\n"
   439      "addq %%rax, %%r9\n"
   440      "adcq %%rdx, %%r10\n"
   441      "adcq $0, %%r8\n"
   442      /* (r9,r10,r8) += n2 */
   443      "addq %%r13, %%r9\n"
   444      "adcq $0, %%r10\n"
   445      "adcq $0, %%r8\n"
   446      /* extract m4 */
   447      "movq %%r9, %q4\n"
   448      /* (r10,r8) += n3 */
   449      "addq %%r14, %%r10\n"
   450      "adcq $0, %%r8\n"
   451      /* extract m5 */
   452      "movq %%r10, %q5\n"
   453      /* extract m6 */
   454      "movq %%r8, %q6\n"
   455      : "=&g"(m0), "=&g"(m1), "=&g"(m2), "=g"(m3), "=g"(m4), "=g"(m5), "=g"(m6)
   456      : "S"(l), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
   457      : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc");
   458  
   459      SECP256K1_CHECKMEM_MSAN_DEFINE(&m0, sizeof(m0));
   460      SECP256K1_CHECKMEM_MSAN_DEFINE(&m1, sizeof(m1));
   461      SECP256K1_CHECKMEM_MSAN_DEFINE(&m2, sizeof(m2));
   462      SECP256K1_CHECKMEM_MSAN_DEFINE(&m3, sizeof(m3));
   463      SECP256K1_CHECKMEM_MSAN_DEFINE(&m4, sizeof(m4));
   464      SECP256K1_CHECKMEM_MSAN_DEFINE(&m5, sizeof(m5));
   465      SECP256K1_CHECKMEM_MSAN_DEFINE(&m6, sizeof(m6));
   466  
   467      /* Reduce 385 bits into 258. */
   468      __asm__ __volatile__(
   469      /* Preload */
   470      "movq %q9, %%r11\n"
   471      "movq %q10, %%r12\n"
   472      "movq %q11, %%r13\n"
   473      /* Initialize (r8,r9,r10) */
   474      "movq %q5, %%r8\n"
   475      "xorq %%r9, %%r9\n"
   476      "xorq %%r10, %%r10\n"
   477      /* (r8,r9) += m4 * c0 */
   478      "movq %12, %%rax\n"
   479      "mulq %%r11\n"
   480      "addq %%rax, %%r8\n"
   481      "adcq %%rdx, %%r9\n"
   482      /* extract p0 */
   483      "movq %%r8, %q0\n"
   484      "xorq %%r8, %%r8\n"
   485      /* (r9,r10) += m1 */
   486      "addq %q6, %%r9\n"
   487      "adcq $0, %%r10\n"
   488      /* (r9,r10,r8) += m5 * c0 */
   489      "movq %12, %%rax\n"
   490      "mulq %%r12\n"
   491      "addq %%rax, %%r9\n"
   492      "adcq %%rdx, %%r10\n"
   493      "adcq $0, %%r8\n"
   494      /* (r9,r10,r8) += m4 * c1 */
   495      "movq %13, %%rax\n"
   496      "mulq %%r11\n"
   497      "addq %%rax, %%r9\n"
   498      "adcq %%rdx, %%r10\n"
   499      "adcq $0, %%r8\n"
   500      /* extract p1 */
   501      "movq %%r9, %q1\n"
   502      "xorq %%r9, %%r9\n"
   503      /* (r10,r8,r9) += m2 */
   504      "addq %q7, %%r10\n"
   505      "adcq $0, %%r8\n"
   506      "adcq $0, %%r9\n"
   507      /* (r10,r8,r9) += m6 * c0 */
   508      "movq %12, %%rax\n"
   509      "mulq %%r13\n"
   510      "addq %%rax, %%r10\n"
   511      "adcq %%rdx, %%r8\n"
   512      "adcq $0, %%r9\n"
   513      /* (r10,r8,r9) += m5 * c1 */
   514      "movq %13, %%rax\n"
   515      "mulq %%r12\n"
   516      "addq %%rax, %%r10\n"
   517      "adcq %%rdx, %%r8\n"
   518      "adcq $0, %%r9\n"
   519      /* (r10,r8,r9) += m4 */
   520      "addq %%r11, %%r10\n"
   521      "adcq $0, %%r8\n"
   522      "adcq $0, %%r9\n"
   523      /* extract p2 */
   524      "movq %%r10, %q2\n"
   525      /* (r8,r9) += m3 */
   526      "addq %q8, %%r8\n"
   527      "adcq $0, %%r9\n"
   528      /* (r8,r9) += m6 * c1 */
   529      "movq %13, %%rax\n"
   530      "mulq %%r13\n"
   531      "addq %%rax, %%r8\n"
   532      "adcq %%rdx, %%r9\n"
   533      /* (r8,r9) += m5 */
   534      "addq %%r12, %%r8\n"
   535      "adcq $0, %%r9\n"
   536      /* extract p3 */
   537      "movq %%r8, %q3\n"
   538      /* (r9) += m6 */
   539      "addq %%r13, %%r9\n"
   540      /* extract p4 */
   541      "movq %%r9, %q4\n"
   542      : "=&g"(p0), "=&g"(p1), "=&g"(p2), "=g"(p3), "=g"(p4)
   543      : "g"(m0), "g"(m1), "g"(m2), "g"(m3), "g"(m4), "g"(m5), "g"(m6), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
   544      : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "cc");
   545  
   546      SECP256K1_CHECKMEM_MSAN_DEFINE(&p0, sizeof(p0));
   547      SECP256K1_CHECKMEM_MSAN_DEFINE(&p1, sizeof(p1));
   548      SECP256K1_CHECKMEM_MSAN_DEFINE(&p2, sizeof(p2));
   549      SECP256K1_CHECKMEM_MSAN_DEFINE(&p3, sizeof(p3));
   550      SECP256K1_CHECKMEM_MSAN_DEFINE(&p4, sizeof(p4));
   551  
   552      /* Reduce 258 bits into 256. */
   553      __asm__ __volatile__(
   554      /* Preload */
   555      "movq %q5, %%r10\n"
   556      /* (rax,rdx) = p4 * c0 */
   557      "movq %7, %%rax\n"
   558      "mulq %%r10\n"
   559      /* (rax,rdx) += p0 */
   560      "addq %q1, %%rax\n"
   561      "adcq $0, %%rdx\n"
   562      /* extract r0 */
   563      "movq %%rax, 0(%q6)\n"
   564      /* Move to (r8,r9) */
   565      "movq %%rdx, %%r8\n"
   566      "xorq %%r9, %%r9\n"
   567      /* (r8,r9) += p1 */
   568      "addq %q2, %%r8\n"
   569      "adcq $0, %%r9\n"
   570      /* (r8,r9) += p4 * c1 */
   571      "movq %8, %%rax\n"
   572      "mulq %%r10\n"
   573      "addq %%rax, %%r8\n"
   574      "adcq %%rdx, %%r9\n"
   575      /* Extract r1 */
   576      "movq %%r8, 8(%q6)\n"
   577      "xorq %%r8, %%r8\n"
   578      /* (r9,r8) += p4 */
   579      "addq %%r10, %%r9\n"
   580      "adcq $0, %%r8\n"
   581      /* (r9,r8) += p2 */
   582      "addq %q3, %%r9\n"
   583      "adcq $0, %%r8\n"
   584      /* Extract r2 */
   585      "movq %%r9, 16(%q6)\n"
   586      "xorq %%r9, %%r9\n"
   587      /* (r8,r9) += p3 */
   588      "addq %q4, %%r8\n"
   589      "adcq $0, %%r9\n"
   590      /* Extract r3 */
   591      "movq %%r8, 24(%q6)\n"
   592      /* Extract c */
   593      "movq %%r9, %q0\n"
   594      : "=g"(c)
   595      : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
   596      : "rax", "rdx", "r8", "r9", "r10", "cc", "memory");
   597  
   598      SECP256K1_CHECKMEM_MSAN_DEFINE(r, sizeof(*r));
   599      SECP256K1_CHECKMEM_MSAN_DEFINE(&c, sizeof(c));
   600  
   601  #else
   602      secp256k1_uint128 c128;
   603      uint64_t c, c0, c1, c2;
   604      uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
   605      uint64_t m0, m1, m2, m3, m4, m5;
   606      uint32_t m6;
   607      uint64_t p0, p1, p2, p3;
   608      uint32_t p4;
   609  
   610      /* Reduce 512 bits into 385. */
   611      /* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
   612      c0 = l[0]; c1 = 0; c2 = 0;
   613      muladd_fast(n0, SECP256K1_N_C_0);
   614      extract_fast(m0);
   615      sumadd_fast(l[1]);
   616      muladd(n1, SECP256K1_N_C_0);
   617      muladd(n0, SECP256K1_N_C_1);
   618      extract(m1);
   619      sumadd(l[2]);
   620      muladd(n2, SECP256K1_N_C_0);
   621      muladd(n1, SECP256K1_N_C_1);
   622      sumadd(n0);
   623      extract(m2);
   624      sumadd(l[3]);
   625      muladd(n3, SECP256K1_N_C_0);
   626      muladd(n2, SECP256K1_N_C_1);
   627      sumadd(n1);
   628      extract(m3);
   629      muladd(n3, SECP256K1_N_C_1);
   630      sumadd(n2);
   631      extract(m4);
   632      sumadd_fast(n3);
   633      extract_fast(m5);
   634      VERIFY_CHECK(c0 <= 1);
   635      m6 = c0;
   636  
   637      /* Reduce 385 bits into 258. */
   638      /* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
   639      c0 = m0; c1 = 0; c2 = 0;
   640      muladd_fast(m4, SECP256K1_N_C_0);
   641      extract_fast(p0);
   642      sumadd_fast(m1);
   643      muladd(m5, SECP256K1_N_C_0);
   644      muladd(m4, SECP256K1_N_C_1);
   645      extract(p1);
   646      sumadd(m2);
   647      muladd(m6, SECP256K1_N_C_0);
   648      muladd(m5, SECP256K1_N_C_1);
   649      sumadd(m4);
   650      extract(p2);
   651      sumadd_fast(m3);
   652      muladd_fast(m6, SECP256K1_N_C_1);
   653      sumadd_fast(m5);
   654      extract_fast(p3);
   655      p4 = c0 + m6;
   656      VERIFY_CHECK(p4 <= 2);
   657  
   658      /* Reduce 258 bits into 256. */
   659      /* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
   660      secp256k1_u128_from_u64(&c128, p0);
   661      secp256k1_u128_accum_mul(&c128, SECP256K1_N_C_0, p4);
   662      r->d[0] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64);
   663      secp256k1_u128_accum_u64(&c128, p1);
   664      secp256k1_u128_accum_mul(&c128, SECP256K1_N_C_1, p4);
   665      r->d[1] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64);
   666      secp256k1_u128_accum_u64(&c128, p2);
   667      secp256k1_u128_accum_u64(&c128, p4);
   668      r->d[2] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64);
   669      secp256k1_u128_accum_u64(&c128, p3);
   670      r->d[3] = secp256k1_u128_to_u64(&c128);
   671      c = secp256k1_u128_hi_u64(&c128);
   672  #endif
   673  
   674      /* Final reduction of r. */
   675      secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
   676  }
   677  
   678  static void secp256k1_scalar_mul_512(uint64_t *l8, const secp256k1_scalar *a, const secp256k1_scalar *b) {
   679  #ifdef USE_ASM_X86_64
   680      const uint64_t *pb = b->d;
   681      __asm__ __volatile__(
   682      /* Preload */
   683      "movq 0(%%rdi), %%r15\n"
   684      "movq 8(%%rdi), %%rbx\n"
   685      "movq 16(%%rdi), %%rcx\n"
   686      "movq 0(%%rdx), %%r11\n"
   687      "movq 8(%%rdx), %%r12\n"
   688      "movq 16(%%rdx), %%r13\n"
   689      "movq 24(%%rdx), %%r14\n"
   690      /* (rax,rdx) = a0 * b0 */
   691      "movq %%r15, %%rax\n"
   692      "mulq %%r11\n"
   693      /* Extract l8[0] */
   694      "movq %%rax, 0(%%rsi)\n"
   695      /* (r8,r9,r10) = (rdx) */
   696      "movq %%rdx, %%r8\n"
   697      "xorq %%r9, %%r9\n"
   698      "xorq %%r10, %%r10\n"
   699      /* (r8,r9,r10) += a0 * b1 */
   700      "movq %%r15, %%rax\n"
   701      "mulq %%r12\n"
   702      "addq %%rax, %%r8\n"
   703      "adcq %%rdx, %%r9\n"
   704      "adcq $0, %%r10\n"
   705      /* (r8,r9,r10) += a1 * b0 */
   706      "movq %%rbx, %%rax\n"
   707      "mulq %%r11\n"
   708      "addq %%rax, %%r8\n"
   709      "adcq %%rdx, %%r9\n"
   710      "adcq $0, %%r10\n"
   711      /* Extract l8[1] */
   712      "movq %%r8, 8(%%rsi)\n"
   713      "xorq %%r8, %%r8\n"
   714      /* (r9,r10,r8) += a0 * b2 */
   715      "movq %%r15, %%rax\n"
   716      "mulq %%r13\n"
   717      "addq %%rax, %%r9\n"
   718      "adcq %%rdx, %%r10\n"
   719      "adcq $0, %%r8\n"
   720      /* (r9,r10,r8) += a1 * b1 */
   721      "movq %%rbx, %%rax\n"
   722      "mulq %%r12\n"
   723      "addq %%rax, %%r9\n"
   724      "adcq %%rdx, %%r10\n"
   725      "adcq $0, %%r8\n"
   726      /* (r9,r10,r8) += a2 * b0 */
   727      "movq %%rcx, %%rax\n"
   728      "mulq %%r11\n"
   729      "addq %%rax, %%r9\n"
   730      "adcq %%rdx, %%r10\n"
   731      "adcq $0, %%r8\n"
   732      /* Extract l8[2] */
   733      "movq %%r9, 16(%%rsi)\n"
   734      "xorq %%r9, %%r9\n"
   735      /* (r10,r8,r9) += a0 * b3 */
   736      "movq %%r15, %%rax\n"
   737      "mulq %%r14\n"
   738      "addq %%rax, %%r10\n"
   739      "adcq %%rdx, %%r8\n"
   740      "adcq $0, %%r9\n"
   741      /* Preload a3 */
   742      "movq 24(%%rdi), %%r15\n"
   743      /* (r10,r8,r9) += a1 * b2 */
   744      "movq %%rbx, %%rax\n"
   745      "mulq %%r13\n"
   746      "addq %%rax, %%r10\n"
   747      "adcq %%rdx, %%r8\n"
   748      "adcq $0, %%r9\n"
   749      /* (r10,r8,r9) += a2 * b1 */
   750      "movq %%rcx, %%rax\n"
   751      "mulq %%r12\n"
   752      "addq %%rax, %%r10\n"
   753      "adcq %%rdx, %%r8\n"
   754      "adcq $0, %%r9\n"
   755      /* (r10,r8,r9) += a3 * b0 */
   756      "movq %%r15, %%rax\n"
   757      "mulq %%r11\n"
   758      "addq %%rax, %%r10\n"
   759      "adcq %%rdx, %%r8\n"
   760      "adcq $0, %%r9\n"
   761      /* Extract l8[3] */
   762      "movq %%r10, 24(%%rsi)\n"
   763      "xorq %%r10, %%r10\n"
   764      /* (r8,r9,r10) += a1 * b3 */
   765      "movq %%rbx, %%rax\n"
   766      "mulq %%r14\n"
   767      "addq %%rax, %%r8\n"
   768      "adcq %%rdx, %%r9\n"
   769      "adcq $0, %%r10\n"
   770      /* (r8,r9,r10) += a2 * b2 */
   771      "movq %%rcx, %%rax\n"
   772      "mulq %%r13\n"
   773      "addq %%rax, %%r8\n"
   774      "adcq %%rdx, %%r9\n"
   775      "adcq $0, %%r10\n"
   776      /* (r8,r9,r10) += a3 * b1 */
   777      "movq %%r15, %%rax\n"
   778      "mulq %%r12\n"
   779      "addq %%rax, %%r8\n"
   780      "adcq %%rdx, %%r9\n"
   781      "adcq $0, %%r10\n"
   782      /* Extract l8[4] */
   783      "movq %%r8, 32(%%rsi)\n"
   784      "xorq %%r8, %%r8\n"
   785      /* (r9,r10,r8) += a2 * b3 */
   786      "movq %%rcx, %%rax\n"
   787      "mulq %%r14\n"
   788      "addq %%rax, %%r9\n"
   789      "adcq %%rdx, %%r10\n"
   790      "adcq $0, %%r8\n"
   791      /* (r9,r10,r8) += a3 * b2 */
   792      "movq %%r15, %%rax\n"
   793      "mulq %%r13\n"
   794      "addq %%rax, %%r9\n"
   795      "adcq %%rdx, %%r10\n"
   796      "adcq $0, %%r8\n"
   797      /* Extract l8[5] */
   798      "movq %%r9, 40(%%rsi)\n"
   799      /* (r10,r8) += a3 * b3 */
   800      "movq %%r15, %%rax\n"
   801      "mulq %%r14\n"
   802      "addq %%rax, %%r10\n"
   803      "adcq %%rdx, %%r8\n"
   804      /* Extract l8[6] */
   805      "movq %%r10, 48(%%rsi)\n"
   806      /* Extract l8[7] */
   807      "movq %%r8, 56(%%rsi)\n"
   808      : "+d"(pb)
   809      : "S"(l8), "D"(a->d)
   810      : "rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "cc", "memory");
   811  
   812      SECP256K1_CHECKMEM_MSAN_DEFINE(l8, sizeof(*l8) * 8);
   813  
   814  #else
   815      /* 160 bit accumulator. */
   816      uint64_t c0 = 0, c1 = 0;
   817      uint32_t c2 = 0;
   818  
   819      /* l8[0..7] = a[0..3] * b[0..3]. */
   820      muladd_fast(a->d[0], b->d[0]);
   821      extract_fast(l8[0]);
   822      muladd(a->d[0], b->d[1]);
   823      muladd(a->d[1], b->d[0]);
   824      extract(l8[1]);
   825      muladd(a->d[0], b->d[2]);
   826      muladd(a->d[1], b->d[1]);
   827      muladd(a->d[2], b->d[0]);
   828      extract(l8[2]);
   829      muladd(a->d[0], b->d[3]);
   830      muladd(a->d[1], b->d[2]);
   831      muladd(a->d[2], b->d[1]);
   832      muladd(a->d[3], b->d[0]);
   833      extract(l8[3]);
   834      muladd(a->d[1], b->d[3]);
   835      muladd(a->d[2], b->d[2]);
   836      muladd(a->d[3], b->d[1]);
   837      extract(l8[4]);
   838      muladd(a->d[2], b->d[3]);
   839      muladd(a->d[3], b->d[2]);
   840      extract(l8[5]);
   841      muladd_fast(a->d[3], b->d[3]);
   842      extract_fast(l8[6]);
   843      VERIFY_CHECK(c1 == 0);
   844      l8[7] = c0;
   845  #endif
   846  }
   847  
   848  #undef sumadd
   849  #undef sumadd_fast
   850  #undef muladd
   851  #undef muladd_fast
   852  #undef extract
   853  #undef extract_fast
   854  
   855  static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
   856      uint64_t l[8];
   857      SECP256K1_SCALAR_VERIFY(a);
   858      SECP256K1_SCALAR_VERIFY(b);
   859  
   860      secp256k1_scalar_mul_512(l, a, b);
   861      secp256k1_scalar_reduce_512(r, l);
   862  
   863      SECP256K1_SCALAR_VERIFY(r);
   864  }
   865  
   866  static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
   867      SECP256K1_SCALAR_VERIFY(k);
   868  
   869      r1->d[0] = k->d[0];
   870      r1->d[1] = k->d[1];
   871      r1->d[2] = 0;
   872      r1->d[3] = 0;
   873      r2->d[0] = k->d[2];
   874      r2->d[1] = k->d[3];
   875      r2->d[2] = 0;
   876      r2->d[3] = 0;
   877  
   878      SECP256K1_SCALAR_VERIFY(r1);
   879      SECP256K1_SCALAR_VERIFY(r2);
   880  }
   881  
   882  SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const secp256k1_scalar *b) {
   883      SECP256K1_SCALAR_VERIFY(a);
   884      SECP256K1_SCALAR_VERIFY(b);
   885  
   886      return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3])) == 0;
   887  }
   888  
   889  SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) {
   890      uint64_t l[8];
   891      unsigned int shiftlimbs;
   892      unsigned int shiftlow;
   893      unsigned int shifthigh;
   894      SECP256K1_SCALAR_VERIFY(a);
   895      SECP256K1_SCALAR_VERIFY(b);
   896      VERIFY_CHECK(shift >= 256);
   897  
   898      secp256k1_scalar_mul_512(l, a, b);
   899      shiftlimbs = shift >> 6;
   900      shiftlow = shift & 0x3F;
   901      shifthigh = 64 - shiftlow;
   902      r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
   903      r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
   904      r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
   905      r->d[3] = shift < 320 ? (l[3 + shiftlimbs] >> shiftlow) : 0;
   906      secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1);
   907  
   908      SECP256K1_SCALAR_VERIFY(r);
   909  }
   910  
   911  static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag) {
   912      uint64_t mask0, mask1;
   913      volatile int vflag = flag;
   914      SECP256K1_SCALAR_VERIFY(a);
   915      SECP256K1_CHECKMEM_CHECK_VERIFY(r->d, sizeof(r->d));
   916  
   917      mask0 = vflag + ~((uint64_t)0);
   918      mask1 = ~mask0;
   919      r->d[0] = (r->d[0] & mask0) | (a->d[0] & mask1);
   920      r->d[1] = (r->d[1] & mask0) | (a->d[1] & mask1);
   921      r->d[2] = (r->d[2] & mask0) | (a->d[2] & mask1);
   922      r->d[3] = (r->d[3] & mask0) | (a->d[3] & mask1);
   923  
   924      SECP256K1_SCALAR_VERIFY(r);
   925  }
   926  
   927  static void secp256k1_scalar_from_signed62(secp256k1_scalar *r, const secp256k1_modinv64_signed62 *a) {
   928      const uint64_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4];
   929  
   930      /* The output from secp256k1_modinv64{_var} should be normalized to range [0,modulus), and
   931       * have limbs in [0,2^62). The modulus is < 2^256, so the top limb must be below 2^(256-62*4).
   932       */
   933      VERIFY_CHECK(a0 >> 62 == 0);
   934      VERIFY_CHECK(a1 >> 62 == 0);
   935      VERIFY_CHECK(a2 >> 62 == 0);
   936      VERIFY_CHECK(a3 >> 62 == 0);
   937      VERIFY_CHECK(a4 >> 8 == 0);
   938  
   939      r->d[0] = a0      | a1 << 62;
   940      r->d[1] = a1 >> 2 | a2 << 60;
   941      r->d[2] = a2 >> 4 | a3 << 58;
   942      r->d[3] = a3 >> 6 | a4 << 56;
   943  
   944      SECP256K1_SCALAR_VERIFY(r);
   945  }
   946  
   947  static void secp256k1_scalar_to_signed62(secp256k1_modinv64_signed62 *r, const secp256k1_scalar *a) {
   948      const uint64_t M62 = UINT64_MAX >> 2;
   949      const uint64_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3];
   950      SECP256K1_SCALAR_VERIFY(a);
   951  
   952      r->v[0] =  a0                   & M62;
   953      r->v[1] = (a0 >> 62 | a1 <<  2) & M62;
   954      r->v[2] = (a1 >> 60 | a2 <<  4) & M62;
   955      r->v[3] = (a2 >> 58 | a3 <<  6) & M62;
   956      r->v[4] =  a3 >> 56;
   957  }
   958  
   959  static const secp256k1_modinv64_modinfo secp256k1_const_modinfo_scalar = {
   960      {{0x3FD25E8CD0364141LL, 0x2ABB739ABD2280EELL, -0x15LL, 0, 256}},
   961      0x34F20099AA774EC1LL
   962  };
   963  
   964  static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *x) {
   965      secp256k1_modinv64_signed62 s;
   966  #ifdef VERIFY
   967      int zero_in = secp256k1_scalar_is_zero(x);
   968  #endif
   969      SECP256K1_SCALAR_VERIFY(x);
   970  
   971      secp256k1_scalar_to_signed62(&s, x);
   972      secp256k1_modinv64(&s, &secp256k1_const_modinfo_scalar);
   973      secp256k1_scalar_from_signed62(r, &s);
   974  
   975      SECP256K1_SCALAR_VERIFY(r);
   976      VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
   977  }
   978  
   979  static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
   980      secp256k1_modinv64_signed62 s;
   981  #ifdef VERIFY
   982      int zero_in = secp256k1_scalar_is_zero(x);
   983  #endif
   984      SECP256K1_SCALAR_VERIFY(x);
   985  
   986      secp256k1_scalar_to_signed62(&s, x);
   987      secp256k1_modinv64_var(&s, &secp256k1_const_modinfo_scalar);
   988      secp256k1_scalar_from_signed62(r, &s);
   989  
   990      SECP256K1_SCALAR_VERIFY(r);
   991      VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
   992  }
   993  
   994  SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
   995      SECP256K1_SCALAR_VERIFY(a);
   996  
   997      return !(a->d[0] & 1);
   998  }
   999  
  1000  #endif /* SECP256K1_SCALAR_REPR_IMPL_H */