github.com/ethereumproject/go-ethereum@v5.5.2+incompatible/crypto/secp256k1/libsecp256k1/src/field_10x26_impl.h (about)

     1  /**********************************************************************
     2   * Copyright (c) 2013, 2014 Pieter Wuille                             *
     3   * Distributed under the MIT software license, see the accompanying   *
     4   * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
     5   **********************************************************************/
     6  
     7  #ifndef _SECP256K1_FIELD_REPR_IMPL_H_
     8  #define _SECP256K1_FIELD_REPR_IMPL_H_
     9  
    10  #include <stdio.h>
    11  #include <string.h>
    12  #include "util.h"
    13  #include "num.h"
    14  #include "field.h"
    15  
    16  #ifdef VERIFY
    17  static void secp256k1_fe_verify(const secp256k1_fe *a) {
    18      const uint32_t *d = a->n;
    19      int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
    20      r &= (d[0] <= 0x3FFFFFFUL * m);
    21      r &= (d[1] <= 0x3FFFFFFUL * m);
    22      r &= (d[2] <= 0x3FFFFFFUL * m);
    23      r &= (d[3] <= 0x3FFFFFFUL * m);
    24      r &= (d[4] <= 0x3FFFFFFUL * m);
    25      r &= (d[5] <= 0x3FFFFFFUL * m);
    26      r &= (d[6] <= 0x3FFFFFFUL * m);
    27      r &= (d[7] <= 0x3FFFFFFUL * m);
    28      r &= (d[8] <= 0x3FFFFFFUL * m);
    29      r &= (d[9] <= 0x03FFFFFUL * m);
    30      r &= (a->magnitude >= 0);
    31      r &= (a->magnitude <= 32);
    32      if (a->normalized) {
    33          r &= (a->magnitude <= 1);
    34          if (r && (d[9] == 0x03FFFFFUL)) {
    35              uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
    36              if (mid == 0x3FFFFFFUL) {
    37                  r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
    38              }
    39          }
    40      }
    41      VERIFY_CHECK(r == 1);
    42  }
    43  #else
    44  static void secp256k1_fe_verify(const secp256k1_fe *a) {
    45      (void)a;
    46  }
    47  #endif
    48  
    49  static void secp256k1_fe_normalize(secp256k1_fe *r) {
    50      uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
    51               t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
    52  
    53      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
    54      uint32_t m;
    55      uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
    56  
    57      /* The first pass ensures the magnitude is 1, ... */
    58      t0 += x * 0x3D1UL; t1 += (x << 6);
    59      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
    60      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
    61      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
    62      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
    63      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
    64      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
    65      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
    66      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
    67      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
    68  
    69      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
    70      VERIFY_CHECK(t9 >> 23 == 0);
    71  
    72      /* At most a single final reduction is needed; check if the value is >= the field characteristic */
    73      x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
    74          & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
    75  
    76      /* Apply the final reduction (for constant-time behaviour, we do it always) */
    77      t0 += x * 0x3D1UL; t1 += (x << 6);
    78      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
    79      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
    80      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
    81      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
    82      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
    83      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
    84      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
    85      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
    86      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
    87  
    88      /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
    89      VERIFY_CHECK(t9 >> 22 == x);
    90  
    91      /* Mask off the possible multiple of 2^256 from the final reduction */
    92      t9 &= 0x03FFFFFUL;
    93  
    94      r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
    95      r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
    96  
    97  #ifdef VERIFY
    98      r->magnitude = 1;
    99      r->normalized = 1;
   100      secp256k1_fe_verify(r);
   101  #endif
   102  }
   103  
   104  static void secp256k1_fe_normalize_weak(secp256k1_fe *r) {
   105      uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
   106               t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
   107  
   108      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
   109      uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
   110  
   111      /* The first pass ensures the magnitude is 1, ... */
   112      t0 += x * 0x3D1UL; t1 += (x << 6);
   113      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
   114      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
   115      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
   116      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
   117      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
   118      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
   119      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
   120      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
   121      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
   122  
   123      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
   124      VERIFY_CHECK(t9 >> 23 == 0);
   125  
   126      r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
   127      r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
   128  
   129  #ifdef VERIFY
   130      r->magnitude = 1;
   131      secp256k1_fe_verify(r);
   132  #endif
   133  }
   134  
   135  static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
   136      uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
   137               t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
   138  
   139      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
   140      uint32_t m;
   141      uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
   142  
   143      /* The first pass ensures the magnitude is 1, ... */
   144      t0 += x * 0x3D1UL; t1 += (x << 6);
   145      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
   146      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
   147      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
   148      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
   149      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
   150      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
   151      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
   152      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
   153      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
   154  
   155      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
   156      VERIFY_CHECK(t9 >> 23 == 0);
   157  
   158      /* At most a single final reduction is needed; check if the value is >= the field characteristic */
   159      x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
   160          & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
   161  
   162      if (x) {
   163          t0 += 0x3D1UL; t1 += (x << 6);
   164          t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
   165          t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
   166          t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
   167          t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
   168          t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
   169          t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
   170          t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
   171          t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
   172          t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
   173  
   174          /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
   175          VERIFY_CHECK(t9 >> 22 == x);
   176  
   177          /* Mask off the possible multiple of 2^256 from the final reduction */
   178          t9 &= 0x03FFFFFUL;
   179      }
   180  
   181      r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
   182      r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
   183  
   184  #ifdef VERIFY
   185      r->magnitude = 1;
   186      r->normalized = 1;
   187      secp256k1_fe_verify(r);
   188  #endif
   189  }
   190  
   191  static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
   192      uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
   193               t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
   194  
   195      /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
   196      uint32_t z0, z1;
   197  
   198      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
   199      uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
   200  
   201      /* The first pass ensures the magnitude is 1, ... */
   202      t0 += x * 0x3D1UL; t1 += (x << 6);
   203      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0  = t0; z1  = t0 ^ 0x3D0UL;
   204      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
   205      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
   206      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
   207      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
   208      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
   209      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
   210      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
   211      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
   212                                           z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
   213  
   214      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
   215      VERIFY_CHECK(t9 >> 23 == 0);
   216  
   217      return (z0 == 0) | (z1 == 0x3FFFFFFUL);
   218  }
   219  
   220  static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) {
   221      uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
   222      uint32_t z0, z1;
   223      uint32_t x;
   224  
   225      t0 = r->n[0];
   226      t9 = r->n[9];
   227  
   228      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
   229      x = t9 >> 22;
   230  
   231      /* The first pass ensures the magnitude is 1, ... */
   232      t0 += x * 0x3D1UL;
   233  
   234      /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
   235      z0 = t0 & 0x3FFFFFFUL;
   236      z1 = z0 ^ 0x3D0UL;
   237  
   238      /* Fast return path should catch the majority of cases */
   239      if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) {
   240          return 0;
   241      }
   242  
   243      t1 = r->n[1];
   244      t2 = r->n[2];
   245      t3 = r->n[3];
   246      t4 = r->n[4];
   247      t5 = r->n[5];
   248      t6 = r->n[6];
   249      t7 = r->n[7];
   250      t8 = r->n[8];
   251  
   252      t9 &= 0x03FFFFFUL;
   253      t1 += (x << 6);
   254  
   255      t1 += (t0 >> 26);
   256      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
   257      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
   258      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
   259      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
   260      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
   261      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
   262      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
   263      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
   264                                           z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
   265  
   266      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
   267      VERIFY_CHECK(t9 >> 23 == 0);
   268  
   269      return (z0 == 0) | (z1 == 0x3FFFFFFUL);
   270  }
   271  
   272  SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe *r, int a) {
   273      r->n[0] = a;
   274      r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
   275  #ifdef VERIFY
   276      r->magnitude = 1;
   277      r->normalized = 1;
   278      secp256k1_fe_verify(r);
   279  #endif
   280  }
   281  
   282  SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe *a) {
   283      const uint32_t *t = a->n;
   284  #ifdef VERIFY
   285      VERIFY_CHECK(a->normalized);
   286      secp256k1_fe_verify(a);
   287  #endif
   288      return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
   289  }
   290  
   291  SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe *a) {
   292  #ifdef VERIFY
   293      VERIFY_CHECK(a->normalized);
   294      secp256k1_fe_verify(a);
   295  #endif
   296      return a->n[0] & 1;
   297  }
   298  
   299  SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe *a) {
   300      int i;
   301  #ifdef VERIFY
   302      a->magnitude = 0;
   303      a->normalized = 1;
   304  #endif
   305      for (i=0; i<10; i++) {
   306          a->n[i] = 0;
   307      }
   308  }
   309  
   310  static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
   311      int i;
   312  #ifdef VERIFY
   313      VERIFY_CHECK(a->normalized);
   314      VERIFY_CHECK(b->normalized);
   315      secp256k1_fe_verify(a);
   316      secp256k1_fe_verify(b);
   317  #endif
   318      for (i = 9; i >= 0; i--) {
   319          if (a->n[i] > b->n[i]) {
   320              return 1;
   321          }
   322          if (a->n[i] < b->n[i]) {
   323              return -1;
   324          }
   325      }
   326      return 0;
   327  }
   328  
   329  static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) {
   330      int i;
   331      r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
   332      r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
   333      for (i=0; i<32; i++) {
   334          int j;
   335          for (j=0; j<4; j++) {
   336              int limb = (8*i+2*j)/26;
   337              int shift = (8*i+2*j)%26;
   338              r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
   339          }
   340      }
   341      if (r->n[9] == 0x3FFFFFUL && (r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL && (r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL) {
   342          return 0;
   343      }
   344  #ifdef VERIFY
   345      r->magnitude = 1;
   346      r->normalized = 1;
   347      secp256k1_fe_verify(r);
   348  #endif
   349      return 1;
   350  }
   351  
   352  /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
   353  static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a) {
   354      int i;
   355  #ifdef VERIFY
   356      VERIFY_CHECK(a->normalized);
   357      secp256k1_fe_verify(a);
   358  #endif
   359      for (i=0; i<32; i++) {
   360          int j;
   361          int c = 0;
   362          for (j=0; j<4; j++) {
   363              int limb = (8*i+2*j)/26;
   364              int shift = (8*i+2*j)%26;
   365              c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
   366          }
   367          r[31-i] = c;
   368      }
   369  }
   370  
   371  SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe *r, const secp256k1_fe *a, int m) {
   372  #ifdef VERIFY
   373      VERIFY_CHECK(a->magnitude <= m);
   374      secp256k1_fe_verify(a);
   375  #endif
   376      r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
   377      r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
   378      r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
   379      r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
   380      r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
   381      r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
   382      r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
   383      r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
   384      r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
   385      r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
   386  #ifdef VERIFY
   387      r->magnitude = m + 1;
   388      r->normalized = 0;
   389      secp256k1_fe_verify(r);
   390  #endif
   391  }
   392  
   393  SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe *r, int a) {
   394      r->n[0] *= a;
   395      r->n[1] *= a;
   396      r->n[2] *= a;
   397      r->n[3] *= a;
   398      r->n[4] *= a;
   399      r->n[5] *= a;
   400      r->n[6] *= a;
   401      r->n[7] *= a;
   402      r->n[8] *= a;
   403      r->n[9] *= a;
   404  #ifdef VERIFY
   405      r->magnitude *= a;
   406      r->normalized = 0;
   407      secp256k1_fe_verify(r);
   408  #endif
   409  }
   410  
   411  SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe *r, const secp256k1_fe *a) {
   412  #ifdef VERIFY
   413      secp256k1_fe_verify(a);
   414  #endif
   415      r->n[0] += a->n[0];
   416      r->n[1] += a->n[1];
   417      r->n[2] += a->n[2];
   418      r->n[3] += a->n[3];
   419      r->n[4] += a->n[4];
   420      r->n[5] += a->n[5];
   421      r->n[6] += a->n[6];
   422      r->n[7] += a->n[7];
   423      r->n[8] += a->n[8];
   424      r->n[9] += a->n[9];
   425  #ifdef VERIFY
   426      r->magnitude += a->magnitude;
   427      r->normalized = 0;
   428      secp256k1_fe_verify(r);
   429  #endif
   430  }
   431  
   432  #ifdef VERIFY
   433  #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
   434  #else
   435  #define VERIFY_BITS(x, n) do { } while(0)
   436  #endif
   437  
   438  SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
   439      uint64_t c, d;
   440      uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
   441      uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
   442      const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
   443  
   444      VERIFY_BITS(a[0], 30);
   445      VERIFY_BITS(a[1], 30);
   446      VERIFY_BITS(a[2], 30);
   447      VERIFY_BITS(a[3], 30);
   448      VERIFY_BITS(a[4], 30);
   449      VERIFY_BITS(a[5], 30);
   450      VERIFY_BITS(a[6], 30);
   451      VERIFY_BITS(a[7], 30);
   452      VERIFY_BITS(a[8], 30);
   453      VERIFY_BITS(a[9], 26);
   454      VERIFY_BITS(b[0], 30);
   455      VERIFY_BITS(b[1], 30);
   456      VERIFY_BITS(b[2], 30);
   457      VERIFY_BITS(b[3], 30);
   458      VERIFY_BITS(b[4], 30);
   459      VERIFY_BITS(b[5], 30);
   460      VERIFY_BITS(b[6], 30);
   461      VERIFY_BITS(b[7], 30);
   462      VERIFY_BITS(b[8], 30);
   463      VERIFY_BITS(b[9], 26);
   464  
   465      /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
   466       *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
   467       *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
   468       */
   469  
   470      d  = (uint64_t)a[0] * b[9]
   471         + (uint64_t)a[1] * b[8]
   472         + (uint64_t)a[2] * b[7]
   473         + (uint64_t)a[3] * b[6]
   474         + (uint64_t)a[4] * b[5]
   475         + (uint64_t)a[5] * b[4]
   476         + (uint64_t)a[6] * b[3]
   477         + (uint64_t)a[7] * b[2]
   478         + (uint64_t)a[8] * b[1]
   479         + (uint64_t)a[9] * b[0];
   480      /* VERIFY_BITS(d, 64); */
   481      /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
   482      t9 = d & M; d >>= 26;
   483      VERIFY_BITS(t9, 26);
   484      VERIFY_BITS(d, 38);
   485      /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
   486  
   487      c  = (uint64_t)a[0] * b[0];
   488      VERIFY_BITS(c, 60);
   489      /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
   490      d += (uint64_t)a[1] * b[9]
   491         + (uint64_t)a[2] * b[8]
   492         + (uint64_t)a[3] * b[7]
   493         + (uint64_t)a[4] * b[6]
   494         + (uint64_t)a[5] * b[5]
   495         + (uint64_t)a[6] * b[4]
   496         + (uint64_t)a[7] * b[3]
   497         + (uint64_t)a[8] * b[2]
   498         + (uint64_t)a[9] * b[1];
   499      VERIFY_BITS(d, 63);
   500      /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   501      u0 = d & M; d >>= 26; c += u0 * R0;
   502      VERIFY_BITS(u0, 26);
   503      VERIFY_BITS(d, 37);
   504      VERIFY_BITS(c, 61);
   505      /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   506      t0 = c & M; c >>= 26; c += u0 * R1;
   507      VERIFY_BITS(t0, 26);
   508      VERIFY_BITS(c, 37);
   509      /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   510      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   511  
   512      c += (uint64_t)a[0] * b[1]
   513         + (uint64_t)a[1] * b[0];
   514      VERIFY_BITS(c, 62);
   515      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
   516      d += (uint64_t)a[2] * b[9]
   517         + (uint64_t)a[3] * b[8]
   518         + (uint64_t)a[4] * b[7]
   519         + (uint64_t)a[5] * b[6]
   520         + (uint64_t)a[6] * b[5]
   521         + (uint64_t)a[7] * b[4]
   522         + (uint64_t)a[8] * b[3]
   523         + (uint64_t)a[9] * b[2];
   524      VERIFY_BITS(d, 63);
   525      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   526      u1 = d & M; d >>= 26; c += u1 * R0;
   527      VERIFY_BITS(u1, 26);
   528      VERIFY_BITS(d, 37);
   529      VERIFY_BITS(c, 63);
   530      /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   531      t1 = c & M; c >>= 26; c += u1 * R1;
   532      VERIFY_BITS(t1, 26);
   533      VERIFY_BITS(c, 38);
   534      /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   535      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   536  
   537      c += (uint64_t)a[0] * b[2]
   538         + (uint64_t)a[1] * b[1]
   539         + (uint64_t)a[2] * b[0];
   540      VERIFY_BITS(c, 62);
   541      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   542      d += (uint64_t)a[3] * b[9]
   543         + (uint64_t)a[4] * b[8]
   544         + (uint64_t)a[5] * b[7]
   545         + (uint64_t)a[6] * b[6]
   546         + (uint64_t)a[7] * b[5]
   547         + (uint64_t)a[8] * b[4]
   548         + (uint64_t)a[9] * b[3];
   549      VERIFY_BITS(d, 63);
   550      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   551      u2 = d & M; d >>= 26; c += u2 * R0;
   552      VERIFY_BITS(u2, 26);
   553      VERIFY_BITS(d, 37);
   554      VERIFY_BITS(c, 63);
   555      /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   556      t2 = c & M; c >>= 26; c += u2 * R1;
   557      VERIFY_BITS(t2, 26);
   558      VERIFY_BITS(c, 38);
   559      /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   560      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   561  
   562      c += (uint64_t)a[0] * b[3]
   563         + (uint64_t)a[1] * b[2]
   564         + (uint64_t)a[2] * b[1]
   565         + (uint64_t)a[3] * b[0];
   566      VERIFY_BITS(c, 63);
   567      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   568      d += (uint64_t)a[4] * b[9]
   569         + (uint64_t)a[5] * b[8]
   570         + (uint64_t)a[6] * b[7]
   571         + (uint64_t)a[7] * b[6]
   572         + (uint64_t)a[8] * b[5]
   573         + (uint64_t)a[9] * b[4];
   574      VERIFY_BITS(d, 63);
   575      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   576      u3 = d & M; d >>= 26; c += u3 * R0;
   577      VERIFY_BITS(u3, 26);
   578      VERIFY_BITS(d, 37);
   579      /* VERIFY_BITS(c, 64); */
   580      /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   581      t3 = c & M; c >>= 26; c += u3 * R1;
   582      VERIFY_BITS(t3, 26);
   583      VERIFY_BITS(c, 39);
   584      /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   585      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   586  
   587      c += (uint64_t)a[0] * b[4]
   588         + (uint64_t)a[1] * b[3]
   589         + (uint64_t)a[2] * b[2]
   590         + (uint64_t)a[3] * b[1]
   591         + (uint64_t)a[4] * b[0];
   592      VERIFY_BITS(c, 63);
   593      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   594      d += (uint64_t)a[5] * b[9]
   595         + (uint64_t)a[6] * b[8]
   596         + (uint64_t)a[7] * b[7]
   597         + (uint64_t)a[8] * b[6]
   598         + (uint64_t)a[9] * b[5];
   599      VERIFY_BITS(d, 62);
   600      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   601      u4 = d & M; d >>= 26; c += u4 * R0;
   602      VERIFY_BITS(u4, 26);
   603      VERIFY_BITS(d, 36);
   604      /* VERIFY_BITS(c, 64); */
   605      /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   606      t4 = c & M; c >>= 26; c += u4 * R1;
   607      VERIFY_BITS(t4, 26);
   608      VERIFY_BITS(c, 39);
   609      /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   610      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   611  
   612      c += (uint64_t)a[0] * b[5]
   613         + (uint64_t)a[1] * b[4]
   614         + (uint64_t)a[2] * b[3]
   615         + (uint64_t)a[3] * b[2]
   616         + (uint64_t)a[4] * b[1]
   617         + (uint64_t)a[5] * b[0];
   618      VERIFY_BITS(c, 63);
   619      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   620      d += (uint64_t)a[6] * b[9]
   621         + (uint64_t)a[7] * b[8]
   622         + (uint64_t)a[8] * b[7]
   623         + (uint64_t)a[9] * b[6];
   624      VERIFY_BITS(d, 62);
   625      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   626      u5 = d & M; d >>= 26; c += u5 * R0;
   627      VERIFY_BITS(u5, 26);
   628      VERIFY_BITS(d, 36);
   629      /* VERIFY_BITS(c, 64); */
   630      /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   631      t5 = c & M; c >>= 26; c += u5 * R1;
   632      VERIFY_BITS(t5, 26);
   633      VERIFY_BITS(c, 39);
   634      /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   635      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   636  
   637      c += (uint64_t)a[0] * b[6]
   638         + (uint64_t)a[1] * b[5]
   639         + (uint64_t)a[2] * b[4]
   640         + (uint64_t)a[3] * b[3]
   641         + (uint64_t)a[4] * b[2]
   642         + (uint64_t)a[5] * b[1]
   643         + (uint64_t)a[6] * b[0];
   644      VERIFY_BITS(c, 63);
   645      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   646      d += (uint64_t)a[7] * b[9]
   647         + (uint64_t)a[8] * b[8]
   648         + (uint64_t)a[9] * b[7];
   649      VERIFY_BITS(d, 61);
   650      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   651      u6 = d & M; d >>= 26; c += u6 * R0;
   652      VERIFY_BITS(u6, 26);
   653      VERIFY_BITS(d, 35);
   654      /* VERIFY_BITS(c, 64); */
   655      /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   656      t6 = c & M; c >>= 26; c += u6 * R1;
   657      VERIFY_BITS(t6, 26);
   658      VERIFY_BITS(c, 39);
   659      /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   660      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   661  
   662      c += (uint64_t)a[0] * b[7]
   663         + (uint64_t)a[1] * b[6]
   664         + (uint64_t)a[2] * b[5]
   665         + (uint64_t)a[3] * b[4]
   666         + (uint64_t)a[4] * b[3]
   667         + (uint64_t)a[5] * b[2]
   668         + (uint64_t)a[6] * b[1]
   669         + (uint64_t)a[7] * b[0];
   670      /* VERIFY_BITS(c, 64); */
   671      VERIFY_CHECK(c <= 0x8000007C00000007ULL);
   672      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   673      d += (uint64_t)a[8] * b[9]
   674         + (uint64_t)a[9] * b[8];
   675      VERIFY_BITS(d, 58);
   676      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   677      u7 = d & M; d >>= 26; c += u7 * R0;
   678      VERIFY_BITS(u7, 26);
   679      VERIFY_BITS(d, 32);
   680      /* VERIFY_BITS(c, 64); */
   681      VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
   682      /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   683      t7 = c & M; c >>= 26; c += u7 * R1;
   684      VERIFY_BITS(t7, 26);
   685      VERIFY_BITS(c, 38);
   686      /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   687      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   688  
   689      c += (uint64_t)a[0] * b[8]
   690         + (uint64_t)a[1] * b[7]
   691         + (uint64_t)a[2] * b[6]
   692         + (uint64_t)a[3] * b[5]
   693         + (uint64_t)a[4] * b[4]
   694         + (uint64_t)a[5] * b[3]
   695         + (uint64_t)a[6] * b[2]
   696         + (uint64_t)a[7] * b[1]
   697         + (uint64_t)a[8] * b[0];
   698      /* VERIFY_BITS(c, 64); */
   699      VERIFY_CHECK(c <= 0x9000007B80000008ULL);
   700      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   701      d += (uint64_t)a[9] * b[9];
   702      VERIFY_BITS(d, 57);
   703      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   704      u8 = d & M; d >>= 26; c += u8 * R0;
   705      VERIFY_BITS(u8, 26);
   706      VERIFY_BITS(d, 31);
   707      /* VERIFY_BITS(c, 64); */
   708      VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
   709      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   710  
   711      r[3] = t3;
   712      VERIFY_BITS(r[3], 26);
   713      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   714      r[4] = t4;
   715      VERIFY_BITS(r[4], 26);
   716      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   717      r[5] = t5;
   718      VERIFY_BITS(r[5], 26);
   719      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   720      r[6] = t6;
   721      VERIFY_BITS(r[6], 26);
   722      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   723      r[7] = t7;
   724      VERIFY_BITS(r[7], 26);
   725      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   726  
   727      r[8] = c & M; c >>= 26; c += u8 * R1;
   728      VERIFY_BITS(r[8], 26);
   729      VERIFY_BITS(c, 39);
   730      /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   731      /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   732      c   += d * R0 + t9;
   733      VERIFY_BITS(c, 45);
   734      /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   735      r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
   736      VERIFY_BITS(r[9], 22);
   737      VERIFY_BITS(c, 46);
   738      /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   739      /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   740      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   741  
   742      d    = c * (R0 >> 4) + t0;
   743      VERIFY_BITS(d, 56);
   744      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   745      r[0] = d & M; d >>= 26;
   746      VERIFY_BITS(r[0], 26);
   747      VERIFY_BITS(d, 30);
   748      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   749      d   += c * (R1 >> 4) + t1;
   750      VERIFY_BITS(d, 53);
   751      VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
   752      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   753      /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   754      r[1] = d & M; d >>= 26;
   755      VERIFY_BITS(r[1], 26);
   756      VERIFY_BITS(d, 27);
   757      VERIFY_CHECK(d <= 0x4000000ULL);
   758      /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   759      d   += t2;
   760      VERIFY_BITS(d, 27);
   761      /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   762      r[2] = d;
   763      VERIFY_BITS(r[2], 27);
   764      /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   765  }
   766  
   767  SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
   768      uint64_t c, d;
   769      uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
   770      uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
   771      const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
   772  
   773      VERIFY_BITS(a[0], 30);
   774      VERIFY_BITS(a[1], 30);
   775      VERIFY_BITS(a[2], 30);
   776      VERIFY_BITS(a[3], 30);
   777      VERIFY_BITS(a[4], 30);
   778      VERIFY_BITS(a[5], 30);
   779      VERIFY_BITS(a[6], 30);
   780      VERIFY_BITS(a[7], 30);
   781      VERIFY_BITS(a[8], 30);
   782      VERIFY_BITS(a[9], 26);
   783  
   784      /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
   785       *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
   786       *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
   787       */
   788  
   789      d  = (uint64_t)(a[0]*2) * a[9]
   790         + (uint64_t)(a[1]*2) * a[8]
   791         + (uint64_t)(a[2]*2) * a[7]
   792         + (uint64_t)(a[3]*2) * a[6]
   793         + (uint64_t)(a[4]*2) * a[5];
   794      /* VERIFY_BITS(d, 64); */
   795      /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
   796      t9 = d & M; d >>= 26;
   797      VERIFY_BITS(t9, 26);
   798      VERIFY_BITS(d, 38);
   799      /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
   800  
   801      c  = (uint64_t)a[0] * a[0];
   802      VERIFY_BITS(c, 60);
   803      /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
   804      d += (uint64_t)(a[1]*2) * a[9]
   805         + (uint64_t)(a[2]*2) * a[8]
   806         + (uint64_t)(a[3]*2) * a[7]
   807         + (uint64_t)(a[4]*2) * a[6]
   808         + (uint64_t)a[5] * a[5];
   809      VERIFY_BITS(d, 63);
   810      /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   811      u0 = d & M; d >>= 26; c += u0 * R0;
   812      VERIFY_BITS(u0, 26);
   813      VERIFY_BITS(d, 37);
   814      VERIFY_BITS(c, 61);
   815      /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   816      t0 = c & M; c >>= 26; c += u0 * R1;
   817      VERIFY_BITS(t0, 26);
   818      VERIFY_BITS(c, 37);
   819      /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   820      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   821  
   822      c += (uint64_t)(a[0]*2) * a[1];
   823      VERIFY_BITS(c, 62);
   824      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
   825      d += (uint64_t)(a[2]*2) * a[9]
   826         + (uint64_t)(a[3]*2) * a[8]
   827         + (uint64_t)(a[4]*2) * a[7]
   828         + (uint64_t)(a[5]*2) * a[6];
   829      VERIFY_BITS(d, 63);
   830      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   831      u1 = d & M; d >>= 26; c += u1 * R0;
   832      VERIFY_BITS(u1, 26);
   833      VERIFY_BITS(d, 37);
   834      VERIFY_BITS(c, 63);
   835      /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   836      t1 = c & M; c >>= 26; c += u1 * R1;
   837      VERIFY_BITS(t1, 26);
   838      VERIFY_BITS(c, 38);
   839      /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   840      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   841  
   842      c += (uint64_t)(a[0]*2) * a[2]
   843         + (uint64_t)a[1] * a[1];
   844      VERIFY_BITS(c, 62);
   845      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   846      d += (uint64_t)(a[3]*2) * a[9]
   847         + (uint64_t)(a[4]*2) * a[8]
   848         + (uint64_t)(a[5]*2) * a[7]
   849         + (uint64_t)a[6] * a[6];
   850      VERIFY_BITS(d, 63);
   851      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   852      u2 = d & M; d >>= 26; c += u2 * R0;
   853      VERIFY_BITS(u2, 26);
   854      VERIFY_BITS(d, 37);
   855      VERIFY_BITS(c, 63);
   856      /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   857      t2 = c & M; c >>= 26; c += u2 * R1;
   858      VERIFY_BITS(t2, 26);
   859      VERIFY_BITS(c, 38);
   860      /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   861      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   862  
   863      c += (uint64_t)(a[0]*2) * a[3]
   864         + (uint64_t)(a[1]*2) * a[2];
   865      VERIFY_BITS(c, 63);
   866      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   867      d += (uint64_t)(a[4]*2) * a[9]
   868         + (uint64_t)(a[5]*2) * a[8]
   869         + (uint64_t)(a[6]*2) * a[7];
   870      VERIFY_BITS(d, 63);
   871      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   872      u3 = d & M; d >>= 26; c += u3 * R0;
   873      VERIFY_BITS(u3, 26);
   874      VERIFY_BITS(d, 37);
   875      /* VERIFY_BITS(c, 64); */
   876      /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   877      t3 = c & M; c >>= 26; c += u3 * R1;
   878      VERIFY_BITS(t3, 26);
   879      VERIFY_BITS(c, 39);
   880      /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   881      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   882  
   883      c += (uint64_t)(a[0]*2) * a[4]
   884         + (uint64_t)(a[1]*2) * a[3]
   885         + (uint64_t)a[2] * a[2];
   886      VERIFY_BITS(c, 63);
   887      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   888      d += (uint64_t)(a[5]*2) * a[9]
   889         + (uint64_t)(a[6]*2) * a[8]
   890         + (uint64_t)a[7] * a[7];
   891      VERIFY_BITS(d, 62);
   892      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   893      u4 = d & M; d >>= 26; c += u4 * R0;
   894      VERIFY_BITS(u4, 26);
   895      VERIFY_BITS(d, 36);
   896      /* VERIFY_BITS(c, 64); */
   897      /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   898      t4 = c & M; c >>= 26; c += u4 * R1;
   899      VERIFY_BITS(t4, 26);
   900      VERIFY_BITS(c, 39);
   901      /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   902      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   903  
   904      c += (uint64_t)(a[0]*2) * a[5]
   905         + (uint64_t)(a[1]*2) * a[4]
   906         + (uint64_t)(a[2]*2) * a[3];
   907      VERIFY_BITS(c, 63);
   908      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   909      d += (uint64_t)(a[6]*2) * a[9]
   910         + (uint64_t)(a[7]*2) * a[8];
   911      VERIFY_BITS(d, 62);
   912      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   913      u5 = d & M; d >>= 26; c += u5 * R0;
   914      VERIFY_BITS(u5, 26);
   915      VERIFY_BITS(d, 36);
   916      /* VERIFY_BITS(c, 64); */
   917      /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   918      t5 = c & M; c >>= 26; c += u5 * R1;
   919      VERIFY_BITS(t5, 26);
   920      VERIFY_BITS(c, 39);
   921      /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   922      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   923  
   924      c += (uint64_t)(a[0]*2) * a[6]
   925         + (uint64_t)(a[1]*2) * a[5]
   926         + (uint64_t)(a[2]*2) * a[4]
   927         + (uint64_t)a[3] * a[3];
   928      VERIFY_BITS(c, 63);
   929      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   930      d += (uint64_t)(a[7]*2) * a[9]
   931         + (uint64_t)a[8] * a[8];
   932      VERIFY_BITS(d, 61);
   933      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   934      u6 = d & M; d >>= 26; c += u6 * R0;
   935      VERIFY_BITS(u6, 26);
   936      VERIFY_BITS(d, 35);
   937      /* VERIFY_BITS(c, 64); */
   938      /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   939      t6 = c & M; c >>= 26; c += u6 * R1;
   940      VERIFY_BITS(t6, 26);
   941      VERIFY_BITS(c, 39);
   942      /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   943      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   944  
   945      c += (uint64_t)(a[0]*2) * a[7]
   946         + (uint64_t)(a[1]*2) * a[6]
   947         + (uint64_t)(a[2]*2) * a[5]
   948         + (uint64_t)(a[3]*2) * a[4];
   949      /* VERIFY_BITS(c, 64); */
   950      VERIFY_CHECK(c <= 0x8000007C00000007ULL);
   951      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   952      d += (uint64_t)(a[8]*2) * a[9];
   953      VERIFY_BITS(d, 58);
   954      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   955      u7 = d & M; d >>= 26; c += u7 * R0;
   956      VERIFY_BITS(u7, 26);
   957      VERIFY_BITS(d, 32);
   958      /* VERIFY_BITS(c, 64); */
   959      VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
   960      /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   961      t7 = c & M; c >>= 26; c += u7 * R1;
   962      VERIFY_BITS(t7, 26);
   963      VERIFY_BITS(c, 38);
   964      /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   965      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   966  
   967      c += (uint64_t)(a[0]*2) * a[8]
   968         + (uint64_t)(a[1]*2) * a[7]
   969         + (uint64_t)(a[2]*2) * a[6]
   970         + (uint64_t)(a[3]*2) * a[5]
   971         + (uint64_t)a[4] * a[4];
   972      /* VERIFY_BITS(c, 64); */
   973      VERIFY_CHECK(c <= 0x9000007B80000008ULL);
   974      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   975      d += (uint64_t)a[9] * a[9];
   976      VERIFY_BITS(d, 57);
   977      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   978      u8 = d & M; d >>= 26; c += u8 * R0;
   979      VERIFY_BITS(u8, 26);
   980      VERIFY_BITS(d, 31);
   981      /* VERIFY_BITS(c, 64); */
   982      VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
   983      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   984  
   985      r[3] = t3;
   986      VERIFY_BITS(r[3], 26);
   987      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   988      r[4] = t4;
   989      VERIFY_BITS(r[4], 26);
   990      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   991      r[5] = t5;
   992      VERIFY_BITS(r[5], 26);
   993      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   994      r[6] = t6;
   995      VERIFY_BITS(r[6], 26);
   996      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   997      r[7] = t7;
   998      VERIFY_BITS(r[7], 26);
   999      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1000  
  1001      r[8] = c & M; c >>= 26; c += u8 * R1;
  1002      VERIFY_BITS(r[8], 26);
  1003      VERIFY_BITS(c, 39);
  1004      /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1005      /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1006      c   += d * R0 + t9;
  1007      VERIFY_BITS(c, 45);
  1008      /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1009      r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
  1010      VERIFY_BITS(r[9], 22);
  1011      VERIFY_BITS(c, 46);
  1012      /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1013      /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1014      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1015  
  1016      d    = c * (R0 >> 4) + t0;
  1017      VERIFY_BITS(d, 56);
  1018      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1019      r[0] = d & M; d >>= 26;
  1020      VERIFY_BITS(r[0], 26);
  1021      VERIFY_BITS(d, 30);
  1022      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1023      d   += c * (R1 >> 4) + t1;
  1024      VERIFY_BITS(d, 53);
  1025      VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
  1026      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1027      /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1028      r[1] = d & M; d >>= 26;
  1029      VERIFY_BITS(r[1], 26);
  1030      VERIFY_BITS(d, 27);
  1031      VERIFY_CHECK(d <= 0x4000000ULL);
  1032      /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1033      d   += t2;
  1034      VERIFY_BITS(d, 27);
  1035      /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1036      r[2] = d;
  1037      VERIFY_BITS(r[2], 27);
  1038      /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1039  }
  1040  
  1041  
  1042  static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) {
  1043  #ifdef VERIFY
  1044      VERIFY_CHECK(a->magnitude <= 8);
  1045      VERIFY_CHECK(b->magnitude <= 8);
  1046      secp256k1_fe_verify(a);
  1047      secp256k1_fe_verify(b);
  1048      VERIFY_CHECK(r != b);
  1049  #endif
  1050      secp256k1_fe_mul_inner(r->n, a->n, b->n);
  1051  #ifdef VERIFY
  1052      r->magnitude = 1;
  1053      r->normalized = 0;
  1054      secp256k1_fe_verify(r);
  1055  #endif
  1056  }
  1057  
  1058  static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a) {
  1059  #ifdef VERIFY
  1060      VERIFY_CHECK(a->magnitude <= 8);
  1061      secp256k1_fe_verify(a);
  1062  #endif
  1063      secp256k1_fe_sqr_inner(r->n, a->n);
  1064  #ifdef VERIFY
  1065      r->magnitude = 1;
  1066      r->normalized = 0;
  1067      secp256k1_fe_verify(r);
  1068  #endif
  1069  }
  1070  
  1071  static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) {
  1072      uint32_t mask0, mask1;
  1073      mask0 = flag + ~((uint32_t)0);
  1074      mask1 = ~mask0;
  1075      r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
  1076      r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
  1077      r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
  1078      r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
  1079      r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
  1080      r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
  1081      r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
  1082      r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
  1083      r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
  1084      r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
  1085  #ifdef VERIFY
  1086      if (a->magnitude > r->magnitude) {
  1087          r->magnitude = a->magnitude;
  1088      }
  1089      r->normalized &= a->normalized;
  1090  #endif
  1091  }
  1092  
  1093  static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
  1094      uint32_t mask0, mask1;
  1095      mask0 = flag + ~((uint32_t)0);
  1096      mask1 = ~mask0;
  1097      r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
  1098      r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
  1099      r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
  1100      r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
  1101      r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
  1102      r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
  1103      r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
  1104      r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
  1105  }
  1106  
  1107  static void secp256k1_fe_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) {
  1108  #ifdef VERIFY
  1109      VERIFY_CHECK(a->normalized);
  1110  #endif
  1111      r->n[0] = a->n[0] | a->n[1] << 26;
  1112      r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
  1113      r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
  1114      r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
  1115      r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
  1116      r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
  1117      r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
  1118      r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
  1119  }
  1120  
  1121  static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) {
  1122      r->n[0] = a->n[0] & 0x3FFFFFFUL;
  1123      r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
  1124      r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
  1125      r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
  1126      r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
  1127      r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
  1128      r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
  1129      r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
  1130      r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
  1131      r->n[9] = a->n[7] >> 10;
  1132  #ifdef VERIFY
  1133      r->magnitude = 1;
  1134      r->normalized = 1;
  1135  #endif
  1136  }
  1137  
  1138  #endif