github.com/verovm/record-replay@v1.9.7/crypto/secp256k1/libsecp256k1/src/field_10x26_impl.h (about)

     1  /**********************************************************************
     2   * Copyright (c) 2013, 2014 Pieter Wuille                             *
     3   * Distributed under the MIT software license, see the accompanying   *
     4   * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
     5   **********************************************************************/
     6  
     7  #ifndef _SECP256K1_FIELD_REPR_IMPL_H_
     8  #define _SECP256K1_FIELD_REPR_IMPL_H_
     9  
    10  #include "util.h"
    11  #include "num.h"
    12  #include "field.h"
    13  
    14  #ifdef VERIFY
    15  static void secp256k1_fe_verify(const secp256k1_fe *a) {
    16      const uint32_t *d = a->n;
    17      int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
    18      r &= (d[0] <= 0x3FFFFFFUL * m);
    19      r &= (d[1] <= 0x3FFFFFFUL * m);
    20      r &= (d[2] <= 0x3FFFFFFUL * m);
    21      r &= (d[3] <= 0x3FFFFFFUL * m);
    22      r &= (d[4] <= 0x3FFFFFFUL * m);
    23      r &= (d[5] <= 0x3FFFFFFUL * m);
    24      r &= (d[6] <= 0x3FFFFFFUL * m);
    25      r &= (d[7] <= 0x3FFFFFFUL * m);
    26      r &= (d[8] <= 0x3FFFFFFUL * m);
    27      r &= (d[9] <= 0x03FFFFFUL * m);
    28      r &= (a->magnitude >= 0);
    29      r &= (a->magnitude <= 32);
    30      if (a->normalized) {
    31          r &= (a->magnitude <= 1);
    32          if (r && (d[9] == 0x03FFFFFUL)) {
    33              uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
    34              if (mid == 0x3FFFFFFUL) {
    35                  r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
    36              }
    37          }
    38      }
    39      VERIFY_CHECK(r == 1);
    40  }
    41  #endif
    42  
    43  static void secp256k1_fe_normalize(secp256k1_fe *r) {
    44      uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
    45               t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
    46  
    47      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
    48      uint32_t m;
    49      uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
    50  
    51      /* The first pass ensures the magnitude is 1, ... */
    52      t0 += x * 0x3D1UL; t1 += (x << 6);
    53      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
    54      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
    55      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
    56      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
    57      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
    58      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
    59      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
    60      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
    61      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
    62  
    63      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
    64      VERIFY_CHECK(t9 >> 23 == 0);
    65  
    66      /* At most a single final reduction is needed; check if the value is >= the field characteristic */
    67      x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
    68          & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
    69  
    70      /* Apply the final reduction (for constant-time behaviour, we do it always) */
    71      t0 += x * 0x3D1UL; t1 += (x << 6);
    72      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
    73      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
    74      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
    75      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
    76      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
    77      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
    78      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
    79      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
    80      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
    81  
    82      /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
    83      VERIFY_CHECK(t9 >> 22 == x);
    84  
    85      /* Mask off the possible multiple of 2^256 from the final reduction */
    86      t9 &= 0x03FFFFFUL;
    87  
    88      r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
    89      r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
    90  
    91  #ifdef VERIFY
    92      r->magnitude = 1;
    93      r->normalized = 1;
    94      secp256k1_fe_verify(r);
    95  #endif
    96  }
    97  
    98  static void secp256k1_fe_normalize_weak(secp256k1_fe *r) {
    99      uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
   100               t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
   101  
   102      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
   103      uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
   104  
   105      /* The first pass ensures the magnitude is 1, ... */
   106      t0 += x * 0x3D1UL; t1 += (x << 6);
   107      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
   108      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
   109      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
   110      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
   111      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
   112      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
   113      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
   114      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
   115      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
   116  
   117      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
   118      VERIFY_CHECK(t9 >> 23 == 0);
   119  
   120      r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
   121      r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
   122  
   123  #ifdef VERIFY
   124      r->magnitude = 1;
   125      secp256k1_fe_verify(r);
   126  #endif
   127  }
   128  
   129  static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
   130      uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
   131               t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
   132  
   133      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
   134      uint32_t m;
   135      uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
   136  
   137      /* The first pass ensures the magnitude is 1, ... */
   138      t0 += x * 0x3D1UL; t1 += (x << 6);
   139      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
   140      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
   141      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
   142      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
   143      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
   144      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
   145      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
   146      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
   147      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
   148  
   149      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
   150      VERIFY_CHECK(t9 >> 23 == 0);
   151  
   152      /* At most a single final reduction is needed; check if the value is >= the field characteristic */
   153      x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
   154          & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
   155  
   156      if (x) {
   157          t0 += 0x3D1UL; t1 += (x << 6);
   158          t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
   159          t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
   160          t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
   161          t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
   162          t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
   163          t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
   164          t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
   165          t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
   166          t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
   167  
   168          /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
   169          VERIFY_CHECK(t9 >> 22 == x);
   170  
   171          /* Mask off the possible multiple of 2^256 from the final reduction */
   172          t9 &= 0x03FFFFFUL;
   173      }
   174  
   175      r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
   176      r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
   177  
   178  #ifdef VERIFY
   179      r->magnitude = 1;
   180      r->normalized = 1;
   181      secp256k1_fe_verify(r);
   182  #endif
   183  }
   184  
   185  static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
   186      uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
   187               t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
   188  
   189      /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
   190      uint32_t z0, z1;
   191  
   192      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
   193      uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
   194  
   195      /* The first pass ensures the magnitude is 1, ... */
   196      t0 += x * 0x3D1UL; t1 += (x << 6);
   197      t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0  = t0; z1  = t0 ^ 0x3D0UL;
   198      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
   199      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
   200      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
   201      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
   202      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
   203      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
   204      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
   205      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
   206                                           z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
   207  
   208      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
   209      VERIFY_CHECK(t9 >> 23 == 0);
   210  
   211      return (z0 == 0) | (z1 == 0x3FFFFFFUL);
   212  }
   213  
   214  static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) {
   215      uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
   216      uint32_t z0, z1;
   217      uint32_t x;
   218  
   219      t0 = r->n[0];
   220      t9 = r->n[9];
   221  
   222      /* Reduce t9 at the start so there will be at most a single carry from the first pass */
   223      x = t9 >> 22;
   224  
   225      /* The first pass ensures the magnitude is 1, ... */
   226      t0 += x * 0x3D1UL;
   227  
   228      /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
   229      z0 = t0 & 0x3FFFFFFUL;
   230      z1 = z0 ^ 0x3D0UL;
   231  
   232      /* Fast return path should catch the majority of cases */
   233      if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) {
   234          return 0;
   235      }
   236  
   237      t1 = r->n[1];
   238      t2 = r->n[2];
   239      t3 = r->n[3];
   240      t4 = r->n[4];
   241      t5 = r->n[5];
   242      t6 = r->n[6];
   243      t7 = r->n[7];
   244      t8 = r->n[8];
   245  
   246      t9 &= 0x03FFFFFUL;
   247      t1 += (x << 6);
   248  
   249      t1 += (t0 >> 26);
   250      t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
   251      t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
   252      t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
   253      t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
   254      t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
   255      t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
   256      t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
   257      t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
   258                                           z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
   259  
   260      /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
   261      VERIFY_CHECK(t9 >> 23 == 0);
   262  
   263      return (z0 == 0) | (z1 == 0x3FFFFFFUL);
   264  }
   265  
   266  SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe *r, int a) {
   267      r->n[0] = a;
   268      r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
   269  #ifdef VERIFY
   270      r->magnitude = 1;
   271      r->normalized = 1;
   272      secp256k1_fe_verify(r);
   273  #endif
   274  }
   275  
   276  SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe *a) {
   277      const uint32_t *t = a->n;
   278  #ifdef VERIFY
   279      VERIFY_CHECK(a->normalized);
   280      secp256k1_fe_verify(a);
   281  #endif
   282      return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
   283  }
   284  
   285  SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe *a) {
   286  #ifdef VERIFY
   287      VERIFY_CHECK(a->normalized);
   288      secp256k1_fe_verify(a);
   289  #endif
   290      return a->n[0] & 1;
   291  }
   292  
   293  SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe *a) {
   294      int i;
   295  #ifdef VERIFY
   296      a->magnitude = 0;
   297      a->normalized = 1;
   298  #endif
   299      for (i=0; i<10; i++) {
   300          a->n[i] = 0;
   301      }
   302  }
   303  
   304  static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
   305      int i;
   306  #ifdef VERIFY
   307      VERIFY_CHECK(a->normalized);
   308      VERIFY_CHECK(b->normalized);
   309      secp256k1_fe_verify(a);
   310      secp256k1_fe_verify(b);
   311  #endif
   312      for (i = 9; i >= 0; i--) {
   313          if (a->n[i] > b->n[i]) {
   314              return 1;
   315          }
   316          if (a->n[i] < b->n[i]) {
   317              return -1;
   318          }
   319      }
   320      return 0;
   321  }
   322  
   323  static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) {
   324      int i;
   325      r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
   326      r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
   327      for (i=0; i<32; i++) {
   328          int j;
   329          for (j=0; j<4; j++) {
   330              int limb = (8*i+2*j)/26;
   331              int shift = (8*i+2*j)%26;
   332              r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
   333          }
   334      }
   335      if (r->n[9] == 0x3FFFFFUL && (r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL && (r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL) {
   336          return 0;
   337      }
   338  #ifdef VERIFY
   339      r->magnitude = 1;
   340      r->normalized = 1;
   341      secp256k1_fe_verify(r);
   342  #endif
   343      return 1;
   344  }
   345  
   346  /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
   347  static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a) {
   348      int i;
   349  #ifdef VERIFY
   350      VERIFY_CHECK(a->normalized);
   351      secp256k1_fe_verify(a);
   352  #endif
   353      for (i=0; i<32; i++) {
   354          int j;
   355          int c = 0;
   356          for (j=0; j<4; j++) {
   357              int limb = (8*i+2*j)/26;
   358              int shift = (8*i+2*j)%26;
   359              c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
   360          }
   361          r[31-i] = c;
   362      }
   363  }
   364  
   365  SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe *r, const secp256k1_fe *a, int m) {
   366  #ifdef VERIFY
   367      VERIFY_CHECK(a->magnitude <= m);
   368      secp256k1_fe_verify(a);
   369  #endif
   370      r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
   371      r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
   372      r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
   373      r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
   374      r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
   375      r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
   376      r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
   377      r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
   378      r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
   379      r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
   380  #ifdef VERIFY
   381      r->magnitude = m + 1;
   382      r->normalized = 0;
   383      secp256k1_fe_verify(r);
   384  #endif
   385  }
   386  
   387  SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe *r, int a) {
   388      r->n[0] *= a;
   389      r->n[1] *= a;
   390      r->n[2] *= a;
   391      r->n[3] *= a;
   392      r->n[4] *= a;
   393      r->n[5] *= a;
   394      r->n[6] *= a;
   395      r->n[7] *= a;
   396      r->n[8] *= a;
   397      r->n[9] *= a;
   398  #ifdef VERIFY
   399      r->magnitude *= a;
   400      r->normalized = 0;
   401      secp256k1_fe_verify(r);
   402  #endif
   403  }
   404  
   405  SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe *r, const secp256k1_fe *a) {
   406  #ifdef VERIFY
   407      secp256k1_fe_verify(a);
   408  #endif
   409      r->n[0] += a->n[0];
   410      r->n[1] += a->n[1];
   411      r->n[2] += a->n[2];
   412      r->n[3] += a->n[3];
   413      r->n[4] += a->n[4];
   414      r->n[5] += a->n[5];
   415      r->n[6] += a->n[6];
   416      r->n[7] += a->n[7];
   417      r->n[8] += a->n[8];
   418      r->n[9] += a->n[9];
   419  #ifdef VERIFY
   420      r->magnitude += a->magnitude;
   421      r->normalized = 0;
   422      secp256k1_fe_verify(r);
   423  #endif
   424  }
   425  
   426  #if defined(USE_EXTERNAL_ASM)
   427  
   428  /* External assembler implementation */
   429  void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b);
   430  void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a);
   431  
   432  #else
   433  
   434  #ifdef VERIFY
   435  #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
   436  #else
   437  #define VERIFY_BITS(x, n) do { } while(0)
   438  #endif
   439  
   440  SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
   441      uint64_t c, d;
   442      uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
   443      uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
   444      const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
   445  
   446      VERIFY_BITS(a[0], 30);
   447      VERIFY_BITS(a[1], 30);
   448      VERIFY_BITS(a[2], 30);
   449      VERIFY_BITS(a[3], 30);
   450      VERIFY_BITS(a[4], 30);
   451      VERIFY_BITS(a[5], 30);
   452      VERIFY_BITS(a[6], 30);
   453      VERIFY_BITS(a[7], 30);
   454      VERIFY_BITS(a[8], 30);
   455      VERIFY_BITS(a[9], 26);
   456      VERIFY_BITS(b[0], 30);
   457      VERIFY_BITS(b[1], 30);
   458      VERIFY_BITS(b[2], 30);
   459      VERIFY_BITS(b[3], 30);
   460      VERIFY_BITS(b[4], 30);
   461      VERIFY_BITS(b[5], 30);
   462      VERIFY_BITS(b[6], 30);
   463      VERIFY_BITS(b[7], 30);
   464      VERIFY_BITS(b[8], 30);
   465      VERIFY_BITS(b[9], 26);
   466  
   467      /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
   468       *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
   469       *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
   470       */
   471  
   472      d  = (uint64_t)a[0] * b[9]
   473         + (uint64_t)a[1] * b[8]
   474         + (uint64_t)a[2] * b[7]
   475         + (uint64_t)a[3] * b[6]
   476         + (uint64_t)a[4] * b[5]
   477         + (uint64_t)a[5] * b[4]
   478         + (uint64_t)a[6] * b[3]
   479         + (uint64_t)a[7] * b[2]
   480         + (uint64_t)a[8] * b[1]
   481         + (uint64_t)a[9] * b[0];
   482      /* VERIFY_BITS(d, 64); */
   483      /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
   484      t9 = d & M; d >>= 26;
   485      VERIFY_BITS(t9, 26);
   486      VERIFY_BITS(d, 38);
   487      /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
   488  
   489      c  = (uint64_t)a[0] * b[0];
   490      VERIFY_BITS(c, 60);
   491      /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
   492      d += (uint64_t)a[1] * b[9]
   493         + (uint64_t)a[2] * b[8]
   494         + (uint64_t)a[3] * b[7]
   495         + (uint64_t)a[4] * b[6]
   496         + (uint64_t)a[5] * b[5]
   497         + (uint64_t)a[6] * b[4]
   498         + (uint64_t)a[7] * b[3]
   499         + (uint64_t)a[8] * b[2]
   500         + (uint64_t)a[9] * b[1];
   501      VERIFY_BITS(d, 63);
   502      /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   503      u0 = d & M; d >>= 26; c += u0 * R0;
   504      VERIFY_BITS(u0, 26);
   505      VERIFY_BITS(d, 37);
   506      VERIFY_BITS(c, 61);
   507      /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   508      t0 = c & M; c >>= 26; c += u0 * R1;
   509      VERIFY_BITS(t0, 26);
   510      VERIFY_BITS(c, 37);
   511      /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   512      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   513  
   514      c += (uint64_t)a[0] * b[1]
   515         + (uint64_t)a[1] * b[0];
   516      VERIFY_BITS(c, 62);
   517      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
   518      d += (uint64_t)a[2] * b[9]
   519         + (uint64_t)a[3] * b[8]
   520         + (uint64_t)a[4] * b[7]
   521         + (uint64_t)a[5] * b[6]
   522         + (uint64_t)a[6] * b[5]
   523         + (uint64_t)a[7] * b[4]
   524         + (uint64_t)a[8] * b[3]
   525         + (uint64_t)a[9] * b[2];
   526      VERIFY_BITS(d, 63);
   527      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   528      u1 = d & M; d >>= 26; c += u1 * R0;
   529      VERIFY_BITS(u1, 26);
   530      VERIFY_BITS(d, 37);
   531      VERIFY_BITS(c, 63);
   532      /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   533      t1 = c & M; c >>= 26; c += u1 * R1;
   534      VERIFY_BITS(t1, 26);
   535      VERIFY_BITS(c, 38);
   536      /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   537      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   538  
   539      c += (uint64_t)a[0] * b[2]
   540         + (uint64_t)a[1] * b[1]
   541         + (uint64_t)a[2] * b[0];
   542      VERIFY_BITS(c, 62);
   543      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   544      d += (uint64_t)a[3] * b[9]
   545         + (uint64_t)a[4] * b[8]
   546         + (uint64_t)a[5] * b[7]
   547         + (uint64_t)a[6] * b[6]
   548         + (uint64_t)a[7] * b[5]
   549         + (uint64_t)a[8] * b[4]
   550         + (uint64_t)a[9] * b[3];
   551      VERIFY_BITS(d, 63);
   552      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   553      u2 = d & M; d >>= 26; c += u2 * R0;
   554      VERIFY_BITS(u2, 26);
   555      VERIFY_BITS(d, 37);
   556      VERIFY_BITS(c, 63);
   557      /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   558      t2 = c & M; c >>= 26; c += u2 * R1;
   559      VERIFY_BITS(t2, 26);
   560      VERIFY_BITS(c, 38);
   561      /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   562      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   563  
   564      c += (uint64_t)a[0] * b[3]
   565         + (uint64_t)a[1] * b[2]
   566         + (uint64_t)a[2] * b[1]
   567         + (uint64_t)a[3] * b[0];
   568      VERIFY_BITS(c, 63);
   569      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   570      d += (uint64_t)a[4] * b[9]
   571         + (uint64_t)a[5] * b[8]
   572         + (uint64_t)a[6] * b[7]
   573         + (uint64_t)a[7] * b[6]
   574         + (uint64_t)a[8] * b[5]
   575         + (uint64_t)a[9] * b[4];
   576      VERIFY_BITS(d, 63);
   577      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   578      u3 = d & M; d >>= 26; c += u3 * R0;
   579      VERIFY_BITS(u3, 26);
   580      VERIFY_BITS(d, 37);
   581      /* VERIFY_BITS(c, 64); */
   582      /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   583      t3 = c & M; c >>= 26; c += u3 * R1;
   584      VERIFY_BITS(t3, 26);
   585      VERIFY_BITS(c, 39);
   586      /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   587      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   588  
   589      c += (uint64_t)a[0] * b[4]
   590         + (uint64_t)a[1] * b[3]
   591         + (uint64_t)a[2] * b[2]
   592         + (uint64_t)a[3] * b[1]
   593         + (uint64_t)a[4] * b[0];
   594      VERIFY_BITS(c, 63);
   595      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   596      d += (uint64_t)a[5] * b[9]
   597         + (uint64_t)a[6] * b[8]
   598         + (uint64_t)a[7] * b[7]
   599         + (uint64_t)a[8] * b[6]
   600         + (uint64_t)a[9] * b[5];
   601      VERIFY_BITS(d, 62);
   602      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   603      u4 = d & M; d >>= 26; c += u4 * R0;
   604      VERIFY_BITS(u4, 26);
   605      VERIFY_BITS(d, 36);
   606      /* VERIFY_BITS(c, 64); */
   607      /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   608      t4 = c & M; c >>= 26; c += u4 * R1;
   609      VERIFY_BITS(t4, 26);
   610      VERIFY_BITS(c, 39);
   611      /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   612      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   613  
   614      c += (uint64_t)a[0] * b[5]
   615         + (uint64_t)a[1] * b[4]
   616         + (uint64_t)a[2] * b[3]
   617         + (uint64_t)a[3] * b[2]
   618         + (uint64_t)a[4] * b[1]
   619         + (uint64_t)a[5] * b[0];
   620      VERIFY_BITS(c, 63);
   621      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   622      d += (uint64_t)a[6] * b[9]
   623         + (uint64_t)a[7] * b[8]
   624         + (uint64_t)a[8] * b[7]
   625         + (uint64_t)a[9] * b[6];
   626      VERIFY_BITS(d, 62);
   627      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   628      u5 = d & M; d >>= 26; c += u5 * R0;
   629      VERIFY_BITS(u5, 26);
   630      VERIFY_BITS(d, 36);
   631      /* VERIFY_BITS(c, 64); */
   632      /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   633      t5 = c & M; c >>= 26; c += u5 * R1;
   634      VERIFY_BITS(t5, 26);
   635      VERIFY_BITS(c, 39);
   636      /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   637      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   638  
   639      c += (uint64_t)a[0] * b[6]
   640         + (uint64_t)a[1] * b[5]
   641         + (uint64_t)a[2] * b[4]
   642         + (uint64_t)a[3] * b[3]
   643         + (uint64_t)a[4] * b[2]
   644         + (uint64_t)a[5] * b[1]
   645         + (uint64_t)a[6] * b[0];
   646      VERIFY_BITS(c, 63);
   647      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   648      d += (uint64_t)a[7] * b[9]
   649         + (uint64_t)a[8] * b[8]
   650         + (uint64_t)a[9] * b[7];
   651      VERIFY_BITS(d, 61);
   652      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   653      u6 = d & M; d >>= 26; c += u6 * R0;
   654      VERIFY_BITS(u6, 26);
   655      VERIFY_BITS(d, 35);
   656      /* VERIFY_BITS(c, 64); */
   657      /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   658      t6 = c & M; c >>= 26; c += u6 * R1;
   659      VERIFY_BITS(t6, 26);
   660      VERIFY_BITS(c, 39);
   661      /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   662      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   663  
   664      c += (uint64_t)a[0] * b[7]
   665         + (uint64_t)a[1] * b[6]
   666         + (uint64_t)a[2] * b[5]
   667         + (uint64_t)a[3] * b[4]
   668         + (uint64_t)a[4] * b[3]
   669         + (uint64_t)a[5] * b[2]
   670         + (uint64_t)a[6] * b[1]
   671         + (uint64_t)a[7] * b[0];
   672      /* VERIFY_BITS(c, 64); */
   673      VERIFY_CHECK(c <= 0x8000007C00000007ULL);
   674      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   675      d += (uint64_t)a[8] * b[9]
   676         + (uint64_t)a[9] * b[8];
   677      VERIFY_BITS(d, 58);
   678      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   679      u7 = d & M; d >>= 26; c += u7 * R0;
   680      VERIFY_BITS(u7, 26);
   681      VERIFY_BITS(d, 32);
   682      /* VERIFY_BITS(c, 64); */
   683      VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
   684      /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   685      t7 = c & M; c >>= 26; c += u7 * R1;
   686      VERIFY_BITS(t7, 26);
   687      VERIFY_BITS(c, 38);
   688      /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   689      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   690  
   691      c += (uint64_t)a[0] * b[8]
   692         + (uint64_t)a[1] * b[7]
   693         + (uint64_t)a[2] * b[6]
   694         + (uint64_t)a[3] * b[5]
   695         + (uint64_t)a[4] * b[4]
   696         + (uint64_t)a[5] * b[3]
   697         + (uint64_t)a[6] * b[2]
   698         + (uint64_t)a[7] * b[1]
   699         + (uint64_t)a[8] * b[0];
   700      /* VERIFY_BITS(c, 64); */
   701      VERIFY_CHECK(c <= 0x9000007B80000008ULL);
   702      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   703      d += (uint64_t)a[9] * b[9];
   704      VERIFY_BITS(d, 57);
   705      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   706      u8 = d & M; d >>= 26; c += u8 * R0;
   707      VERIFY_BITS(u8, 26);
   708      VERIFY_BITS(d, 31);
   709      /* VERIFY_BITS(c, 64); */
   710      VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
   711      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   712  
   713      r[3] = t3;
   714      VERIFY_BITS(r[3], 26);
   715      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   716      r[4] = t4;
   717      VERIFY_BITS(r[4], 26);
   718      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   719      r[5] = t5;
   720      VERIFY_BITS(r[5], 26);
   721      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   722      r[6] = t6;
   723      VERIFY_BITS(r[6], 26);
   724      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   725      r[7] = t7;
   726      VERIFY_BITS(r[7], 26);
   727      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   728  
   729      r[8] = c & M; c >>= 26; c += u8 * R1;
   730      VERIFY_BITS(r[8], 26);
   731      VERIFY_BITS(c, 39);
   732      /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   733      /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   734      c   += d * R0 + t9;
   735      VERIFY_BITS(c, 45);
   736      /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   737      r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
   738      VERIFY_BITS(r[9], 22);
   739      VERIFY_BITS(c, 46);
   740      /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   741      /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   742      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   743  
   744      d    = c * (R0 >> 4) + t0;
   745      VERIFY_BITS(d, 56);
   746      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   747      r[0] = d & M; d >>= 26;
   748      VERIFY_BITS(r[0], 26);
   749      VERIFY_BITS(d, 30);
   750      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   751      d   += c * (R1 >> 4) + t1;
   752      VERIFY_BITS(d, 53);
   753      VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
   754      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   755      /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   756      r[1] = d & M; d >>= 26;
   757      VERIFY_BITS(r[1], 26);
   758      VERIFY_BITS(d, 27);
   759      VERIFY_CHECK(d <= 0x4000000ULL);
   760      /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   761      d   += t2;
   762      VERIFY_BITS(d, 27);
   763      /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   764      r[2] = d;
   765      VERIFY_BITS(r[2], 27);
   766      /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   767  }
   768  
   769  SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
   770      uint64_t c, d;
   771      uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
   772      uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
   773      const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
   774  
   775      VERIFY_BITS(a[0], 30);
   776      VERIFY_BITS(a[1], 30);
   777      VERIFY_BITS(a[2], 30);
   778      VERIFY_BITS(a[3], 30);
   779      VERIFY_BITS(a[4], 30);
   780      VERIFY_BITS(a[5], 30);
   781      VERIFY_BITS(a[6], 30);
   782      VERIFY_BITS(a[7], 30);
   783      VERIFY_BITS(a[8], 30);
   784      VERIFY_BITS(a[9], 26);
   785  
   786      /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
   787       *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
   788       *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
   789       */
   790  
   791      d  = (uint64_t)(a[0]*2) * a[9]
   792         + (uint64_t)(a[1]*2) * a[8]
   793         + (uint64_t)(a[2]*2) * a[7]
   794         + (uint64_t)(a[3]*2) * a[6]
   795         + (uint64_t)(a[4]*2) * a[5];
   796      /* VERIFY_BITS(d, 64); */
   797      /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
   798      t9 = d & M; d >>= 26;
   799      VERIFY_BITS(t9, 26);
   800      VERIFY_BITS(d, 38);
   801      /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
   802  
   803      c  = (uint64_t)a[0] * a[0];
   804      VERIFY_BITS(c, 60);
   805      /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
   806      d += (uint64_t)(a[1]*2) * a[9]
   807         + (uint64_t)(a[2]*2) * a[8]
   808         + (uint64_t)(a[3]*2) * a[7]
   809         + (uint64_t)(a[4]*2) * a[6]
   810         + (uint64_t)a[5] * a[5];
   811      VERIFY_BITS(d, 63);
   812      /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   813      u0 = d & M; d >>= 26; c += u0 * R0;
   814      VERIFY_BITS(u0, 26);
   815      VERIFY_BITS(d, 37);
   816      VERIFY_BITS(c, 61);
   817      /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   818      t0 = c & M; c >>= 26; c += u0 * R1;
   819      VERIFY_BITS(t0, 26);
   820      VERIFY_BITS(c, 37);
   821      /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   822      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
   823  
   824      c += (uint64_t)(a[0]*2) * a[1];
   825      VERIFY_BITS(c, 62);
   826      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
   827      d += (uint64_t)(a[2]*2) * a[9]
   828         + (uint64_t)(a[3]*2) * a[8]
   829         + (uint64_t)(a[4]*2) * a[7]
   830         + (uint64_t)(a[5]*2) * a[6];
   831      VERIFY_BITS(d, 63);
   832      /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   833      u1 = d & M; d >>= 26; c += u1 * R0;
   834      VERIFY_BITS(u1, 26);
   835      VERIFY_BITS(d, 37);
   836      VERIFY_BITS(c, 63);
   837      /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   838      t1 = c & M; c >>= 26; c += u1 * R1;
   839      VERIFY_BITS(t1, 26);
   840      VERIFY_BITS(c, 38);
   841      /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   842      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
   843  
   844      c += (uint64_t)(a[0]*2) * a[2]
   845         + (uint64_t)a[1] * a[1];
   846      VERIFY_BITS(c, 62);
   847      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   848      d += (uint64_t)(a[3]*2) * a[9]
   849         + (uint64_t)(a[4]*2) * a[8]
   850         + (uint64_t)(a[5]*2) * a[7]
   851         + (uint64_t)a[6] * a[6];
   852      VERIFY_BITS(d, 63);
   853      /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   854      u2 = d & M; d >>= 26; c += u2 * R0;
   855      VERIFY_BITS(u2, 26);
   856      VERIFY_BITS(d, 37);
   857      VERIFY_BITS(c, 63);
   858      /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   859      t2 = c & M; c >>= 26; c += u2 * R1;
   860      VERIFY_BITS(t2, 26);
   861      VERIFY_BITS(c, 38);
   862      /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   863      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
   864  
   865      c += (uint64_t)(a[0]*2) * a[3]
   866         + (uint64_t)(a[1]*2) * a[2];
   867      VERIFY_BITS(c, 63);
   868      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   869      d += (uint64_t)(a[4]*2) * a[9]
   870         + (uint64_t)(a[5]*2) * a[8]
   871         + (uint64_t)(a[6]*2) * a[7];
   872      VERIFY_BITS(d, 63);
   873      /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   874      u3 = d & M; d >>= 26; c += u3 * R0;
   875      VERIFY_BITS(u3, 26);
   876      VERIFY_BITS(d, 37);
   877      /* VERIFY_BITS(c, 64); */
   878      /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   879      t3 = c & M; c >>= 26; c += u3 * R1;
   880      VERIFY_BITS(t3, 26);
   881      VERIFY_BITS(c, 39);
   882      /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   883      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
   884  
   885      c += (uint64_t)(a[0]*2) * a[4]
   886         + (uint64_t)(a[1]*2) * a[3]
   887         + (uint64_t)a[2] * a[2];
   888      VERIFY_BITS(c, 63);
   889      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   890      d += (uint64_t)(a[5]*2) * a[9]
   891         + (uint64_t)(a[6]*2) * a[8]
   892         + (uint64_t)a[7] * a[7];
   893      VERIFY_BITS(d, 62);
   894      /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   895      u4 = d & M; d >>= 26; c += u4 * R0;
   896      VERIFY_BITS(u4, 26);
   897      VERIFY_BITS(d, 36);
   898      /* VERIFY_BITS(c, 64); */
   899      /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   900      t4 = c & M; c >>= 26; c += u4 * R1;
   901      VERIFY_BITS(t4, 26);
   902      VERIFY_BITS(c, 39);
   903      /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   904      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
   905  
   906      c += (uint64_t)(a[0]*2) * a[5]
   907         + (uint64_t)(a[1]*2) * a[4]
   908         + (uint64_t)(a[2]*2) * a[3];
   909      VERIFY_BITS(c, 63);
   910      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   911      d += (uint64_t)(a[6]*2) * a[9]
   912         + (uint64_t)(a[7]*2) * a[8];
   913      VERIFY_BITS(d, 62);
   914      /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   915      u5 = d & M; d >>= 26; c += u5 * R0;
   916      VERIFY_BITS(u5, 26);
   917      VERIFY_BITS(d, 36);
   918      /* VERIFY_BITS(c, 64); */
   919      /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   920      t5 = c & M; c >>= 26; c += u5 * R1;
   921      VERIFY_BITS(t5, 26);
   922      VERIFY_BITS(c, 39);
   923      /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   924      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
   925  
   926      c += (uint64_t)(a[0]*2) * a[6]
   927         + (uint64_t)(a[1]*2) * a[5]
   928         + (uint64_t)(a[2]*2) * a[4]
   929         + (uint64_t)a[3] * a[3];
   930      VERIFY_BITS(c, 63);
   931      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   932      d += (uint64_t)(a[7]*2) * a[9]
   933         + (uint64_t)a[8] * a[8];
   934      VERIFY_BITS(d, 61);
   935      /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   936      u6 = d & M; d >>= 26; c += u6 * R0;
   937      VERIFY_BITS(u6, 26);
   938      VERIFY_BITS(d, 35);
   939      /* VERIFY_BITS(c, 64); */
   940      /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   941      t6 = c & M; c >>= 26; c += u6 * R1;
   942      VERIFY_BITS(t6, 26);
   943      VERIFY_BITS(c, 39);
   944      /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   945      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
   946  
   947      c += (uint64_t)(a[0]*2) * a[7]
   948         + (uint64_t)(a[1]*2) * a[6]
   949         + (uint64_t)(a[2]*2) * a[5]
   950         + (uint64_t)(a[3]*2) * a[4];
   951      /* VERIFY_BITS(c, 64); */
   952      VERIFY_CHECK(c <= 0x8000007C00000007ULL);
   953      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   954      d += (uint64_t)(a[8]*2) * a[9];
   955      VERIFY_BITS(d, 58);
   956      /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   957      u7 = d & M; d >>= 26; c += u7 * R0;
   958      VERIFY_BITS(u7, 26);
   959      VERIFY_BITS(d, 32);
   960      /* VERIFY_BITS(c, 64); */
   961      VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
   962      /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   963      t7 = c & M; c >>= 26; c += u7 * R1;
   964      VERIFY_BITS(t7, 26);
   965      VERIFY_BITS(c, 38);
   966      /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   967      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
   968  
   969      c += (uint64_t)(a[0]*2) * a[8]
   970         + (uint64_t)(a[1]*2) * a[7]
   971         + (uint64_t)(a[2]*2) * a[6]
   972         + (uint64_t)(a[3]*2) * a[5]
   973         + (uint64_t)a[4] * a[4];
   974      /* VERIFY_BITS(c, 64); */
   975      VERIFY_CHECK(c <= 0x9000007B80000008ULL);
   976      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   977      d += (uint64_t)a[9] * a[9];
   978      VERIFY_BITS(d, 57);
   979      /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   980      u8 = d & M; d >>= 26; c += u8 * R0;
   981      VERIFY_BITS(u8, 26);
   982      VERIFY_BITS(d, 31);
   983      /* VERIFY_BITS(c, 64); */
   984      VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
   985      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   986  
   987      r[3] = t3;
   988      VERIFY_BITS(r[3], 26);
   989      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   990      r[4] = t4;
   991      VERIFY_BITS(r[4], 26);
   992      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   993      r[5] = t5;
   994      VERIFY_BITS(r[5], 26);
   995      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   996      r[6] = t6;
   997      VERIFY_BITS(r[6], 26);
   998      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   999      r[7] = t7;
  1000      VERIFY_BITS(r[7], 26);
  1001      /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1002  
  1003      r[8] = c & M; c >>= 26; c += u8 * R1;
  1004      VERIFY_BITS(r[8], 26);
  1005      VERIFY_BITS(c, 39);
  1006      /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1007      /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1008      c   += d * R0 + t9;
  1009      VERIFY_BITS(c, 45);
  1010      /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1011      r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
  1012      VERIFY_BITS(r[9], 22);
  1013      VERIFY_BITS(c, 46);
  1014      /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1015      /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1016      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1017  
  1018      d    = c * (R0 >> 4) + t0;
  1019      VERIFY_BITS(d, 56);
  1020      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1021      r[0] = d & M; d >>= 26;
  1022      VERIFY_BITS(r[0], 26);
  1023      VERIFY_BITS(d, 30);
  1024      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1025      d   += c * (R1 >> 4) + t1;
  1026      VERIFY_BITS(d, 53);
  1027      VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
  1028      /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1029      /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1030      r[1] = d & M; d >>= 26;
  1031      VERIFY_BITS(r[1], 26);
  1032      VERIFY_BITS(d, 27);
  1033      VERIFY_CHECK(d <= 0x4000000ULL);
  1034      /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1035      d   += t2;
  1036      VERIFY_BITS(d, 27);
  1037      /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1038      r[2] = d;
  1039      VERIFY_BITS(r[2], 27);
  1040      /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
  1041  }
  1042  #endif
  1043  
  1044  static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) {
  1045  #ifdef VERIFY
  1046      VERIFY_CHECK(a->magnitude <= 8);
  1047      VERIFY_CHECK(b->magnitude <= 8);
  1048      secp256k1_fe_verify(a);
  1049      secp256k1_fe_verify(b);
  1050      VERIFY_CHECK(r != b);
  1051  #endif
  1052      secp256k1_fe_mul_inner(r->n, a->n, b->n);
  1053  #ifdef VERIFY
  1054      r->magnitude = 1;
  1055      r->normalized = 0;
  1056      secp256k1_fe_verify(r);
  1057  #endif
  1058  }
  1059  
  1060  static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a) {
  1061  #ifdef VERIFY
  1062      VERIFY_CHECK(a->magnitude <= 8);
  1063      secp256k1_fe_verify(a);
  1064  #endif
  1065      secp256k1_fe_sqr_inner(r->n, a->n);
  1066  #ifdef VERIFY
  1067      r->magnitude = 1;
  1068      r->normalized = 0;
  1069      secp256k1_fe_verify(r);
  1070  #endif
  1071  }
  1072  
  1073  static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) {
  1074      uint32_t mask0, mask1;
  1075      mask0 = flag + ~((uint32_t)0);
  1076      mask1 = ~mask0;
  1077      r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
  1078      r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
  1079      r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
  1080      r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
  1081      r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
  1082      r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
  1083      r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
  1084      r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
  1085      r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
  1086      r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
  1087  #ifdef VERIFY
  1088      if (a->magnitude > r->magnitude) {
  1089          r->magnitude = a->magnitude;
  1090      }
  1091      r->normalized &= a->normalized;
  1092  #endif
  1093  }
  1094  
  1095  static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
  1096      uint32_t mask0, mask1;
  1097      mask0 = flag + ~((uint32_t)0);
  1098      mask1 = ~mask0;
  1099      r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
  1100      r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
  1101      r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
  1102      r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
  1103      r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
  1104      r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
  1105      r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
  1106      r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
  1107  }
  1108  
  1109  static void secp256k1_fe_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) {
  1110  #ifdef VERIFY
  1111      VERIFY_CHECK(a->normalized);
  1112  #endif
  1113      r->n[0] = a->n[0] | a->n[1] << 26;
  1114      r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
  1115      r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
  1116      r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
  1117      r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
  1118      r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
  1119      r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
  1120      r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
  1121  }
  1122  
  1123  static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) {
  1124      r->n[0] = a->n[0] & 0x3FFFFFFUL;
  1125      r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
  1126      r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
  1127      r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
  1128      r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
  1129      r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
  1130      r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
  1131      r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
  1132      r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
  1133      r->n[9] = a->n[7] >> 10;
  1134  #ifdef VERIFY
  1135      r->magnitude = 1;
  1136      r->normalized = 1;
  1137  #endif
  1138  }
  1139  
  1140  #endif