github.com/guiltylotus/go-ethereum@v1.9.7/crypto/secp256k1/libsecp256k1/src/field_10x26_impl.h (about) 1 /********************************************************************** 2 * Copyright (c) 2013, 2014 Pieter Wuille * 3 * Distributed under the MIT software license, see the accompanying * 4 * file COPYING or http://www.opensource.org/licenses/mit-license.php.* 5 **********************************************************************/ 6 7 #ifndef _SECP256K1_FIELD_REPR_IMPL_H_ 8 #define _SECP256K1_FIELD_REPR_IMPL_H_ 9 10 #include "util.h" 11 #include "num.h" 12 #include "field.h" 13 14 #ifdef VERIFY 15 static void secp256k1_fe_verify(const secp256k1_fe *a) { 16 const uint32_t *d = a->n; 17 int m = a->normalized ? 1 : 2 * a->magnitude, r = 1; 18 r &= (d[0] <= 0x3FFFFFFUL * m); 19 r &= (d[1] <= 0x3FFFFFFUL * m); 20 r &= (d[2] <= 0x3FFFFFFUL * m); 21 r &= (d[3] <= 0x3FFFFFFUL * m); 22 r &= (d[4] <= 0x3FFFFFFUL * m); 23 r &= (d[5] <= 0x3FFFFFFUL * m); 24 r &= (d[6] <= 0x3FFFFFFUL * m); 25 r &= (d[7] <= 0x3FFFFFFUL * m); 26 r &= (d[8] <= 0x3FFFFFFUL * m); 27 r &= (d[9] <= 0x03FFFFFUL * m); 28 r &= (a->magnitude >= 0); 29 r &= (a->magnitude <= 32); 30 if (a->normalized) { 31 r &= (a->magnitude <= 1); 32 if (r && (d[9] == 0x03FFFFFUL)) { 33 uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2]; 34 if (mid == 0x3FFFFFFUL) { 35 r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL); 36 } 37 } 38 } 39 VERIFY_CHECK(r == 1); 40 } 41 #endif 42 43 static void secp256k1_fe_normalize(secp256k1_fe *r) { 44 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 45 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 46 47 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 48 uint32_t m; 49 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 50 51 /* The first pass ensures the magnitude is 1, ... */ 52 t0 += x * 0x3D1UL; t1 += (x << 6); 53 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 54 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 55 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; 56 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; 57 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; 58 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; 59 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; 60 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; 61 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; 62 63 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 64 VERIFY_CHECK(t9 >> 23 == 0); 65 66 /* At most a single final reduction is needed; check if the value is >= the field characteristic */ 67 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) 68 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 69 70 /* Apply the final reduction (for constant-time behaviour, we do it always) */ 71 t0 += x * 0x3D1UL; t1 += (x << 6); 72 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 73 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 74 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 75 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 76 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 77 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 78 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 79 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 80 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 81 82 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ 83 VERIFY_CHECK(t9 >> 22 == x); 84 85 /* Mask off the possible multiple of 2^256 from the final reduction */ 86 t9 &= 0x03FFFFFUL; 87 88 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 89 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 90 91 #ifdef VERIFY 92 r->magnitude = 1; 93 r->normalized = 1; 94 secp256k1_fe_verify(r); 95 #endif 96 } 97 98 static void secp256k1_fe_normalize_weak(secp256k1_fe *r) { 99 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 100 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 101 102 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 103 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 104 105 /* The first pass ensures the magnitude is 1, ... */ 106 t0 += x * 0x3D1UL; t1 += (x << 6); 107 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 108 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 109 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 110 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 111 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 112 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 113 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 114 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 115 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 116 117 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 118 VERIFY_CHECK(t9 >> 23 == 0); 119 120 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 121 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 122 123 #ifdef VERIFY 124 r->magnitude = 1; 125 secp256k1_fe_verify(r); 126 #endif 127 } 128 129 static void secp256k1_fe_normalize_var(secp256k1_fe *r) { 130 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 131 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 132 133 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 134 uint32_t m; 135 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 136 137 /* The first pass ensures the magnitude is 1, ... */ 138 t0 += x * 0x3D1UL; t1 += (x << 6); 139 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 140 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 141 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; 142 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; 143 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; 144 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; 145 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; 146 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; 147 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; 148 149 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 150 VERIFY_CHECK(t9 >> 23 == 0); 151 152 /* At most a single final reduction is needed; check if the value is >= the field characteristic */ 153 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) 154 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 155 156 if (x) { 157 t0 += 0x3D1UL; t1 += (x << 6); 158 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 159 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 160 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 161 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 162 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 163 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 164 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 165 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 166 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 167 168 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ 169 VERIFY_CHECK(t9 >> 22 == x); 170 171 /* Mask off the possible multiple of 2^256 from the final reduction */ 172 t9 &= 0x03FFFFFUL; 173 } 174 175 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 176 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 177 178 #ifdef VERIFY 179 r->magnitude = 1; 180 r->normalized = 1; 181 secp256k1_fe_verify(r); 182 #endif 183 } 184 185 static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) { 186 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 187 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 188 189 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ 190 uint32_t z0, z1; 191 192 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 193 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 194 195 /* The first pass ensures the magnitude is 1, ... */ 196 t0 += x * 0x3D1UL; t1 += (x << 6); 197 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL; 198 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; 199 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; 200 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; 201 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; 202 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; 203 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; 204 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; 205 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; 206 z0 |= t9; z1 &= t9 ^ 0x3C00000UL; 207 208 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 209 VERIFY_CHECK(t9 >> 23 == 0); 210 211 return (z0 == 0) | (z1 == 0x3FFFFFFUL); 212 } 213 214 static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) { 215 uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; 216 uint32_t z0, z1; 217 uint32_t x; 218 219 t0 = r->n[0]; 220 t9 = r->n[9]; 221 222 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 223 x = t9 >> 22; 224 225 /* The first pass ensures the magnitude is 1, ... */ 226 t0 += x * 0x3D1UL; 227 228 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ 229 z0 = t0 & 0x3FFFFFFUL; 230 z1 = z0 ^ 0x3D0UL; 231 232 /* Fast return path should catch the majority of cases */ 233 if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) { 234 return 0; 235 } 236 237 t1 = r->n[1]; 238 t2 = r->n[2]; 239 t3 = r->n[3]; 240 t4 = r->n[4]; 241 t5 = r->n[5]; 242 t6 = r->n[6]; 243 t7 = r->n[7]; 244 t8 = r->n[8]; 245 246 t9 &= 0x03FFFFFUL; 247 t1 += (x << 6); 248 249 t1 += (t0 >> 26); 250 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; 251 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; 252 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; 253 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; 254 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; 255 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; 256 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; 257 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; 258 z0 |= t9; z1 &= t9 ^ 0x3C00000UL; 259 260 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 261 VERIFY_CHECK(t9 >> 23 == 0); 262 263 return (z0 == 0) | (z1 == 0x3FFFFFFUL); 264 } 265 266 SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe *r, int a) { 267 r->n[0] = a; 268 r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; 269 #ifdef VERIFY 270 r->magnitude = 1; 271 r->normalized = 1; 272 secp256k1_fe_verify(r); 273 #endif 274 } 275 276 SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe *a) { 277 const uint32_t *t = a->n; 278 #ifdef VERIFY 279 VERIFY_CHECK(a->normalized); 280 secp256k1_fe_verify(a); 281 #endif 282 return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0; 283 } 284 285 SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe *a) { 286 #ifdef VERIFY 287 VERIFY_CHECK(a->normalized); 288 secp256k1_fe_verify(a); 289 #endif 290 return a->n[0] & 1; 291 } 292 293 SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe *a) { 294 int i; 295 #ifdef VERIFY 296 a->magnitude = 0; 297 a->normalized = 1; 298 #endif 299 for (i=0; i<10; i++) { 300 a->n[i] = 0; 301 } 302 } 303 304 static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) { 305 int i; 306 #ifdef VERIFY 307 VERIFY_CHECK(a->normalized); 308 VERIFY_CHECK(b->normalized); 309 secp256k1_fe_verify(a); 310 secp256k1_fe_verify(b); 311 #endif 312 for (i = 9; i >= 0; i--) { 313 if (a->n[i] > b->n[i]) { 314 return 1; 315 } 316 if (a->n[i] < b->n[i]) { 317 return -1; 318 } 319 } 320 return 0; 321 } 322 323 static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) { 324 int i; 325 r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; 326 r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; 327 for (i=0; i<32; i++) { 328 int j; 329 for (j=0; j<4; j++) { 330 int limb = (8*i+2*j)/26; 331 int shift = (8*i+2*j)%26; 332 r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift; 333 } 334 } 335 if (r->n[9] == 0x3FFFFFUL && (r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL && (r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL) { 336 return 0; 337 } 338 #ifdef VERIFY 339 r->magnitude = 1; 340 r->normalized = 1; 341 secp256k1_fe_verify(r); 342 #endif 343 return 1; 344 } 345 346 /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ 347 static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a) { 348 int i; 349 #ifdef VERIFY 350 VERIFY_CHECK(a->normalized); 351 secp256k1_fe_verify(a); 352 #endif 353 for (i=0; i<32; i++) { 354 int j; 355 int c = 0; 356 for (j=0; j<4; j++) { 357 int limb = (8*i+2*j)/26; 358 int shift = (8*i+2*j)%26; 359 c |= ((a->n[limb] >> shift) & 0x3) << (2 * j); 360 } 361 r[31-i] = c; 362 } 363 } 364 365 SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe *r, const secp256k1_fe *a, int m) { 366 #ifdef VERIFY 367 VERIFY_CHECK(a->magnitude <= m); 368 secp256k1_fe_verify(a); 369 #endif 370 r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0]; 371 r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1]; 372 r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2]; 373 r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3]; 374 r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4]; 375 r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5]; 376 r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6]; 377 r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7]; 378 r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8]; 379 r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9]; 380 #ifdef VERIFY 381 r->magnitude = m + 1; 382 r->normalized = 0; 383 secp256k1_fe_verify(r); 384 #endif 385 } 386 387 SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe *r, int a) { 388 r->n[0] *= a; 389 r->n[1] *= a; 390 r->n[2] *= a; 391 r->n[3] *= a; 392 r->n[4] *= a; 393 r->n[5] *= a; 394 r->n[6] *= a; 395 r->n[7] *= a; 396 r->n[8] *= a; 397 r->n[9] *= a; 398 #ifdef VERIFY 399 r->magnitude *= a; 400 r->normalized = 0; 401 secp256k1_fe_verify(r); 402 #endif 403 } 404 405 SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe *r, const secp256k1_fe *a) { 406 #ifdef VERIFY 407 secp256k1_fe_verify(a); 408 #endif 409 r->n[0] += a->n[0]; 410 r->n[1] += a->n[1]; 411 r->n[2] += a->n[2]; 412 r->n[3] += a->n[3]; 413 r->n[4] += a->n[4]; 414 r->n[5] += a->n[5]; 415 r->n[6] += a->n[6]; 416 r->n[7] += a->n[7]; 417 r->n[8] += a->n[8]; 418 r->n[9] += a->n[9]; 419 #ifdef VERIFY 420 r->magnitude += a->magnitude; 421 r->normalized = 0; 422 secp256k1_fe_verify(r); 423 #endif 424 } 425 426 #if defined(USE_EXTERNAL_ASM) 427 428 /* External assembler implementation */ 429 void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b); 430 void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a); 431 432 #else 433 434 #ifdef VERIFY 435 #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) 436 #else 437 #define VERIFY_BITS(x, n) do { } while(0) 438 #endif 439 440 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { 441 uint64_t c, d; 442 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; 443 uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7; 444 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; 445 446 VERIFY_BITS(a[0], 30); 447 VERIFY_BITS(a[1], 30); 448 VERIFY_BITS(a[2], 30); 449 VERIFY_BITS(a[3], 30); 450 VERIFY_BITS(a[4], 30); 451 VERIFY_BITS(a[5], 30); 452 VERIFY_BITS(a[6], 30); 453 VERIFY_BITS(a[7], 30); 454 VERIFY_BITS(a[8], 30); 455 VERIFY_BITS(a[9], 26); 456 VERIFY_BITS(b[0], 30); 457 VERIFY_BITS(b[1], 30); 458 VERIFY_BITS(b[2], 30); 459 VERIFY_BITS(b[3], 30); 460 VERIFY_BITS(b[4], 30); 461 VERIFY_BITS(b[5], 30); 462 VERIFY_BITS(b[6], 30); 463 VERIFY_BITS(b[7], 30); 464 VERIFY_BITS(b[8], 30); 465 VERIFY_BITS(b[9], 26); 466 467 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. 468 * px is a shorthand for sum(a[i]*b[x-i], i=0..x). 469 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. 470 */ 471 472 d = (uint64_t)a[0] * b[9] 473 + (uint64_t)a[1] * b[8] 474 + (uint64_t)a[2] * b[7] 475 + (uint64_t)a[3] * b[6] 476 + (uint64_t)a[4] * b[5] 477 + (uint64_t)a[5] * b[4] 478 + (uint64_t)a[6] * b[3] 479 + (uint64_t)a[7] * b[2] 480 + (uint64_t)a[8] * b[1] 481 + (uint64_t)a[9] * b[0]; 482 /* VERIFY_BITS(d, 64); */ 483 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 484 t9 = d & M; d >>= 26; 485 VERIFY_BITS(t9, 26); 486 VERIFY_BITS(d, 38); 487 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 488 489 c = (uint64_t)a[0] * b[0]; 490 VERIFY_BITS(c, 60); 491 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ 492 d += (uint64_t)a[1] * b[9] 493 + (uint64_t)a[2] * b[8] 494 + (uint64_t)a[3] * b[7] 495 + (uint64_t)a[4] * b[6] 496 + (uint64_t)a[5] * b[5] 497 + (uint64_t)a[6] * b[4] 498 + (uint64_t)a[7] * b[3] 499 + (uint64_t)a[8] * b[2] 500 + (uint64_t)a[9] * b[1]; 501 VERIFY_BITS(d, 63); 502 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 503 u0 = d & M; d >>= 26; c += u0 * R0; 504 VERIFY_BITS(u0, 26); 505 VERIFY_BITS(d, 37); 506 VERIFY_BITS(c, 61); 507 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 508 t0 = c & M; c >>= 26; c += u0 * R1; 509 VERIFY_BITS(t0, 26); 510 VERIFY_BITS(c, 37); 511 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 512 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 513 514 c += (uint64_t)a[0] * b[1] 515 + (uint64_t)a[1] * b[0]; 516 VERIFY_BITS(c, 62); 517 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ 518 d += (uint64_t)a[2] * b[9] 519 + (uint64_t)a[3] * b[8] 520 + (uint64_t)a[4] * b[7] 521 + (uint64_t)a[5] * b[6] 522 + (uint64_t)a[6] * b[5] 523 + (uint64_t)a[7] * b[4] 524 + (uint64_t)a[8] * b[3] 525 + (uint64_t)a[9] * b[2]; 526 VERIFY_BITS(d, 63); 527 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 528 u1 = d & M; d >>= 26; c += u1 * R0; 529 VERIFY_BITS(u1, 26); 530 VERIFY_BITS(d, 37); 531 VERIFY_BITS(c, 63); 532 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 533 t1 = c & M; c >>= 26; c += u1 * R1; 534 VERIFY_BITS(t1, 26); 535 VERIFY_BITS(c, 38); 536 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 537 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 538 539 c += (uint64_t)a[0] * b[2] 540 + (uint64_t)a[1] * b[1] 541 + (uint64_t)a[2] * b[0]; 542 VERIFY_BITS(c, 62); 543 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 544 d += (uint64_t)a[3] * b[9] 545 + (uint64_t)a[4] * b[8] 546 + (uint64_t)a[5] * b[7] 547 + (uint64_t)a[6] * b[6] 548 + (uint64_t)a[7] * b[5] 549 + (uint64_t)a[8] * b[4] 550 + (uint64_t)a[9] * b[3]; 551 VERIFY_BITS(d, 63); 552 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 553 u2 = d & M; d >>= 26; c += u2 * R0; 554 VERIFY_BITS(u2, 26); 555 VERIFY_BITS(d, 37); 556 VERIFY_BITS(c, 63); 557 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 558 t2 = c & M; c >>= 26; c += u2 * R1; 559 VERIFY_BITS(t2, 26); 560 VERIFY_BITS(c, 38); 561 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 562 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 563 564 c += (uint64_t)a[0] * b[3] 565 + (uint64_t)a[1] * b[2] 566 + (uint64_t)a[2] * b[1] 567 + (uint64_t)a[3] * b[0]; 568 VERIFY_BITS(c, 63); 569 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 570 d += (uint64_t)a[4] * b[9] 571 + (uint64_t)a[5] * b[8] 572 + (uint64_t)a[6] * b[7] 573 + (uint64_t)a[7] * b[6] 574 + (uint64_t)a[8] * b[5] 575 + (uint64_t)a[9] * b[4]; 576 VERIFY_BITS(d, 63); 577 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 578 u3 = d & M; d >>= 26; c += u3 * R0; 579 VERIFY_BITS(u3, 26); 580 VERIFY_BITS(d, 37); 581 /* VERIFY_BITS(c, 64); */ 582 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 583 t3 = c & M; c >>= 26; c += u3 * R1; 584 VERIFY_BITS(t3, 26); 585 VERIFY_BITS(c, 39); 586 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 587 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 588 589 c += (uint64_t)a[0] * b[4] 590 + (uint64_t)a[1] * b[3] 591 + (uint64_t)a[2] * b[2] 592 + (uint64_t)a[3] * b[1] 593 + (uint64_t)a[4] * b[0]; 594 VERIFY_BITS(c, 63); 595 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 596 d += (uint64_t)a[5] * b[9] 597 + (uint64_t)a[6] * b[8] 598 + (uint64_t)a[7] * b[7] 599 + (uint64_t)a[8] * b[6] 600 + (uint64_t)a[9] * b[5]; 601 VERIFY_BITS(d, 62); 602 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 603 u4 = d & M; d >>= 26; c += u4 * R0; 604 VERIFY_BITS(u4, 26); 605 VERIFY_BITS(d, 36); 606 /* VERIFY_BITS(c, 64); */ 607 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 608 t4 = c & M; c >>= 26; c += u4 * R1; 609 VERIFY_BITS(t4, 26); 610 VERIFY_BITS(c, 39); 611 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 612 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 613 614 c += (uint64_t)a[0] * b[5] 615 + (uint64_t)a[1] * b[4] 616 + (uint64_t)a[2] * b[3] 617 + (uint64_t)a[3] * b[2] 618 + (uint64_t)a[4] * b[1] 619 + (uint64_t)a[5] * b[0]; 620 VERIFY_BITS(c, 63); 621 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 622 d += (uint64_t)a[6] * b[9] 623 + (uint64_t)a[7] * b[8] 624 + (uint64_t)a[8] * b[7] 625 + (uint64_t)a[9] * b[6]; 626 VERIFY_BITS(d, 62); 627 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 628 u5 = d & M; d >>= 26; c += u5 * R0; 629 VERIFY_BITS(u5, 26); 630 VERIFY_BITS(d, 36); 631 /* VERIFY_BITS(c, 64); */ 632 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 633 t5 = c & M; c >>= 26; c += u5 * R1; 634 VERIFY_BITS(t5, 26); 635 VERIFY_BITS(c, 39); 636 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 637 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 638 639 c += (uint64_t)a[0] * b[6] 640 + (uint64_t)a[1] * b[5] 641 + (uint64_t)a[2] * b[4] 642 + (uint64_t)a[3] * b[3] 643 + (uint64_t)a[4] * b[2] 644 + (uint64_t)a[5] * b[1] 645 + (uint64_t)a[6] * b[0]; 646 VERIFY_BITS(c, 63); 647 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 648 d += (uint64_t)a[7] * b[9] 649 + (uint64_t)a[8] * b[8] 650 + (uint64_t)a[9] * b[7]; 651 VERIFY_BITS(d, 61); 652 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 653 u6 = d & M; d >>= 26; c += u6 * R0; 654 VERIFY_BITS(u6, 26); 655 VERIFY_BITS(d, 35); 656 /* VERIFY_BITS(c, 64); */ 657 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 658 t6 = c & M; c >>= 26; c += u6 * R1; 659 VERIFY_BITS(t6, 26); 660 VERIFY_BITS(c, 39); 661 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 662 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 663 664 c += (uint64_t)a[0] * b[7] 665 + (uint64_t)a[1] * b[6] 666 + (uint64_t)a[2] * b[5] 667 + (uint64_t)a[3] * b[4] 668 + (uint64_t)a[4] * b[3] 669 + (uint64_t)a[5] * b[2] 670 + (uint64_t)a[6] * b[1] 671 + (uint64_t)a[7] * b[0]; 672 /* VERIFY_BITS(c, 64); */ 673 VERIFY_CHECK(c <= 0x8000007C00000007ULL); 674 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 675 d += (uint64_t)a[8] * b[9] 676 + (uint64_t)a[9] * b[8]; 677 VERIFY_BITS(d, 58); 678 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 679 u7 = d & M; d >>= 26; c += u7 * R0; 680 VERIFY_BITS(u7, 26); 681 VERIFY_BITS(d, 32); 682 /* VERIFY_BITS(c, 64); */ 683 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); 684 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 685 t7 = c & M; c >>= 26; c += u7 * R1; 686 VERIFY_BITS(t7, 26); 687 VERIFY_BITS(c, 38); 688 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 689 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 690 691 c += (uint64_t)a[0] * b[8] 692 + (uint64_t)a[1] * b[7] 693 + (uint64_t)a[2] * b[6] 694 + (uint64_t)a[3] * b[5] 695 + (uint64_t)a[4] * b[4] 696 + (uint64_t)a[5] * b[3] 697 + (uint64_t)a[6] * b[2] 698 + (uint64_t)a[7] * b[1] 699 + (uint64_t)a[8] * b[0]; 700 /* VERIFY_BITS(c, 64); */ 701 VERIFY_CHECK(c <= 0x9000007B80000008ULL); 702 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 703 d += (uint64_t)a[9] * b[9]; 704 VERIFY_BITS(d, 57); 705 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 706 u8 = d & M; d >>= 26; c += u8 * R0; 707 VERIFY_BITS(u8, 26); 708 VERIFY_BITS(d, 31); 709 /* VERIFY_BITS(c, 64); */ 710 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); 711 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 712 713 r[3] = t3; 714 VERIFY_BITS(r[3], 26); 715 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 716 r[4] = t4; 717 VERIFY_BITS(r[4], 26); 718 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 719 r[5] = t5; 720 VERIFY_BITS(r[5], 26); 721 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 722 r[6] = t6; 723 VERIFY_BITS(r[6], 26); 724 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 725 r[7] = t7; 726 VERIFY_BITS(r[7], 26); 727 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 728 729 r[8] = c & M; c >>= 26; c += u8 * R1; 730 VERIFY_BITS(r[8], 26); 731 VERIFY_BITS(c, 39); 732 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 733 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 734 c += d * R0 + t9; 735 VERIFY_BITS(c, 45); 736 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 737 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); 738 VERIFY_BITS(r[9], 22); 739 VERIFY_BITS(c, 46); 740 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 741 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 742 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 743 744 d = c * (R0 >> 4) + t0; 745 VERIFY_BITS(d, 56); 746 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 747 r[0] = d & M; d >>= 26; 748 VERIFY_BITS(r[0], 26); 749 VERIFY_BITS(d, 30); 750 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 751 d += c * (R1 >> 4) + t1; 752 VERIFY_BITS(d, 53); 753 VERIFY_CHECK(d <= 0x10000003FFFFBFULL); 754 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 755 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 756 r[1] = d & M; d >>= 26; 757 VERIFY_BITS(r[1], 26); 758 VERIFY_BITS(d, 27); 759 VERIFY_CHECK(d <= 0x4000000ULL); 760 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 761 d += t2; 762 VERIFY_BITS(d, 27); 763 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 764 r[2] = d; 765 VERIFY_BITS(r[2], 27); 766 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 767 } 768 769 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) { 770 uint64_t c, d; 771 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; 772 uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7; 773 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; 774 775 VERIFY_BITS(a[0], 30); 776 VERIFY_BITS(a[1], 30); 777 VERIFY_BITS(a[2], 30); 778 VERIFY_BITS(a[3], 30); 779 VERIFY_BITS(a[4], 30); 780 VERIFY_BITS(a[5], 30); 781 VERIFY_BITS(a[6], 30); 782 VERIFY_BITS(a[7], 30); 783 VERIFY_BITS(a[8], 30); 784 VERIFY_BITS(a[9], 26); 785 786 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. 787 * px is a shorthand for sum(a[i]*a[x-i], i=0..x). 788 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. 789 */ 790 791 d = (uint64_t)(a[0]*2) * a[9] 792 + (uint64_t)(a[1]*2) * a[8] 793 + (uint64_t)(a[2]*2) * a[7] 794 + (uint64_t)(a[3]*2) * a[6] 795 + (uint64_t)(a[4]*2) * a[5]; 796 /* VERIFY_BITS(d, 64); */ 797 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 798 t9 = d & M; d >>= 26; 799 VERIFY_BITS(t9, 26); 800 VERIFY_BITS(d, 38); 801 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 802 803 c = (uint64_t)a[0] * a[0]; 804 VERIFY_BITS(c, 60); 805 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ 806 d += (uint64_t)(a[1]*2) * a[9] 807 + (uint64_t)(a[2]*2) * a[8] 808 + (uint64_t)(a[3]*2) * a[7] 809 + (uint64_t)(a[4]*2) * a[6] 810 + (uint64_t)a[5] * a[5]; 811 VERIFY_BITS(d, 63); 812 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 813 u0 = d & M; d >>= 26; c += u0 * R0; 814 VERIFY_BITS(u0, 26); 815 VERIFY_BITS(d, 37); 816 VERIFY_BITS(c, 61); 817 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 818 t0 = c & M; c >>= 26; c += u0 * R1; 819 VERIFY_BITS(t0, 26); 820 VERIFY_BITS(c, 37); 821 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 822 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 823 824 c += (uint64_t)(a[0]*2) * a[1]; 825 VERIFY_BITS(c, 62); 826 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ 827 d += (uint64_t)(a[2]*2) * a[9] 828 + (uint64_t)(a[3]*2) * a[8] 829 + (uint64_t)(a[4]*2) * a[7] 830 + (uint64_t)(a[5]*2) * a[6]; 831 VERIFY_BITS(d, 63); 832 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 833 u1 = d & M; d >>= 26; c += u1 * R0; 834 VERIFY_BITS(u1, 26); 835 VERIFY_BITS(d, 37); 836 VERIFY_BITS(c, 63); 837 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 838 t1 = c & M; c >>= 26; c += u1 * R1; 839 VERIFY_BITS(t1, 26); 840 VERIFY_BITS(c, 38); 841 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 842 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 843 844 c += (uint64_t)(a[0]*2) * a[2] 845 + (uint64_t)a[1] * a[1]; 846 VERIFY_BITS(c, 62); 847 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 848 d += (uint64_t)(a[3]*2) * a[9] 849 + (uint64_t)(a[4]*2) * a[8] 850 + (uint64_t)(a[5]*2) * a[7] 851 + (uint64_t)a[6] * a[6]; 852 VERIFY_BITS(d, 63); 853 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 854 u2 = d & M; d >>= 26; c += u2 * R0; 855 VERIFY_BITS(u2, 26); 856 VERIFY_BITS(d, 37); 857 VERIFY_BITS(c, 63); 858 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 859 t2 = c & M; c >>= 26; c += u2 * R1; 860 VERIFY_BITS(t2, 26); 861 VERIFY_BITS(c, 38); 862 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 863 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 864 865 c += (uint64_t)(a[0]*2) * a[3] 866 + (uint64_t)(a[1]*2) * a[2]; 867 VERIFY_BITS(c, 63); 868 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 869 d += (uint64_t)(a[4]*2) * a[9] 870 + (uint64_t)(a[5]*2) * a[8] 871 + (uint64_t)(a[6]*2) * a[7]; 872 VERIFY_BITS(d, 63); 873 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 874 u3 = d & M; d >>= 26; c += u3 * R0; 875 VERIFY_BITS(u3, 26); 876 VERIFY_BITS(d, 37); 877 /* VERIFY_BITS(c, 64); */ 878 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 879 t3 = c & M; c >>= 26; c += u3 * R1; 880 VERIFY_BITS(t3, 26); 881 VERIFY_BITS(c, 39); 882 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 883 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 884 885 c += (uint64_t)(a[0]*2) * a[4] 886 + (uint64_t)(a[1]*2) * a[3] 887 + (uint64_t)a[2] * a[2]; 888 VERIFY_BITS(c, 63); 889 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 890 d += (uint64_t)(a[5]*2) * a[9] 891 + (uint64_t)(a[6]*2) * a[8] 892 + (uint64_t)a[7] * a[7]; 893 VERIFY_BITS(d, 62); 894 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 895 u4 = d & M; d >>= 26; c += u4 * R0; 896 VERIFY_BITS(u4, 26); 897 VERIFY_BITS(d, 36); 898 /* VERIFY_BITS(c, 64); */ 899 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 900 t4 = c & M; c >>= 26; c += u4 * R1; 901 VERIFY_BITS(t4, 26); 902 VERIFY_BITS(c, 39); 903 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 904 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 905 906 c += (uint64_t)(a[0]*2) * a[5] 907 + (uint64_t)(a[1]*2) * a[4] 908 + (uint64_t)(a[2]*2) * a[3]; 909 VERIFY_BITS(c, 63); 910 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 911 d += (uint64_t)(a[6]*2) * a[9] 912 + (uint64_t)(a[7]*2) * a[8]; 913 VERIFY_BITS(d, 62); 914 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 915 u5 = d & M; d >>= 26; c += u5 * R0; 916 VERIFY_BITS(u5, 26); 917 VERIFY_BITS(d, 36); 918 /* VERIFY_BITS(c, 64); */ 919 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 920 t5 = c & M; c >>= 26; c += u5 * R1; 921 VERIFY_BITS(t5, 26); 922 VERIFY_BITS(c, 39); 923 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 924 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 925 926 c += (uint64_t)(a[0]*2) * a[6] 927 + (uint64_t)(a[1]*2) * a[5] 928 + (uint64_t)(a[2]*2) * a[4] 929 + (uint64_t)a[3] * a[3]; 930 VERIFY_BITS(c, 63); 931 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 932 d += (uint64_t)(a[7]*2) * a[9] 933 + (uint64_t)a[8] * a[8]; 934 VERIFY_BITS(d, 61); 935 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 936 u6 = d & M; d >>= 26; c += u6 * R0; 937 VERIFY_BITS(u6, 26); 938 VERIFY_BITS(d, 35); 939 /* VERIFY_BITS(c, 64); */ 940 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 941 t6 = c & M; c >>= 26; c += u6 * R1; 942 VERIFY_BITS(t6, 26); 943 VERIFY_BITS(c, 39); 944 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 945 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 946 947 c += (uint64_t)(a[0]*2) * a[7] 948 + (uint64_t)(a[1]*2) * a[6] 949 + (uint64_t)(a[2]*2) * a[5] 950 + (uint64_t)(a[3]*2) * a[4]; 951 /* VERIFY_BITS(c, 64); */ 952 VERIFY_CHECK(c <= 0x8000007C00000007ULL); 953 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 954 d += (uint64_t)(a[8]*2) * a[9]; 955 VERIFY_BITS(d, 58); 956 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 957 u7 = d & M; d >>= 26; c += u7 * R0; 958 VERIFY_BITS(u7, 26); 959 VERIFY_BITS(d, 32); 960 /* VERIFY_BITS(c, 64); */ 961 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); 962 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 963 t7 = c & M; c >>= 26; c += u7 * R1; 964 VERIFY_BITS(t7, 26); 965 VERIFY_BITS(c, 38); 966 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 967 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 968 969 c += (uint64_t)(a[0]*2) * a[8] 970 + (uint64_t)(a[1]*2) * a[7] 971 + (uint64_t)(a[2]*2) * a[6] 972 + (uint64_t)(a[3]*2) * a[5] 973 + (uint64_t)a[4] * a[4]; 974 /* VERIFY_BITS(c, 64); */ 975 VERIFY_CHECK(c <= 0x9000007B80000008ULL); 976 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 977 d += (uint64_t)a[9] * a[9]; 978 VERIFY_BITS(d, 57); 979 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 980 u8 = d & M; d >>= 26; c += u8 * R0; 981 VERIFY_BITS(u8, 26); 982 VERIFY_BITS(d, 31); 983 /* VERIFY_BITS(c, 64); */ 984 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); 985 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 986 987 r[3] = t3; 988 VERIFY_BITS(r[3], 26); 989 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 990 r[4] = t4; 991 VERIFY_BITS(r[4], 26); 992 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 993 r[5] = t5; 994 VERIFY_BITS(r[5], 26); 995 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 996 r[6] = t6; 997 VERIFY_BITS(r[6], 26); 998 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 999 r[7] = t7; 1000 VERIFY_BITS(r[7], 26); 1001 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1002 1003 r[8] = c & M; c >>= 26; c += u8 * R1; 1004 VERIFY_BITS(r[8], 26); 1005 VERIFY_BITS(c, 39); 1006 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1007 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1008 c += d * R0 + t9; 1009 VERIFY_BITS(c, 45); 1010 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1011 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); 1012 VERIFY_BITS(r[9], 22); 1013 VERIFY_BITS(c, 46); 1014 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1015 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1016 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1017 1018 d = c * (R0 >> 4) + t0; 1019 VERIFY_BITS(d, 56); 1020 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1021 r[0] = d & M; d >>= 26; 1022 VERIFY_BITS(r[0], 26); 1023 VERIFY_BITS(d, 30); 1024 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1025 d += c * (R1 >> 4) + t1; 1026 VERIFY_BITS(d, 53); 1027 VERIFY_CHECK(d <= 0x10000003FFFFBFULL); 1028 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1029 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1030 r[1] = d & M; d >>= 26; 1031 VERIFY_BITS(r[1], 26); 1032 VERIFY_BITS(d, 27); 1033 VERIFY_CHECK(d <= 0x4000000ULL); 1034 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1035 d += t2; 1036 VERIFY_BITS(d, 27); 1037 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1038 r[2] = d; 1039 VERIFY_BITS(r[2], 27); 1040 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1041 } 1042 #endif 1043 1044 static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) { 1045 #ifdef VERIFY 1046 VERIFY_CHECK(a->magnitude <= 8); 1047 VERIFY_CHECK(b->magnitude <= 8); 1048 secp256k1_fe_verify(a); 1049 secp256k1_fe_verify(b); 1050 VERIFY_CHECK(r != b); 1051 #endif 1052 secp256k1_fe_mul_inner(r->n, a->n, b->n); 1053 #ifdef VERIFY 1054 r->magnitude = 1; 1055 r->normalized = 0; 1056 secp256k1_fe_verify(r); 1057 #endif 1058 } 1059 1060 static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a) { 1061 #ifdef VERIFY 1062 VERIFY_CHECK(a->magnitude <= 8); 1063 secp256k1_fe_verify(a); 1064 #endif 1065 secp256k1_fe_sqr_inner(r->n, a->n); 1066 #ifdef VERIFY 1067 r->magnitude = 1; 1068 r->normalized = 0; 1069 secp256k1_fe_verify(r); 1070 #endif 1071 } 1072 1073 static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) { 1074 uint32_t mask0, mask1; 1075 mask0 = flag + ~((uint32_t)0); 1076 mask1 = ~mask0; 1077 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); 1078 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); 1079 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); 1080 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); 1081 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); 1082 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); 1083 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); 1084 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); 1085 r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1); 1086 r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1); 1087 #ifdef VERIFY 1088 if (a->magnitude > r->magnitude) { 1089 r->magnitude = a->magnitude; 1090 } 1091 r->normalized &= a->normalized; 1092 #endif 1093 } 1094 1095 static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) { 1096 uint32_t mask0, mask1; 1097 mask0 = flag + ~((uint32_t)0); 1098 mask1 = ~mask0; 1099 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); 1100 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); 1101 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); 1102 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); 1103 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); 1104 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); 1105 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); 1106 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); 1107 } 1108 1109 static void secp256k1_fe_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) { 1110 #ifdef VERIFY 1111 VERIFY_CHECK(a->normalized); 1112 #endif 1113 r->n[0] = a->n[0] | a->n[1] << 26; 1114 r->n[1] = a->n[1] >> 6 | a->n[2] << 20; 1115 r->n[2] = a->n[2] >> 12 | a->n[3] << 14; 1116 r->n[3] = a->n[3] >> 18 | a->n[4] << 8; 1117 r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28; 1118 r->n[5] = a->n[6] >> 4 | a->n[7] << 22; 1119 r->n[6] = a->n[7] >> 10 | a->n[8] << 16; 1120 r->n[7] = a->n[8] >> 16 | a->n[9] << 10; 1121 } 1122 1123 static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) { 1124 r->n[0] = a->n[0] & 0x3FFFFFFUL; 1125 r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL); 1126 r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL); 1127 r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL); 1128 r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL); 1129 r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL; 1130 r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL); 1131 r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL); 1132 r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL); 1133 r->n[9] = a->n[7] >> 10; 1134 #ifdef VERIFY 1135 r->magnitude = 1; 1136 r->normalized = 1; 1137 #endif 1138 } 1139 1140 #endif