github.com/ethereumproject/go-ethereum@v5.5.2+incompatible/crypto/secp256k1/libsecp256k1/src/field_10x26_impl.h (about) 1 /********************************************************************** 2 * Copyright (c) 2013, 2014 Pieter Wuille * 3 * Distributed under the MIT software license, see the accompanying * 4 * file COPYING or http://www.opensource.org/licenses/mit-license.php.* 5 **********************************************************************/ 6 7 #ifndef _SECP256K1_FIELD_REPR_IMPL_H_ 8 #define _SECP256K1_FIELD_REPR_IMPL_H_ 9 10 #include <stdio.h> 11 #include <string.h> 12 #include "util.h" 13 #include "num.h" 14 #include "field.h" 15 16 #ifdef VERIFY 17 static void secp256k1_fe_verify(const secp256k1_fe *a) { 18 const uint32_t *d = a->n; 19 int m = a->normalized ? 1 : 2 * a->magnitude, r = 1; 20 r &= (d[0] <= 0x3FFFFFFUL * m); 21 r &= (d[1] <= 0x3FFFFFFUL * m); 22 r &= (d[2] <= 0x3FFFFFFUL * m); 23 r &= (d[3] <= 0x3FFFFFFUL * m); 24 r &= (d[4] <= 0x3FFFFFFUL * m); 25 r &= (d[5] <= 0x3FFFFFFUL * m); 26 r &= (d[6] <= 0x3FFFFFFUL * m); 27 r &= (d[7] <= 0x3FFFFFFUL * m); 28 r &= (d[8] <= 0x3FFFFFFUL * m); 29 r &= (d[9] <= 0x03FFFFFUL * m); 30 r &= (a->magnitude >= 0); 31 r &= (a->magnitude <= 32); 32 if (a->normalized) { 33 r &= (a->magnitude <= 1); 34 if (r && (d[9] == 0x03FFFFFUL)) { 35 uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2]; 36 if (mid == 0x3FFFFFFUL) { 37 r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL); 38 } 39 } 40 } 41 VERIFY_CHECK(r == 1); 42 } 43 #else 44 static void secp256k1_fe_verify(const secp256k1_fe *a) { 45 (void)a; 46 } 47 #endif 48 49 static void secp256k1_fe_normalize(secp256k1_fe *r) { 50 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 51 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 52 53 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 54 uint32_t m; 55 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 56 57 /* The first pass ensures the magnitude is 1, ... */ 58 t0 += x * 0x3D1UL; t1 += (x << 6); 59 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 60 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 61 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; 62 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; 63 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; 64 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; 65 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; 66 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; 67 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; 68 69 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 70 VERIFY_CHECK(t9 >> 23 == 0); 71 72 /* At most a single final reduction is needed; check if the value is >= the field characteristic */ 73 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) 74 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 75 76 /* Apply the final reduction (for constant-time behaviour, we do it always) */ 77 t0 += x * 0x3D1UL; t1 += (x << 6); 78 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 79 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 80 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 81 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 82 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 83 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 84 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 85 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 86 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 87 88 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ 89 VERIFY_CHECK(t9 >> 22 == x); 90 91 /* Mask off the possible multiple of 2^256 from the final reduction */ 92 t9 &= 0x03FFFFFUL; 93 94 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 95 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 96 97 #ifdef VERIFY 98 r->magnitude = 1; 99 r->normalized = 1; 100 secp256k1_fe_verify(r); 101 #endif 102 } 103 104 static void secp256k1_fe_normalize_weak(secp256k1_fe *r) { 105 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 106 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 107 108 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 109 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 110 111 /* The first pass ensures the magnitude is 1, ... */ 112 t0 += x * 0x3D1UL; t1 += (x << 6); 113 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 114 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 115 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 116 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 117 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 118 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 119 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 120 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 121 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 122 123 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 124 VERIFY_CHECK(t9 >> 23 == 0); 125 126 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 127 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 128 129 #ifdef VERIFY 130 r->magnitude = 1; 131 secp256k1_fe_verify(r); 132 #endif 133 } 134 135 static void secp256k1_fe_normalize_var(secp256k1_fe *r) { 136 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 137 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 138 139 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 140 uint32_t m; 141 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 142 143 /* The first pass ensures the magnitude is 1, ... */ 144 t0 += x * 0x3D1UL; t1 += (x << 6); 145 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 146 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 147 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; 148 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; 149 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; 150 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; 151 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; 152 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; 153 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; 154 155 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 156 VERIFY_CHECK(t9 >> 23 == 0); 157 158 /* At most a single final reduction is needed; check if the value is >= the field characteristic */ 159 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) 160 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 161 162 if (x) { 163 t0 += 0x3D1UL; t1 += (x << 6); 164 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 165 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 166 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 167 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 168 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 169 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 170 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 171 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 172 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 173 174 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ 175 VERIFY_CHECK(t9 >> 22 == x); 176 177 /* Mask off the possible multiple of 2^256 from the final reduction */ 178 t9 &= 0x03FFFFFUL; 179 } 180 181 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 182 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 183 184 #ifdef VERIFY 185 r->magnitude = 1; 186 r->normalized = 1; 187 secp256k1_fe_verify(r); 188 #endif 189 } 190 191 static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) { 192 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 193 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 194 195 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ 196 uint32_t z0, z1; 197 198 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 199 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 200 201 /* The first pass ensures the magnitude is 1, ... */ 202 t0 += x * 0x3D1UL; t1 += (x << 6); 203 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL; 204 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; 205 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; 206 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; 207 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; 208 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; 209 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; 210 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; 211 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; 212 z0 |= t9; z1 &= t9 ^ 0x3C00000UL; 213 214 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 215 VERIFY_CHECK(t9 >> 23 == 0); 216 217 return (z0 == 0) | (z1 == 0x3FFFFFFUL); 218 } 219 220 static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) { 221 uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; 222 uint32_t z0, z1; 223 uint32_t x; 224 225 t0 = r->n[0]; 226 t9 = r->n[9]; 227 228 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 229 x = t9 >> 22; 230 231 /* The first pass ensures the magnitude is 1, ... */ 232 t0 += x * 0x3D1UL; 233 234 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ 235 z0 = t0 & 0x3FFFFFFUL; 236 z1 = z0 ^ 0x3D0UL; 237 238 /* Fast return path should catch the majority of cases */ 239 if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) { 240 return 0; 241 } 242 243 t1 = r->n[1]; 244 t2 = r->n[2]; 245 t3 = r->n[3]; 246 t4 = r->n[4]; 247 t5 = r->n[5]; 248 t6 = r->n[6]; 249 t7 = r->n[7]; 250 t8 = r->n[8]; 251 252 t9 &= 0x03FFFFFUL; 253 t1 += (x << 6); 254 255 t1 += (t0 >> 26); 256 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; 257 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; 258 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; 259 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; 260 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; 261 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; 262 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; 263 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; 264 z0 |= t9; z1 &= t9 ^ 0x3C00000UL; 265 266 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 267 VERIFY_CHECK(t9 >> 23 == 0); 268 269 return (z0 == 0) | (z1 == 0x3FFFFFFUL); 270 } 271 272 SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe *r, int a) { 273 r->n[0] = a; 274 r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; 275 #ifdef VERIFY 276 r->magnitude = 1; 277 r->normalized = 1; 278 secp256k1_fe_verify(r); 279 #endif 280 } 281 282 SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe *a) { 283 const uint32_t *t = a->n; 284 #ifdef VERIFY 285 VERIFY_CHECK(a->normalized); 286 secp256k1_fe_verify(a); 287 #endif 288 return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0; 289 } 290 291 SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe *a) { 292 #ifdef VERIFY 293 VERIFY_CHECK(a->normalized); 294 secp256k1_fe_verify(a); 295 #endif 296 return a->n[0] & 1; 297 } 298 299 SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe *a) { 300 int i; 301 #ifdef VERIFY 302 a->magnitude = 0; 303 a->normalized = 1; 304 #endif 305 for (i=0; i<10; i++) { 306 a->n[i] = 0; 307 } 308 } 309 310 static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) { 311 int i; 312 #ifdef VERIFY 313 VERIFY_CHECK(a->normalized); 314 VERIFY_CHECK(b->normalized); 315 secp256k1_fe_verify(a); 316 secp256k1_fe_verify(b); 317 #endif 318 for (i = 9; i >= 0; i--) { 319 if (a->n[i] > b->n[i]) { 320 return 1; 321 } 322 if (a->n[i] < b->n[i]) { 323 return -1; 324 } 325 } 326 return 0; 327 } 328 329 static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) { 330 int i; 331 r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; 332 r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; 333 for (i=0; i<32; i++) { 334 int j; 335 for (j=0; j<4; j++) { 336 int limb = (8*i+2*j)/26; 337 int shift = (8*i+2*j)%26; 338 r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift; 339 } 340 } 341 if (r->n[9] == 0x3FFFFFUL && (r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL && (r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL) { 342 return 0; 343 } 344 #ifdef VERIFY 345 r->magnitude = 1; 346 r->normalized = 1; 347 secp256k1_fe_verify(r); 348 #endif 349 return 1; 350 } 351 352 /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ 353 static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a) { 354 int i; 355 #ifdef VERIFY 356 VERIFY_CHECK(a->normalized); 357 secp256k1_fe_verify(a); 358 #endif 359 for (i=0; i<32; i++) { 360 int j; 361 int c = 0; 362 for (j=0; j<4; j++) { 363 int limb = (8*i+2*j)/26; 364 int shift = (8*i+2*j)%26; 365 c |= ((a->n[limb] >> shift) & 0x3) << (2 * j); 366 } 367 r[31-i] = c; 368 } 369 } 370 371 SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe *r, const secp256k1_fe *a, int m) { 372 #ifdef VERIFY 373 VERIFY_CHECK(a->magnitude <= m); 374 secp256k1_fe_verify(a); 375 #endif 376 r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0]; 377 r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1]; 378 r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2]; 379 r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3]; 380 r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4]; 381 r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5]; 382 r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6]; 383 r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7]; 384 r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8]; 385 r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9]; 386 #ifdef VERIFY 387 r->magnitude = m + 1; 388 r->normalized = 0; 389 secp256k1_fe_verify(r); 390 #endif 391 } 392 393 SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe *r, int a) { 394 r->n[0] *= a; 395 r->n[1] *= a; 396 r->n[2] *= a; 397 r->n[3] *= a; 398 r->n[4] *= a; 399 r->n[5] *= a; 400 r->n[6] *= a; 401 r->n[7] *= a; 402 r->n[8] *= a; 403 r->n[9] *= a; 404 #ifdef VERIFY 405 r->magnitude *= a; 406 r->normalized = 0; 407 secp256k1_fe_verify(r); 408 #endif 409 } 410 411 SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe *r, const secp256k1_fe *a) { 412 #ifdef VERIFY 413 secp256k1_fe_verify(a); 414 #endif 415 r->n[0] += a->n[0]; 416 r->n[1] += a->n[1]; 417 r->n[2] += a->n[2]; 418 r->n[3] += a->n[3]; 419 r->n[4] += a->n[4]; 420 r->n[5] += a->n[5]; 421 r->n[6] += a->n[6]; 422 r->n[7] += a->n[7]; 423 r->n[8] += a->n[8]; 424 r->n[9] += a->n[9]; 425 #ifdef VERIFY 426 r->magnitude += a->magnitude; 427 r->normalized = 0; 428 secp256k1_fe_verify(r); 429 #endif 430 } 431 432 #ifdef VERIFY 433 #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) 434 #else 435 #define VERIFY_BITS(x, n) do { } while(0) 436 #endif 437 438 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { 439 uint64_t c, d; 440 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; 441 uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7; 442 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; 443 444 VERIFY_BITS(a[0], 30); 445 VERIFY_BITS(a[1], 30); 446 VERIFY_BITS(a[2], 30); 447 VERIFY_BITS(a[3], 30); 448 VERIFY_BITS(a[4], 30); 449 VERIFY_BITS(a[5], 30); 450 VERIFY_BITS(a[6], 30); 451 VERIFY_BITS(a[7], 30); 452 VERIFY_BITS(a[8], 30); 453 VERIFY_BITS(a[9], 26); 454 VERIFY_BITS(b[0], 30); 455 VERIFY_BITS(b[1], 30); 456 VERIFY_BITS(b[2], 30); 457 VERIFY_BITS(b[3], 30); 458 VERIFY_BITS(b[4], 30); 459 VERIFY_BITS(b[5], 30); 460 VERIFY_BITS(b[6], 30); 461 VERIFY_BITS(b[7], 30); 462 VERIFY_BITS(b[8], 30); 463 VERIFY_BITS(b[9], 26); 464 465 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. 466 * px is a shorthand for sum(a[i]*b[x-i], i=0..x). 467 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. 468 */ 469 470 d = (uint64_t)a[0] * b[9] 471 + (uint64_t)a[1] * b[8] 472 + (uint64_t)a[2] * b[7] 473 + (uint64_t)a[3] * b[6] 474 + (uint64_t)a[4] * b[5] 475 + (uint64_t)a[5] * b[4] 476 + (uint64_t)a[6] * b[3] 477 + (uint64_t)a[7] * b[2] 478 + (uint64_t)a[8] * b[1] 479 + (uint64_t)a[9] * b[0]; 480 /* VERIFY_BITS(d, 64); */ 481 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 482 t9 = d & M; d >>= 26; 483 VERIFY_BITS(t9, 26); 484 VERIFY_BITS(d, 38); 485 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 486 487 c = (uint64_t)a[0] * b[0]; 488 VERIFY_BITS(c, 60); 489 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ 490 d += (uint64_t)a[1] * b[9] 491 + (uint64_t)a[2] * b[8] 492 + (uint64_t)a[3] * b[7] 493 + (uint64_t)a[4] * b[6] 494 + (uint64_t)a[5] * b[5] 495 + (uint64_t)a[6] * b[4] 496 + (uint64_t)a[7] * b[3] 497 + (uint64_t)a[8] * b[2] 498 + (uint64_t)a[9] * b[1]; 499 VERIFY_BITS(d, 63); 500 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 501 u0 = d & M; d >>= 26; c += u0 * R0; 502 VERIFY_BITS(u0, 26); 503 VERIFY_BITS(d, 37); 504 VERIFY_BITS(c, 61); 505 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 506 t0 = c & M; c >>= 26; c += u0 * R1; 507 VERIFY_BITS(t0, 26); 508 VERIFY_BITS(c, 37); 509 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 510 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 511 512 c += (uint64_t)a[0] * b[1] 513 + (uint64_t)a[1] * b[0]; 514 VERIFY_BITS(c, 62); 515 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ 516 d += (uint64_t)a[2] * b[9] 517 + (uint64_t)a[3] * b[8] 518 + (uint64_t)a[4] * b[7] 519 + (uint64_t)a[5] * b[6] 520 + (uint64_t)a[6] * b[5] 521 + (uint64_t)a[7] * b[4] 522 + (uint64_t)a[8] * b[3] 523 + (uint64_t)a[9] * b[2]; 524 VERIFY_BITS(d, 63); 525 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 526 u1 = d & M; d >>= 26; c += u1 * R0; 527 VERIFY_BITS(u1, 26); 528 VERIFY_BITS(d, 37); 529 VERIFY_BITS(c, 63); 530 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 531 t1 = c & M; c >>= 26; c += u1 * R1; 532 VERIFY_BITS(t1, 26); 533 VERIFY_BITS(c, 38); 534 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 535 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 536 537 c += (uint64_t)a[0] * b[2] 538 + (uint64_t)a[1] * b[1] 539 + (uint64_t)a[2] * b[0]; 540 VERIFY_BITS(c, 62); 541 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 542 d += (uint64_t)a[3] * b[9] 543 + (uint64_t)a[4] * b[8] 544 + (uint64_t)a[5] * b[7] 545 + (uint64_t)a[6] * b[6] 546 + (uint64_t)a[7] * b[5] 547 + (uint64_t)a[8] * b[4] 548 + (uint64_t)a[9] * b[3]; 549 VERIFY_BITS(d, 63); 550 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 551 u2 = d & M; d >>= 26; c += u2 * R0; 552 VERIFY_BITS(u2, 26); 553 VERIFY_BITS(d, 37); 554 VERIFY_BITS(c, 63); 555 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 556 t2 = c & M; c >>= 26; c += u2 * R1; 557 VERIFY_BITS(t2, 26); 558 VERIFY_BITS(c, 38); 559 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 560 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 561 562 c += (uint64_t)a[0] * b[3] 563 + (uint64_t)a[1] * b[2] 564 + (uint64_t)a[2] * b[1] 565 + (uint64_t)a[3] * b[0]; 566 VERIFY_BITS(c, 63); 567 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 568 d += (uint64_t)a[4] * b[9] 569 + (uint64_t)a[5] * b[8] 570 + (uint64_t)a[6] * b[7] 571 + (uint64_t)a[7] * b[6] 572 + (uint64_t)a[8] * b[5] 573 + (uint64_t)a[9] * b[4]; 574 VERIFY_BITS(d, 63); 575 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 576 u3 = d & M; d >>= 26; c += u3 * R0; 577 VERIFY_BITS(u3, 26); 578 VERIFY_BITS(d, 37); 579 /* VERIFY_BITS(c, 64); */ 580 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 581 t3 = c & M; c >>= 26; c += u3 * R1; 582 VERIFY_BITS(t3, 26); 583 VERIFY_BITS(c, 39); 584 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 585 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 586 587 c += (uint64_t)a[0] * b[4] 588 + (uint64_t)a[1] * b[3] 589 + (uint64_t)a[2] * b[2] 590 + (uint64_t)a[3] * b[1] 591 + (uint64_t)a[4] * b[0]; 592 VERIFY_BITS(c, 63); 593 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 594 d += (uint64_t)a[5] * b[9] 595 + (uint64_t)a[6] * b[8] 596 + (uint64_t)a[7] * b[7] 597 + (uint64_t)a[8] * b[6] 598 + (uint64_t)a[9] * b[5]; 599 VERIFY_BITS(d, 62); 600 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 601 u4 = d & M; d >>= 26; c += u4 * R0; 602 VERIFY_BITS(u4, 26); 603 VERIFY_BITS(d, 36); 604 /* VERIFY_BITS(c, 64); */ 605 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 606 t4 = c & M; c >>= 26; c += u4 * R1; 607 VERIFY_BITS(t4, 26); 608 VERIFY_BITS(c, 39); 609 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 610 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 611 612 c += (uint64_t)a[0] * b[5] 613 + (uint64_t)a[1] * b[4] 614 + (uint64_t)a[2] * b[3] 615 + (uint64_t)a[3] * b[2] 616 + (uint64_t)a[4] * b[1] 617 + (uint64_t)a[5] * b[0]; 618 VERIFY_BITS(c, 63); 619 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 620 d += (uint64_t)a[6] * b[9] 621 + (uint64_t)a[7] * b[8] 622 + (uint64_t)a[8] * b[7] 623 + (uint64_t)a[9] * b[6]; 624 VERIFY_BITS(d, 62); 625 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 626 u5 = d & M; d >>= 26; c += u5 * R0; 627 VERIFY_BITS(u5, 26); 628 VERIFY_BITS(d, 36); 629 /* VERIFY_BITS(c, 64); */ 630 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 631 t5 = c & M; c >>= 26; c += u5 * R1; 632 VERIFY_BITS(t5, 26); 633 VERIFY_BITS(c, 39); 634 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 635 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 636 637 c += (uint64_t)a[0] * b[6] 638 + (uint64_t)a[1] * b[5] 639 + (uint64_t)a[2] * b[4] 640 + (uint64_t)a[3] * b[3] 641 + (uint64_t)a[4] * b[2] 642 + (uint64_t)a[5] * b[1] 643 + (uint64_t)a[6] * b[0]; 644 VERIFY_BITS(c, 63); 645 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 646 d += (uint64_t)a[7] * b[9] 647 + (uint64_t)a[8] * b[8] 648 + (uint64_t)a[9] * b[7]; 649 VERIFY_BITS(d, 61); 650 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 651 u6 = d & M; d >>= 26; c += u6 * R0; 652 VERIFY_BITS(u6, 26); 653 VERIFY_BITS(d, 35); 654 /* VERIFY_BITS(c, 64); */ 655 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 656 t6 = c & M; c >>= 26; c += u6 * R1; 657 VERIFY_BITS(t6, 26); 658 VERIFY_BITS(c, 39); 659 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 660 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 661 662 c += (uint64_t)a[0] * b[7] 663 + (uint64_t)a[1] * b[6] 664 + (uint64_t)a[2] * b[5] 665 + (uint64_t)a[3] * b[4] 666 + (uint64_t)a[4] * b[3] 667 + (uint64_t)a[5] * b[2] 668 + (uint64_t)a[6] * b[1] 669 + (uint64_t)a[7] * b[0]; 670 /* VERIFY_BITS(c, 64); */ 671 VERIFY_CHECK(c <= 0x8000007C00000007ULL); 672 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 673 d += (uint64_t)a[8] * b[9] 674 + (uint64_t)a[9] * b[8]; 675 VERIFY_BITS(d, 58); 676 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 677 u7 = d & M; d >>= 26; c += u7 * R0; 678 VERIFY_BITS(u7, 26); 679 VERIFY_BITS(d, 32); 680 /* VERIFY_BITS(c, 64); */ 681 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); 682 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 683 t7 = c & M; c >>= 26; c += u7 * R1; 684 VERIFY_BITS(t7, 26); 685 VERIFY_BITS(c, 38); 686 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 687 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 688 689 c += (uint64_t)a[0] * b[8] 690 + (uint64_t)a[1] * b[7] 691 + (uint64_t)a[2] * b[6] 692 + (uint64_t)a[3] * b[5] 693 + (uint64_t)a[4] * b[4] 694 + (uint64_t)a[5] * b[3] 695 + (uint64_t)a[6] * b[2] 696 + (uint64_t)a[7] * b[1] 697 + (uint64_t)a[8] * b[0]; 698 /* VERIFY_BITS(c, 64); */ 699 VERIFY_CHECK(c <= 0x9000007B80000008ULL); 700 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 701 d += (uint64_t)a[9] * b[9]; 702 VERIFY_BITS(d, 57); 703 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 704 u8 = d & M; d >>= 26; c += u8 * R0; 705 VERIFY_BITS(u8, 26); 706 VERIFY_BITS(d, 31); 707 /* VERIFY_BITS(c, 64); */ 708 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); 709 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 710 711 r[3] = t3; 712 VERIFY_BITS(r[3], 26); 713 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 714 r[4] = t4; 715 VERIFY_BITS(r[4], 26); 716 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 717 r[5] = t5; 718 VERIFY_BITS(r[5], 26); 719 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 720 r[6] = t6; 721 VERIFY_BITS(r[6], 26); 722 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 723 r[7] = t7; 724 VERIFY_BITS(r[7], 26); 725 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 726 727 r[8] = c & M; c >>= 26; c += u8 * R1; 728 VERIFY_BITS(r[8], 26); 729 VERIFY_BITS(c, 39); 730 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 731 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 732 c += d * R0 + t9; 733 VERIFY_BITS(c, 45); 734 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 735 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); 736 VERIFY_BITS(r[9], 22); 737 VERIFY_BITS(c, 46); 738 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 739 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 740 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 741 742 d = c * (R0 >> 4) + t0; 743 VERIFY_BITS(d, 56); 744 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 745 r[0] = d & M; d >>= 26; 746 VERIFY_BITS(r[0], 26); 747 VERIFY_BITS(d, 30); 748 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 749 d += c * (R1 >> 4) + t1; 750 VERIFY_BITS(d, 53); 751 VERIFY_CHECK(d <= 0x10000003FFFFBFULL); 752 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 753 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 754 r[1] = d & M; d >>= 26; 755 VERIFY_BITS(r[1], 26); 756 VERIFY_BITS(d, 27); 757 VERIFY_CHECK(d <= 0x4000000ULL); 758 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 759 d += t2; 760 VERIFY_BITS(d, 27); 761 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 762 r[2] = d; 763 VERIFY_BITS(r[2], 27); 764 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 765 } 766 767 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) { 768 uint64_t c, d; 769 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; 770 uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7; 771 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; 772 773 VERIFY_BITS(a[0], 30); 774 VERIFY_BITS(a[1], 30); 775 VERIFY_BITS(a[2], 30); 776 VERIFY_BITS(a[3], 30); 777 VERIFY_BITS(a[4], 30); 778 VERIFY_BITS(a[5], 30); 779 VERIFY_BITS(a[6], 30); 780 VERIFY_BITS(a[7], 30); 781 VERIFY_BITS(a[8], 30); 782 VERIFY_BITS(a[9], 26); 783 784 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. 785 * px is a shorthand for sum(a[i]*a[x-i], i=0..x). 786 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. 787 */ 788 789 d = (uint64_t)(a[0]*2) * a[9] 790 + (uint64_t)(a[1]*2) * a[8] 791 + (uint64_t)(a[2]*2) * a[7] 792 + (uint64_t)(a[3]*2) * a[6] 793 + (uint64_t)(a[4]*2) * a[5]; 794 /* VERIFY_BITS(d, 64); */ 795 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 796 t9 = d & M; d >>= 26; 797 VERIFY_BITS(t9, 26); 798 VERIFY_BITS(d, 38); 799 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 800 801 c = (uint64_t)a[0] * a[0]; 802 VERIFY_BITS(c, 60); 803 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ 804 d += (uint64_t)(a[1]*2) * a[9] 805 + (uint64_t)(a[2]*2) * a[8] 806 + (uint64_t)(a[3]*2) * a[7] 807 + (uint64_t)(a[4]*2) * a[6] 808 + (uint64_t)a[5] * a[5]; 809 VERIFY_BITS(d, 63); 810 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 811 u0 = d & M; d >>= 26; c += u0 * R0; 812 VERIFY_BITS(u0, 26); 813 VERIFY_BITS(d, 37); 814 VERIFY_BITS(c, 61); 815 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 816 t0 = c & M; c >>= 26; c += u0 * R1; 817 VERIFY_BITS(t0, 26); 818 VERIFY_BITS(c, 37); 819 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 820 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 821 822 c += (uint64_t)(a[0]*2) * a[1]; 823 VERIFY_BITS(c, 62); 824 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ 825 d += (uint64_t)(a[2]*2) * a[9] 826 + (uint64_t)(a[3]*2) * a[8] 827 + (uint64_t)(a[4]*2) * a[7] 828 + (uint64_t)(a[5]*2) * a[6]; 829 VERIFY_BITS(d, 63); 830 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 831 u1 = d & M; d >>= 26; c += u1 * R0; 832 VERIFY_BITS(u1, 26); 833 VERIFY_BITS(d, 37); 834 VERIFY_BITS(c, 63); 835 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 836 t1 = c & M; c >>= 26; c += u1 * R1; 837 VERIFY_BITS(t1, 26); 838 VERIFY_BITS(c, 38); 839 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 840 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 841 842 c += (uint64_t)(a[0]*2) * a[2] 843 + (uint64_t)a[1] * a[1]; 844 VERIFY_BITS(c, 62); 845 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 846 d += (uint64_t)(a[3]*2) * a[9] 847 + (uint64_t)(a[4]*2) * a[8] 848 + (uint64_t)(a[5]*2) * a[7] 849 + (uint64_t)a[6] * a[6]; 850 VERIFY_BITS(d, 63); 851 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 852 u2 = d & M; d >>= 26; c += u2 * R0; 853 VERIFY_BITS(u2, 26); 854 VERIFY_BITS(d, 37); 855 VERIFY_BITS(c, 63); 856 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 857 t2 = c & M; c >>= 26; c += u2 * R1; 858 VERIFY_BITS(t2, 26); 859 VERIFY_BITS(c, 38); 860 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 861 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 862 863 c += (uint64_t)(a[0]*2) * a[3] 864 + (uint64_t)(a[1]*2) * a[2]; 865 VERIFY_BITS(c, 63); 866 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 867 d += (uint64_t)(a[4]*2) * a[9] 868 + (uint64_t)(a[5]*2) * a[8] 869 + (uint64_t)(a[6]*2) * a[7]; 870 VERIFY_BITS(d, 63); 871 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 872 u3 = d & M; d >>= 26; c += u3 * R0; 873 VERIFY_BITS(u3, 26); 874 VERIFY_BITS(d, 37); 875 /* VERIFY_BITS(c, 64); */ 876 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 877 t3 = c & M; c >>= 26; c += u3 * R1; 878 VERIFY_BITS(t3, 26); 879 VERIFY_BITS(c, 39); 880 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 881 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 882 883 c += (uint64_t)(a[0]*2) * a[4] 884 + (uint64_t)(a[1]*2) * a[3] 885 + (uint64_t)a[2] * a[2]; 886 VERIFY_BITS(c, 63); 887 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 888 d += (uint64_t)(a[5]*2) * a[9] 889 + (uint64_t)(a[6]*2) * a[8] 890 + (uint64_t)a[7] * a[7]; 891 VERIFY_BITS(d, 62); 892 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 893 u4 = d & M; d >>= 26; c += u4 * R0; 894 VERIFY_BITS(u4, 26); 895 VERIFY_BITS(d, 36); 896 /* VERIFY_BITS(c, 64); */ 897 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 898 t4 = c & M; c >>= 26; c += u4 * R1; 899 VERIFY_BITS(t4, 26); 900 VERIFY_BITS(c, 39); 901 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 902 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 903 904 c += (uint64_t)(a[0]*2) * a[5] 905 + (uint64_t)(a[1]*2) * a[4] 906 + (uint64_t)(a[2]*2) * a[3]; 907 VERIFY_BITS(c, 63); 908 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 909 d += (uint64_t)(a[6]*2) * a[9] 910 + (uint64_t)(a[7]*2) * a[8]; 911 VERIFY_BITS(d, 62); 912 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 913 u5 = d & M; d >>= 26; c += u5 * R0; 914 VERIFY_BITS(u5, 26); 915 VERIFY_BITS(d, 36); 916 /* VERIFY_BITS(c, 64); */ 917 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 918 t5 = c & M; c >>= 26; c += u5 * R1; 919 VERIFY_BITS(t5, 26); 920 VERIFY_BITS(c, 39); 921 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 922 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 923 924 c += (uint64_t)(a[0]*2) * a[6] 925 + (uint64_t)(a[1]*2) * a[5] 926 + (uint64_t)(a[2]*2) * a[4] 927 + (uint64_t)a[3] * a[3]; 928 VERIFY_BITS(c, 63); 929 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 930 d += (uint64_t)(a[7]*2) * a[9] 931 + (uint64_t)a[8] * a[8]; 932 VERIFY_BITS(d, 61); 933 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 934 u6 = d & M; d >>= 26; c += u6 * R0; 935 VERIFY_BITS(u6, 26); 936 VERIFY_BITS(d, 35); 937 /* VERIFY_BITS(c, 64); */ 938 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 939 t6 = c & M; c >>= 26; c += u6 * R1; 940 VERIFY_BITS(t6, 26); 941 VERIFY_BITS(c, 39); 942 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 943 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 944 945 c += (uint64_t)(a[0]*2) * a[7] 946 + (uint64_t)(a[1]*2) * a[6] 947 + (uint64_t)(a[2]*2) * a[5] 948 + (uint64_t)(a[3]*2) * a[4]; 949 /* VERIFY_BITS(c, 64); */ 950 VERIFY_CHECK(c <= 0x8000007C00000007ULL); 951 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 952 d += (uint64_t)(a[8]*2) * a[9]; 953 VERIFY_BITS(d, 58); 954 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 955 u7 = d & M; d >>= 26; c += u7 * R0; 956 VERIFY_BITS(u7, 26); 957 VERIFY_BITS(d, 32); 958 /* VERIFY_BITS(c, 64); */ 959 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); 960 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 961 t7 = c & M; c >>= 26; c += u7 * R1; 962 VERIFY_BITS(t7, 26); 963 VERIFY_BITS(c, 38); 964 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 965 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 966 967 c += (uint64_t)(a[0]*2) * a[8] 968 + (uint64_t)(a[1]*2) * a[7] 969 + (uint64_t)(a[2]*2) * a[6] 970 + (uint64_t)(a[3]*2) * a[5] 971 + (uint64_t)a[4] * a[4]; 972 /* VERIFY_BITS(c, 64); */ 973 VERIFY_CHECK(c <= 0x9000007B80000008ULL); 974 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 975 d += (uint64_t)a[9] * a[9]; 976 VERIFY_BITS(d, 57); 977 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 978 u8 = d & M; d >>= 26; c += u8 * R0; 979 VERIFY_BITS(u8, 26); 980 VERIFY_BITS(d, 31); 981 /* VERIFY_BITS(c, 64); */ 982 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); 983 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 984 985 r[3] = t3; 986 VERIFY_BITS(r[3], 26); 987 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 988 r[4] = t4; 989 VERIFY_BITS(r[4], 26); 990 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 991 r[5] = t5; 992 VERIFY_BITS(r[5], 26); 993 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 994 r[6] = t6; 995 VERIFY_BITS(r[6], 26); 996 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 997 r[7] = t7; 998 VERIFY_BITS(r[7], 26); 999 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1000 1001 r[8] = c & M; c >>= 26; c += u8 * R1; 1002 VERIFY_BITS(r[8], 26); 1003 VERIFY_BITS(c, 39); 1004 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1005 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1006 c += d * R0 + t9; 1007 VERIFY_BITS(c, 45); 1008 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1009 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); 1010 VERIFY_BITS(r[9], 22); 1011 VERIFY_BITS(c, 46); 1012 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1013 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1014 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1015 1016 d = c * (R0 >> 4) + t0; 1017 VERIFY_BITS(d, 56); 1018 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1019 r[0] = d & M; d >>= 26; 1020 VERIFY_BITS(r[0], 26); 1021 VERIFY_BITS(d, 30); 1022 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1023 d += c * (R1 >> 4) + t1; 1024 VERIFY_BITS(d, 53); 1025 VERIFY_CHECK(d <= 0x10000003FFFFBFULL); 1026 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1027 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1028 r[1] = d & M; d >>= 26; 1029 VERIFY_BITS(r[1], 26); 1030 VERIFY_BITS(d, 27); 1031 VERIFY_CHECK(d <= 0x4000000ULL); 1032 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1033 d += t2; 1034 VERIFY_BITS(d, 27); 1035 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1036 r[2] = d; 1037 VERIFY_BITS(r[2], 27); 1038 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1039 } 1040 1041 1042 static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) { 1043 #ifdef VERIFY 1044 VERIFY_CHECK(a->magnitude <= 8); 1045 VERIFY_CHECK(b->magnitude <= 8); 1046 secp256k1_fe_verify(a); 1047 secp256k1_fe_verify(b); 1048 VERIFY_CHECK(r != b); 1049 #endif 1050 secp256k1_fe_mul_inner(r->n, a->n, b->n); 1051 #ifdef VERIFY 1052 r->magnitude = 1; 1053 r->normalized = 0; 1054 secp256k1_fe_verify(r); 1055 #endif 1056 } 1057 1058 static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a) { 1059 #ifdef VERIFY 1060 VERIFY_CHECK(a->magnitude <= 8); 1061 secp256k1_fe_verify(a); 1062 #endif 1063 secp256k1_fe_sqr_inner(r->n, a->n); 1064 #ifdef VERIFY 1065 r->magnitude = 1; 1066 r->normalized = 0; 1067 secp256k1_fe_verify(r); 1068 #endif 1069 } 1070 1071 static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) { 1072 uint32_t mask0, mask1; 1073 mask0 = flag + ~((uint32_t)0); 1074 mask1 = ~mask0; 1075 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); 1076 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); 1077 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); 1078 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); 1079 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); 1080 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); 1081 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); 1082 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); 1083 r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1); 1084 r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1); 1085 #ifdef VERIFY 1086 if (a->magnitude > r->magnitude) { 1087 r->magnitude = a->magnitude; 1088 } 1089 r->normalized &= a->normalized; 1090 #endif 1091 } 1092 1093 static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) { 1094 uint32_t mask0, mask1; 1095 mask0 = flag + ~((uint32_t)0); 1096 mask1 = ~mask0; 1097 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); 1098 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); 1099 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); 1100 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); 1101 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); 1102 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); 1103 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); 1104 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); 1105 } 1106 1107 static void secp256k1_fe_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) { 1108 #ifdef VERIFY 1109 VERIFY_CHECK(a->normalized); 1110 #endif 1111 r->n[0] = a->n[0] | a->n[1] << 26; 1112 r->n[1] = a->n[1] >> 6 | a->n[2] << 20; 1113 r->n[2] = a->n[2] >> 12 | a->n[3] << 14; 1114 r->n[3] = a->n[3] >> 18 | a->n[4] << 8; 1115 r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28; 1116 r->n[5] = a->n[6] >> 4 | a->n[7] << 22; 1117 r->n[6] = a->n[7] >> 10 | a->n[8] << 16; 1118 r->n[7] = a->n[8] >> 16 | a->n[9] << 10; 1119 } 1120 1121 static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) { 1122 r->n[0] = a->n[0] & 0x3FFFFFFUL; 1123 r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL); 1124 r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL); 1125 r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL); 1126 r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL); 1127 r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL; 1128 r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL); 1129 r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL); 1130 r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL); 1131 r->n[9] = a->n[7] >> 10; 1132 #ifdef VERIFY 1133 r->magnitude = 1; 1134 r->normalized = 1; 1135 #endif 1136 } 1137 1138 #endif