github.com/ethereum/go-ethereum@v1.16.1/crypto/secp256k1/libsecp256k1/src/field_10x26_impl.h (about) 1 /*********************************************************************** 2 * Copyright (c) 2013, 2014 Pieter Wuille * 3 * Distributed under the MIT software license, see the accompanying * 4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.* 5 ***********************************************************************/ 6 7 #ifndef SECP256K1_FIELD_REPR_IMPL_H 8 #define SECP256K1_FIELD_REPR_IMPL_H 9 10 #include "checkmem.h" 11 #include "util.h" 12 #include "field.h" 13 #include "modinv32_impl.h" 14 15 #ifdef VERIFY 16 static void secp256k1_fe_impl_verify(const secp256k1_fe *a) { 17 const uint32_t *d = a->n; 18 int m = a->normalized ? 1 : 2 * a->magnitude; 19 VERIFY_CHECK(d[0] <= 0x3FFFFFFUL * m); 20 VERIFY_CHECK(d[1] <= 0x3FFFFFFUL * m); 21 VERIFY_CHECK(d[2] <= 0x3FFFFFFUL * m); 22 VERIFY_CHECK(d[3] <= 0x3FFFFFFUL * m); 23 VERIFY_CHECK(d[4] <= 0x3FFFFFFUL * m); 24 VERIFY_CHECK(d[5] <= 0x3FFFFFFUL * m); 25 VERIFY_CHECK(d[6] <= 0x3FFFFFFUL * m); 26 VERIFY_CHECK(d[7] <= 0x3FFFFFFUL * m); 27 VERIFY_CHECK(d[8] <= 0x3FFFFFFUL * m); 28 VERIFY_CHECK(d[9] <= 0x03FFFFFUL * m); 29 if (a->normalized) { 30 if (d[9] == 0x03FFFFFUL) { 31 uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2]; 32 if (mid == 0x3FFFFFFUL) { 33 VERIFY_CHECK((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL); 34 } 35 } 36 } 37 } 38 #endif 39 40 static void secp256k1_fe_impl_get_bounds(secp256k1_fe *r, int m) { 41 r->n[0] = 0x3FFFFFFUL * 2 * m; 42 r->n[1] = 0x3FFFFFFUL * 2 * m; 43 r->n[2] = 0x3FFFFFFUL * 2 * m; 44 r->n[3] = 0x3FFFFFFUL * 2 * m; 45 r->n[4] = 0x3FFFFFFUL * 2 * m; 46 r->n[5] = 0x3FFFFFFUL * 2 * m; 47 r->n[6] = 0x3FFFFFFUL * 2 * m; 48 r->n[7] = 0x3FFFFFFUL * 2 * m; 49 r->n[8] = 0x3FFFFFFUL * 2 * m; 50 r->n[9] = 0x03FFFFFUL * 2 * m; 51 } 52 53 static void secp256k1_fe_impl_normalize(secp256k1_fe *r) { 54 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 55 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 56 57 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 58 uint32_t m; 59 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 60 61 /* The first pass ensures the magnitude is 1, ... */ 62 t0 += x * 0x3D1UL; t1 += (x << 6); 63 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 64 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 65 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; 66 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; 67 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; 68 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; 69 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; 70 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; 71 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; 72 73 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 74 VERIFY_CHECK(t9 >> 23 == 0); 75 76 /* At most a single final reduction is needed; check if the value is >= the field characteristic */ 77 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) 78 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 79 80 /* Apply the final reduction (for constant-time behaviour, we do it always) */ 81 t0 += x * 0x3D1UL; t1 += (x << 6); 82 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 83 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 84 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 85 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 86 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 87 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 88 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 89 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 90 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 91 92 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ 93 VERIFY_CHECK(t9 >> 22 == x); 94 95 /* Mask off the possible multiple of 2^256 from the final reduction */ 96 t9 &= 0x03FFFFFUL; 97 98 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 99 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 100 } 101 102 static void secp256k1_fe_impl_normalize_weak(secp256k1_fe *r) { 103 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 104 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 105 106 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 107 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 108 109 /* The first pass ensures the magnitude is 1, ... */ 110 t0 += x * 0x3D1UL; t1 += (x << 6); 111 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 112 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 113 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 114 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 115 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 116 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 117 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 118 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 119 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 120 121 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 122 VERIFY_CHECK(t9 >> 23 == 0); 123 124 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 125 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 126 } 127 128 static void secp256k1_fe_impl_normalize_var(secp256k1_fe *r) { 129 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 130 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 131 132 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 133 uint32_t m; 134 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 135 136 /* The first pass ensures the magnitude is 1, ... */ 137 t0 += x * 0x3D1UL; t1 += (x << 6); 138 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 139 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 140 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; 141 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; 142 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; 143 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; 144 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; 145 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; 146 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; 147 148 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 149 VERIFY_CHECK(t9 >> 23 == 0); 150 151 /* At most a single final reduction is needed; check if the value is >= the field characteristic */ 152 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) 153 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 154 155 if (x) { 156 t0 += 0x3D1UL; t1 += (x << 6); 157 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 158 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 159 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 160 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 161 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 162 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 163 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 164 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 165 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 166 167 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ 168 VERIFY_CHECK(t9 >> 22 == x); 169 170 /* Mask off the possible multiple of 2^256 from the final reduction */ 171 t9 &= 0x03FFFFFUL; 172 } 173 174 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 175 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 176 } 177 178 static int secp256k1_fe_impl_normalizes_to_zero(const secp256k1_fe *r) { 179 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 180 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 181 182 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ 183 uint32_t z0, z1; 184 185 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 186 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 187 188 /* The first pass ensures the magnitude is 1, ... */ 189 t0 += x * 0x3D1UL; t1 += (x << 6); 190 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL; 191 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; 192 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; 193 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; 194 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; 195 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; 196 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; 197 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; 198 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; 199 z0 |= t9; z1 &= t9 ^ 0x3C00000UL; 200 201 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 202 VERIFY_CHECK(t9 >> 23 == 0); 203 204 return (z0 == 0) | (z1 == 0x3FFFFFFUL); 205 } 206 207 static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r) { 208 uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; 209 uint32_t z0, z1; 210 uint32_t x; 211 212 t0 = r->n[0]; 213 t9 = r->n[9]; 214 215 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 216 x = t9 >> 22; 217 218 /* The first pass ensures the magnitude is 1, ... */ 219 t0 += x * 0x3D1UL; 220 221 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ 222 z0 = t0 & 0x3FFFFFFUL; 223 z1 = z0 ^ 0x3D0UL; 224 225 /* Fast return path should catch the majority of cases */ 226 if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) { 227 return 0; 228 } 229 230 t1 = r->n[1]; 231 t2 = r->n[2]; 232 t3 = r->n[3]; 233 t4 = r->n[4]; 234 t5 = r->n[5]; 235 t6 = r->n[6]; 236 t7 = r->n[7]; 237 t8 = r->n[8]; 238 239 t9 &= 0x03FFFFFUL; 240 t1 += (x << 6); 241 242 t1 += (t0 >> 26); 243 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; 244 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; 245 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; 246 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; 247 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; 248 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; 249 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; 250 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; 251 z0 |= t9; z1 &= t9 ^ 0x3C00000UL; 252 253 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 254 VERIFY_CHECK(t9 >> 23 == 0); 255 256 return (z0 == 0) | (z1 == 0x3FFFFFFUL); 257 } 258 259 SECP256K1_INLINE static void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a) { 260 r->n[0] = a; 261 r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; 262 } 263 264 SECP256K1_INLINE static int secp256k1_fe_impl_is_zero(const secp256k1_fe *a) { 265 const uint32_t *t = a->n; 266 return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0; 267 } 268 269 SECP256K1_INLINE static int secp256k1_fe_impl_is_odd(const secp256k1_fe *a) { 270 return a->n[0] & 1; 271 } 272 273 static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) { 274 int i; 275 for (i = 9; i >= 0; i--) { 276 if (a->n[i] > b->n[i]) { 277 return 1; 278 } 279 if (a->n[i] < b->n[i]) { 280 return -1; 281 } 282 } 283 return 0; 284 } 285 286 static void secp256k1_fe_impl_set_b32_mod(secp256k1_fe *r, const unsigned char *a) { 287 r->n[0] = (uint32_t)a[31] | ((uint32_t)a[30] << 8) | ((uint32_t)a[29] << 16) | ((uint32_t)(a[28] & 0x3) << 24); 288 r->n[1] = (uint32_t)((a[28] >> 2) & 0x3f) | ((uint32_t)a[27] << 6) | ((uint32_t)a[26] << 14) | ((uint32_t)(a[25] & 0xf) << 22); 289 r->n[2] = (uint32_t)((a[25] >> 4) & 0xf) | ((uint32_t)a[24] << 4) | ((uint32_t)a[23] << 12) | ((uint32_t)(a[22] & 0x3f) << 20); 290 r->n[3] = (uint32_t)((a[22] >> 6) & 0x3) | ((uint32_t)a[21] << 2) | ((uint32_t)a[20] << 10) | ((uint32_t)a[19] << 18); 291 r->n[4] = (uint32_t)a[18] | ((uint32_t)a[17] << 8) | ((uint32_t)a[16] << 16) | ((uint32_t)(a[15] & 0x3) << 24); 292 r->n[5] = (uint32_t)((a[15] >> 2) & 0x3f) | ((uint32_t)a[14] << 6) | ((uint32_t)a[13] << 14) | ((uint32_t)(a[12] & 0xf) << 22); 293 r->n[6] = (uint32_t)((a[12] >> 4) & 0xf) | ((uint32_t)a[11] << 4) | ((uint32_t)a[10] << 12) | ((uint32_t)(a[9] & 0x3f) << 20); 294 r->n[7] = (uint32_t)((a[9] >> 6) & 0x3) | ((uint32_t)a[8] << 2) | ((uint32_t)a[7] << 10) | ((uint32_t)a[6] << 18); 295 r->n[8] = (uint32_t)a[5] | ((uint32_t)a[4] << 8) | ((uint32_t)a[3] << 16) | ((uint32_t)(a[2] & 0x3) << 24); 296 r->n[9] = (uint32_t)((a[2] >> 2) & 0x3f) | ((uint32_t)a[1] << 6) | ((uint32_t)a[0] << 14); 297 } 298 299 static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char *a) { 300 secp256k1_fe_impl_set_b32_mod(r, a); 301 return !((r->n[9] == 0x3FFFFFUL) & ((r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL) & ((r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 302 } 303 304 /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ 305 static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a) { 306 r[0] = (a->n[9] >> 14) & 0xff; 307 r[1] = (a->n[9] >> 6) & 0xff; 308 r[2] = ((a->n[9] & 0x3F) << 2) | ((a->n[8] >> 24) & 0x3); 309 r[3] = (a->n[8] >> 16) & 0xff; 310 r[4] = (a->n[8] >> 8) & 0xff; 311 r[5] = a->n[8] & 0xff; 312 r[6] = (a->n[7] >> 18) & 0xff; 313 r[7] = (a->n[7] >> 10) & 0xff; 314 r[8] = (a->n[7] >> 2) & 0xff; 315 r[9] = ((a->n[7] & 0x3) << 6) | ((a->n[6] >> 20) & 0x3f); 316 r[10] = (a->n[6] >> 12) & 0xff; 317 r[11] = (a->n[6] >> 4) & 0xff; 318 r[12] = ((a->n[6] & 0xf) << 4) | ((a->n[5] >> 22) & 0xf); 319 r[13] = (a->n[5] >> 14) & 0xff; 320 r[14] = (a->n[5] >> 6) & 0xff; 321 r[15] = ((a->n[5] & 0x3f) << 2) | ((a->n[4] >> 24) & 0x3); 322 r[16] = (a->n[4] >> 16) & 0xff; 323 r[17] = (a->n[4] >> 8) & 0xff; 324 r[18] = a->n[4] & 0xff; 325 r[19] = (a->n[3] >> 18) & 0xff; 326 r[20] = (a->n[3] >> 10) & 0xff; 327 r[21] = (a->n[3] >> 2) & 0xff; 328 r[22] = ((a->n[3] & 0x3) << 6) | ((a->n[2] >> 20) & 0x3f); 329 r[23] = (a->n[2] >> 12) & 0xff; 330 r[24] = (a->n[2] >> 4) & 0xff; 331 r[25] = ((a->n[2] & 0xf) << 4) | ((a->n[1] >> 22) & 0xf); 332 r[26] = (a->n[1] >> 14) & 0xff; 333 r[27] = (a->n[1] >> 6) & 0xff; 334 r[28] = ((a->n[1] & 0x3f) << 2) | ((a->n[0] >> 24) & 0x3); 335 r[29] = (a->n[0] >> 16) & 0xff; 336 r[30] = (a->n[0] >> 8) & 0xff; 337 r[31] = a->n[0] & 0xff; 338 } 339 340 SECP256K1_INLINE static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m) { 341 /* For all legal values of m (0..31), the following properties hold: */ 342 VERIFY_CHECK(0x3FFFC2FUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m); 343 VERIFY_CHECK(0x3FFFFBFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m); 344 VERIFY_CHECK(0x3FFFFFFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m); 345 VERIFY_CHECK(0x03FFFFFUL * 2 * (m + 1) >= 0x03FFFFFUL * 2 * m); 346 347 /* Due to the properties above, the left hand in the subtractions below is never less than 348 * the right hand. */ 349 r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0]; 350 r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1]; 351 r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2]; 352 r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3]; 353 r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4]; 354 r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5]; 355 r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6]; 356 r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7]; 357 r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8]; 358 r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9]; 359 } 360 361 SECP256K1_INLINE static void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a) { 362 r->n[0] *= a; 363 r->n[1] *= a; 364 r->n[2] *= a; 365 r->n[3] *= a; 366 r->n[4] *= a; 367 r->n[5] *= a; 368 r->n[6] *= a; 369 r->n[7] *= a; 370 r->n[8] *= a; 371 r->n[9] *= a; 372 } 373 374 SECP256K1_INLINE static void secp256k1_fe_impl_add(secp256k1_fe *r, const secp256k1_fe *a) { 375 r->n[0] += a->n[0]; 376 r->n[1] += a->n[1]; 377 r->n[2] += a->n[2]; 378 r->n[3] += a->n[3]; 379 r->n[4] += a->n[4]; 380 r->n[5] += a->n[5]; 381 r->n[6] += a->n[6]; 382 r->n[7] += a->n[7]; 383 r->n[8] += a->n[8]; 384 r->n[9] += a->n[9]; 385 } 386 387 SECP256K1_INLINE static void secp256k1_fe_impl_add_int(secp256k1_fe *r, int a) { 388 r->n[0] += a; 389 } 390 391 #if defined(USE_EXTERNAL_ASM) 392 393 /* External assembler implementation */ 394 void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b); 395 void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a); 396 397 #else 398 399 #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) 400 401 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { 402 uint64_t c, d; 403 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; 404 uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7; 405 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; 406 407 VERIFY_BITS(a[0], 30); 408 VERIFY_BITS(a[1], 30); 409 VERIFY_BITS(a[2], 30); 410 VERIFY_BITS(a[3], 30); 411 VERIFY_BITS(a[4], 30); 412 VERIFY_BITS(a[5], 30); 413 VERIFY_BITS(a[6], 30); 414 VERIFY_BITS(a[7], 30); 415 VERIFY_BITS(a[8], 30); 416 VERIFY_BITS(a[9], 26); 417 VERIFY_BITS(b[0], 30); 418 VERIFY_BITS(b[1], 30); 419 VERIFY_BITS(b[2], 30); 420 VERIFY_BITS(b[3], 30); 421 VERIFY_BITS(b[4], 30); 422 VERIFY_BITS(b[5], 30); 423 VERIFY_BITS(b[6], 30); 424 VERIFY_BITS(b[7], 30); 425 VERIFY_BITS(b[8], 30); 426 VERIFY_BITS(b[9], 26); 427 428 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. 429 * for 0 <= x <= 9, px is a shorthand for sum(a[i]*b[x-i], i=0..x). 430 * for 9 <= x <= 18, px is a shorthand for sum(a[i]*b[x-i], i=(x-9)..9) 431 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. 432 */ 433 434 d = (uint64_t)a[0] * b[9] 435 + (uint64_t)a[1] * b[8] 436 + (uint64_t)a[2] * b[7] 437 + (uint64_t)a[3] * b[6] 438 + (uint64_t)a[4] * b[5] 439 + (uint64_t)a[5] * b[4] 440 + (uint64_t)a[6] * b[3] 441 + (uint64_t)a[7] * b[2] 442 + (uint64_t)a[8] * b[1] 443 + (uint64_t)a[9] * b[0]; 444 /* VERIFY_BITS(d, 64); */ 445 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 446 t9 = d & M; d >>= 26; 447 VERIFY_BITS(t9, 26); 448 VERIFY_BITS(d, 38); 449 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 450 451 c = (uint64_t)a[0] * b[0]; 452 VERIFY_BITS(c, 60); 453 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ 454 d += (uint64_t)a[1] * b[9] 455 + (uint64_t)a[2] * b[8] 456 + (uint64_t)a[3] * b[7] 457 + (uint64_t)a[4] * b[6] 458 + (uint64_t)a[5] * b[5] 459 + (uint64_t)a[6] * b[4] 460 + (uint64_t)a[7] * b[3] 461 + (uint64_t)a[8] * b[2] 462 + (uint64_t)a[9] * b[1]; 463 VERIFY_BITS(d, 63); 464 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 465 u0 = d & M; d >>= 26; c += u0 * R0; 466 VERIFY_BITS(u0, 26); 467 VERIFY_BITS(d, 37); 468 VERIFY_BITS(c, 61); 469 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 470 t0 = c & M; c >>= 26; c += u0 * R1; 471 VERIFY_BITS(t0, 26); 472 VERIFY_BITS(c, 37); 473 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 474 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 475 476 c += (uint64_t)a[0] * b[1] 477 + (uint64_t)a[1] * b[0]; 478 VERIFY_BITS(c, 62); 479 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ 480 d += (uint64_t)a[2] * b[9] 481 + (uint64_t)a[3] * b[8] 482 + (uint64_t)a[4] * b[7] 483 + (uint64_t)a[5] * b[6] 484 + (uint64_t)a[6] * b[5] 485 + (uint64_t)a[7] * b[4] 486 + (uint64_t)a[8] * b[3] 487 + (uint64_t)a[9] * b[2]; 488 VERIFY_BITS(d, 63); 489 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 490 u1 = d & M; d >>= 26; c += u1 * R0; 491 VERIFY_BITS(u1, 26); 492 VERIFY_BITS(d, 37); 493 VERIFY_BITS(c, 63); 494 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 495 t1 = c & M; c >>= 26; c += u1 * R1; 496 VERIFY_BITS(t1, 26); 497 VERIFY_BITS(c, 38); 498 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 499 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 500 501 c += (uint64_t)a[0] * b[2] 502 + (uint64_t)a[1] * b[1] 503 + (uint64_t)a[2] * b[0]; 504 VERIFY_BITS(c, 62); 505 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 506 d += (uint64_t)a[3] * b[9] 507 + (uint64_t)a[4] * b[8] 508 + (uint64_t)a[5] * b[7] 509 + (uint64_t)a[6] * b[6] 510 + (uint64_t)a[7] * b[5] 511 + (uint64_t)a[8] * b[4] 512 + (uint64_t)a[9] * b[3]; 513 VERIFY_BITS(d, 63); 514 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 515 u2 = d & M; d >>= 26; c += u2 * R0; 516 VERIFY_BITS(u2, 26); 517 VERIFY_BITS(d, 37); 518 VERIFY_BITS(c, 63); 519 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 520 t2 = c & M; c >>= 26; c += u2 * R1; 521 VERIFY_BITS(t2, 26); 522 VERIFY_BITS(c, 38); 523 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 524 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 525 526 c += (uint64_t)a[0] * b[3] 527 + (uint64_t)a[1] * b[2] 528 + (uint64_t)a[2] * b[1] 529 + (uint64_t)a[3] * b[0]; 530 VERIFY_BITS(c, 63); 531 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 532 d += (uint64_t)a[4] * b[9] 533 + (uint64_t)a[5] * b[8] 534 + (uint64_t)a[6] * b[7] 535 + (uint64_t)a[7] * b[6] 536 + (uint64_t)a[8] * b[5] 537 + (uint64_t)a[9] * b[4]; 538 VERIFY_BITS(d, 63); 539 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 540 u3 = d & M; d >>= 26; c += u3 * R0; 541 VERIFY_BITS(u3, 26); 542 VERIFY_BITS(d, 37); 543 /* VERIFY_BITS(c, 64); */ 544 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 545 t3 = c & M; c >>= 26; c += u3 * R1; 546 VERIFY_BITS(t3, 26); 547 VERIFY_BITS(c, 39); 548 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 549 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 550 551 c += (uint64_t)a[0] * b[4] 552 + (uint64_t)a[1] * b[3] 553 + (uint64_t)a[2] * b[2] 554 + (uint64_t)a[3] * b[1] 555 + (uint64_t)a[4] * b[0]; 556 VERIFY_BITS(c, 63); 557 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 558 d += (uint64_t)a[5] * b[9] 559 + (uint64_t)a[6] * b[8] 560 + (uint64_t)a[7] * b[7] 561 + (uint64_t)a[8] * b[6] 562 + (uint64_t)a[9] * b[5]; 563 VERIFY_BITS(d, 62); 564 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 565 u4 = d & M; d >>= 26; c += u4 * R0; 566 VERIFY_BITS(u4, 26); 567 VERIFY_BITS(d, 36); 568 /* VERIFY_BITS(c, 64); */ 569 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 570 t4 = c & M; c >>= 26; c += u4 * R1; 571 VERIFY_BITS(t4, 26); 572 VERIFY_BITS(c, 39); 573 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 574 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 575 576 c += (uint64_t)a[0] * b[5] 577 + (uint64_t)a[1] * b[4] 578 + (uint64_t)a[2] * b[3] 579 + (uint64_t)a[3] * b[2] 580 + (uint64_t)a[4] * b[1] 581 + (uint64_t)a[5] * b[0]; 582 VERIFY_BITS(c, 63); 583 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 584 d += (uint64_t)a[6] * b[9] 585 + (uint64_t)a[7] * b[8] 586 + (uint64_t)a[8] * b[7] 587 + (uint64_t)a[9] * b[6]; 588 VERIFY_BITS(d, 62); 589 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 590 u5 = d & M; d >>= 26; c += u5 * R0; 591 VERIFY_BITS(u5, 26); 592 VERIFY_BITS(d, 36); 593 /* VERIFY_BITS(c, 64); */ 594 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 595 t5 = c & M; c >>= 26; c += u5 * R1; 596 VERIFY_BITS(t5, 26); 597 VERIFY_BITS(c, 39); 598 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 599 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 600 601 c += (uint64_t)a[0] * b[6] 602 + (uint64_t)a[1] * b[5] 603 + (uint64_t)a[2] * b[4] 604 + (uint64_t)a[3] * b[3] 605 + (uint64_t)a[4] * b[2] 606 + (uint64_t)a[5] * b[1] 607 + (uint64_t)a[6] * b[0]; 608 VERIFY_BITS(c, 63); 609 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 610 d += (uint64_t)a[7] * b[9] 611 + (uint64_t)a[8] * b[8] 612 + (uint64_t)a[9] * b[7]; 613 VERIFY_BITS(d, 61); 614 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 615 u6 = d & M; d >>= 26; c += u6 * R0; 616 VERIFY_BITS(u6, 26); 617 VERIFY_BITS(d, 35); 618 /* VERIFY_BITS(c, 64); */ 619 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 620 t6 = c & M; c >>= 26; c += u6 * R1; 621 VERIFY_BITS(t6, 26); 622 VERIFY_BITS(c, 39); 623 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 624 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 625 626 c += (uint64_t)a[0] * b[7] 627 + (uint64_t)a[1] * b[6] 628 + (uint64_t)a[2] * b[5] 629 + (uint64_t)a[3] * b[4] 630 + (uint64_t)a[4] * b[3] 631 + (uint64_t)a[5] * b[2] 632 + (uint64_t)a[6] * b[1] 633 + (uint64_t)a[7] * b[0]; 634 /* VERIFY_BITS(c, 64); */ 635 VERIFY_CHECK(c <= 0x8000007C00000007ULL); 636 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 637 d += (uint64_t)a[8] * b[9] 638 + (uint64_t)a[9] * b[8]; 639 VERIFY_BITS(d, 58); 640 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 641 u7 = d & M; d >>= 26; c += u7 * R0; 642 VERIFY_BITS(u7, 26); 643 VERIFY_BITS(d, 32); 644 /* VERIFY_BITS(c, 64); */ 645 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); 646 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 647 t7 = c & M; c >>= 26; c += u7 * R1; 648 VERIFY_BITS(t7, 26); 649 VERIFY_BITS(c, 38); 650 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 651 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 652 653 c += (uint64_t)a[0] * b[8] 654 + (uint64_t)a[1] * b[7] 655 + (uint64_t)a[2] * b[6] 656 + (uint64_t)a[3] * b[5] 657 + (uint64_t)a[4] * b[4] 658 + (uint64_t)a[5] * b[3] 659 + (uint64_t)a[6] * b[2] 660 + (uint64_t)a[7] * b[1] 661 + (uint64_t)a[8] * b[0]; 662 /* VERIFY_BITS(c, 64); */ 663 VERIFY_CHECK(c <= 0x9000007B80000008ULL); 664 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 665 d += (uint64_t)a[9] * b[9]; 666 VERIFY_BITS(d, 57); 667 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 668 u8 = d & M; d >>= 26; c += u8 * R0; 669 VERIFY_BITS(u8, 26); 670 VERIFY_BITS(d, 31); 671 /* VERIFY_BITS(c, 64); */ 672 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); 673 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 674 675 r[3] = t3; 676 VERIFY_BITS(r[3], 26); 677 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 678 r[4] = t4; 679 VERIFY_BITS(r[4], 26); 680 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 681 r[5] = t5; 682 VERIFY_BITS(r[5], 26); 683 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 684 r[6] = t6; 685 VERIFY_BITS(r[6], 26); 686 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 687 r[7] = t7; 688 VERIFY_BITS(r[7], 26); 689 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 690 691 r[8] = c & M; c >>= 26; c += u8 * R1; 692 VERIFY_BITS(r[8], 26); 693 VERIFY_BITS(c, 39); 694 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 695 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 696 c += d * R0 + t9; 697 VERIFY_BITS(c, 45); 698 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 699 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); 700 VERIFY_BITS(r[9], 22); 701 VERIFY_BITS(c, 46); 702 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 703 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 704 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 705 706 d = c * (R0 >> 4) + t0; 707 VERIFY_BITS(d, 56); 708 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 709 r[0] = d & M; d >>= 26; 710 VERIFY_BITS(r[0], 26); 711 VERIFY_BITS(d, 30); 712 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 713 d += c * (R1 >> 4) + t1; 714 VERIFY_BITS(d, 53); 715 VERIFY_CHECK(d <= 0x10000003FFFFBFULL); 716 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 717 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 718 r[1] = d & M; d >>= 26; 719 VERIFY_BITS(r[1], 26); 720 VERIFY_BITS(d, 27); 721 VERIFY_CHECK(d <= 0x4000000ULL); 722 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 723 d += t2; 724 VERIFY_BITS(d, 27); 725 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 726 r[2] = d; 727 VERIFY_BITS(r[2], 27); 728 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 729 } 730 731 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) { 732 uint64_t c, d; 733 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; 734 uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7; 735 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; 736 737 VERIFY_BITS(a[0], 30); 738 VERIFY_BITS(a[1], 30); 739 VERIFY_BITS(a[2], 30); 740 VERIFY_BITS(a[3], 30); 741 VERIFY_BITS(a[4], 30); 742 VERIFY_BITS(a[5], 30); 743 VERIFY_BITS(a[6], 30); 744 VERIFY_BITS(a[7], 30); 745 VERIFY_BITS(a[8], 30); 746 VERIFY_BITS(a[9], 26); 747 748 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. 749 * px is a shorthand for sum(a[i]*a[x-i], i=0..x). 750 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. 751 */ 752 753 d = (uint64_t)(a[0]*2) * a[9] 754 + (uint64_t)(a[1]*2) * a[8] 755 + (uint64_t)(a[2]*2) * a[7] 756 + (uint64_t)(a[3]*2) * a[6] 757 + (uint64_t)(a[4]*2) * a[5]; 758 /* VERIFY_BITS(d, 64); */ 759 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 760 t9 = d & M; d >>= 26; 761 VERIFY_BITS(t9, 26); 762 VERIFY_BITS(d, 38); 763 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 764 765 c = (uint64_t)a[0] * a[0]; 766 VERIFY_BITS(c, 60); 767 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ 768 d += (uint64_t)(a[1]*2) * a[9] 769 + (uint64_t)(a[2]*2) * a[8] 770 + (uint64_t)(a[3]*2) * a[7] 771 + (uint64_t)(a[4]*2) * a[6] 772 + (uint64_t)a[5] * a[5]; 773 VERIFY_BITS(d, 63); 774 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 775 u0 = d & M; d >>= 26; c += u0 * R0; 776 VERIFY_BITS(u0, 26); 777 VERIFY_BITS(d, 37); 778 VERIFY_BITS(c, 61); 779 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 780 t0 = c & M; c >>= 26; c += u0 * R1; 781 VERIFY_BITS(t0, 26); 782 VERIFY_BITS(c, 37); 783 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 784 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 785 786 c += (uint64_t)(a[0]*2) * a[1]; 787 VERIFY_BITS(c, 62); 788 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ 789 d += (uint64_t)(a[2]*2) * a[9] 790 + (uint64_t)(a[3]*2) * a[8] 791 + (uint64_t)(a[4]*2) * a[7] 792 + (uint64_t)(a[5]*2) * a[6]; 793 VERIFY_BITS(d, 63); 794 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 795 u1 = d & M; d >>= 26; c += u1 * R0; 796 VERIFY_BITS(u1, 26); 797 VERIFY_BITS(d, 37); 798 VERIFY_BITS(c, 63); 799 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 800 t1 = c & M; c >>= 26; c += u1 * R1; 801 VERIFY_BITS(t1, 26); 802 VERIFY_BITS(c, 38); 803 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 804 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 805 806 c += (uint64_t)(a[0]*2) * a[2] 807 + (uint64_t)a[1] * a[1]; 808 VERIFY_BITS(c, 62); 809 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 810 d += (uint64_t)(a[3]*2) * a[9] 811 + (uint64_t)(a[4]*2) * a[8] 812 + (uint64_t)(a[5]*2) * a[7] 813 + (uint64_t)a[6] * a[6]; 814 VERIFY_BITS(d, 63); 815 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 816 u2 = d & M; d >>= 26; c += u2 * R0; 817 VERIFY_BITS(u2, 26); 818 VERIFY_BITS(d, 37); 819 VERIFY_BITS(c, 63); 820 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 821 t2 = c & M; c >>= 26; c += u2 * R1; 822 VERIFY_BITS(t2, 26); 823 VERIFY_BITS(c, 38); 824 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 825 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 826 827 c += (uint64_t)(a[0]*2) * a[3] 828 + (uint64_t)(a[1]*2) * a[2]; 829 VERIFY_BITS(c, 63); 830 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 831 d += (uint64_t)(a[4]*2) * a[9] 832 + (uint64_t)(a[5]*2) * a[8] 833 + (uint64_t)(a[6]*2) * a[7]; 834 VERIFY_BITS(d, 63); 835 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 836 u3 = d & M; d >>= 26; c += u3 * R0; 837 VERIFY_BITS(u3, 26); 838 VERIFY_BITS(d, 37); 839 /* VERIFY_BITS(c, 64); */ 840 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 841 t3 = c & M; c >>= 26; c += u3 * R1; 842 VERIFY_BITS(t3, 26); 843 VERIFY_BITS(c, 39); 844 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 845 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 846 847 c += (uint64_t)(a[0]*2) * a[4] 848 + (uint64_t)(a[1]*2) * a[3] 849 + (uint64_t)a[2] * a[2]; 850 VERIFY_BITS(c, 63); 851 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 852 d += (uint64_t)(a[5]*2) * a[9] 853 + (uint64_t)(a[6]*2) * a[8] 854 + (uint64_t)a[7] * a[7]; 855 VERIFY_BITS(d, 62); 856 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 857 u4 = d & M; d >>= 26; c += u4 * R0; 858 VERIFY_BITS(u4, 26); 859 VERIFY_BITS(d, 36); 860 /* VERIFY_BITS(c, 64); */ 861 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 862 t4 = c & M; c >>= 26; c += u4 * R1; 863 VERIFY_BITS(t4, 26); 864 VERIFY_BITS(c, 39); 865 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 866 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 867 868 c += (uint64_t)(a[0]*2) * a[5] 869 + (uint64_t)(a[1]*2) * a[4] 870 + (uint64_t)(a[2]*2) * a[3]; 871 VERIFY_BITS(c, 63); 872 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 873 d += (uint64_t)(a[6]*2) * a[9] 874 + (uint64_t)(a[7]*2) * a[8]; 875 VERIFY_BITS(d, 62); 876 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 877 u5 = d & M; d >>= 26; c += u5 * R0; 878 VERIFY_BITS(u5, 26); 879 VERIFY_BITS(d, 36); 880 /* VERIFY_BITS(c, 64); */ 881 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 882 t5 = c & M; c >>= 26; c += u5 * R1; 883 VERIFY_BITS(t5, 26); 884 VERIFY_BITS(c, 39); 885 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 886 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 887 888 c += (uint64_t)(a[0]*2) * a[6] 889 + (uint64_t)(a[1]*2) * a[5] 890 + (uint64_t)(a[2]*2) * a[4] 891 + (uint64_t)a[3] * a[3]; 892 VERIFY_BITS(c, 63); 893 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 894 d += (uint64_t)(a[7]*2) * a[9] 895 + (uint64_t)a[8] * a[8]; 896 VERIFY_BITS(d, 61); 897 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 898 u6 = d & M; d >>= 26; c += u6 * R0; 899 VERIFY_BITS(u6, 26); 900 VERIFY_BITS(d, 35); 901 /* VERIFY_BITS(c, 64); */ 902 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 903 t6 = c & M; c >>= 26; c += u6 * R1; 904 VERIFY_BITS(t6, 26); 905 VERIFY_BITS(c, 39); 906 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 907 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 908 909 c += (uint64_t)(a[0]*2) * a[7] 910 + (uint64_t)(a[1]*2) * a[6] 911 + (uint64_t)(a[2]*2) * a[5] 912 + (uint64_t)(a[3]*2) * a[4]; 913 /* VERIFY_BITS(c, 64); */ 914 VERIFY_CHECK(c <= 0x8000007C00000007ULL); 915 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 916 d += (uint64_t)(a[8]*2) * a[9]; 917 VERIFY_BITS(d, 58); 918 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 919 u7 = d & M; d >>= 26; c += u7 * R0; 920 VERIFY_BITS(u7, 26); 921 VERIFY_BITS(d, 32); 922 /* VERIFY_BITS(c, 64); */ 923 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); 924 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 925 t7 = c & M; c >>= 26; c += u7 * R1; 926 VERIFY_BITS(t7, 26); 927 VERIFY_BITS(c, 38); 928 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 929 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 930 931 c += (uint64_t)(a[0]*2) * a[8] 932 + (uint64_t)(a[1]*2) * a[7] 933 + (uint64_t)(a[2]*2) * a[6] 934 + (uint64_t)(a[3]*2) * a[5] 935 + (uint64_t)a[4] * a[4]; 936 /* VERIFY_BITS(c, 64); */ 937 VERIFY_CHECK(c <= 0x9000007B80000008ULL); 938 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 939 d += (uint64_t)a[9] * a[9]; 940 VERIFY_BITS(d, 57); 941 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 942 u8 = d & M; d >>= 26; c += u8 * R0; 943 VERIFY_BITS(u8, 26); 944 VERIFY_BITS(d, 31); 945 /* VERIFY_BITS(c, 64); */ 946 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); 947 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 948 949 r[3] = t3; 950 VERIFY_BITS(r[3], 26); 951 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 952 r[4] = t4; 953 VERIFY_BITS(r[4], 26); 954 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 955 r[5] = t5; 956 VERIFY_BITS(r[5], 26); 957 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 958 r[6] = t6; 959 VERIFY_BITS(r[6], 26); 960 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 961 r[7] = t7; 962 VERIFY_BITS(r[7], 26); 963 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 964 965 r[8] = c & M; c >>= 26; c += u8 * R1; 966 VERIFY_BITS(r[8], 26); 967 VERIFY_BITS(c, 39); 968 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 969 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 970 c += d * R0 + t9; 971 VERIFY_BITS(c, 45); 972 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 973 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); 974 VERIFY_BITS(r[9], 22); 975 VERIFY_BITS(c, 46); 976 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 977 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 978 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 979 980 d = c * (R0 >> 4) + t0; 981 VERIFY_BITS(d, 56); 982 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 983 r[0] = d & M; d >>= 26; 984 VERIFY_BITS(r[0], 26); 985 VERIFY_BITS(d, 30); 986 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 987 d += c * (R1 >> 4) + t1; 988 VERIFY_BITS(d, 53); 989 VERIFY_CHECK(d <= 0x10000003FFFFBFULL); 990 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 991 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 992 r[1] = d & M; d >>= 26; 993 VERIFY_BITS(r[1], 26); 994 VERIFY_BITS(d, 27); 995 VERIFY_CHECK(d <= 0x4000000ULL); 996 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 997 d += t2; 998 VERIFY_BITS(d, 27); 999 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1000 r[2] = d; 1001 VERIFY_BITS(r[2], 27); 1002 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1003 } 1004 #endif 1005 1006 SECP256K1_INLINE static void secp256k1_fe_impl_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) { 1007 secp256k1_fe_mul_inner(r->n, a->n, b->n); 1008 } 1009 1010 SECP256K1_INLINE static void secp256k1_fe_impl_sqr(secp256k1_fe *r, const secp256k1_fe *a) { 1011 secp256k1_fe_sqr_inner(r->n, a->n); 1012 } 1013 1014 SECP256K1_INLINE static void secp256k1_fe_impl_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) { 1015 uint32_t mask0, mask1; 1016 volatile int vflag = flag; 1017 SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n)); 1018 mask0 = vflag + ~((uint32_t)0); 1019 mask1 = ~mask0; 1020 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); 1021 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); 1022 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); 1023 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); 1024 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); 1025 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); 1026 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); 1027 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); 1028 r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1); 1029 r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1); 1030 } 1031 1032 static SECP256K1_INLINE void secp256k1_fe_impl_half(secp256k1_fe *r) { 1033 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 1034 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 1035 uint32_t one = (uint32_t)1; 1036 uint32_t mask = -(t0 & one) >> 6; 1037 1038 /* Bounds analysis (over the rationals). 1039 * 1040 * Let m = r->magnitude 1041 * C = 0x3FFFFFFUL * 2 1042 * D = 0x03FFFFFUL * 2 1043 * 1044 * Initial bounds: t0..t8 <= C * m 1045 * t9 <= D * m 1046 */ 1047 1048 t0 += 0x3FFFC2FUL & mask; 1049 t1 += 0x3FFFFBFUL & mask; 1050 t2 += mask; 1051 t3 += mask; 1052 t4 += mask; 1053 t5 += mask; 1054 t6 += mask; 1055 t7 += mask; 1056 t8 += mask; 1057 t9 += mask >> 4; 1058 1059 VERIFY_CHECK((t0 & one) == 0); 1060 1061 /* t0..t8: added <= C/2 1062 * t9: added <= D/2 1063 * 1064 * Current bounds: t0..t8 <= C * (m + 1/2) 1065 * t9 <= D * (m + 1/2) 1066 */ 1067 1068 r->n[0] = (t0 >> 1) + ((t1 & one) << 25); 1069 r->n[1] = (t1 >> 1) + ((t2 & one) << 25); 1070 r->n[2] = (t2 >> 1) + ((t3 & one) << 25); 1071 r->n[3] = (t3 >> 1) + ((t4 & one) << 25); 1072 r->n[4] = (t4 >> 1) + ((t5 & one) << 25); 1073 r->n[5] = (t5 >> 1) + ((t6 & one) << 25); 1074 r->n[6] = (t6 >> 1) + ((t7 & one) << 25); 1075 r->n[7] = (t7 >> 1) + ((t8 & one) << 25); 1076 r->n[8] = (t8 >> 1) + ((t9 & one) << 25); 1077 r->n[9] = (t9 >> 1); 1078 1079 /* t0..t8: shifted right and added <= C/4 + 1/2 1080 * t9: shifted right 1081 * 1082 * Current bounds: t0..t8 <= C * (m/2 + 1/2) 1083 * t9 <= D * (m/2 + 1/4) 1084 * 1085 * Therefore the output magnitude (M) has to be set such that: 1086 * t0..t8: C * M >= C * (m/2 + 1/2) 1087 * t9: D * M >= D * (m/2 + 1/4) 1088 * 1089 * It suffices for all limbs that, for any input magnitude m: 1090 * M >= m/2 + 1/2 1091 * 1092 * and since we want the smallest such integer value for M: 1093 * M == floor(m/2) + 1 1094 */ 1095 } 1096 1097 static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) { 1098 uint32_t mask0, mask1; 1099 volatile int vflag = flag; 1100 SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n)); 1101 mask0 = vflag + ~((uint32_t)0); 1102 mask1 = ~mask0; 1103 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); 1104 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); 1105 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); 1106 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); 1107 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); 1108 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); 1109 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); 1110 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); 1111 } 1112 1113 static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) { 1114 r->n[0] = a->n[0] | a->n[1] << 26; 1115 r->n[1] = a->n[1] >> 6 | a->n[2] << 20; 1116 r->n[2] = a->n[2] >> 12 | a->n[3] << 14; 1117 r->n[3] = a->n[3] >> 18 | a->n[4] << 8; 1118 r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28; 1119 r->n[5] = a->n[6] >> 4 | a->n[7] << 22; 1120 r->n[6] = a->n[7] >> 10 | a->n[8] << 16; 1121 r->n[7] = a->n[8] >> 16 | a->n[9] << 10; 1122 } 1123 1124 static SECP256K1_INLINE void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) { 1125 r->n[0] = a->n[0] & 0x3FFFFFFUL; 1126 r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL); 1127 r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL); 1128 r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL); 1129 r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL); 1130 r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL; 1131 r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL); 1132 r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL); 1133 r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL); 1134 r->n[9] = a->n[7] >> 10; 1135 } 1136 1137 static void secp256k1_fe_from_signed30(secp256k1_fe *r, const secp256k1_modinv32_signed30 *a) { 1138 const uint32_t M26 = UINT32_MAX >> 6; 1139 const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4], 1140 a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8]; 1141 1142 /* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and 1143 * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8). 1144 */ 1145 VERIFY_CHECK(a0 >> 30 == 0); 1146 VERIFY_CHECK(a1 >> 30 == 0); 1147 VERIFY_CHECK(a2 >> 30 == 0); 1148 VERIFY_CHECK(a3 >> 30 == 0); 1149 VERIFY_CHECK(a4 >> 30 == 0); 1150 VERIFY_CHECK(a5 >> 30 == 0); 1151 VERIFY_CHECK(a6 >> 30 == 0); 1152 VERIFY_CHECK(a7 >> 30 == 0); 1153 VERIFY_CHECK(a8 >> 16 == 0); 1154 1155 r->n[0] = a0 & M26; 1156 r->n[1] = (a0 >> 26 | a1 << 4) & M26; 1157 r->n[2] = (a1 >> 22 | a2 << 8) & M26; 1158 r->n[3] = (a2 >> 18 | a3 << 12) & M26; 1159 r->n[4] = (a3 >> 14 | a4 << 16) & M26; 1160 r->n[5] = (a4 >> 10 | a5 << 20) & M26; 1161 r->n[6] = (a5 >> 6 | a6 << 24) & M26; 1162 r->n[7] = (a6 >> 2 ) & M26; 1163 r->n[8] = (a6 >> 28 | a7 << 2) & M26; 1164 r->n[9] = (a7 >> 24 | a8 << 6); 1165 } 1166 1167 static void secp256k1_fe_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_fe *a) { 1168 const uint32_t M30 = UINT32_MAX >> 2; 1169 const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4], 1170 a5 = a->n[5], a6 = a->n[6], a7 = a->n[7], a8 = a->n[8], a9 = a->n[9]; 1171 1172 r->v[0] = (a0 | a1 << 26) & M30; 1173 r->v[1] = (a1 >> 4 | a2 << 22) & M30; 1174 r->v[2] = (a2 >> 8 | a3 << 18) & M30; 1175 r->v[3] = (a3 >> 12 | a4 << 14) & M30; 1176 r->v[4] = (a4 >> 16 | a5 << 10) & M30; 1177 r->v[5] = (a5 >> 20 | a6 << 6) & M30; 1178 r->v[6] = (a6 >> 24 | a7 << 2 1179 | a8 << 28) & M30; 1180 r->v[7] = (a8 >> 2 | a9 << 24) & M30; 1181 r->v[8] = a9 >> 6; 1182 } 1183 1184 static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_fe = { 1185 {{-0x3D1, -4, 0, 0, 0, 0, 0, 0, 65536}}, 1186 0x2DDACACFL 1187 }; 1188 1189 static void secp256k1_fe_impl_inv(secp256k1_fe *r, const secp256k1_fe *x) { 1190 secp256k1_fe tmp = *x; 1191 secp256k1_modinv32_signed30 s; 1192 1193 secp256k1_fe_normalize(&tmp); 1194 secp256k1_fe_to_signed30(&s, &tmp); 1195 secp256k1_modinv32(&s, &secp256k1_const_modinfo_fe); 1196 secp256k1_fe_from_signed30(r, &s); 1197 } 1198 1199 static void secp256k1_fe_impl_inv_var(secp256k1_fe *r, const secp256k1_fe *x) { 1200 secp256k1_fe tmp = *x; 1201 secp256k1_modinv32_signed30 s; 1202 1203 secp256k1_fe_normalize_var(&tmp); 1204 secp256k1_fe_to_signed30(&s, &tmp); 1205 secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_fe); 1206 secp256k1_fe_from_signed30(r, &s); 1207 } 1208 1209 static int secp256k1_fe_impl_is_square_var(const secp256k1_fe *x) { 1210 secp256k1_fe tmp; 1211 secp256k1_modinv32_signed30 s; 1212 int jac, ret; 1213 1214 tmp = *x; 1215 secp256k1_fe_normalize_var(&tmp); 1216 /* secp256k1_jacobi32_maybe_var cannot deal with input 0. */ 1217 if (secp256k1_fe_is_zero(&tmp)) return 1; 1218 secp256k1_fe_to_signed30(&s, &tmp); 1219 jac = secp256k1_jacobi32_maybe_var(&s, &secp256k1_const_modinfo_fe); 1220 if (jac == 0) { 1221 /* secp256k1_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back 1222 * to computing a square root. This should be extremely rare with random 1223 * input (except in VERIFY mode, where a lower iteration count is used). */ 1224 secp256k1_fe dummy; 1225 ret = secp256k1_fe_sqrt(&dummy, &tmp); 1226 } else { 1227 ret = jac >= 0; 1228 } 1229 return ret; 1230 } 1231 1232 #endif /* SECP256K1_FIELD_REPR_IMPL_H */