golang.zx2c4.com/wireguard/windows@v0.5.4-0.20230123132234-dcc0eb72a04b/installer/fetcher/crypto.c (about) 1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2020-2022 Jason A. Donenfeld. All Rights Reserved. 4 * Copyright (c) 2020, Google Inc. 5 */ 6 7 #include "crypto.h" 8 #include <stdint.h> 9 #include <string.h> 10 #include <winternl.h> 11 #include <bcrypt.h> 12 13 #if REG_DWORD == REG_DWORD_LITTLE_ENDIAN 14 #define swap_le64(x) (x) 15 #define swap_le32(x) (x) 16 #elif REG_DWORD == REG_DWORD_BIG_ENDIAN 17 #define swap_le64(x) __builtin_bswap64(x) 18 #define swap_le32(x) __builtin_bswap32(x) 19 #endif 20 21 static void store_le64(uint8_t *dst, uint64_t src) 22 { 23 src = swap_le64(src); 24 __builtin_memcpy(dst, &src, sizeof(src)); 25 } 26 27 static uint64_t load_le64(const uint8_t *src) 28 { 29 uint64_t dst; 30 __builtin_memcpy(&dst, src, sizeof(dst)); 31 return swap_le64(dst); 32 } 33 34 static uint32_t load_le24(const uint8_t *in) 35 { 36 uint32_t dst; 37 dst = (uint32_t)in[0]; 38 dst |= ((uint32_t)in[1]) << 8; 39 dst |= ((uint32_t)in[2]) << 16; 40 return dst; 41 } 42 43 static uint32_t load_le32(const uint8_t *src) 44 { 45 uint32_t dst; 46 __builtin_memcpy(&dst, src, sizeof(dst)); 47 return swap_le32(dst); 48 } 49 50 static uint64_t ror64(uint64_t i, unsigned int s) 51 { 52 return (i >> (s & 63)) | (i << ((-s) & 63)); 53 } 54 55 static inline uint32_t value_barrier_u32(uint32_t a) 56 { 57 __asm__("" : "+r"(a) : /* no inputs */); 58 return a; 59 } 60 61 static int memcmp_ct(const void *first, const void *second, size_t len) 62 { 63 const uint8_t *a = first; 64 const uint8_t *b = second; 65 uint8_t diff = 0; 66 67 for (size_t i = 0; i < len; ++i) { 68 diff |= a[i] ^ b[i]; 69 __asm__("" : "+r"(diff) : /* no inputs */); 70 } 71 72 return diff; 73 } 74 75 /* 76 * The function fiat_25519_addcarryx_u26 is an addition with carry. 77 * Postconditions: 78 * out1 = (arg1 + arg2 + arg3) mod 2^26 79 * out2 = ⌊(arg1 + arg2 + arg3) / 2^26⌋ 80 * 81 * Input Bounds: 82 * arg1: [0x0 ~> 0x1] 83 * arg2: [0x0 ~> 0x3ffffff] 84 * arg3: [0x0 ~> 0x3ffffff] 85 * Output Bounds: 86 * out1: [0x0 ~> 0x3ffffff] 87 * out2: [0x0 ~> 0x1] 88 */ 89 static void fiat_25519_addcarryx_u26(uint32_t *out1, uint8_t *out2, 90 uint8_t arg1, uint32_t arg2, uint32_t arg3) 91 { 92 uint32_t x1 = ((arg1 + arg2) + arg3); 93 uint32_t x2 = (x1 & UINT32_C(0x3ffffff)); 94 uint8_t x3 = (uint8_t)(x1 >> 26); 95 *out1 = x2; 96 *out2 = x3; 97 } 98 99 /* 100 * The function fiat_25519_subborrowx_u26 is a subtraction with borrow. 101 * Postconditions: 102 * out1 = (-arg1 + arg2 + -arg3) mod 2^26 103 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^26⌋ 104 * 105 * Input Bounds: 106 * arg1: [0x0 ~> 0x1] 107 * arg2: [0x0 ~> 0x3ffffff] 108 * arg3: [0x0 ~> 0x3ffffff] 109 * Output Bounds: 110 * out1: [0x0 ~> 0x3ffffff] 111 * out2: [0x0 ~> 0x1] 112 */ 113 static void fiat_25519_subborrowx_u26(uint32_t *out1, uint8_t *out2, 114 uint8_t arg1, uint32_t arg2, 115 uint32_t arg3) 116 { 117 int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3); 118 int8_t x2 = (int8_t)(x1 >> 26); 119 uint32_t x3 = (x1 & UINT32_C(0x3ffffff)); 120 *out1 = x3; 121 *out2 = (uint8_t)(0x0 - x2); 122 } 123 124 /* 125 * The function fiat_25519_addcarryx_u25 is an addition with carry. 126 * Postconditions: 127 * out1 = (arg1 + arg2 + arg3) mod 2^25 128 * out2 = ⌊(arg1 + arg2 + arg3) / 2^25⌋ 129 * 130 * Input Bounds: 131 * arg1: [0x0 ~> 0x1] 132 * arg2: [0x0 ~> 0x1ffffff] 133 * arg3: [0x0 ~> 0x1ffffff] 134 * Output Bounds: 135 * out1: [0x0 ~> 0x1ffffff] 136 * out2: [0x0 ~> 0x1] 137 */ 138 static void fiat_25519_addcarryx_u25(uint32_t *out1, uint8_t *out2, 139 uint8_t arg1, uint32_t arg2, uint32_t arg3) 140 { 141 uint32_t x1 = ((arg1 + arg2) + arg3); 142 uint32_t x2 = (x1 & UINT32_C(0x1ffffff)); 143 uint8_t x3 = (uint8_t)(x1 >> 25); 144 *out1 = x2; 145 *out2 = x3; 146 } 147 148 /* 149 * The function fiat_25519_subborrowx_u25 is a subtraction with borrow. 150 * Postconditions: 151 * out1 = (-arg1 + arg2 + -arg3) mod 2^25 152 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^25⌋ 153 * 154 * Input Bounds: 155 * arg1: [0x0 ~> 0x1] 156 * arg2: [0x0 ~> 0x1ffffff] 157 * arg3: [0x0 ~> 0x1ffffff] 158 * Output Bounds: 159 * out1: [0x0 ~> 0x1ffffff] 160 * out2: [0x0 ~> 0x1] 161 */ 162 static void fiat_25519_subborrowx_u25(uint32_t *out1, uint8_t *out2, 163 uint8_t arg1, uint32_t arg2, 164 uint32_t arg3) 165 { 166 int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3); 167 int8_t x2 = (int8_t)(x1 >> 25); 168 uint32_t x3 = (x1 & UINT32_C(0x1ffffff)); 169 *out1 = x3; 170 *out2 = (uint8_t)(0x0 - x2); 171 } 172 173 /* 174 * The function fiat_25519_cmovznz_u32 is a single-word conditional move. 175 * Postconditions: 176 * out1 = (if arg1 = 0 then arg2 else arg3) 177 * 178 * Input Bounds: 179 * arg1: [0x0 ~> 0x1] 180 * arg2: [0x0 ~> 0xffffffff] 181 * arg3: [0x0 ~> 0xffffffff] 182 * Output Bounds: 183 * out1: [0x0 ~> 0xffffffff] 184 */ 185 static void fiat_25519_cmovznz_u32(uint32_t *out1, uint8_t arg1, uint32_t arg2, 186 uint32_t arg3) 187 { 188 uint8_t x1 = (!(!arg1)); 189 uint32_t x2 = ((int8_t)(0x0 - x1) & UINT32_C(0xffffffff)); 190 // Note this line has been patched from the synthesized code to add value 191 // barriers. 192 // 193 // Clang recognizes this pattern as a select. While it usually transforms it 194 // to a cmov, it sometimes further transforms it into a branch, which we do 195 // not want. 196 uint32_t x3 = ((value_barrier_u32(x2) & arg3) | 197 (value_barrier_u32(~x2) & arg2)); 198 *out1 = x3; 199 } 200 201 /* 202 * The function fiat_25519_carry_mul multiplies two field elements and reduces the result. 203 * Postconditions: 204 * eval out1 mod m = (eval arg1 * eval arg2) mod m 205 * 206 * Input Bounds: 207 * arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] 208 * arg2: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] 209 * Output Bounds: 210 * out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 211 */ 212 static void fiat_25519_carry_mul(uint32_t out1[10], const uint32_t arg1[10], 213 const uint32_t arg2[10]) 214 { 215 uint64_t x1 = ((uint64_t)(arg1[9]) * ((arg2[9]) * UINT8_C(0x26))); 216 uint64_t x2 = ((uint64_t)(arg1[9]) * ((arg2[8]) * UINT8_C(0x13))); 217 uint64_t x3 = ((uint64_t)(arg1[9]) * ((arg2[7]) * UINT8_C(0x26))); 218 uint64_t x4 = ((uint64_t)(arg1[9]) * ((arg2[6]) * UINT8_C(0x13))); 219 uint64_t x5 = ((uint64_t)(arg1[9]) * ((arg2[5]) * UINT8_C(0x26))); 220 uint64_t x6 = ((uint64_t)(arg1[9]) * ((arg2[4]) * UINT8_C(0x13))); 221 uint64_t x7 = ((uint64_t)(arg1[9]) * ((arg2[3]) * UINT8_C(0x26))); 222 uint64_t x8 = ((uint64_t)(arg1[9]) * ((arg2[2]) * UINT8_C(0x13))); 223 uint64_t x9 = ((uint64_t)(arg1[9]) * ((arg2[1]) * UINT8_C(0x26))); 224 uint64_t x10 = ((uint64_t)(arg1[8]) * ((arg2[9]) * UINT8_C(0x13))); 225 uint64_t x11 = ((uint64_t)(arg1[8]) * ((arg2[8]) * UINT8_C(0x13))); 226 uint64_t x12 = ((uint64_t)(arg1[8]) * ((arg2[7]) * UINT8_C(0x13))); 227 uint64_t x13 = ((uint64_t)(arg1[8]) * ((arg2[6]) * UINT8_C(0x13))); 228 uint64_t x14 = ((uint64_t)(arg1[8]) * ((arg2[5]) * UINT8_C(0x13))); 229 uint64_t x15 = ((uint64_t)(arg1[8]) * ((arg2[4]) * UINT8_C(0x13))); 230 uint64_t x16 = ((uint64_t)(arg1[8]) * ((arg2[3]) * UINT8_C(0x13))); 231 uint64_t x17 = ((uint64_t)(arg1[8]) * ((arg2[2]) * UINT8_C(0x13))); 232 uint64_t x18 = ((uint64_t)(arg1[7]) * ((arg2[9]) * UINT8_C(0x26))); 233 uint64_t x19 = ((uint64_t)(arg1[7]) * ((arg2[8]) * UINT8_C(0x13))); 234 uint64_t x20 = ((uint64_t)(arg1[7]) * ((arg2[7]) * UINT8_C(0x26))); 235 uint64_t x21 = ((uint64_t)(arg1[7]) * ((arg2[6]) * UINT8_C(0x13))); 236 uint64_t x22 = ((uint64_t)(arg1[7]) * ((arg2[5]) * UINT8_C(0x26))); 237 uint64_t x23 = ((uint64_t)(arg1[7]) * ((arg2[4]) * UINT8_C(0x13))); 238 uint64_t x24 = ((uint64_t)(arg1[7]) * ((arg2[3]) * UINT8_C(0x26))); 239 uint64_t x25 = ((uint64_t)(arg1[6]) * ((arg2[9]) * UINT8_C(0x13))); 240 uint64_t x26 = ((uint64_t)(arg1[6]) * ((arg2[8]) * UINT8_C(0x13))); 241 uint64_t x27 = ((uint64_t)(arg1[6]) * ((arg2[7]) * UINT8_C(0x13))); 242 uint64_t x28 = ((uint64_t)(arg1[6]) * ((arg2[6]) * UINT8_C(0x13))); 243 uint64_t x29 = ((uint64_t)(arg1[6]) * ((arg2[5]) * UINT8_C(0x13))); 244 uint64_t x30 = ((uint64_t)(arg1[6]) * ((arg2[4]) * UINT8_C(0x13))); 245 uint64_t x31 = ((uint64_t)(arg1[5]) * ((arg2[9]) * UINT8_C(0x26))); 246 uint64_t x32 = ((uint64_t)(arg1[5]) * ((arg2[8]) * UINT8_C(0x13))); 247 uint64_t x33 = ((uint64_t)(arg1[5]) * ((arg2[7]) * UINT8_C(0x26))); 248 uint64_t x34 = ((uint64_t)(arg1[5]) * ((arg2[6]) * UINT8_C(0x13))); 249 uint64_t x35 = ((uint64_t)(arg1[5]) * ((arg2[5]) * UINT8_C(0x26))); 250 uint64_t x36 = ((uint64_t)(arg1[4]) * ((arg2[9]) * UINT8_C(0x13))); 251 uint64_t x37 = ((uint64_t)(arg1[4]) * ((arg2[8]) * UINT8_C(0x13))); 252 uint64_t x38 = ((uint64_t)(arg1[4]) * ((arg2[7]) * UINT8_C(0x13))); 253 uint64_t x39 = ((uint64_t)(arg1[4]) * ((arg2[6]) * UINT8_C(0x13))); 254 uint64_t x40 = ((uint64_t)(arg1[3]) * ((arg2[9]) * UINT8_C(0x26))); 255 uint64_t x41 = ((uint64_t)(arg1[3]) * ((arg2[8]) * UINT8_C(0x13))); 256 uint64_t x42 = ((uint64_t)(arg1[3]) * ((arg2[7]) * UINT8_C(0x26))); 257 uint64_t x43 = ((uint64_t)(arg1[2]) * ((arg2[9]) * UINT8_C(0x13))); 258 uint64_t x44 = ((uint64_t)(arg1[2]) * ((arg2[8]) * UINT8_C(0x13))); 259 uint64_t x45 = ((uint64_t)(arg1[1]) * ((arg2[9]) * UINT8_C(0x26))); 260 uint64_t x46 = ((uint64_t)(arg1[9]) * (arg2[0])); 261 uint64_t x47 = ((uint64_t)(arg1[8]) * (arg2[1])); 262 uint64_t x48 = ((uint64_t)(arg1[8]) * (arg2[0])); 263 uint64_t x49 = ((uint64_t)(arg1[7]) * (arg2[2])); 264 uint64_t x50 = ((uint64_t)(arg1[7]) * ((arg2[1]) * 0x2)); 265 uint64_t x51 = ((uint64_t)(arg1[7]) * (arg2[0])); 266 uint64_t x52 = ((uint64_t)(arg1[6]) * (arg2[3])); 267 uint64_t x53 = ((uint64_t)(arg1[6]) * (arg2[2])); 268 uint64_t x54 = ((uint64_t)(arg1[6]) * (arg2[1])); 269 uint64_t x55 = ((uint64_t)(arg1[6]) * (arg2[0])); 270 uint64_t x56 = ((uint64_t)(arg1[5]) * (arg2[4])); 271 uint64_t x57 = ((uint64_t)(arg1[5]) * ((arg2[3]) * 0x2)); 272 uint64_t x58 = ((uint64_t)(arg1[5]) * (arg2[2])); 273 uint64_t x59 = ((uint64_t)(arg1[5]) * ((arg2[1]) * 0x2)); 274 uint64_t x60 = ((uint64_t)(arg1[5]) * (arg2[0])); 275 uint64_t x61 = ((uint64_t)(arg1[4]) * (arg2[5])); 276 uint64_t x62 = ((uint64_t)(arg1[4]) * (arg2[4])); 277 uint64_t x63 = ((uint64_t)(arg1[4]) * (arg2[3])); 278 uint64_t x64 = ((uint64_t)(arg1[4]) * (arg2[2])); 279 uint64_t x65 = ((uint64_t)(arg1[4]) * (arg2[1])); 280 uint64_t x66 = ((uint64_t)(arg1[4]) * (arg2[0])); 281 uint64_t x67 = ((uint64_t)(arg1[3]) * (arg2[6])); 282 uint64_t x68 = ((uint64_t)(arg1[3]) * ((arg2[5]) * 0x2)); 283 uint64_t x69 = ((uint64_t)(arg1[3]) * (arg2[4])); 284 uint64_t x70 = ((uint64_t)(arg1[3]) * ((arg2[3]) * 0x2)); 285 uint64_t x71 = ((uint64_t)(arg1[3]) * (arg2[2])); 286 uint64_t x72 = ((uint64_t)(arg1[3]) * ((arg2[1]) * 0x2)); 287 uint64_t x73 = ((uint64_t)(arg1[3]) * (arg2[0])); 288 uint64_t x74 = ((uint64_t)(arg1[2]) * (arg2[7])); 289 uint64_t x75 = ((uint64_t)(arg1[2]) * (arg2[6])); 290 uint64_t x76 = ((uint64_t)(arg1[2]) * (arg2[5])); 291 uint64_t x77 = ((uint64_t)(arg1[2]) * (arg2[4])); 292 uint64_t x78 = ((uint64_t)(arg1[2]) * (arg2[3])); 293 uint64_t x79 = ((uint64_t)(arg1[2]) * (arg2[2])); 294 uint64_t x80 = ((uint64_t)(arg1[2]) * (arg2[1])); 295 uint64_t x81 = ((uint64_t)(arg1[2]) * (arg2[0])); 296 uint64_t x82 = ((uint64_t)(arg1[1]) * (arg2[8])); 297 uint64_t x83 = ((uint64_t)(arg1[1]) * ((arg2[7]) * 0x2)); 298 uint64_t x84 = ((uint64_t)(arg1[1]) * (arg2[6])); 299 uint64_t x85 = ((uint64_t)(arg1[1]) * ((arg2[5]) * 0x2)); 300 uint64_t x86 = ((uint64_t)(arg1[1]) * (arg2[4])); 301 uint64_t x87 = ((uint64_t)(arg1[1]) * ((arg2[3]) * 0x2)); 302 uint64_t x88 = ((uint64_t)(arg1[1]) * (arg2[2])); 303 uint64_t x89 = ((uint64_t)(arg1[1]) * ((arg2[1]) * 0x2)); 304 uint64_t x90 = ((uint64_t)(arg1[1]) * (arg2[0])); 305 uint64_t x91 = ((uint64_t)(arg1[0]) * (arg2[9])); 306 uint64_t x92 = ((uint64_t)(arg1[0]) * (arg2[8])); 307 uint64_t x93 = ((uint64_t)(arg1[0]) * (arg2[7])); 308 uint64_t x94 = ((uint64_t)(arg1[0]) * (arg2[6])); 309 uint64_t x95 = ((uint64_t)(arg1[0]) * (arg2[5])); 310 uint64_t x96 = ((uint64_t)(arg1[0]) * (arg2[4])); 311 uint64_t x97 = ((uint64_t)(arg1[0]) * (arg2[3])); 312 uint64_t x98 = ((uint64_t)(arg1[0]) * (arg2[2])); 313 uint64_t x99 = ((uint64_t)(arg1[0]) * (arg2[1])); 314 uint64_t x100 = ((uint64_t)(arg1[0]) * (arg2[0])); 315 uint64_t x101 = 316 (x100 + 317 (x45 + 318 (x44 + (x42 + (x39 + (x35 + (x30 + (x24 + (x17 + x9))))))))); 319 uint64_t x102 = (x101 >> 26); 320 uint32_t x103 = (uint32_t)(x101 & UINT32_C(0x3ffffff)); 321 uint64_t x104 = 322 (x91 + 323 (x82 + 324 (x74 + (x67 + (x61 + (x56 + (x52 + (x49 + (x47 + x46))))))))); 325 uint64_t x105 = 326 (x92 + 327 (x83 + 328 (x75 + (x68 + (x62 + (x57 + (x53 + (x50 + (x48 + x1))))))))); 329 uint64_t x106 = 330 (x93 + 331 (x84 + 332 (x76 + (x69 + (x63 + (x58 + (x54 + (x51 + (x10 + x2))))))))); 333 uint64_t x107 = 334 (x94 + 335 (x85 + 336 (x77 + (x70 + (x64 + (x59 + (x55 + (x18 + (x11 + x3))))))))); 337 uint64_t x108 = 338 (x95 + 339 (x86 + 340 (x78 + (x71 + (x65 + (x60 + (x25 + (x19 + (x12 + x4))))))))); 341 uint64_t x109 = 342 (x96 + 343 (x87 + 344 (x79 + (x72 + (x66 + (x31 + (x26 + (x20 + (x13 + x5))))))))); 345 uint64_t x110 = 346 (x97 + 347 (x88 + 348 (x80 + (x73 + (x36 + (x32 + (x27 + (x21 + (x14 + x6))))))))); 349 uint64_t x111 = 350 (x98 + 351 (x89 + 352 (x81 + (x40 + (x37 + (x33 + (x28 + (x22 + (x15 + x7))))))))); 353 uint64_t x112 = 354 (x99 + 355 (x90 + 356 (x43 + (x41 + (x38 + (x34 + (x29 + (x23 + (x16 + x8))))))))); 357 uint64_t x113 = (x102 + x112); 358 uint64_t x114 = (x113 >> 25); 359 uint32_t x115 = (uint32_t)(x113 & UINT32_C(0x1ffffff)); 360 uint64_t x116 = (x114 + x111); 361 uint64_t x117 = (x116 >> 26); 362 uint32_t x118 = (uint32_t)(x116 & UINT32_C(0x3ffffff)); 363 uint64_t x119 = (x117 + x110); 364 uint64_t x120 = (x119 >> 25); 365 uint32_t x121 = (uint32_t)(x119 & UINT32_C(0x1ffffff)); 366 uint64_t x122 = (x120 + x109); 367 uint64_t x123 = (x122 >> 26); 368 uint32_t x124 = (uint32_t)(x122 & UINT32_C(0x3ffffff)); 369 uint64_t x125 = (x123 + x108); 370 uint64_t x126 = (x125 >> 25); 371 uint32_t x127 = (uint32_t)(x125 & UINT32_C(0x1ffffff)); 372 uint64_t x128 = (x126 + x107); 373 uint64_t x129 = (x128 >> 26); 374 uint32_t x130 = (uint32_t)(x128 & UINT32_C(0x3ffffff)); 375 uint64_t x131 = (x129 + x106); 376 uint64_t x132 = (x131 >> 25); 377 uint32_t x133 = (uint32_t)(x131 & UINT32_C(0x1ffffff)); 378 uint64_t x134 = (x132 + x105); 379 uint64_t x135 = (x134 >> 26); 380 uint32_t x136 = (uint32_t)(x134 & UINT32_C(0x3ffffff)); 381 uint64_t x137 = (x135 + x104); 382 uint64_t x138 = (x137 >> 25); 383 uint32_t x139 = (uint32_t)(x137 & UINT32_C(0x1ffffff)); 384 uint64_t x140 = (x138 * UINT8_C(0x13)); 385 uint64_t x141 = (x103 + x140); 386 uint32_t x142 = (uint32_t)(x141 >> 26); 387 uint32_t x143 = (uint32_t)(x141 & UINT32_C(0x3ffffff)); 388 uint32_t x144 = (x142 + x115); 389 uint8_t x145 = (uint8_t)(x144 >> 25); 390 uint32_t x146 = (x144 & UINT32_C(0x1ffffff)); 391 uint32_t x147 = (x145 + x118); 392 out1[0] = x143; 393 out1[1] = x146; 394 out1[2] = x147; 395 out1[3] = x121; 396 out1[4] = x124; 397 out1[5] = x127; 398 out1[6] = x130; 399 out1[7] = x133; 400 out1[8] = x136; 401 out1[9] = x139; 402 } 403 404 /* 405 * The function fiat_25519_carry_square squares a field element and reduces the result. 406 * Postconditions: 407 * eval out1 mod m = (eval arg1 * eval arg1) mod m 408 * 409 * Input Bounds: 410 * arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] 411 * Output Bounds: 412 * out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 413 */ 414 static void fiat_25519_carry_square(uint32_t out1[10], const uint32_t arg1[10]) 415 { 416 uint32_t x1 = ((arg1[9]) * UINT8_C(0x13)); 417 uint32_t x2 = (x1 * 0x2); 418 uint32_t x3 = ((arg1[9]) * 0x2); 419 uint32_t x4 = ((arg1[8]) * UINT8_C(0x13)); 420 uint64_t x5 = ((uint64_t)x4 * 0x2); 421 uint32_t x6 = ((arg1[8]) * 0x2); 422 uint32_t x7 = ((arg1[7]) * UINT8_C(0x13)); 423 uint32_t x8 = (x7 * 0x2); 424 uint32_t x9 = ((arg1[7]) * 0x2); 425 uint32_t x10 = ((arg1[6]) * UINT8_C(0x13)); 426 uint64_t x11 = ((uint64_t)x10 * 0x2); 427 uint32_t x12 = ((arg1[6]) * 0x2); 428 uint32_t x13 = ((arg1[5]) * UINT8_C(0x13)); 429 uint32_t x14 = ((arg1[5]) * 0x2); 430 uint32_t x15 = ((arg1[4]) * 0x2); 431 uint32_t x16 = ((arg1[3]) * 0x2); 432 uint32_t x17 = ((arg1[2]) * 0x2); 433 uint32_t x18 = ((arg1[1]) * 0x2); 434 uint64_t x19 = ((uint64_t)(arg1[9]) * (x1 * 0x2)); 435 uint64_t x20 = ((uint64_t)(arg1[8]) * x2); 436 uint64_t x21 = ((uint64_t)(arg1[8]) * x4); 437 uint64_t x22 = ((arg1[7]) * ((uint64_t)x2 * 0x2)); 438 uint64_t x23 = ((arg1[7]) * x5); 439 uint64_t x24 = ((uint64_t)(arg1[7]) * (x7 * 0x2)); 440 uint64_t x25 = ((uint64_t)(arg1[6]) * x2); 441 uint64_t x26 = ((arg1[6]) * x5); 442 uint64_t x27 = ((uint64_t)(arg1[6]) * x8); 443 uint64_t x28 = ((uint64_t)(arg1[6]) * x10); 444 uint64_t x29 = ((arg1[5]) * ((uint64_t)x2 * 0x2)); 445 uint64_t x30 = ((arg1[5]) * x5); 446 uint64_t x31 = ((arg1[5]) * ((uint64_t)x8 * 0x2)); 447 uint64_t x32 = ((arg1[5]) * x11); 448 uint64_t x33 = ((uint64_t)(arg1[5]) * (x13 * 0x2)); 449 uint64_t x34 = ((uint64_t)(arg1[4]) * x2); 450 uint64_t x35 = ((arg1[4]) * x5); 451 uint64_t x36 = ((uint64_t)(arg1[4]) * x8); 452 uint64_t x37 = ((arg1[4]) * x11); 453 uint64_t x38 = ((uint64_t)(arg1[4]) * x14); 454 uint64_t x39 = ((uint64_t)(arg1[4]) * (arg1[4])); 455 uint64_t x40 = ((arg1[3]) * ((uint64_t)x2 * 0x2)); 456 uint64_t x41 = ((arg1[3]) * x5); 457 uint64_t x42 = ((arg1[3]) * ((uint64_t)x8 * 0x2)); 458 uint64_t x43 = ((uint64_t)(arg1[3]) * x12); 459 uint64_t x44 = ((uint64_t)(arg1[3]) * (x14 * 0x2)); 460 uint64_t x45 = ((uint64_t)(arg1[3]) * x15); 461 uint64_t x46 = ((uint64_t)(arg1[3]) * ((arg1[3]) * 0x2)); 462 uint64_t x47 = ((uint64_t)(arg1[2]) * x2); 463 uint64_t x48 = ((arg1[2]) * x5); 464 uint64_t x49 = ((uint64_t)(arg1[2]) * x9); 465 uint64_t x50 = ((uint64_t)(arg1[2]) * x12); 466 uint64_t x51 = ((uint64_t)(arg1[2]) * x14); 467 uint64_t x52 = ((uint64_t)(arg1[2]) * x15); 468 uint64_t x53 = ((uint64_t)(arg1[2]) * x16); 469 uint64_t x54 = ((uint64_t)(arg1[2]) * (arg1[2])); 470 uint64_t x55 = ((arg1[1]) * ((uint64_t)x2 * 0x2)); 471 uint64_t x56 = ((uint64_t)(arg1[1]) * x6); 472 uint64_t x57 = ((uint64_t)(arg1[1]) * (x9 * 0x2)); 473 uint64_t x58 = ((uint64_t)(arg1[1]) * x12); 474 uint64_t x59 = ((uint64_t)(arg1[1]) * (x14 * 0x2)); 475 uint64_t x60 = ((uint64_t)(arg1[1]) * x15); 476 uint64_t x61 = ((uint64_t)(arg1[1]) * (x16 * 0x2)); 477 uint64_t x62 = ((uint64_t)(arg1[1]) * x17); 478 uint64_t x63 = ((uint64_t)(arg1[1]) * ((arg1[1]) * 0x2)); 479 uint64_t x64 = ((uint64_t)(arg1[0]) * x3); 480 uint64_t x65 = ((uint64_t)(arg1[0]) * x6); 481 uint64_t x66 = ((uint64_t)(arg1[0]) * x9); 482 uint64_t x67 = ((uint64_t)(arg1[0]) * x12); 483 uint64_t x68 = ((uint64_t)(arg1[0]) * x14); 484 uint64_t x69 = ((uint64_t)(arg1[0]) * x15); 485 uint64_t x70 = ((uint64_t)(arg1[0]) * x16); 486 uint64_t x71 = ((uint64_t)(arg1[0]) * x17); 487 uint64_t x72 = ((uint64_t)(arg1[0]) * x18); 488 uint64_t x73 = ((uint64_t)(arg1[0]) * (arg1[0])); 489 uint64_t x74 = (x73 + (x55 + (x48 + (x42 + (x37 + x33))))); 490 uint64_t x75 = (x74 >> 26); 491 uint32_t x76 = (uint32_t)(x74 & UINT32_C(0x3ffffff)); 492 uint64_t x77 = (x64 + (x56 + (x49 + (x43 + x38)))); 493 uint64_t x78 = (x65 + (x57 + (x50 + (x44 + (x39 + x19))))); 494 uint64_t x79 = (x66 + (x58 + (x51 + (x45 + x20)))); 495 uint64_t x80 = (x67 + (x59 + (x52 + (x46 + (x22 + x21))))); 496 uint64_t x81 = (x68 + (x60 + (x53 + (x25 + x23)))); 497 uint64_t x82 = (x69 + (x61 + (x54 + (x29 + (x26 + x24))))); 498 uint64_t x83 = (x70 + (x62 + (x34 + (x30 + x27)))); 499 uint64_t x84 = (x71 + (x63 + (x40 + (x35 + (x31 + x28))))); 500 uint64_t x85 = (x72 + (x47 + (x41 + (x36 + x32)))); 501 uint64_t x86 = (x75 + x85); 502 uint64_t x87 = (x86 >> 25); 503 uint32_t x88 = (uint32_t)(x86 & UINT32_C(0x1ffffff)); 504 uint64_t x89 = (x87 + x84); 505 uint64_t x90 = (x89 >> 26); 506 uint32_t x91 = (uint32_t)(x89 & UINT32_C(0x3ffffff)); 507 uint64_t x92 = (x90 + x83); 508 uint64_t x93 = (x92 >> 25); 509 uint32_t x94 = (uint32_t)(x92 & UINT32_C(0x1ffffff)); 510 uint64_t x95 = (x93 + x82); 511 uint64_t x96 = (x95 >> 26); 512 uint32_t x97 = (uint32_t)(x95 & UINT32_C(0x3ffffff)); 513 uint64_t x98 = (x96 + x81); 514 uint64_t x99 = (x98 >> 25); 515 uint32_t x100 = (uint32_t)(x98 & UINT32_C(0x1ffffff)); 516 uint64_t x101 = (x99 + x80); 517 uint64_t x102 = (x101 >> 26); 518 uint32_t x103 = (uint32_t)(x101 & UINT32_C(0x3ffffff)); 519 uint64_t x104 = (x102 + x79); 520 uint64_t x105 = (x104 >> 25); 521 uint32_t x106 = (uint32_t)(x104 & UINT32_C(0x1ffffff)); 522 uint64_t x107 = (x105 + x78); 523 uint64_t x108 = (x107 >> 26); 524 uint32_t x109 = (uint32_t)(x107 & UINT32_C(0x3ffffff)); 525 uint64_t x110 = (x108 + x77); 526 uint64_t x111 = (x110 >> 25); 527 uint32_t x112 = (uint32_t)(x110 & UINT32_C(0x1ffffff)); 528 uint64_t x113 = (x111 * UINT8_C(0x13)); 529 uint64_t x114 = (x76 + x113); 530 uint32_t x115 = (uint32_t)(x114 >> 26); 531 uint32_t x116 = (uint32_t)(x114 & UINT32_C(0x3ffffff)); 532 uint32_t x117 = (x115 + x88); 533 uint8_t x118 = (uint8_t)(x117 >> 25); 534 uint32_t x119 = (x117 & UINT32_C(0x1ffffff)); 535 uint32_t x120 = (x118 + x91); 536 out1[0] = x116; 537 out1[1] = x119; 538 out1[2] = x120; 539 out1[3] = x94; 540 out1[4] = x97; 541 out1[5] = x100; 542 out1[6] = x103; 543 out1[7] = x106; 544 out1[8] = x109; 545 out1[9] = x112; 546 } 547 548 /* 549 * The function fiat_25519_carry reduces a field element. 550 * Postconditions: 551 * eval out1 mod m = eval arg1 mod m 552 * 553 * Input Bounds: 554 * arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] 555 * Output Bounds: 556 * out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 557 */ 558 static void fiat_25519_carry(uint32_t out1[10], const uint32_t arg1[10]) 559 { 560 uint32_t x1 = (arg1[0]); 561 uint32_t x2 = ((x1 >> 26) + (arg1[1])); 562 uint32_t x3 = ((x2 >> 25) + (arg1[2])); 563 uint32_t x4 = ((x3 >> 26) + (arg1[3])); 564 uint32_t x5 = ((x4 >> 25) + (arg1[4])); 565 uint32_t x6 = ((x5 >> 26) + (arg1[5])); 566 uint32_t x7 = ((x6 >> 25) + (arg1[6])); 567 uint32_t x8 = ((x7 >> 26) + (arg1[7])); 568 uint32_t x9 = ((x8 >> 25) + (arg1[8])); 569 uint32_t x10 = ((x9 >> 26) + (arg1[9])); 570 uint32_t x11 = 571 ((x1 & UINT32_C(0x3ffffff)) + ((x10 >> 25) * UINT8_C(0x13))); 572 uint32_t x12 = ((uint8_t)(x11 >> 26) + (x2 & UINT32_C(0x1ffffff))); 573 uint32_t x13 = (x11 & UINT32_C(0x3ffffff)); 574 uint32_t x14 = (x12 & UINT32_C(0x1ffffff)); 575 uint32_t x15 = ((uint8_t)(x12 >> 25) + (x3 & UINT32_C(0x3ffffff))); 576 uint32_t x16 = (x4 & UINT32_C(0x1ffffff)); 577 uint32_t x17 = (x5 & UINT32_C(0x3ffffff)); 578 uint32_t x18 = (x6 & UINT32_C(0x1ffffff)); 579 uint32_t x19 = (x7 & UINT32_C(0x3ffffff)); 580 uint32_t x20 = (x8 & UINT32_C(0x1ffffff)); 581 uint32_t x21 = (x9 & UINT32_C(0x3ffffff)); 582 uint32_t x22 = (x10 & UINT32_C(0x1ffffff)); 583 out1[0] = x13; 584 out1[1] = x14; 585 out1[2] = x15; 586 out1[3] = x16; 587 out1[4] = x17; 588 out1[5] = x18; 589 out1[6] = x19; 590 out1[7] = x20; 591 out1[8] = x21; 592 out1[9] = x22; 593 } 594 595 /* 596 * The function fiat_25519_add adds two field elements. 597 * Postconditions: 598 * eval out1 mod m = (eval arg1 + eval arg2) mod m 599 * 600 * Input Bounds: 601 * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 602 * arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 603 * Output Bounds: 604 * out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] 605 */ 606 static void fiat_25519_add(uint32_t out1[10], const uint32_t arg1[10], 607 const uint32_t arg2[10]) 608 { 609 uint32_t x1 = ((arg1[0]) + (arg2[0])); 610 uint32_t x2 = ((arg1[1]) + (arg2[1])); 611 uint32_t x3 = ((arg1[2]) + (arg2[2])); 612 uint32_t x4 = ((arg1[3]) + (arg2[3])); 613 uint32_t x5 = ((arg1[4]) + (arg2[4])); 614 uint32_t x6 = ((arg1[5]) + (arg2[5])); 615 uint32_t x7 = ((arg1[6]) + (arg2[6])); 616 uint32_t x8 = ((arg1[7]) + (arg2[7])); 617 uint32_t x9 = ((arg1[8]) + (arg2[8])); 618 uint32_t x10 = ((arg1[9]) + (arg2[9])); 619 out1[0] = x1; 620 out1[1] = x2; 621 out1[2] = x3; 622 out1[3] = x4; 623 out1[4] = x5; 624 out1[5] = x6; 625 out1[6] = x7; 626 out1[7] = x8; 627 out1[8] = x9; 628 out1[9] = x10; 629 } 630 631 /* 632 * The function fiat_25519_sub subtracts two field elements. 633 * Postconditions: 634 * eval out1 mod m = (eval arg1 - eval arg2) mod m 635 * 636 * Input Bounds: 637 * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 638 * arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 639 * Output Bounds: 640 * out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] 641 */ 642 static void fiat_25519_sub(uint32_t out1[10], const uint32_t arg1[10], 643 const uint32_t arg2[10]) 644 { 645 uint32_t x1 = ((UINT32_C(0x7ffffda) + (arg1[0])) - (arg2[0])); 646 uint32_t x2 = ((UINT32_C(0x3fffffe) + (arg1[1])) - (arg2[1])); 647 uint32_t x3 = ((UINT32_C(0x7fffffe) + (arg1[2])) - (arg2[2])); 648 uint32_t x4 = ((UINT32_C(0x3fffffe) + (arg1[3])) - (arg2[3])); 649 uint32_t x5 = ((UINT32_C(0x7fffffe) + (arg1[4])) - (arg2[4])); 650 uint32_t x6 = ((UINT32_C(0x3fffffe) + (arg1[5])) - (arg2[5])); 651 uint32_t x7 = ((UINT32_C(0x7fffffe) + (arg1[6])) - (arg2[6])); 652 uint32_t x8 = ((UINT32_C(0x3fffffe) + (arg1[7])) - (arg2[7])); 653 uint32_t x9 = ((UINT32_C(0x7fffffe) + (arg1[8])) - (arg2[8])); 654 uint32_t x10 = ((UINT32_C(0x3fffffe) + (arg1[9])) - (arg2[9])); 655 out1[0] = x1; 656 out1[1] = x2; 657 out1[2] = x3; 658 out1[3] = x4; 659 out1[4] = x5; 660 out1[5] = x6; 661 out1[6] = x7; 662 out1[7] = x8; 663 out1[8] = x9; 664 out1[9] = x10; 665 } 666 667 /* 668 * The function fiat_25519_opp negates a field element. 669 * Postconditions: 670 * eval out1 mod m = -eval arg1 mod m 671 * 672 * Input Bounds: 673 * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 674 * Output Bounds: 675 * out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]] 676 */ 677 static void fiat_25519_opp(uint32_t out1[10], const uint32_t arg1[10]) 678 { 679 uint32_t x1 = (UINT32_C(0x7ffffda) - (arg1[0])); 680 uint32_t x2 = (UINT32_C(0x3fffffe) - (arg1[1])); 681 uint32_t x3 = (UINT32_C(0x7fffffe) - (arg1[2])); 682 uint32_t x4 = (UINT32_C(0x3fffffe) - (arg1[3])); 683 uint32_t x5 = (UINT32_C(0x7fffffe) - (arg1[4])); 684 uint32_t x6 = (UINT32_C(0x3fffffe) - (arg1[5])); 685 uint32_t x7 = (UINT32_C(0x7fffffe) - (arg1[6])); 686 uint32_t x8 = (UINT32_C(0x3fffffe) - (arg1[7])); 687 uint32_t x9 = (UINT32_C(0x7fffffe) - (arg1[8])); 688 uint32_t x10 = (UINT32_C(0x3fffffe) - (arg1[9])); 689 out1[0] = x1; 690 out1[1] = x2; 691 out1[2] = x3; 692 out1[3] = x4; 693 out1[4] = x5; 694 out1[5] = x6; 695 out1[6] = x7; 696 out1[7] = x8; 697 out1[8] = x9; 698 out1[9] = x10; 699 } 700 701 /* 702 * The function fiat_25519_to_bytes serializes a field element to bytes in little-endian order. 703 * Postconditions: 704 * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31] 705 * 706 * Input Bounds: 707 * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 708 * Output Bounds: 709 * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]] 710 */ 711 static void fiat_25519_to_bytes(uint8_t out1[32], const uint32_t arg1[10]) 712 { 713 uint32_t x1; 714 uint8_t x2; 715 fiat_25519_subborrowx_u26(&x1, &x2, 0x0, (arg1[0]), 716 UINT32_C(0x3ffffed)); 717 uint32_t x3; 718 uint8_t x4; 719 fiat_25519_subborrowx_u25(&x3, &x4, x2, (arg1[1]), UINT32_C(0x1ffffff)); 720 uint32_t x5; 721 uint8_t x6; 722 fiat_25519_subborrowx_u26(&x5, &x6, x4, (arg1[2]), UINT32_C(0x3ffffff)); 723 uint32_t x7; 724 uint8_t x8; 725 fiat_25519_subborrowx_u25(&x7, &x8, x6, (arg1[3]), UINT32_C(0x1ffffff)); 726 uint32_t x9; 727 uint8_t x10; 728 fiat_25519_subborrowx_u26(&x9, &x10, x8, (arg1[4]), 729 UINT32_C(0x3ffffff)); 730 uint32_t x11; 731 uint8_t x12; 732 fiat_25519_subborrowx_u25(&x11, &x12, x10, (arg1[5]), 733 UINT32_C(0x1ffffff)); 734 uint32_t x13; 735 uint8_t x14; 736 fiat_25519_subborrowx_u26(&x13, &x14, x12, (arg1[6]), 737 UINT32_C(0x3ffffff)); 738 uint32_t x15; 739 uint8_t x16; 740 fiat_25519_subborrowx_u25(&x15, &x16, x14, (arg1[7]), 741 UINT32_C(0x1ffffff)); 742 uint32_t x17; 743 uint8_t x18; 744 fiat_25519_subborrowx_u26(&x17, &x18, x16, (arg1[8]), 745 UINT32_C(0x3ffffff)); 746 uint32_t x19; 747 uint8_t x20; 748 fiat_25519_subborrowx_u25(&x19, &x20, x18, (arg1[9]), 749 UINT32_C(0x1ffffff)); 750 uint32_t x21; 751 fiat_25519_cmovznz_u32(&x21, x20, 0x0, UINT32_C(0xffffffff)); 752 uint32_t x22; 753 uint8_t x23; 754 fiat_25519_addcarryx_u26(&x22, &x23, 0x0, x1, 755 (x21 & UINT32_C(0x3ffffed))); 756 uint32_t x24; 757 uint8_t x25; 758 fiat_25519_addcarryx_u25(&x24, &x25, x23, x3, 759 (x21 & UINT32_C(0x1ffffff))); 760 uint32_t x26; 761 uint8_t x27; 762 fiat_25519_addcarryx_u26(&x26, &x27, x25, x5, 763 (x21 & UINT32_C(0x3ffffff))); 764 uint32_t x28; 765 uint8_t x29; 766 fiat_25519_addcarryx_u25(&x28, &x29, x27, x7, 767 (x21 & UINT32_C(0x1ffffff))); 768 uint32_t x30; 769 uint8_t x31; 770 fiat_25519_addcarryx_u26(&x30, &x31, x29, x9, 771 (x21 & UINT32_C(0x3ffffff))); 772 uint32_t x32; 773 uint8_t x33; 774 fiat_25519_addcarryx_u25(&x32, &x33, x31, x11, 775 (x21 & UINT32_C(0x1ffffff))); 776 uint32_t x34; 777 uint8_t x35; 778 fiat_25519_addcarryx_u26(&x34, &x35, x33, x13, 779 (x21 & UINT32_C(0x3ffffff))); 780 uint32_t x36; 781 uint8_t x37; 782 fiat_25519_addcarryx_u25(&x36, &x37, x35, x15, 783 (x21 & UINT32_C(0x1ffffff))); 784 uint32_t x38; 785 uint8_t x39; 786 fiat_25519_addcarryx_u26(&x38, &x39, x37, x17, 787 (x21 & UINT32_C(0x3ffffff))); 788 uint32_t x40; 789 uint8_t x41; 790 fiat_25519_addcarryx_u25(&x40, &x41, x39, x19, 791 (x21 & UINT32_C(0x1ffffff))); 792 uint32_t x42 = (x40 << 6); 793 uint32_t x43 = (x38 << 4); 794 uint32_t x44 = (x36 << 3); 795 uint32_t x45 = (x34 * (uint32_t)0x2); 796 uint32_t x46 = (x30 << 6); 797 uint32_t x47 = (x28 << 5); 798 uint32_t x48 = (x26 << 3); 799 uint32_t x49 = (x24 << 2); 800 uint32_t x50 = (x22 >> 8); 801 uint8_t x51 = (uint8_t)(x22 & UINT8_C(0xff)); 802 uint32_t x52 = (x50 >> 8); 803 uint8_t x53 = (uint8_t)(x50 & UINT8_C(0xff)); 804 uint8_t x54 = (uint8_t)(x52 >> 8); 805 uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff)); 806 uint32_t x56 = (x54 + x49); 807 uint32_t x57 = (x56 >> 8); 808 uint8_t x58 = (uint8_t)(x56 & UINT8_C(0xff)); 809 uint32_t x59 = (x57 >> 8); 810 uint8_t x60 = (uint8_t)(x57 & UINT8_C(0xff)); 811 uint8_t x61 = (uint8_t)(x59 >> 8); 812 uint8_t x62 = (uint8_t)(x59 & UINT8_C(0xff)); 813 uint32_t x63 = (x61 + x48); 814 uint32_t x64 = (x63 >> 8); 815 uint8_t x65 = (uint8_t)(x63 & UINT8_C(0xff)); 816 uint32_t x66 = (x64 >> 8); 817 uint8_t x67 = (uint8_t)(x64 & UINT8_C(0xff)); 818 uint8_t x68 = (uint8_t)(x66 >> 8); 819 uint8_t x69 = (uint8_t)(x66 & UINT8_C(0xff)); 820 uint32_t x70 = (x68 + x47); 821 uint32_t x71 = (x70 >> 8); 822 uint8_t x72 = (uint8_t)(x70 & UINT8_C(0xff)); 823 uint32_t x73 = (x71 >> 8); 824 uint8_t x74 = (uint8_t)(x71 & UINT8_C(0xff)); 825 uint8_t x75 = (uint8_t)(x73 >> 8); 826 uint8_t x76 = (uint8_t)(x73 & UINT8_C(0xff)); 827 uint32_t x77 = (x75 + x46); 828 uint32_t x78 = (x77 >> 8); 829 uint8_t x79 = (uint8_t)(x77 & UINT8_C(0xff)); 830 uint32_t x80 = (x78 >> 8); 831 uint8_t x81 = (uint8_t)(x78 & UINT8_C(0xff)); 832 uint8_t x82 = (uint8_t)(x80 >> 8); 833 uint8_t x83 = (uint8_t)(x80 & UINT8_C(0xff)); 834 uint8_t x84 = (uint8_t)(x82 & UINT8_C(0xff)); 835 uint32_t x85 = (x32 >> 8); 836 uint8_t x86 = (uint8_t)(x32 & UINT8_C(0xff)); 837 uint32_t x87 = (x85 >> 8); 838 uint8_t x88 = (uint8_t)(x85 & UINT8_C(0xff)); 839 uint8_t x89 = (uint8_t)(x87 >> 8); 840 uint8_t x90 = (uint8_t)(x87 & UINT8_C(0xff)); 841 uint32_t x91 = (x89 + x45); 842 uint32_t x92 = (x91 >> 8); 843 uint8_t x93 = (uint8_t)(x91 & UINT8_C(0xff)); 844 uint32_t x94 = (x92 >> 8); 845 uint8_t x95 = (uint8_t)(x92 & UINT8_C(0xff)); 846 uint8_t x96 = (uint8_t)(x94 >> 8); 847 uint8_t x97 = (uint8_t)(x94 & UINT8_C(0xff)); 848 uint32_t x98 = (x96 + x44); 849 uint32_t x99 = (x98 >> 8); 850 uint8_t x100 = (uint8_t)(x98 & UINT8_C(0xff)); 851 uint32_t x101 = (x99 >> 8); 852 uint8_t x102 = (uint8_t)(x99 & UINT8_C(0xff)); 853 uint8_t x103 = (uint8_t)(x101 >> 8); 854 uint8_t x104 = (uint8_t)(x101 & UINT8_C(0xff)); 855 uint32_t x105 = (x103 + x43); 856 uint32_t x106 = (x105 >> 8); 857 uint8_t x107 = (uint8_t)(x105 & UINT8_C(0xff)); 858 uint32_t x108 = (x106 >> 8); 859 uint8_t x109 = (uint8_t)(x106 & UINT8_C(0xff)); 860 uint8_t x110 = (uint8_t)(x108 >> 8); 861 uint8_t x111 = (uint8_t)(x108 & UINT8_C(0xff)); 862 uint32_t x112 = (x110 + x42); 863 uint32_t x113 = (x112 >> 8); 864 uint8_t x114 = (uint8_t)(x112 & UINT8_C(0xff)); 865 uint32_t x115 = (x113 >> 8); 866 uint8_t x116 = (uint8_t)(x113 & UINT8_C(0xff)); 867 uint8_t x117 = (uint8_t)(x115 >> 8); 868 uint8_t x118 = (uint8_t)(x115 & UINT8_C(0xff)); 869 out1[0] = x51; 870 out1[1] = x53; 871 out1[2] = x55; 872 out1[3] = x58; 873 out1[4] = x60; 874 out1[5] = x62; 875 out1[6] = x65; 876 out1[7] = x67; 877 out1[8] = x69; 878 out1[9] = x72; 879 out1[10] = x74; 880 out1[11] = x76; 881 out1[12] = x79; 882 out1[13] = x81; 883 out1[14] = x83; 884 out1[15] = x84; 885 out1[16] = x86; 886 out1[17] = x88; 887 out1[18] = x90; 888 out1[19] = x93; 889 out1[20] = x95; 890 out1[21] = x97; 891 out1[22] = x100; 892 out1[23] = x102; 893 out1[24] = x104; 894 out1[25] = x107; 895 out1[26] = x109; 896 out1[27] = x111; 897 out1[28] = x114; 898 out1[29] = x116; 899 out1[30] = x118; 900 out1[31] = x117; 901 } 902 903 /* 904 * The function fiat_25519_from_bytes deserializes a field element from bytes in little-endian order. 905 * Postconditions: 906 * eval out1 mod m = bytes_eval arg1 mod m 907 * 908 * Input Bounds: 909 * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]] 910 * Output Bounds: 911 * out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]] 912 */ 913 static void fiat_25519_from_bytes(uint32_t out1[10], const uint8_t arg1[32]) 914 { 915 uint32_t x1 = ((uint32_t)(arg1[31]) << 18); 916 uint32_t x2 = ((uint32_t)(arg1[30]) << 10); 917 uint32_t x3 = ((uint32_t)(arg1[29]) << 2); 918 uint32_t x4 = ((uint32_t)(arg1[28]) << 20); 919 uint32_t x5 = ((uint32_t)(arg1[27]) << 12); 920 uint32_t x6 = ((uint32_t)(arg1[26]) << 4); 921 uint32_t x7 = ((uint32_t)(arg1[25]) << 21); 922 uint32_t x8 = ((uint32_t)(arg1[24]) << 13); 923 uint32_t x9 = ((uint32_t)(arg1[23]) << 5); 924 uint32_t x10 = ((uint32_t)(arg1[22]) << 23); 925 uint32_t x11 = ((uint32_t)(arg1[21]) << 15); 926 uint32_t x12 = ((uint32_t)(arg1[20]) << 7); 927 uint32_t x13 = ((uint32_t)(arg1[19]) << 24); 928 uint32_t x14 = ((uint32_t)(arg1[18]) << 16); 929 uint32_t x15 = ((uint32_t)(arg1[17]) << 8); 930 uint8_t x16 = (arg1[16]); 931 uint32_t x17 = ((uint32_t)(arg1[15]) << 18); 932 uint32_t x18 = ((uint32_t)(arg1[14]) << 10); 933 uint32_t x19 = ((uint32_t)(arg1[13]) << 2); 934 uint32_t x20 = ((uint32_t)(arg1[12]) << 19); 935 uint32_t x21 = ((uint32_t)(arg1[11]) << 11); 936 uint32_t x22 = ((uint32_t)(arg1[10]) << 3); 937 uint32_t x23 = ((uint32_t)(arg1[9]) << 21); 938 uint32_t x24 = ((uint32_t)(arg1[8]) << 13); 939 uint32_t x25 = ((uint32_t)(arg1[7]) << 5); 940 uint32_t x26 = ((uint32_t)(arg1[6]) << 22); 941 uint32_t x27 = ((uint32_t)(arg1[5]) << 14); 942 uint32_t x28 = ((uint32_t)(arg1[4]) << 6); 943 uint32_t x29 = ((uint32_t)(arg1[3]) << 24); 944 uint32_t x30 = ((uint32_t)(arg1[2]) << 16); 945 uint32_t x31 = ((uint32_t)(arg1[1]) << 8); 946 uint8_t x32 = (arg1[0]); 947 uint32_t x33 = (x32 + (x31 + (x30 + x29))); 948 uint8_t x34 = (uint8_t)(x33 >> 26); 949 uint32_t x35 = (x33 & UINT32_C(0x3ffffff)); 950 uint32_t x36 = (x3 + (x2 + x1)); 951 uint32_t x37 = (x6 + (x5 + x4)); 952 uint32_t x38 = (x9 + (x8 + x7)); 953 uint32_t x39 = (x12 + (x11 + x10)); 954 uint32_t x40 = (x16 + (x15 + (x14 + x13))); 955 uint32_t x41 = (x19 + (x18 + x17)); 956 uint32_t x42 = (x22 + (x21 + x20)); 957 uint32_t x43 = (x25 + (x24 + x23)); 958 uint32_t x44 = (x28 + (x27 + x26)); 959 uint32_t x45 = (x34 + x44); 960 uint8_t x46 = (uint8_t)(x45 >> 25); 961 uint32_t x47 = (x45 & UINT32_C(0x1ffffff)); 962 uint32_t x48 = (x46 + x43); 963 uint8_t x49 = (uint8_t)(x48 >> 26); 964 uint32_t x50 = (x48 & UINT32_C(0x3ffffff)); 965 uint32_t x51 = (x49 + x42); 966 uint8_t x52 = (uint8_t)(x51 >> 25); 967 uint32_t x53 = (x51 & UINT32_C(0x1ffffff)); 968 uint32_t x54 = (x52 + x41); 969 uint32_t x55 = (x54 & UINT32_C(0x3ffffff)); 970 uint8_t x56 = (uint8_t)(x40 >> 25); 971 uint32_t x57 = (x40 & UINT32_C(0x1ffffff)); 972 uint32_t x58 = (x56 + x39); 973 uint8_t x59 = (uint8_t)(x58 >> 26); 974 uint32_t x60 = (x58 & UINT32_C(0x3ffffff)); 975 uint32_t x61 = (x59 + x38); 976 uint8_t x62 = (uint8_t)(x61 >> 25); 977 uint32_t x63 = (x61 & UINT32_C(0x1ffffff)); 978 uint32_t x64 = (x62 + x37); 979 uint8_t x65 = (uint8_t)(x64 >> 26); 980 uint32_t x66 = (x64 & UINT32_C(0x3ffffff)); 981 uint32_t x67 = (x65 + x36); 982 out1[0] = x35; 983 out1[1] = x47; 984 out1[2] = x50; 985 out1[3] = x53; 986 out1[4] = x55; 987 out1[5] = x57; 988 out1[6] = x60; 989 out1[7] = x63; 990 out1[8] = x66; 991 out1[9] = x67; 992 } 993 994 // Definitions 995 996 // fe means field element. Here the field is \Z/(2^255-19). An element t, 997 // entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 998 // t[3]+2^102 t[4]+...+2^230 t[9]. 999 // fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc. 1000 // Multiplication and carrying produce fe from fe_loose. 1001 typedef struct fe { 1002 uint32_t v[10]; 1003 } fe; 1004 1005 // fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc. 1006 // Addition and subtraction produce fe_loose from (fe, fe). 1007 typedef struct fe_loose { 1008 uint32_t v[10]; 1009 } fe_loose; 1010 1011 // ge means group element. 1012 // 1013 // Here the group is the set of pairs (x,y) of field elements (see fe.h) 1014 // satisfying -x^2 + y^2 = 1 + d x^2y^2 1015 // where d = -121665/121666. 1016 // 1017 // Representations: 1018 // ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z 1019 // ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT 1020 // ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T 1021 // ge_precomp (Duif): (y+x,y-x,2dxy) 1022 1023 typedef struct { 1024 fe X; 1025 fe Y; 1026 fe Z; 1027 } ge_p2; 1028 1029 typedef struct { 1030 fe X; 1031 fe Y; 1032 fe Z; 1033 fe T; 1034 } ge_p3; 1035 1036 typedef struct { 1037 fe_loose X; 1038 fe_loose Y; 1039 fe_loose Z; 1040 fe_loose T; 1041 } ge_p1p1; 1042 1043 typedef struct { 1044 fe_loose yplusx; 1045 fe_loose yminusx; 1046 fe_loose xy2d; 1047 } ge_precomp; 1048 1049 typedef struct { 1050 fe_loose YplusX; 1051 fe_loose YminusX; 1052 fe_loose Z; 1053 fe_loose T2d; 1054 } ge_cached; 1055 1056 // Constants. 1057 1058 static const fe d = { { 56195235, 13857412, 51736253, 6949390, 114729, 24766616, 1059 60832955, 30306712, 48412415, 21499315 } }; 1060 1061 static const fe sqrtm1 = { { 34513072, 25610706, 9377949, 3500415, 12389472, 1062 33281959, 41962654, 31548777, 326685, 11406482 } }; 1063 1064 static const fe d2 = { { 45281625, 27714825, 36363642, 13898781, 229458, 1065 15978800, 54557047, 27058993, 29715967, 9444199 } }; 1066 1067 // Bi[i] = (2*i+1)*B 1068 static const ge_precomp Bi[8] = { 1069 { 1070 { { 25967493, 19198397, 29566455, 3660896, 54414519, 4014786, 1071 27544626, 21800161, 61029707, 2047604 1072 1073 } }, 1074 { { 54563134, 934261, 64385954, 3049989, 66381436, 9406985, 1075 12720692, 5043384, 19500929, 18085054 1076 1077 } }, 1078 { { 58370664, 4489569, 9688441, 18769238, 10184608, 21191052, 1079 29287918, 11864899, 42594502, 29115885 } }, 1080 }, 1081 { 1082 { { 15636272, 23865875, 24204772, 25642034, 616976, 16869170, 1083 27787599, 18782243, 28944399, 32004408 } }, 1084 { { 16568933, 4717097, 55552716, 32452109, 15682895, 21747389, 1085 16354576, 21778470, 7689661, 11199574 } }, 1086 { { 30464137, 27578307, 55329429, 17883566, 23220364, 15915852, 1087 7512774, 10017326, 49359771, 23634074 } }, 1088 }, 1089 { 1090 { { 10861363, 11473154, 27284546, 1981175, 37044515, 12577860, 1091 32867885, 14515107, 51670560, 10819379 } }, 1092 { { 4708026, 6336745, 20377586, 9066809, 55836755, 6594695, 1093 41455196, 12483687, 54440373, 5581305 } }, 1094 { { 19563141, 16186464, 37722007, 4097518, 10237984, 29206317, 1095 28542349, 13850243, 43430843, 17738489 } }, 1096 }, 1097 { 1098 { { 5153727, 9909285, 1723747, 30776558, 30523604, 5516873, 1099 19480852, 5230134, 43156425, 18378665 } }, 1100 { { 36839857, 30090922, 7665485, 10083793, 28475525, 1649722, 1101 20654025, 16520125, 30598449, 7715701 } }, 1102 { { 28881826, 14381568, 9657904, 3680757, 46927229, 7843315, 1103 35708204, 1370707, 29794553, 32145132 } }, 1104 }, 1105 { 1106 { { 44589871, 26862249, 14201701, 24808930, 43598457, 8844725, 1107 18474211, 32192982, 54046167, 13821876 } }, 1108 { { 60653668, 25714560, 3374701, 28813570, 40010246, 22982724, 1109 31655027, 26342105, 18853321, 19333481 } }, 1110 { { 4566811, 20590564, 38133974, 21313742, 59506191, 30723862, 1111 58594505, 23123294, 2207752, 30344648 } }, 1112 }, 1113 { 1114 { { 41954014, 29368610, 29681143, 7868801, 60254203, 24130566, 1115 54671499, 32891431, 35997400, 17421995 } }, 1116 { { 25576264, 30851218, 7349803, 21739588, 16472781, 9300885, 1117 3844789, 15725684, 171356, 6466918 } }, 1118 { { 23103977, 13316479, 9739013, 17404951, 817874, 18515490, 1119 8965338, 19466374, 36393951, 16193876 } }, 1120 }, 1121 { 1122 { { 33587053, 3180712, 64714734, 14003686, 50205390, 17283591, 1123 17238397, 4729455, 49034351, 9256799 } }, 1124 { { 41926547, 29380300, 32336397, 5036987, 45872047, 11360616, 1125 22616405, 9761698, 47281666, 630304 } }, 1126 { { 53388152, 2639452, 42871404, 26147950, 9494426, 27780403, 1127 60554312, 17593437, 64659607, 19263131 } }, 1128 }, 1129 { 1130 { { 63957664, 28508356, 9282713, 6866145, 35201802, 32691408, 1131 48168288, 15033783, 25105118, 25659556 } }, 1132 { { 42782475, 15950225, 35307649, 18961608, 55446126, 28463506, 1133 1573891, 30928545, 2198789, 17749813 } }, 1134 { { 64009494, 10324966, 64867251, 7453182, 61661885, 30818928, 1135 53296841, 17317989, 34647629, 21263748 } }, 1136 }, 1137 }; 1138 1139 static void fe_frombytes_strict(fe *h, const uint8_t s[32]) 1140 { 1141 // |fiat_25519_from_bytes| requires the top-most bit be clear. 1142 fiat_25519_from_bytes(h->v, s); 1143 } 1144 1145 static void fe_frombytes(fe *h, const uint8_t s[32]) 1146 { 1147 uint8_t s_copy[32]; 1148 memcpy(s_copy, s, 32); 1149 s_copy[31] &= 0x7f; 1150 fe_frombytes_strict(h, s_copy); 1151 } 1152 1153 static void fe_tobytes(uint8_t s[32], const fe *f) 1154 { 1155 fiat_25519_to_bytes(s, f->v); 1156 } 1157 1158 // h = 0 1159 static void fe_0(fe *h) 1160 { 1161 memset(h, 0, sizeof(fe)); 1162 } 1163 1164 // h = 1 1165 static void fe_1(fe *h) 1166 { 1167 memset(h, 0, sizeof(fe)); 1168 h->v[0] = 1; 1169 } 1170 1171 // h = f + g 1172 // Can overlap h with f or g. 1173 static void fe_add(fe_loose *h, const fe *f, const fe *g) 1174 { 1175 fiat_25519_add(h->v, f->v, g->v); 1176 } 1177 1178 // h = f - g 1179 // Can overlap h with f or g. 1180 static void fe_sub(fe_loose *h, const fe *f, const fe *g) 1181 { 1182 fiat_25519_sub(h->v, f->v, g->v); 1183 } 1184 1185 static void fe_carry(fe *h, const fe_loose *f) 1186 { 1187 fiat_25519_carry(h->v, f->v); 1188 } 1189 1190 static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], 1191 const uint32_t in2[10]) 1192 { 1193 fiat_25519_carry_mul(out, in1, in2); 1194 } 1195 1196 static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) 1197 { 1198 fe_mul_impl(h->v, f->v, g->v); 1199 } 1200 1201 static void fe_mul_ttt(fe *h, const fe *f, const fe *g) 1202 { 1203 fe_mul_impl(h->v, f->v, g->v); 1204 } 1205 1206 static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) 1207 { 1208 fe_mul_impl(h->v, f->v, g->v); 1209 } 1210 1211 static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) 1212 { 1213 fe_mul_impl(h->v, f->v, g->v); 1214 } 1215 1216 static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) 1217 { 1218 fe_mul_impl(h->v, f->v, g->v); 1219 } 1220 1221 static void fe_sq_tl(fe *h, const fe_loose *f) 1222 { 1223 fiat_25519_carry_square(h->v, f->v); 1224 } 1225 1226 static void fe_sq_tt(fe *h, const fe *f) 1227 { 1228 fiat_25519_carry_square(h->v, f->v); 1229 } 1230 1231 // h = -f 1232 static void fe_neg(fe_loose *h, const fe *f) 1233 { 1234 fiat_25519_opp(h->v, f->v); 1235 } 1236 1237 // h = f 1238 static void fe_copy(fe *h, const fe *f) 1239 { 1240 memmove(h, f, sizeof(fe)); 1241 } 1242 1243 static void fe_copy_lt(fe_loose *h, const fe *f) 1244 { 1245 memmove(h, f, sizeof(fe)); 1246 } 1247 1248 static void fe_loose_invert(fe *out, const fe_loose *z) 1249 { 1250 fe t0; 1251 fe t1; 1252 fe t2; 1253 fe t3; 1254 int i; 1255 1256 fe_sq_tl(&t0, z); 1257 fe_sq_tt(&t1, &t0); 1258 for (i = 1; i < 2; ++i) { 1259 fe_sq_tt(&t1, &t1); 1260 } 1261 fe_mul_tlt(&t1, z, &t1); 1262 fe_mul_ttt(&t0, &t0, &t1); 1263 fe_sq_tt(&t2, &t0); 1264 fe_mul_ttt(&t1, &t1, &t2); 1265 fe_sq_tt(&t2, &t1); 1266 for (i = 1; i < 5; ++i) { 1267 fe_sq_tt(&t2, &t2); 1268 } 1269 fe_mul_ttt(&t1, &t2, &t1); 1270 fe_sq_tt(&t2, &t1); 1271 for (i = 1; i < 10; ++i) { 1272 fe_sq_tt(&t2, &t2); 1273 } 1274 fe_mul_ttt(&t2, &t2, &t1); 1275 fe_sq_tt(&t3, &t2); 1276 for (i = 1; i < 20; ++i) { 1277 fe_sq_tt(&t3, &t3); 1278 } 1279 fe_mul_ttt(&t2, &t3, &t2); 1280 fe_sq_tt(&t2, &t2); 1281 for (i = 1; i < 10; ++i) { 1282 fe_sq_tt(&t2, &t2); 1283 } 1284 fe_mul_ttt(&t1, &t2, &t1); 1285 fe_sq_tt(&t2, &t1); 1286 for (i = 1; i < 50; ++i) { 1287 fe_sq_tt(&t2, &t2); 1288 } 1289 fe_mul_ttt(&t2, &t2, &t1); 1290 fe_sq_tt(&t3, &t2); 1291 for (i = 1; i < 100; ++i) { 1292 fe_sq_tt(&t3, &t3); 1293 } 1294 fe_mul_ttt(&t2, &t3, &t2); 1295 fe_sq_tt(&t2, &t2); 1296 for (i = 1; i < 50; ++i) { 1297 fe_sq_tt(&t2, &t2); 1298 } 1299 fe_mul_ttt(&t1, &t2, &t1); 1300 fe_sq_tt(&t1, &t1); 1301 for (i = 1; i < 5; ++i) { 1302 fe_sq_tt(&t1, &t1); 1303 } 1304 fe_mul_ttt(out, &t1, &t0); 1305 } 1306 1307 static void fe_invert(fe *out, const fe *z) 1308 { 1309 fe_loose l; 1310 fe_copy_lt(&l, z); 1311 fe_loose_invert(out, &l); 1312 } 1313 1314 // return 0 if f == 0 1315 // return 1 if f != 0 1316 static int fe_isnonzero(const fe_loose *f) 1317 { 1318 fe tight; 1319 fe_carry(&tight, f); 1320 uint8_t s[32]; 1321 fe_tobytes(s, &tight); 1322 1323 static const uint8_t zero[32] = { 0 }; 1324 return memcmp_ct(s, zero, sizeof(zero)) != 0; 1325 } 1326 1327 // return 1 if f is in {1,3,5,...,q-2} 1328 // return 0 if f is in {0,2,4,...,q-1} 1329 static int fe_isnegative(const fe *f) 1330 { 1331 uint8_t s[32]; 1332 fe_tobytes(s, f); 1333 return s[0] & 1; 1334 } 1335 1336 static void fe_sq2_tt(fe *h, const fe *f) 1337 { 1338 // h = f^2 1339 fe_sq_tt(h, f); 1340 1341 // h = h + h 1342 fe_loose tmp; 1343 fe_add(&tmp, h, h); 1344 fe_carry(h, &tmp); 1345 } 1346 1347 static void fe_pow22523(fe *out, const fe *z) 1348 { 1349 fe t0; 1350 fe t1; 1351 fe t2; 1352 int i; 1353 1354 fe_sq_tt(&t0, z); 1355 fe_sq_tt(&t1, &t0); 1356 for (i = 1; i < 2; ++i) { 1357 fe_sq_tt(&t1, &t1); 1358 } 1359 fe_mul_ttt(&t1, z, &t1); 1360 fe_mul_ttt(&t0, &t0, &t1); 1361 fe_sq_tt(&t0, &t0); 1362 fe_mul_ttt(&t0, &t1, &t0); 1363 fe_sq_tt(&t1, &t0); 1364 for (i = 1; i < 5; ++i) { 1365 fe_sq_tt(&t1, &t1); 1366 } 1367 fe_mul_ttt(&t0, &t1, &t0); 1368 fe_sq_tt(&t1, &t0); 1369 for (i = 1; i < 10; ++i) { 1370 fe_sq_tt(&t1, &t1); 1371 } 1372 fe_mul_ttt(&t1, &t1, &t0); 1373 fe_sq_tt(&t2, &t1); 1374 for (i = 1; i < 20; ++i) { 1375 fe_sq_tt(&t2, &t2); 1376 } 1377 fe_mul_ttt(&t1, &t2, &t1); 1378 fe_sq_tt(&t1, &t1); 1379 for (i = 1; i < 10; ++i) { 1380 fe_sq_tt(&t1, &t1); 1381 } 1382 fe_mul_ttt(&t0, &t1, &t0); 1383 fe_sq_tt(&t1, &t0); 1384 for (i = 1; i < 50; ++i) { 1385 fe_sq_tt(&t1, &t1); 1386 } 1387 fe_mul_ttt(&t1, &t1, &t0); 1388 fe_sq_tt(&t2, &t1); 1389 for (i = 1; i < 100; ++i) { 1390 fe_sq_tt(&t2, &t2); 1391 } 1392 fe_mul_ttt(&t1, &t2, &t1); 1393 fe_sq_tt(&t1, &t1); 1394 for (i = 1; i < 50; ++i) { 1395 fe_sq_tt(&t1, &t1); 1396 } 1397 fe_mul_ttt(&t0, &t1, &t0); 1398 fe_sq_tt(&t0, &t0); 1399 for (i = 1; i < 2; ++i) { 1400 fe_sq_tt(&t0, &t0); 1401 } 1402 fe_mul_ttt(out, &t0, z); 1403 } 1404 1405 static void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) 1406 { 1407 fe recip; 1408 fe x; 1409 fe y; 1410 1411 fe_invert(&recip, &h->Z); 1412 fe_mul_ttt(&x, &h->X, &recip); 1413 fe_mul_ttt(&y, &h->Y, &recip); 1414 fe_tobytes(s, &y); 1415 s[31] ^= fe_isnegative(&x) << 7; 1416 } 1417 1418 static int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t s[32]) 1419 { 1420 fe u; 1421 fe_loose v; 1422 fe v3; 1423 fe vxx; 1424 fe_loose check; 1425 1426 fe_frombytes(&h->Y, s); 1427 fe_1(&h->Z); 1428 fe_sq_tt(&v3, &h->Y); 1429 fe_mul_ttt(&vxx, &v3, &d); 1430 fe_sub(&v, &v3, &h->Z); // u = y^2-1 1431 fe_carry(&u, &v); 1432 fe_add(&v, &vxx, &h->Z); // v = dy^2+1 1433 1434 fe_sq_tl(&v3, &v); 1435 fe_mul_ttl(&v3, &v3, &v); // v3 = v^3 1436 fe_sq_tt(&h->X, &v3); 1437 fe_mul_ttl(&h->X, &h->X, &v); 1438 fe_mul_ttt(&h->X, &h->X, &u); // x = uv^7 1439 1440 fe_pow22523(&h->X, &h->X); // x = (uv^7)^((q-5)/8) 1441 fe_mul_ttt(&h->X, &h->X, &v3); 1442 fe_mul_ttt(&h->X, &h->X, &u); // x = uv^3(uv^7)^((q-5)/8) 1443 1444 fe_sq_tt(&vxx, &h->X); 1445 fe_mul_ttl(&vxx, &vxx, &v); 1446 fe_sub(&check, &vxx, &u); 1447 if (fe_isnonzero(&check)) { 1448 fe_add(&check, &vxx, &u); 1449 if (fe_isnonzero(&check)) { 1450 return 0; 1451 } 1452 fe_mul_ttt(&h->X, &h->X, &sqrtm1); 1453 } 1454 1455 if (fe_isnegative(&h->X) != (s[31] >> 7)) { 1456 fe_loose t; 1457 fe_neg(&t, &h->X); 1458 fe_carry(&h->X, &t); 1459 } 1460 1461 fe_mul_ttt(&h->T, &h->X, &h->Y); 1462 return 1; 1463 } 1464 1465 static void ge_p2_0(ge_p2 *h) 1466 { 1467 fe_0(&h->X); 1468 fe_1(&h->Y); 1469 fe_1(&h->Z); 1470 } 1471 1472 // r = p 1473 static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) 1474 { 1475 fe_copy(&r->X, &p->X); 1476 fe_copy(&r->Y, &p->Y); 1477 fe_copy(&r->Z, &p->Z); 1478 } 1479 1480 // r = p 1481 static void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) 1482 { 1483 fe_add(&r->YplusX, &p->Y, &p->X); 1484 fe_sub(&r->YminusX, &p->Y, &p->X); 1485 fe_copy_lt(&r->Z, &p->Z); 1486 fe_mul_ltt(&r->T2d, &p->T, &d2); 1487 } 1488 1489 // r = p 1490 static void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) 1491 { 1492 fe_mul_tll(&r->X, &p->X, &p->T); 1493 fe_mul_tll(&r->Y, &p->Y, &p->Z); 1494 fe_mul_tll(&r->Z, &p->Z, &p->T); 1495 } 1496 1497 // r = p 1498 static void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) 1499 { 1500 fe_mul_tll(&r->X, &p->X, &p->T); 1501 fe_mul_tll(&r->Y, &p->Y, &p->Z); 1502 fe_mul_tll(&r->Z, &p->Z, &p->T); 1503 fe_mul_tll(&r->T, &p->X, &p->Y); 1504 } 1505 1506 // r = 2 * p 1507 static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) 1508 { 1509 fe trX, trZ, trT; 1510 fe t0; 1511 1512 fe_sq_tt(&trX, &p->X); 1513 fe_sq_tt(&trZ, &p->Y); 1514 fe_sq2_tt(&trT, &p->Z); 1515 fe_add(&r->Y, &p->X, &p->Y); 1516 fe_sq_tl(&t0, &r->Y); 1517 1518 fe_add(&r->Y, &trZ, &trX); 1519 fe_sub(&r->Z, &trZ, &trX); 1520 fe_carry(&trZ, &r->Y); 1521 fe_sub(&r->X, &t0, &trZ); 1522 fe_carry(&trZ, &r->Z); 1523 fe_sub(&r->T, &trT, &trZ); 1524 } 1525 1526 // r = 2 * p 1527 static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) 1528 { 1529 ge_p2 q; 1530 ge_p3_to_p2(&q, p); 1531 ge_p2_dbl(r, &q); 1532 } 1533 1534 // r = p + q 1535 static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) 1536 { 1537 fe trY, trZ, trT; 1538 1539 fe_add(&r->X, &p->Y, &p->X); 1540 fe_sub(&r->Y, &p->Y, &p->X); 1541 fe_mul_tll(&trZ, &r->X, &q->yplusx); 1542 fe_mul_tll(&trY, &r->Y, &q->yminusx); 1543 fe_mul_tlt(&trT, &q->xy2d, &p->T); 1544 fe_add(&r->T, &p->Z, &p->Z); 1545 fe_sub(&r->X, &trZ, &trY); 1546 fe_add(&r->Y, &trZ, &trY); 1547 fe_carry(&trZ, &r->T); 1548 fe_add(&r->Z, &trZ, &trT); 1549 fe_sub(&r->T, &trZ, &trT); 1550 } 1551 1552 // r = p - q 1553 static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) 1554 { 1555 fe trY, trZ, trT; 1556 1557 fe_add(&r->X, &p->Y, &p->X); 1558 fe_sub(&r->Y, &p->Y, &p->X); 1559 fe_mul_tll(&trZ, &r->X, &q->yminusx); 1560 fe_mul_tll(&trY, &r->Y, &q->yplusx); 1561 fe_mul_tlt(&trT, &q->xy2d, &p->T); 1562 fe_add(&r->T, &p->Z, &p->Z); 1563 fe_sub(&r->X, &trZ, &trY); 1564 fe_add(&r->Y, &trZ, &trY); 1565 fe_carry(&trZ, &r->T); 1566 fe_sub(&r->Z, &trZ, &trT); 1567 fe_add(&r->T, &trZ, &trT); 1568 } 1569 1570 // r = p + q 1571 static void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) 1572 { 1573 fe trX, trY, trZ, trT; 1574 1575 fe_add(&r->X, &p->Y, &p->X); 1576 fe_sub(&r->Y, &p->Y, &p->X); 1577 fe_mul_tll(&trZ, &r->X, &q->YplusX); 1578 fe_mul_tll(&trY, &r->Y, &q->YminusX); 1579 fe_mul_tlt(&trT, &q->T2d, &p->T); 1580 fe_mul_ttl(&trX, &p->Z, &q->Z); 1581 fe_add(&r->T, &trX, &trX); 1582 fe_sub(&r->X, &trZ, &trY); 1583 fe_add(&r->Y, &trZ, &trY); 1584 fe_carry(&trZ, &r->T); 1585 fe_add(&r->Z, &trZ, &trT); 1586 fe_sub(&r->T, &trZ, &trT); 1587 } 1588 1589 // r = p - q 1590 static void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) 1591 { 1592 fe trX, trY, trZ, trT; 1593 1594 fe_add(&r->X, &p->Y, &p->X); 1595 fe_sub(&r->Y, &p->Y, &p->X); 1596 fe_mul_tll(&trZ, &r->X, &q->YminusX); 1597 fe_mul_tll(&trY, &r->Y, &q->YplusX); 1598 fe_mul_tlt(&trT, &q->T2d, &p->T); 1599 fe_mul_ttl(&trX, &p->Z, &q->Z); 1600 fe_add(&r->T, &trX, &trX); 1601 fe_sub(&r->X, &trZ, &trY); 1602 fe_add(&r->Y, &trZ, &trY); 1603 fe_carry(&trZ, &r->T); 1604 fe_sub(&r->Z, &trZ, &trT); 1605 fe_add(&r->T, &trZ, &trT); 1606 } 1607 1608 static void slide(signed char *r, const uint8_t *a) 1609 { 1610 int i; 1611 int b; 1612 int k; 1613 1614 for (i = 0; i < 256; ++i) { 1615 r[i] = 1 & (a[i >> 3] >> (i & 7)); 1616 } 1617 1618 for (i = 0; i < 256; ++i) { 1619 if (r[i]) { 1620 for (b = 1; b <= 6 && i + b < 256; ++b) { 1621 if (r[i + b]) { 1622 if (r[i] + (r[i + b] << b) <= 15) { 1623 r[i] += r[i + b] << b; 1624 r[i + b] = 0; 1625 } else if (r[i] - (r[i + b] << b) >= 1626 -15) { 1627 r[i] -= r[i + b] << b; 1628 for (k = i + b; k < 256; ++k) { 1629 if (!r[k]) { 1630 r[k] = 1; 1631 break; 1632 } 1633 r[k] = 0; 1634 } 1635 } else { 1636 break; 1637 } 1638 } 1639 } 1640 } 1641 } 1642 } 1643 1644 // r = a * A + b * B 1645 // where a = a[0]+256*a[1]+...+256^31 a[31]. 1646 // and b = b[0]+256*b[1]+...+256^31 b[31]. 1647 // B is the Ed25519 base point (x,4/5) with x positive. 1648 static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a, 1649 const ge_p3 *A, const uint8_t *b) 1650 { 1651 signed char aslide[256]; 1652 signed char bslide[256]; 1653 ge_cached Ai[8]; // A,3A,5A,7A,9A,11A,13A,15A 1654 ge_p1p1 t; 1655 ge_p3 u; 1656 ge_p3 A2; 1657 int i; 1658 1659 slide(aslide, a); 1660 slide(bslide, b); 1661 1662 x25519_ge_p3_to_cached(&Ai[0], A); 1663 ge_p3_dbl(&t, A); 1664 x25519_ge_p1p1_to_p3(&A2, &t); 1665 x25519_ge_add(&t, &A2, &Ai[0]); 1666 x25519_ge_p1p1_to_p3(&u, &t); 1667 x25519_ge_p3_to_cached(&Ai[1], &u); 1668 x25519_ge_add(&t, &A2, &Ai[1]); 1669 x25519_ge_p1p1_to_p3(&u, &t); 1670 x25519_ge_p3_to_cached(&Ai[2], &u); 1671 x25519_ge_add(&t, &A2, &Ai[2]); 1672 x25519_ge_p1p1_to_p3(&u, &t); 1673 x25519_ge_p3_to_cached(&Ai[3], &u); 1674 x25519_ge_add(&t, &A2, &Ai[3]); 1675 x25519_ge_p1p1_to_p3(&u, &t); 1676 x25519_ge_p3_to_cached(&Ai[4], &u); 1677 x25519_ge_add(&t, &A2, &Ai[4]); 1678 x25519_ge_p1p1_to_p3(&u, &t); 1679 x25519_ge_p3_to_cached(&Ai[5], &u); 1680 x25519_ge_add(&t, &A2, &Ai[5]); 1681 x25519_ge_p1p1_to_p3(&u, &t); 1682 x25519_ge_p3_to_cached(&Ai[6], &u); 1683 x25519_ge_add(&t, &A2, &Ai[6]); 1684 x25519_ge_p1p1_to_p3(&u, &t); 1685 x25519_ge_p3_to_cached(&Ai[7], &u); 1686 1687 ge_p2_0(r); 1688 1689 for (i = 255; i >= 0; --i) { 1690 if (aslide[i] || bslide[i]) { 1691 break; 1692 } 1693 } 1694 1695 for (; i >= 0; --i) { 1696 ge_p2_dbl(&t, r); 1697 1698 if (aslide[i] > 0) { 1699 x25519_ge_p1p1_to_p3(&u, &t); 1700 x25519_ge_add(&t, &u, &Ai[aslide[i] / 2]); 1701 } else if (aslide[i] < 0) { 1702 x25519_ge_p1p1_to_p3(&u, &t); 1703 x25519_ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]); 1704 } 1705 1706 if (bslide[i] > 0) { 1707 x25519_ge_p1p1_to_p3(&u, &t); 1708 ge_madd(&t, &u, &Bi[bslide[i] / 2]); 1709 } else if (bslide[i] < 0) { 1710 x25519_ge_p1p1_to_p3(&u, &t); 1711 ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]); 1712 } 1713 1714 x25519_ge_p1p1_to_p2(r, &t); 1715 } 1716 } 1717 1718 // int64_lshift21 returns |a << 21| but is defined when shifting bits into the 1719 // sign bit. This works around a language flaw in C. 1720 static inline int64_t int64_lshift21(int64_t a) 1721 { 1722 return (int64_t)((uint64_t)a << 21); 1723 } 1724 1725 // The set of scalars is \Z/l 1726 // where l = 2^252 + 27742317777372353535851937790883648493. 1727 1728 // Input: 1729 // s[0]+256*s[1]+...+256^63*s[63] = s 1730 // 1731 // Output: 1732 // s[0]+256*s[1]+...+256^31*s[31] = s mod l 1733 // where l = 2^252 + 27742317777372353535851937790883648493. 1734 // Overwrites s in place. 1735 static void x25519_sc_reduce(uint8_t s[64]) 1736 { 1737 int64_t s0 = 2097151 & load_le24(s); 1738 int64_t s1 = 2097151 & (load_le32(s + 2) >> 5); 1739 int64_t s2 = 2097151 & (load_le24(s + 5) >> 2); 1740 int64_t s3 = 2097151 & (load_le32(s + 7) >> 7); 1741 int64_t s4 = 2097151 & (load_le32(s + 10) >> 4); 1742 int64_t s5 = 2097151 & (load_le24(s + 13) >> 1); 1743 int64_t s6 = 2097151 & (load_le32(s + 15) >> 6); 1744 int64_t s7 = 2097151 & (load_le24(s + 18) >> 3); 1745 int64_t s8 = 2097151 & load_le24(s + 21); 1746 int64_t s9 = 2097151 & (load_le32(s + 23) >> 5); 1747 int64_t s10 = 2097151 & (load_le24(s + 26) >> 2); 1748 int64_t s11 = 2097151 & (load_le32(s + 28) >> 7); 1749 int64_t s12 = 2097151 & (load_le32(s + 31) >> 4); 1750 int64_t s13 = 2097151 & (load_le24(s + 34) >> 1); 1751 int64_t s14 = 2097151 & (load_le32(s + 36) >> 6); 1752 int64_t s15 = 2097151 & (load_le24(s + 39) >> 3); 1753 int64_t s16 = 2097151 & load_le24(s + 42); 1754 int64_t s17 = 2097151 & (load_le32(s + 44) >> 5); 1755 int64_t s18 = 2097151 & (load_le24(s + 47) >> 2); 1756 int64_t s19 = 2097151 & (load_le32(s + 49) >> 7); 1757 int64_t s20 = 2097151 & (load_le32(s + 52) >> 4); 1758 int64_t s21 = 2097151 & (load_le24(s + 55) >> 1); 1759 int64_t s22 = 2097151 & (load_le32(s + 57) >> 6); 1760 int64_t s23 = (load_le32(s + 60) >> 3); 1761 int64_t carry0; 1762 int64_t carry1; 1763 int64_t carry2; 1764 int64_t carry3; 1765 int64_t carry4; 1766 int64_t carry5; 1767 int64_t carry6; 1768 int64_t carry7; 1769 int64_t carry8; 1770 int64_t carry9; 1771 int64_t carry10; 1772 int64_t carry11; 1773 int64_t carry12; 1774 int64_t carry13; 1775 int64_t carry14; 1776 int64_t carry15; 1777 int64_t carry16; 1778 1779 s11 += s23 * 666643; 1780 s12 += s23 * 470296; 1781 s13 += s23 * 654183; 1782 s14 -= s23 * 997805; 1783 s15 += s23 * 136657; 1784 s16 -= s23 * 683901; 1785 s23 = 0; 1786 1787 s10 += s22 * 666643; 1788 s11 += s22 * 470296; 1789 s12 += s22 * 654183; 1790 s13 -= s22 * 997805; 1791 s14 += s22 * 136657; 1792 s15 -= s22 * 683901; 1793 s22 = 0; 1794 1795 s9 += s21 * 666643; 1796 s10 += s21 * 470296; 1797 s11 += s21 * 654183; 1798 s12 -= s21 * 997805; 1799 s13 += s21 * 136657; 1800 s14 -= s21 * 683901; 1801 s21 = 0; 1802 1803 s8 += s20 * 666643; 1804 s9 += s20 * 470296; 1805 s10 += s20 * 654183; 1806 s11 -= s20 * 997805; 1807 s12 += s20 * 136657; 1808 s13 -= s20 * 683901; 1809 s20 = 0; 1810 1811 s7 += s19 * 666643; 1812 s8 += s19 * 470296; 1813 s9 += s19 * 654183; 1814 s10 -= s19 * 997805; 1815 s11 += s19 * 136657; 1816 s12 -= s19 * 683901; 1817 s19 = 0; 1818 1819 s6 += s18 * 666643; 1820 s7 += s18 * 470296; 1821 s8 += s18 * 654183; 1822 s9 -= s18 * 997805; 1823 s10 += s18 * 136657; 1824 s11 -= s18 * 683901; 1825 s18 = 0; 1826 1827 carry6 = (s6 + (1 << 20)) >> 21; 1828 s7 += carry6; 1829 s6 -= int64_lshift21(carry6); 1830 carry8 = (s8 + (1 << 20)) >> 21; 1831 s9 += carry8; 1832 s8 -= int64_lshift21(carry8); 1833 carry10 = (s10 + (1 << 20)) >> 21; 1834 s11 += carry10; 1835 s10 -= int64_lshift21(carry10); 1836 carry12 = (s12 + (1 << 20)) >> 21; 1837 s13 += carry12; 1838 s12 -= int64_lshift21(carry12); 1839 carry14 = (s14 + (1 << 20)) >> 21; 1840 s15 += carry14; 1841 s14 -= int64_lshift21(carry14); 1842 carry16 = (s16 + (1 << 20)) >> 21; 1843 s17 += carry16; 1844 s16 -= int64_lshift21(carry16); 1845 1846 carry7 = (s7 + (1 << 20)) >> 21; 1847 s8 += carry7; 1848 s7 -= int64_lshift21(carry7); 1849 carry9 = (s9 + (1 << 20)) >> 21; 1850 s10 += carry9; 1851 s9 -= int64_lshift21(carry9); 1852 carry11 = (s11 + (1 << 20)) >> 21; 1853 s12 += carry11; 1854 s11 -= int64_lshift21(carry11); 1855 carry13 = (s13 + (1 << 20)) >> 21; 1856 s14 += carry13; 1857 s13 -= int64_lshift21(carry13); 1858 carry15 = (s15 + (1 << 20)) >> 21; 1859 s16 += carry15; 1860 s15 -= int64_lshift21(carry15); 1861 1862 s5 += s17 * 666643; 1863 s6 += s17 * 470296; 1864 s7 += s17 * 654183; 1865 s8 -= s17 * 997805; 1866 s9 += s17 * 136657; 1867 s10 -= s17 * 683901; 1868 s17 = 0; 1869 1870 s4 += s16 * 666643; 1871 s5 += s16 * 470296; 1872 s6 += s16 * 654183; 1873 s7 -= s16 * 997805; 1874 s8 += s16 * 136657; 1875 s9 -= s16 * 683901; 1876 s16 = 0; 1877 1878 s3 += s15 * 666643; 1879 s4 += s15 * 470296; 1880 s5 += s15 * 654183; 1881 s6 -= s15 * 997805; 1882 s7 += s15 * 136657; 1883 s8 -= s15 * 683901; 1884 s15 = 0; 1885 1886 s2 += s14 * 666643; 1887 s3 += s14 * 470296; 1888 s4 += s14 * 654183; 1889 s5 -= s14 * 997805; 1890 s6 += s14 * 136657; 1891 s7 -= s14 * 683901; 1892 s14 = 0; 1893 1894 s1 += s13 * 666643; 1895 s2 += s13 * 470296; 1896 s3 += s13 * 654183; 1897 s4 -= s13 * 997805; 1898 s5 += s13 * 136657; 1899 s6 -= s13 * 683901; 1900 s13 = 0; 1901 1902 s0 += s12 * 666643; 1903 s1 += s12 * 470296; 1904 s2 += s12 * 654183; 1905 s3 -= s12 * 997805; 1906 s4 += s12 * 136657; 1907 s5 -= s12 * 683901; 1908 s12 = 0; 1909 1910 carry0 = (s0 + (1 << 20)) >> 21; 1911 s1 += carry0; 1912 s0 -= int64_lshift21(carry0); 1913 carry2 = (s2 + (1 << 20)) >> 21; 1914 s3 += carry2; 1915 s2 -= int64_lshift21(carry2); 1916 carry4 = (s4 + (1 << 20)) >> 21; 1917 s5 += carry4; 1918 s4 -= int64_lshift21(carry4); 1919 carry6 = (s6 + (1 << 20)) >> 21; 1920 s7 += carry6; 1921 s6 -= int64_lshift21(carry6); 1922 carry8 = (s8 + (1 << 20)) >> 21; 1923 s9 += carry8; 1924 s8 -= int64_lshift21(carry8); 1925 carry10 = (s10 + (1 << 20)) >> 21; 1926 s11 += carry10; 1927 s10 -= int64_lshift21(carry10); 1928 1929 carry1 = (s1 + (1 << 20)) >> 21; 1930 s2 += carry1; 1931 s1 -= int64_lshift21(carry1); 1932 carry3 = (s3 + (1 << 20)) >> 21; 1933 s4 += carry3; 1934 s3 -= int64_lshift21(carry3); 1935 carry5 = (s5 + (1 << 20)) >> 21; 1936 s6 += carry5; 1937 s5 -= int64_lshift21(carry5); 1938 carry7 = (s7 + (1 << 20)) >> 21; 1939 s8 += carry7; 1940 s7 -= int64_lshift21(carry7); 1941 carry9 = (s9 + (1 << 20)) >> 21; 1942 s10 += carry9; 1943 s9 -= int64_lshift21(carry9); 1944 carry11 = (s11 + (1 << 20)) >> 21; 1945 s12 += carry11; 1946 s11 -= int64_lshift21(carry11); 1947 1948 s0 += s12 * 666643; 1949 s1 += s12 * 470296; 1950 s2 += s12 * 654183; 1951 s3 -= s12 * 997805; 1952 s4 += s12 * 136657; 1953 s5 -= s12 * 683901; 1954 s12 = 0; 1955 1956 carry0 = s0 >> 21; 1957 s1 += carry0; 1958 s0 -= int64_lshift21(carry0); 1959 carry1 = s1 >> 21; 1960 s2 += carry1; 1961 s1 -= int64_lshift21(carry1); 1962 carry2 = s2 >> 21; 1963 s3 += carry2; 1964 s2 -= int64_lshift21(carry2); 1965 carry3 = s3 >> 21; 1966 s4 += carry3; 1967 s3 -= int64_lshift21(carry3); 1968 carry4 = s4 >> 21; 1969 s5 += carry4; 1970 s4 -= int64_lshift21(carry4); 1971 carry5 = s5 >> 21; 1972 s6 += carry5; 1973 s5 -= int64_lshift21(carry5); 1974 carry6 = s6 >> 21; 1975 s7 += carry6; 1976 s6 -= int64_lshift21(carry6); 1977 carry7 = s7 >> 21; 1978 s8 += carry7; 1979 s7 -= int64_lshift21(carry7); 1980 carry8 = s8 >> 21; 1981 s9 += carry8; 1982 s8 -= int64_lshift21(carry8); 1983 carry9 = s9 >> 21; 1984 s10 += carry9; 1985 s9 -= int64_lshift21(carry9); 1986 carry10 = s10 >> 21; 1987 s11 += carry10; 1988 s10 -= int64_lshift21(carry10); 1989 carry11 = s11 >> 21; 1990 s12 += carry11; 1991 s11 -= int64_lshift21(carry11); 1992 1993 s0 += s12 * 666643; 1994 s1 += s12 * 470296; 1995 s2 += s12 * 654183; 1996 s3 -= s12 * 997805; 1997 s4 += s12 * 136657; 1998 s5 -= s12 * 683901; 1999 s12 = 0; 2000 2001 carry0 = s0 >> 21; 2002 s1 += carry0; 2003 s0 -= int64_lshift21(carry0); 2004 carry1 = s1 >> 21; 2005 s2 += carry1; 2006 s1 -= int64_lshift21(carry1); 2007 carry2 = s2 >> 21; 2008 s3 += carry2; 2009 s2 -= int64_lshift21(carry2); 2010 carry3 = s3 >> 21; 2011 s4 += carry3; 2012 s3 -= int64_lshift21(carry3); 2013 carry4 = s4 >> 21; 2014 s5 += carry4; 2015 s4 -= int64_lshift21(carry4); 2016 carry5 = s5 >> 21; 2017 s6 += carry5; 2018 s5 -= int64_lshift21(carry5); 2019 carry6 = s6 >> 21; 2020 s7 += carry6; 2021 s6 -= int64_lshift21(carry6); 2022 carry7 = s7 >> 21; 2023 s8 += carry7; 2024 s7 -= int64_lshift21(carry7); 2025 carry8 = s8 >> 21; 2026 s9 += carry8; 2027 s8 -= int64_lshift21(carry8); 2028 carry9 = s9 >> 21; 2029 s10 += carry9; 2030 s9 -= int64_lshift21(carry9); 2031 carry10 = s10 >> 21; 2032 s11 += carry10; 2033 s10 -= int64_lshift21(carry10); 2034 2035 s[0] = s0 >> 0; 2036 s[1] = s0 >> 8; 2037 s[2] = (s0 >> 16) | (s1 << 5); 2038 s[3] = s1 >> 3; 2039 s[4] = s1 >> 11; 2040 s[5] = (s1 >> 19) | (s2 << 2); 2041 s[6] = s2 >> 6; 2042 s[7] = (s2 >> 14) | (s3 << 7); 2043 s[8] = s3 >> 1; 2044 s[9] = s3 >> 9; 2045 s[10] = (s3 >> 17) | (s4 << 4); 2046 s[11] = s4 >> 4; 2047 s[12] = s4 >> 12; 2048 s[13] = (s4 >> 20) | (s5 << 1); 2049 s[14] = s5 >> 7; 2050 s[15] = (s5 >> 15) | (s6 << 6); 2051 s[16] = s6 >> 2; 2052 s[17] = s6 >> 10; 2053 s[18] = (s6 >> 18) | (s7 << 3); 2054 s[19] = s7 >> 5; 2055 s[20] = s7 >> 13; 2056 s[21] = s8 >> 0; 2057 s[22] = s8 >> 8; 2058 s[23] = (s8 >> 16) | (s9 << 5); 2059 s[24] = s9 >> 3; 2060 s[25] = s9 >> 11; 2061 s[26] = (s9 >> 19) | (s10 << 2); 2062 s[27] = s10 >> 6; 2063 s[28] = (s10 >> 14) | (s11 << 7); 2064 s[29] = s11 >> 1; 2065 s[30] = s11 >> 9; 2066 s[31] = s11 >> 17; 2067 } 2068 2069 bool ed25519_verify(const uint8_t signature[64], const uint8_t public_key[32], 2070 const void *message, size_t message_size) 2071 { 2072 ge_p3 A; 2073 if ((signature[63] & 224) != 0 || 2074 !x25519_ge_frombytes_vartime(&A, public_key)) 2075 return false; 2076 2077 fe_loose t; 2078 fe_neg(&t, &A.X); 2079 fe_carry(&A.X, &t); 2080 fe_neg(&t, &A.T); 2081 fe_carry(&A.T, &t); 2082 2083 uint8_t pkcopy[32]; 2084 memcpy(pkcopy, public_key, 32); 2085 uint8_t rcopy[32]; 2086 memcpy(rcopy, signature, 32); 2087 union { 2088 uint64_t u64[4]; 2089 uint8_t u8[32]; 2090 } scopy; 2091 memcpy(&scopy.u8[0], signature + 32, 32); 2092 2093 // https://tools.ietf.org/html/rfc8032#section-5.1.7 requires that s be in 2094 // the range [0, order) in order to prevent signature malleability. 2095 2096 // kOrder is the order of Curve25519 in little-endian form. 2097 static const uint64_t kOrder[4] = { 2098 UINT64_C(0x5812631a5cf5d3ed), 2099 UINT64_C(0x14def9dea2f79cd6), 2100 0, 2101 UINT64_C(0x1000000000000000), 2102 }; 2103 for (size_t i = 3;; --i) { 2104 uint64_t le = swap_le64(scopy.u64[i]); 2105 if (le > kOrder[i]) { 2106 return false; 2107 } else if (le < kOrder[i]) { 2108 break; 2109 } else if (i == 0) { 2110 return false; 2111 } 2112 } 2113 2114 uint8_t h[64]; 2115 BCRYPT_ALG_HANDLE alg, hash; 2116 if (!NT_SUCCESS(BCryptOpenAlgorithmProvider(&alg, BCRYPT_SHA512_ALGORITHM, NULL, 0)) || 2117 !NT_SUCCESS(BCryptCreateHash(alg, &hash, NULL, 0, NULL, 0, 0)) || 2118 !NT_SUCCESS(BCryptHashData(hash, (PUCHAR)signature, 32, 0)) || 2119 !NT_SUCCESS(BCryptHashData(hash, (PUCHAR)public_key, 32, 0)) || 2120 !NT_SUCCESS(BCryptHashData(hash, (PUCHAR)message, message_size, 0)) || 2121 !NT_SUCCESS(BCryptFinishHash(hash, h, 64, 0)) || 2122 !NT_SUCCESS(BCryptDestroyHash(hash)) || 2123 !NT_SUCCESS(BCryptCloseAlgorithmProvider(alg, 0))) 2124 return false; 2125 2126 x25519_sc_reduce(h); 2127 2128 ge_p2 R; 2129 ge_double_scalarmult_vartime(&R, h, &A, scopy.u8); 2130 2131 uint8_t rcheck[32]; 2132 x25519_ge_tobytes(rcheck, &R); 2133 2134 return memcmp_ct(rcheck, rcopy, sizeof(rcheck)) == 0; 2135 } 2136 2137 static const uint64_t blake2b_iv[8] = { 2138 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL, 2139 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 2140 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL 2141 }; 2142 2143 static const uint8_t blake2b_sigma[12][16] = { 2144 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, 2145 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, 2146 { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, 2147 { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, 2148 { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, 2149 { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, 2150 { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, 2151 { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, 2152 { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, 2153 { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, 2154 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, 2155 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } 2156 }; 2157 2158 #define G(r, i, a, b, c, d) \ 2159 do { \ 2160 a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ 2161 d = ror64(d ^ a, 32); \ 2162 c = c + d; \ 2163 b = ror64(b ^ c, 24); \ 2164 a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ 2165 d = ror64(d ^ a, 16); \ 2166 c = c + d; \ 2167 b = ror64(b ^ c, 63); \ 2168 } while (0) 2169 2170 #define ROUND(r) \ 2171 do { \ 2172 G(r, 0, v[0], v[4], v[8], v[12]); \ 2173 G(r, 1, v[1], v[5], v[9], v[13]); \ 2174 G(r, 2, v[2], v[6], v[10], v[14]); \ 2175 G(r, 3, v[3], v[7], v[11], v[15]); \ 2176 G(r, 4, v[0], v[5], v[10], v[15]); \ 2177 G(r, 5, v[1], v[6], v[11], v[12]); \ 2178 G(r, 6, v[2], v[7], v[8], v[13]); \ 2179 G(r, 7, v[3], v[4], v[9], v[14]); \ 2180 } while (0) 2181 2182 static void blake2b256_compress(struct blake2b256_state *state, 2183 const uint8_t block[128]) 2184 { 2185 uint64_t m[16]; 2186 uint64_t v[16]; 2187 2188 for (int i = 0; i < 16; ++i) 2189 m[i] = load_le64(block + i * sizeof(m[i])); 2190 2191 for (int i = 0; i < 8; ++i) 2192 v[i] = state->h[i]; 2193 2194 memcpy(v + 8, blake2b_iv, sizeof(blake2b_iv)); 2195 v[12] ^= state->t[0]; 2196 v[13] ^= state->t[1]; 2197 v[14] ^= state->f[0]; 2198 v[15] ^= state->f[1]; 2199 2200 for (int i = 0; i < 12; ++i) 2201 ROUND(i); 2202 for (int i = 0; i < 8; ++i) 2203 state->h[i] = state->h[i] ^ v[i] ^ v[i + 8]; 2204 } 2205 2206 void blake2b256_init(struct blake2b256_state *state) 2207 { 2208 memset(state, 0, sizeof(*state)); 2209 memcpy(state->h, blake2b_iv, sizeof(state->h)); 2210 state->h[0] ^= 0x01010000 | 32; 2211 } 2212 2213 void blake2b256_update(struct blake2b256_state *state, const uint8_t *in, 2214 unsigned int inlen) 2215 { 2216 const size_t left = state->buflen; 2217 const size_t fill = 128 - left; 2218 2219 if (!inlen) 2220 return; 2221 2222 if (inlen > fill) { 2223 state->buflen = 0; 2224 memcpy(state->buf + left, in, fill); 2225 state->t[0] += 128; 2226 state->t[1] += (state->t[0] < 128); 2227 blake2b256_compress(state, state->buf); 2228 in += fill; 2229 inlen -= fill; 2230 while (inlen > 128) { 2231 state->t[0] += 128; 2232 state->t[1] += (state->t[0] < 128); 2233 blake2b256_compress(state, in); 2234 in += 128; 2235 inlen -= 128; 2236 } 2237 } 2238 memcpy(state->buf + state->buflen, in, inlen); 2239 state->buflen += inlen; 2240 } 2241 2242 void blake2b256_final(struct blake2b256_state *state, uint8_t out[32]) 2243 { 2244 state->t[0] += state->buflen; 2245 state->t[1] += (state->t[0] < state->buflen); 2246 state->f[0] = (uint64_t)-1; 2247 memset(state->buf + state->buflen, 0, 128 - state->buflen); 2248 blake2b256_compress(state, state->buf); 2249 2250 for (int i = 0; i < 4; ++i) 2251 store_le64(out + i * sizeof(state->h[i]), state->h[i]); 2252 }