github.com/cloudflare/circl@v1.5.0/dh/sidh/internal/p751/arith_amd64.s (about) 1 // +build amd64,!purego 2 3 #include "textflag.h" 4 5 // p751 + 1 6 #define P751P1_5 $0xEEB0000000000000 7 #define P751P1_6 $0xE3EC968549F878A8 8 #define P751P1_7 $0xDA959B1A13F7CC76 9 #define P751P1_8 $0x084E9867D6EBE876 10 #define P751P1_9 $0x8562B5045CB25748 11 #define P751P1_10 $0x0E12909F97BADC66 12 #define P751P1_11 $0x00006FE5D541F71C 13 14 #define P751_0 $0xFFFFFFFFFFFFFFFF 15 #define P751_5 $0xEEAFFFFFFFFFFFFF 16 #define P751_6 $0xE3EC968549F878A8 17 #define P751_7 $0xDA959B1A13F7CC76 18 #define P751_8 $0x084E9867D6EBE876 19 #define P751_9 $0x8562B5045CB25748 20 #define P751_10 $0x0E12909F97BADC66 21 #define P751_11 $0x00006FE5D541F71C 22 23 #define P751X2_0 $0xFFFFFFFFFFFFFFFE 24 #define P751X2_1 $0xFFFFFFFFFFFFFFFF 25 #define P751X2_5 $0xDD5FFFFFFFFFFFFF 26 #define P751X2_6 $0xC7D92D0A93F0F151 27 #define P751X2_7 $0xB52B363427EF98ED 28 #define P751X2_8 $0x109D30CFADD7D0ED 29 #define P751X2_9 $0x0AC56A08B964AE90 30 #define P751X2_10 $0x1C25213F2F75B8CD 31 #define P751X2_11 $0x0000DFCBAA83EE38 32 33 // The MSR code uses these registers for parameter passing. Keep using 34 // them to avoid significant code changes. This means that when the Go 35 // assembler does something strange, we can diff the machine code 36 // against a different assembler to find out what Go did. 37 38 #define REG_P1 DI 39 #define REG_P2 SI 40 #define REG_P3 DX 41 42 TEXT ·modP751(SB), NOSPLIT, $0-8 43 MOVQ x+0(FP), REG_P1 44 45 // Zero AX for later use: 46 XORQ AX, AX 47 48 // Load p into registers: 49 MOVQ P751_0, R8 50 // P751_{1,2,3,4} = P751_0, so reuse R8 51 MOVQ P751_5, R9 52 MOVQ P751_6, R10 53 MOVQ P751_7, R11 54 MOVQ P751_8, R12 55 MOVQ P751_9, R13 56 MOVQ P751_10, R14 57 MOVQ P751_11, R15 58 59 // Set x <- x - p 60 SUBQ R8, (REG_P1) 61 SBBQ R8, (8)(REG_P1) 62 SBBQ R8, (16)(REG_P1) 63 SBBQ R8, (24)(REG_P1) 64 SBBQ R8, (32)(REG_P1) 65 SBBQ R9, (40)(REG_P1) 66 SBBQ R10, (48)(REG_P1) 67 SBBQ R11, (56)(REG_P1) 68 SBBQ R12, (64)(REG_P1) 69 SBBQ R13, (72)(REG_P1) 70 SBBQ R14, (80)(REG_P1) 71 SBBQ R15, (88)(REG_P1) 72 73 // Save carry flag indicating x-p < 0 as a mask in AX 74 SBBQ $0, AX 75 76 // Conditionally add p to x if x-p < 0 77 ANDQ AX, R8 78 ANDQ AX, R9 79 ANDQ AX, R10 80 ANDQ AX, R11 81 ANDQ AX, R12 82 ANDQ AX, R13 83 ANDQ AX, R14 84 ANDQ AX, R15 85 86 ADDQ R8, (REG_P1) 87 ADCQ R8, (8)(REG_P1) 88 ADCQ R8, (16)(REG_P1) 89 ADCQ R8, (24)(REG_P1) 90 ADCQ R8, (32)(REG_P1) 91 ADCQ R9, (40)(REG_P1) 92 ADCQ R10, (48)(REG_P1) 93 ADCQ R11, (56)(REG_P1) 94 ADCQ R12, (64)(REG_P1) 95 ADCQ R13, (72)(REG_P1) 96 ADCQ R14, (80)(REG_P1) 97 ADCQ R15, (88)(REG_P1) 98 99 RET 100 101 TEXT ·cswapP751(SB), NOSPLIT, $0-17 102 103 MOVQ x+0(FP), REG_P1 104 MOVQ y+8(FP), REG_P2 105 MOVB choice+16(FP), AL // AL = 0 or 1 106 MOVBLZX AL, AX // AX = 0 or 1 107 NEGQ AX // RAX = 0x00..00 or 0xff..ff 108 109 MOVQ (0*8)(REG_P1), BX // BX = x[0] 110 MOVQ (0*8)(REG_P2), CX // CX = y[0] 111 MOVQ CX, DX // DX = y[0] 112 XORQ BX, DX // DX = y[0] ^ x[0] 113 ANDQ AX, DX // DX = (y[0] ^ x[0]) & mask 114 XORQ DX, BX // BX = (y[0] ^ x[0]) & mask) ^ x[0] = x[0] or y[0] 115 XORQ DX, CX // CX = (y[0] ^ x[0]) & mask) ^ y[0] = y[0] or x[0] 116 MOVQ BX, (0*8)(REG_P1) 117 MOVQ CX, (0*8)(REG_P2) 118 119 MOVQ (1*8)(REG_P1), BX 120 MOVQ (1*8)(REG_P2), CX 121 MOVQ CX, DX 122 XORQ BX, DX 123 ANDQ AX, DX 124 XORQ DX, BX 125 XORQ DX, CX 126 MOVQ BX, (1*8)(REG_P1) 127 MOVQ CX, (1*8)(REG_P2) 128 129 MOVQ (2*8)(REG_P1), BX 130 MOVQ (2*8)(REG_P2), CX 131 MOVQ CX, DX 132 XORQ BX, DX 133 ANDQ AX, DX 134 XORQ DX, BX 135 XORQ DX, CX 136 MOVQ BX, (2*8)(REG_P1) 137 MOVQ CX, (2*8)(REG_P2) 138 139 MOVQ (3*8)(REG_P1), BX 140 MOVQ (3*8)(REG_P2), CX 141 MOVQ CX, DX 142 XORQ BX, DX 143 ANDQ AX, DX 144 XORQ DX, BX 145 XORQ DX, CX 146 MOVQ BX, (3*8)(REG_P1) 147 MOVQ CX, (3*8)(REG_P2) 148 149 MOVQ (4*8)(REG_P1), BX 150 MOVQ (4*8)(REG_P2), CX 151 MOVQ CX, DX 152 XORQ BX, DX 153 ANDQ AX, DX 154 XORQ DX, BX 155 XORQ DX, CX 156 MOVQ BX, (4*8)(REG_P1) 157 MOVQ CX, (4*8)(REG_P2) 158 159 MOVQ (5*8)(REG_P1), BX 160 MOVQ (5*8)(REG_P2), CX 161 MOVQ CX, DX 162 XORQ BX, DX 163 ANDQ AX, DX 164 XORQ DX, BX 165 XORQ DX, CX 166 MOVQ BX, (5*8)(REG_P1) 167 MOVQ CX, (5*8)(REG_P2) 168 169 MOVQ (6*8)(REG_P1), BX 170 MOVQ (6*8)(REG_P2), CX 171 MOVQ CX, DX 172 XORQ BX, DX 173 ANDQ AX, DX 174 XORQ DX, BX 175 XORQ DX, CX 176 MOVQ BX, (6*8)(REG_P1) 177 MOVQ CX, (6*8)(REG_P2) 178 179 MOVQ (7*8)(REG_P1), BX 180 MOVQ (7*8)(REG_P2), CX 181 MOVQ CX, DX 182 XORQ BX, DX 183 ANDQ AX, DX 184 XORQ DX, BX 185 XORQ DX, CX 186 MOVQ BX, (7*8)(REG_P1) 187 MOVQ CX, (7*8)(REG_P2) 188 189 MOVQ (8*8)(REG_P1), BX 190 MOVQ (8*8)(REG_P2), CX 191 MOVQ CX, DX 192 XORQ BX, DX 193 ANDQ AX, DX 194 XORQ DX, BX 195 XORQ DX, CX 196 MOVQ BX, (8*8)(REG_P1) 197 MOVQ CX, (8*8)(REG_P2) 198 199 MOVQ (9*8)(REG_P1), BX 200 MOVQ (9*8)(REG_P2), CX 201 MOVQ CX, DX 202 XORQ BX, DX 203 ANDQ AX, DX 204 XORQ DX, BX 205 XORQ DX, CX 206 MOVQ BX, (9*8)(REG_P1) 207 MOVQ CX, (9*8)(REG_P2) 208 209 MOVQ (10*8)(REG_P1), BX 210 MOVQ (10*8)(REG_P2), CX 211 MOVQ CX, DX 212 XORQ BX, DX 213 ANDQ AX, DX 214 XORQ DX, BX 215 XORQ DX, CX 216 MOVQ BX, (10*8)(REG_P1) 217 MOVQ CX, (10*8)(REG_P2) 218 219 MOVQ (11*8)(REG_P1), BX 220 MOVQ (11*8)(REG_P2), CX 221 MOVQ CX, DX 222 XORQ BX, DX 223 ANDQ AX, DX 224 XORQ DX, BX 225 XORQ DX, CX 226 MOVQ BX, (11*8)(REG_P1) 227 MOVQ CX, (11*8)(REG_P2) 228 229 RET 230 231 TEXT ·cmovP751(SB),NOSPLIT,$0-17 232 233 MOVQ x+0(FP), DI 234 MOVQ y+8(FP), SI 235 MOVB choice+16(FP), AL // AL = 0 or 1 236 MOVBLZX AL, AX // AX = 0 or 1 237 NEGQ AX // AX = 0x00..00 or 0xff..ff 238 #ifndef CMOV_BLOCK 239 #define CMOV_BLOCK(idx) \ 240 MOVQ (idx*8)(DI), BX \ // BX = x[idx] 241 MOVQ (idx*8)(SI), DX \ // DX = y[idx] 242 XORQ BX, DX \ // DX = y[idx] ^ x[idx] 243 ANDQ AX, DX \ // DX = (y[idx] ^ x[idx]) & mask 244 XORQ DX, BX \ // BX = (y[idx] ^ x[idx]) & mask) ^ x[idx] = x[idx] or y[idx] 245 MOVQ BX, (idx*8)(DI) 246 #endif 247 CMOV_BLOCK(0) 248 CMOV_BLOCK(1) 249 CMOV_BLOCK(2) 250 CMOV_BLOCK(3) 251 CMOV_BLOCK(4) 252 CMOV_BLOCK(5) 253 CMOV_BLOCK(6) 254 CMOV_BLOCK(7) 255 CMOV_BLOCK(8) 256 CMOV_BLOCK(9) 257 CMOV_BLOCK(10) 258 CMOV_BLOCK(11) 259 #ifdef CMOV_BLOCK 260 #undef CMOV_BLOCK 261 #endif 262 RET 263 264 TEXT ·addP751(SB), NOSPLIT, $0-24 265 266 MOVQ z+0(FP), REG_P3 267 MOVQ x+8(FP), REG_P1 268 MOVQ y+16(FP), REG_P2 269 270 MOVQ (REG_P1), R8 271 MOVQ (8)(REG_P1), R9 272 MOVQ (16)(REG_P1), R10 273 MOVQ (24)(REG_P1), R11 274 MOVQ (32)(REG_P1), R12 275 MOVQ (40)(REG_P1), R13 276 MOVQ (48)(REG_P1), R14 277 MOVQ (56)(REG_P1), R15 278 MOVQ (64)(REG_P1), CX 279 ADDQ (REG_P2), R8 280 ADCQ (8)(REG_P2), R9 281 ADCQ (16)(REG_P2), R10 282 ADCQ (24)(REG_P2), R11 283 ADCQ (32)(REG_P2), R12 284 ADCQ (40)(REG_P2), R13 285 ADCQ (48)(REG_P2), R14 286 ADCQ (56)(REG_P2), R15 287 ADCQ (64)(REG_P2), CX 288 MOVQ (72)(REG_P1), AX 289 ADCQ (72)(REG_P2), AX 290 MOVQ AX, (72)(REG_P3) 291 MOVQ (80)(REG_P1), AX 292 ADCQ (80)(REG_P2), AX 293 MOVQ AX, (80)(REG_P3) 294 MOVQ (88)(REG_P1), AX 295 ADCQ (88)(REG_P2), AX 296 MOVQ AX, (88)(REG_P3) 297 298 MOVQ P751X2_0, AX 299 SUBQ AX, R8 300 MOVQ P751X2_1, AX 301 SBBQ AX, R9 302 SBBQ AX, R10 303 SBBQ AX, R11 304 SBBQ AX, R12 305 MOVQ P751X2_5, AX 306 SBBQ AX, R13 307 MOVQ P751X2_6, AX 308 SBBQ AX, R14 309 MOVQ P751X2_7, AX 310 SBBQ AX, R15 311 MOVQ P751X2_8, AX 312 SBBQ AX, CX 313 MOVQ R8, (REG_P3) 314 MOVQ R9, (8)(REG_P3) 315 MOVQ R10, (16)(REG_P3) 316 MOVQ R11, (24)(REG_P3) 317 MOVQ R12, (32)(REG_P3) 318 MOVQ R13, (40)(REG_P3) 319 MOVQ R14, (48)(REG_P3) 320 MOVQ R15, (56)(REG_P3) 321 MOVQ CX, (64)(REG_P3) 322 MOVQ (72)(REG_P3), R8 323 MOVQ (80)(REG_P3), R9 324 MOVQ (88)(REG_P3), R10 325 MOVQ P751X2_9, AX 326 SBBQ AX, R8 327 MOVQ P751X2_10, AX 328 SBBQ AX, R9 329 MOVQ P751X2_11, AX 330 SBBQ AX, R10 331 MOVQ R8, (72)(REG_P3) 332 MOVQ R9, (80)(REG_P3) 333 MOVQ R10, (88)(REG_P3) 334 MOVQ $0, AX 335 SBBQ $0, AX 336 337 MOVQ P751X2_0, SI 338 ANDQ AX, SI 339 MOVQ P751X2_1, R8 340 ANDQ AX, R8 341 MOVQ P751X2_5, R9 342 ANDQ AX, R9 343 MOVQ P751X2_6, R10 344 ANDQ AX, R10 345 MOVQ P751X2_7, R11 346 ANDQ AX, R11 347 MOVQ P751X2_8, R12 348 ANDQ AX, R12 349 MOVQ P751X2_9, R13 350 ANDQ AX, R13 351 MOVQ P751X2_10, R14 352 ANDQ AX, R14 353 MOVQ P751X2_11, R15 354 ANDQ AX, R15 355 356 MOVQ (REG_P3), AX 357 ADDQ SI, AX 358 MOVQ AX, (REG_P3) 359 MOVQ (8)(REG_P3), AX 360 ADCQ R8, AX 361 MOVQ AX, (8)(REG_P3) 362 MOVQ (16)(REG_P3), AX 363 ADCQ R8, AX 364 MOVQ AX, (16)(REG_P3) 365 MOVQ (24)(REG_P3), AX 366 ADCQ R8, AX 367 MOVQ AX, (24)(REG_P3) 368 MOVQ (32)(REG_P3), AX 369 ADCQ R8, AX 370 MOVQ AX, (32)(REG_P3) 371 MOVQ (40)(REG_P3), AX 372 ADCQ R9, AX 373 MOVQ AX, (40)(REG_P3) 374 MOVQ (48)(REG_P3), AX 375 ADCQ R10, AX 376 MOVQ AX, (48)(REG_P3) 377 MOVQ (56)(REG_P3), AX 378 ADCQ R11, AX 379 MOVQ AX, (56)(REG_P3) 380 MOVQ (64)(REG_P3), AX 381 ADCQ R12, AX 382 MOVQ AX, (64)(REG_P3) 383 MOVQ (72)(REG_P3), AX 384 ADCQ R13, AX 385 MOVQ AX, (72)(REG_P3) 386 MOVQ (80)(REG_P3), AX 387 ADCQ R14, AX 388 MOVQ AX, (80)(REG_P3) 389 MOVQ (88)(REG_P3), AX 390 ADCQ R15, AX 391 MOVQ AX, (88)(REG_P3) 392 393 RET 394 395 TEXT ·subP751(SB), NOSPLIT, $0-24 396 397 MOVQ z+0(FP), REG_P3 398 MOVQ x+8(FP), REG_P1 399 MOVQ y+16(FP), REG_P2 400 401 MOVQ (REG_P1), R8 402 MOVQ (8)(REG_P1), R9 403 MOVQ (16)(REG_P1), R10 404 MOVQ (24)(REG_P1), R11 405 MOVQ (32)(REG_P1), R12 406 MOVQ (40)(REG_P1), R13 407 MOVQ (48)(REG_P1), R14 408 MOVQ (56)(REG_P1), R15 409 MOVQ (64)(REG_P1), CX 410 SUBQ (REG_P2), R8 411 SBBQ (8)(REG_P2), R9 412 SBBQ (16)(REG_P2), R10 413 SBBQ (24)(REG_P2), R11 414 SBBQ (32)(REG_P2), R12 415 SBBQ (40)(REG_P2), R13 416 SBBQ (48)(REG_P2), R14 417 SBBQ (56)(REG_P2), R15 418 SBBQ (64)(REG_P2), CX 419 MOVQ R8, (REG_P3) 420 MOVQ R9, (8)(REG_P3) 421 MOVQ R10, (16)(REG_P3) 422 MOVQ R11, (24)(REG_P3) 423 MOVQ R12, (32)(REG_P3) 424 MOVQ R13, (40)(REG_P3) 425 MOVQ R14, (48)(REG_P3) 426 MOVQ R15, (56)(REG_P3) 427 MOVQ CX, (64)(REG_P3) 428 MOVQ (72)(REG_P1), AX 429 SBBQ (72)(REG_P2), AX 430 MOVQ AX, (72)(REG_P3) 431 MOVQ (80)(REG_P1), AX 432 SBBQ (80)(REG_P2), AX 433 MOVQ AX, (80)(REG_P3) 434 MOVQ (88)(REG_P1), AX 435 SBBQ (88)(REG_P2), AX 436 MOVQ AX, (88)(REG_P3) 437 MOVQ $0, AX 438 SBBQ $0, AX 439 440 MOVQ P751X2_0, SI 441 ANDQ AX, SI 442 MOVQ P751X2_1, R8 443 ANDQ AX, R8 444 MOVQ P751X2_5, R9 445 ANDQ AX, R9 446 MOVQ P751X2_6, R10 447 ANDQ AX, R10 448 MOVQ P751X2_7, R11 449 ANDQ AX, R11 450 MOVQ P751X2_8, R12 451 ANDQ AX, R12 452 MOVQ P751X2_9, R13 453 ANDQ AX, R13 454 MOVQ P751X2_10, R14 455 ANDQ AX, R14 456 MOVQ P751X2_11, R15 457 ANDQ AX, R15 458 459 MOVQ (REG_P3), AX 460 ADDQ SI, AX 461 MOVQ AX, (REG_P3) 462 MOVQ (8)(REG_P3), AX 463 ADCQ R8, AX 464 MOVQ AX, (8)(REG_P3) 465 MOVQ (16)(REG_P3), AX 466 ADCQ R8, AX 467 MOVQ AX, (16)(REG_P3) 468 MOVQ (24)(REG_P3), AX 469 ADCQ R8, AX 470 MOVQ AX, (24)(REG_P3) 471 MOVQ (32)(REG_P3), AX 472 ADCQ R8, AX 473 MOVQ AX, (32)(REG_P3) 474 MOVQ (40)(REG_P3), AX 475 ADCQ R9, AX 476 MOVQ AX, (40)(REG_P3) 477 MOVQ (48)(REG_P3), AX 478 ADCQ R10, AX 479 MOVQ AX, (48)(REG_P3) 480 MOVQ (56)(REG_P3), AX 481 ADCQ R11, AX 482 MOVQ AX, (56)(REG_P3) 483 MOVQ (64)(REG_P3), AX 484 ADCQ R12, AX 485 MOVQ AX, (64)(REG_P3) 486 MOVQ (72)(REG_P3), AX 487 ADCQ R13, AX 488 MOVQ AX, (72)(REG_P3) 489 MOVQ (80)(REG_P3), AX 490 ADCQ R14, AX 491 MOVQ AX, (80)(REG_P3) 492 MOVQ (88)(REG_P3), AX 493 ADCQ R15, AX 494 MOVQ AX, (88)(REG_P3) 495 496 RET 497 498 TEXT ·mulP751(SB), $96-24 499 500 // Here we store the destination in CX instead of in REG_P3 because the 501 // multiplication instructions use DX as an implicit destination 502 // operand: MULQ $REG sets DX:AX <-- AX * $REG. 503 504 MOVQ z+0(FP), CX 505 MOVQ x+8(FP), REG_P1 506 MOVQ y+16(FP), REG_P2 507 508 XORQ AX, AX 509 MOVQ (48)(REG_P1), R8 510 MOVQ (56)(REG_P1), R9 511 MOVQ (64)(REG_P1), R10 512 MOVQ (72)(REG_P1), R11 513 MOVQ (80)(REG_P1), R12 514 MOVQ (88)(REG_P1), R13 515 ADDQ (REG_P1), R8 516 ADCQ (8)(REG_P1), R9 517 ADCQ (16)(REG_P1), R10 518 ADCQ (24)(REG_P1), R11 519 ADCQ (32)(REG_P1), R12 520 ADCQ (40)(REG_P1), R13 521 MOVQ R8, (CX) 522 MOVQ R9, (8)(CX) 523 MOVQ R10, (16)(CX) 524 MOVQ R11, (24)(CX) 525 MOVQ R12, (32)(CX) 526 MOVQ R13, (40)(CX) 527 SBBQ $0, AX 528 529 XORQ DX, DX 530 MOVQ (48)(REG_P2), R8 531 MOVQ (56)(REG_P2), R9 532 MOVQ (64)(REG_P2), R10 533 MOVQ (72)(REG_P2), R11 534 MOVQ (80)(REG_P2), R12 535 MOVQ (88)(REG_P2), R13 536 ADDQ (REG_P2), R8 537 ADCQ (8)(REG_P2), R9 538 ADCQ (16)(REG_P2), R10 539 ADCQ (24)(REG_P2), R11 540 ADCQ (32)(REG_P2), R12 541 ADCQ (40)(REG_P2), R13 542 MOVQ R8, (48)(CX) 543 MOVQ R9, (56)(CX) 544 MOVQ R10, (64)(CX) 545 MOVQ R11, (72)(CX) 546 MOVQ R12, (80)(CX) 547 MOVQ R13, (88)(CX) 548 SBBQ $0, DX 549 MOVQ AX, (80)(SP) 550 MOVQ DX, (88)(SP) 551 552 // (SP[0-8],R10,R8,R9) <- (AH+AL)*(BH+BL) 553 554 MOVQ (CX), R11 555 MOVQ R8, AX 556 MULQ R11 557 MOVQ AX, (SP) // c0 558 MOVQ DX, R14 559 560 XORQ R15, R15 561 MOVQ R9, AX 562 MULQ R11 563 XORQ R9, R9 564 ADDQ AX, R14 565 ADCQ DX, R9 566 567 MOVQ (8)(CX), R12 568 MOVQ R8, AX 569 MULQ R12 570 ADDQ AX, R14 571 MOVQ R14, (8)(SP) // c1 572 ADCQ DX, R9 573 ADCQ $0, R15 574 575 XORQ R8, R8 576 MOVQ R10, AX 577 MULQ R11 578 ADDQ AX, R9 579 MOVQ (48)(CX), R13 580 ADCQ DX, R15 581 ADCQ $0, R8 582 583 MOVQ (16)(CX), AX 584 MULQ R13 585 ADDQ AX, R9 586 ADCQ DX, R15 587 MOVQ (56)(CX), AX 588 ADCQ $0, R8 589 590 MULQ R12 591 ADDQ AX, R9 592 MOVQ R9, (16)(SP) // c2 593 ADCQ DX, R15 594 ADCQ $0, R8 595 596 XORQ R9, R9 597 MOVQ (72)(CX), AX 598 MULQ R11 599 ADDQ AX, R15 600 ADCQ DX, R8 601 ADCQ $0, R9 602 603 MOVQ (24)(CX), AX 604 MULQ R13 605 ADDQ AX, R15 606 ADCQ DX, R8 607 ADCQ $0, R9 608 609 MOVQ R10, AX 610 MULQ R12 611 ADDQ AX, R15 612 ADCQ DX, R8 613 ADCQ $0, R9 614 615 MOVQ (16)(CX), R14 616 MOVQ (56)(CX), AX 617 MULQ R14 618 ADDQ AX, R15 619 MOVQ R15, (24)(SP) // c3 620 ADCQ DX, R8 621 ADCQ $0, R9 622 623 XORQ R10, R10 624 MOVQ (80)(CX), AX 625 MULQ R11 626 ADDQ AX, R8 627 ADCQ DX, R9 628 ADCQ $0, R10 629 630 MOVQ (64)(CX), AX 631 MULQ R14 632 ADDQ AX, R8 633 ADCQ DX, R9 634 ADCQ $0, R10 635 636 MOVQ (48)(CX), R15 637 MOVQ (32)(CX), AX 638 MULQ R15 639 ADDQ AX, R8 640 ADCQ DX, R9 641 ADCQ $0, R10 642 643 MOVQ (72)(CX), AX 644 MULQ R12 645 ADDQ AX, R8 646 ADCQ DX, R9 647 ADCQ $0, R10 648 649 MOVQ (24)(CX), R13 650 MOVQ (56)(CX), AX 651 MULQ R13 652 ADDQ AX, R8 653 MOVQ R8, (32)(SP) // c4 654 ADCQ DX, R9 655 ADCQ $0, R10 656 657 XORQ R8, R8 658 MOVQ (88)(CX), AX 659 MULQ R11 660 ADDQ AX, R9 661 ADCQ DX, R10 662 ADCQ $0, R8 663 664 MOVQ (64)(CX), AX 665 MULQ R13 666 ADDQ AX, R9 667 ADCQ DX, R10 668 ADCQ $0, R8 669 670 MOVQ (72)(CX), AX 671 MULQ R14 672 ADDQ AX, R9 673 ADCQ DX, R10 674 ADCQ $0, R8 675 676 MOVQ (40)(CX), AX 677 MULQ R15 678 ADDQ AX, R9 679 ADCQ DX, R10 680 ADCQ $0, R8 681 682 MOVQ (80)(CX), AX 683 MULQ R12 684 ADDQ AX, R9 685 ADCQ DX, R10 686 ADCQ $0, R8 687 688 MOVQ (32)(CX), R15 689 MOVQ (56)(CX), AX 690 MULQ R15 691 ADDQ AX, R9 692 MOVQ R9, (40)(SP) // c5 693 ADCQ DX, R10 694 ADCQ $0, R8 695 696 XORQ R9, R9 697 MOVQ (64)(CX), AX 698 MULQ R15 699 ADDQ AX, R10 700 ADCQ DX, R8 701 ADCQ $0, R9 702 703 MOVQ (88)(CX), AX 704 MULQ R12 705 ADDQ AX, R10 706 ADCQ DX, R8 707 ADCQ $0, R9 708 709 MOVQ (80)(CX), AX 710 MULQ R14 711 ADDQ AX, R10 712 ADCQ DX, R8 713 ADCQ $0, R9 714 715 MOVQ (40)(CX), R11 716 MOVQ (56)(CX), AX 717 MULQ R11 718 ADDQ AX, R10 719 ADCQ DX, R8 720 ADCQ $0, R9 721 722 MOVQ (72)(CX), AX 723 MULQ R13 724 ADDQ AX, R10 725 MOVQ R10, (48)(SP) // c6 726 ADCQ DX, R8 727 ADCQ $0, R9 728 729 XORQ R10, R10 730 MOVQ (88)(CX), AX 731 MULQ R14 732 ADDQ AX, R8 733 ADCQ DX, R9 734 ADCQ $0, R10 735 736 MOVQ (64)(CX), AX 737 MULQ R11 738 ADDQ AX, R8 739 ADCQ DX, R9 740 ADCQ $0, R10 741 742 MOVQ (80)(CX), AX 743 MULQ R13 744 ADDQ AX, R8 745 ADCQ DX, R9 746 ADCQ $0, R10 747 748 MOVQ (72)(CX), AX 749 MULQ R15 750 ADDQ AX, R8 751 MOVQ R8, (56)(SP) // c7 752 ADCQ DX, R9 753 ADCQ $0, R10 754 755 XORQ R8, R8 756 MOVQ (72)(CX), AX 757 MULQ R11 758 ADDQ AX, R9 759 ADCQ DX, R10 760 ADCQ $0, R8 761 762 MOVQ (80)(CX), AX 763 MULQ R15 764 ADDQ AX, R9 765 ADCQ DX, R10 766 ADCQ $0, R8 767 768 MOVQ (88)(CX), AX 769 MULQ R13 770 ADDQ AX, R9 771 MOVQ R9, (64)(SP) // c8 772 ADCQ DX, R10 773 ADCQ $0, R8 774 775 XORQ R9, R9 776 MOVQ (88)(CX), AX 777 MULQ R15 778 ADDQ AX, R10 779 ADCQ DX, R8 780 ADCQ $0, R9 781 782 MOVQ (80)(CX), AX 783 MULQ R11 784 ADDQ AX, R10 // c9 785 ADCQ DX, R8 786 ADCQ $0, R9 787 788 MOVQ (88)(CX), AX 789 MULQ R11 790 ADDQ AX, R8 // c10 791 ADCQ DX, R9 // c11 792 793 MOVQ (88)(SP), AX 794 MOVQ (CX), DX 795 ANDQ AX, R12 796 ANDQ AX, R14 797 ANDQ AX, DX 798 ANDQ AX, R13 799 ANDQ AX, R15 800 ANDQ AX, R11 801 MOVQ (48)(SP), AX 802 ADDQ AX, DX 803 MOVQ (56)(SP), AX 804 ADCQ AX, R12 805 MOVQ (64)(SP), AX 806 ADCQ AX, R14 807 ADCQ R10, R13 808 ADCQ R8, R15 809 ADCQ R9, R11 810 MOVQ (80)(SP), AX 811 MOVQ DX, (48)(SP) 812 MOVQ R12, (56)(SP) 813 MOVQ R14, (64)(SP) 814 MOVQ R13, (72)(SP) 815 MOVQ R15, (80)(SP) 816 MOVQ R11, (88)(SP) 817 818 MOVQ (48)(CX), R8 819 MOVQ (56)(CX), R9 820 MOVQ (64)(CX), R10 821 MOVQ (72)(CX), R11 822 MOVQ (80)(CX), R12 823 MOVQ (88)(CX), R13 824 ANDQ AX, R8 825 ANDQ AX, R9 826 ANDQ AX, R10 827 ANDQ AX, R11 828 ANDQ AX, R12 829 ANDQ AX, R13 830 MOVQ (48)(SP), AX 831 ADDQ AX, R8 832 MOVQ (56)(SP), AX 833 ADCQ AX, R9 834 MOVQ (64)(SP), AX 835 ADCQ AX, R10 836 MOVQ (72)(SP), AX 837 ADCQ AX, R11 838 MOVQ (80)(SP), AX 839 ADCQ AX, R12 840 MOVQ (88)(SP), AX 841 ADCQ AX, R13 842 MOVQ R8, (48)(SP) 843 MOVQ R9, (56)(SP) 844 MOVQ R11, (72)(SP) 845 846 // CX[0-11] <- AL*BL 847 MOVQ (REG_P1), R11 848 MOVQ (REG_P2), AX 849 MULQ R11 850 XORQ R9, R9 851 MOVQ AX, (CX) // c0 852 MOVQ R10, (64)(SP) 853 MOVQ DX, R8 854 855 MOVQ (8)(REG_P2), AX 856 MULQ R11 857 XORQ R10, R10 858 ADDQ AX, R8 859 MOVQ R12, (80)(SP) 860 ADCQ DX, R9 861 862 MOVQ (8)(REG_P1), R12 863 MOVQ (REG_P2), AX 864 MULQ R12 865 ADDQ AX, R8 866 MOVQ R8, (8)(CX) // c1 867 ADCQ DX, R9 868 MOVQ R13, (88)(SP) 869 ADCQ $0, R10 870 871 XORQ R8, R8 872 MOVQ (16)(REG_P2), AX 873 MULQ R11 874 ADDQ AX, R9 875 ADCQ DX, R10 876 ADCQ $0, R8 877 878 MOVQ (REG_P2), R13 879 MOVQ (16)(REG_P1), AX 880 MULQ R13 881 ADDQ AX, R9 882 ADCQ DX, R10 883 ADCQ $0, R8 884 885 MOVQ (8)(REG_P2), AX 886 MULQ R12 887 ADDQ AX, R9 888 MOVQ R9, (16)(CX) // c2 889 ADCQ DX, R10 890 ADCQ $0, R8 891 892 XORQ R9, R9 893 MOVQ (24)(REG_P2), AX 894 MULQ R11 895 ADDQ AX, R10 896 ADCQ DX, R8 897 ADCQ $0, R9 898 899 MOVQ (24)(REG_P1), AX 900 MULQ R13 901 ADDQ AX, R10 902 ADCQ DX, R8 903 ADCQ $0, R9 904 905 MOVQ (16)(REG_P2), AX 906 MULQ R12 907 ADDQ AX, R10 908 ADCQ DX, R8 909 ADCQ $0, R9 910 911 MOVQ (16)(REG_P1), R14 912 MOVQ (8)(REG_P2), AX 913 MULQ R14 914 ADDQ AX, R10 915 MOVQ R10, (24)(CX) // c3 916 ADCQ DX, R8 917 ADCQ $0, R9 918 919 XORQ R10, R10 920 MOVQ (32)(REG_P2), AX 921 MULQ R11 922 ADDQ AX, R8 923 ADCQ DX, R9 924 ADCQ $0, R10 925 926 MOVQ (16)(REG_P2), AX 927 MULQ R14 928 ADDQ AX, R8 929 ADCQ DX, R9 930 ADCQ $0, R10 931 932 MOVQ (32)(REG_P1), AX 933 MULQ R13 934 ADDQ AX, R8 935 ADCQ DX, R9 936 ADCQ $0, R10 937 938 MOVQ (24)(REG_P2), AX 939 MULQ R12 940 ADDQ AX, R8 941 ADCQ DX, R9 942 ADCQ $0, R10 943 944 MOVQ (24)(REG_P1), R13 945 MOVQ (8)(REG_P2), AX 946 MULQ R13 947 ADDQ AX, R8 948 MOVQ R8, (32)(CX) // c4 949 ADCQ DX, R9 950 ADCQ $0, R10 951 952 XORQ R8, R8 953 MOVQ (40)(REG_P2), AX 954 MULQ R11 955 ADDQ AX, R9 956 ADCQ DX, R10 957 ADCQ $0, R8 958 959 MOVQ (16)(REG_P2), AX 960 MULQ R13 961 ADDQ AX, R9 962 ADCQ DX, R10 963 ADCQ $0, R8 964 965 MOVQ (24)(REG_P2), AX 966 MULQ R14 967 ADDQ AX, R9 968 ADCQ DX, R10 969 ADCQ $0, R8 970 971 MOVQ (40)(REG_P1), R11 972 MOVQ (REG_P2), AX 973 MULQ R11 974 ADDQ AX, R9 975 ADCQ DX, R10 976 ADCQ $0, R8 977 978 MOVQ (32)(REG_P2), AX 979 MULQ R12 980 ADDQ AX, R9 981 ADCQ DX, R10 982 ADCQ $0, R8 983 984 MOVQ (32)(REG_P1), R15 985 MOVQ (8)(REG_P2), AX 986 MULQ R15 987 ADDQ AX, R9 988 MOVQ R9, (40)(CX) //c5 989 ADCQ DX, R10 990 ADCQ $0, R8 991 992 XORQ R9, R9 993 MOVQ (16)(REG_P2), AX 994 MULQ R15 995 ADDQ AX, R10 996 ADCQ DX, R8 997 ADCQ $0, R9 998 999 MOVQ (40)(REG_P2), AX 1000 MULQ R12 1001 ADDQ AX, R10 1002 ADCQ DX, R8 1003 ADCQ $0, R9 1004 1005 MOVQ (32)(REG_P2), AX 1006 MULQ R14 1007 ADDQ AX, R10 1008 ADCQ DX, R8 1009 ADCQ $0, R9 1010 1011 MOVQ (8)(REG_P2), AX 1012 MULQ R11 1013 ADDQ AX, R10 1014 ADCQ DX, R8 1015 ADCQ $0, R9 1016 1017 MOVQ (24)(REG_P2), AX 1018 MULQ R13 1019 ADDQ AX, R10 1020 MOVQ R10, (48)(CX) // c6 1021 ADCQ DX, R8 1022 ADCQ $0, R9 1023 1024 XORQ R10, R10 1025 MOVQ (40)(REG_P2), AX 1026 MULQ R14 1027 ADDQ AX, R8 1028 ADCQ DX, R9 1029 ADCQ $0, R10 1030 1031 MOVQ (16)(REG_P2), AX 1032 MULQ R11 1033 ADDQ AX, R8 1034 ADCQ DX, R9 1035 ADCQ $0, R10 1036 1037 MOVQ (32)(REG_P2), AX 1038 MULQ R13 1039 ADDQ AX, R8 1040 ADCQ DX, R9 1041 ADCQ $0, R10 1042 1043 MOVQ (24)(REG_P2), AX 1044 MULQ R15 1045 ADDQ AX, R8 1046 MOVQ R8, (56)(CX) // c7 1047 ADCQ DX, R9 1048 ADCQ $0, R10 1049 1050 XORQ R8, R8 1051 MOVQ (24)(REG_P2), AX 1052 MULQ R11 1053 ADDQ AX, R9 1054 ADCQ DX, R10 1055 ADCQ $0, R8 1056 1057 MOVQ (32)(REG_P2), AX 1058 MULQ R15 1059 ADDQ AX, R9 1060 ADCQ DX, R10 1061 ADCQ $0, R8 1062 1063 MOVQ (40)(REG_P2), AX 1064 MULQ R13 1065 ADDQ AX, R9 1066 MOVQ R9, (64)(CX) // c8 1067 ADCQ DX, R10 1068 ADCQ $0, R8 1069 1070 XORQ R9, R9 1071 MOVQ (40)(REG_P2), AX 1072 MULQ R15 1073 ADDQ AX, R10 1074 ADCQ DX, R8 1075 ADCQ $0, R9 1076 1077 MOVQ (32)(REG_P2), AX 1078 MULQ R11 1079 ADDQ AX, R10 1080 MOVQ R10, (72)(CX) // c9 1081 ADCQ DX, R8 1082 ADCQ $0, R9 1083 1084 MOVQ (40)(REG_P2), AX 1085 MULQ R11 1086 ADDQ AX, R8 1087 MOVQ R8, (80)(CX) // c10 1088 ADCQ DX, R9 1089 MOVQ R9, (88)(CX) // c11 1090 1091 // CX[12-23] <- AH*BH 1092 MOVQ (48)(REG_P1), R11 1093 MOVQ (48)(REG_P2), AX 1094 MULQ R11 1095 XORQ R9, R9 1096 MOVQ AX, (96)(CX) // c0 1097 MOVQ DX, R8 1098 1099 MOVQ (56)(REG_P2), AX 1100 MULQ R11 1101 XORQ R10, R10 1102 ADDQ AX, R8 1103 ADCQ DX, R9 1104 1105 MOVQ (56)(REG_P1), R12 1106 MOVQ (48)(REG_P2), AX 1107 MULQ R12 1108 ADDQ AX, R8 1109 MOVQ R8, (104)(CX) // c1 1110 ADCQ DX, R9 1111 ADCQ $0, R10 1112 1113 XORQ R8, R8 1114 MOVQ (64)(REG_P2), AX 1115 MULQ R11 1116 ADDQ AX, R9 1117 ADCQ DX, R10 1118 ADCQ $0, R8 1119 1120 MOVQ (48)(REG_P2), R13 1121 MOVQ (64)(REG_P1), AX 1122 MULQ R13 1123 ADDQ AX, R9 1124 ADCQ DX, R10 1125 ADCQ $0, R8 1126 1127 MOVQ (56)(REG_P2), AX 1128 MULQ R12 1129 ADDQ AX, R9 1130 MOVQ R9, (112)(CX) // c2 1131 ADCQ DX, R10 1132 ADCQ $0, R8 1133 1134 XORQ R9, R9 1135 MOVQ (72)(REG_P2), AX 1136 MULQ R11 1137 ADDQ AX, R10 1138 ADCQ DX, R8 1139 ADCQ $0, R9 1140 1141 MOVQ (72)(REG_P1), AX 1142 MULQ R13 1143 ADDQ AX, R10 1144 ADCQ DX, R8 1145 ADCQ $0, R9 1146 1147 MOVQ (64)(REG_P2), AX 1148 MULQ R12 1149 ADDQ AX, R10 1150 ADCQ DX, R8 1151 ADCQ $0, R9 1152 1153 MOVQ (64)(REG_P1), R14 1154 MOVQ (56)(REG_P2), AX 1155 MULQ R14 1156 ADDQ AX, R10 1157 MOVQ R10, (120)(CX) // c3 1158 ADCQ DX, R8 1159 ADCQ $0, R9 1160 1161 XORQ R10, R10 1162 MOVQ (80)(REG_P2), AX 1163 MULQ R11 1164 ADDQ AX, R8 1165 ADCQ DX, R9 1166 ADCQ $0, R10 1167 1168 MOVQ (64)(REG_P2), AX 1169 MULQ R14 1170 ADDQ AX, R8 1171 ADCQ DX, R9 1172 ADCQ $0, R10 1173 1174 MOVQ (80)(REG_P1), R15 1175 MOVQ R13, AX 1176 MULQ R15 1177 ADDQ AX, R8 1178 ADCQ DX, R9 1179 ADCQ $0, R10 1180 1181 MOVQ (72)(REG_P2), AX 1182 MULQ R12 1183 ADDQ AX, R8 1184 ADCQ DX, R9 1185 ADCQ $0, R10 1186 1187 MOVQ (72)(REG_P1), R13 1188 MOVQ (56)(REG_P2), AX 1189 MULQ R13 1190 ADDQ AX, R8 1191 MOVQ R8, (128)(CX) // c4 1192 ADCQ DX, R9 1193 ADCQ $0, R10 1194 1195 XORQ R8, R8 1196 MOVQ (88)(REG_P2), AX 1197 MULQ R11 1198 ADDQ AX, R9 1199 ADCQ DX, R10 1200 ADCQ $0, R8 1201 1202 MOVQ (64)(REG_P2), AX 1203 MULQ R13 1204 ADDQ AX, R9 1205 ADCQ DX, R10 1206 ADCQ $0, R8 1207 1208 MOVQ (72)(REG_P2), AX 1209 MULQ R14 1210 ADDQ AX, R9 1211 ADCQ DX, R10 1212 ADCQ $0, R8 1213 1214 MOVQ (88)(REG_P1), R11 1215 MOVQ (48)(REG_P2), AX 1216 MULQ R11 1217 ADDQ AX, R9 1218 ADCQ DX, R10 1219 ADCQ $0, R8 1220 1221 MOVQ (80)(REG_P2), AX 1222 MULQ R12 1223 ADDQ AX, R9 1224 ADCQ DX, R10 1225 ADCQ $0, R8 1226 1227 MOVQ (56)(REG_P2), AX 1228 MULQ R15 1229 ADDQ AX, R9 1230 MOVQ R9, (136)(CX) // c5 1231 ADCQ DX, R10 1232 ADCQ $0, R8 1233 1234 XORQ R9, R9 1235 MOVQ (64)(REG_P2), AX 1236 MULQ R15 1237 ADDQ AX, R10 1238 ADCQ DX, R8 1239 ADCQ $0, R9 1240 1241 MOVQ (88)(REG_P2), AX 1242 MULQ R12 1243 ADDQ AX, R10 1244 ADCQ DX, R8 1245 ADCQ $0, R9 1246 1247 MOVQ (80)(REG_P2), AX 1248 MULQ R14 1249 ADDQ AX, R10 1250 ADCQ DX, R8 1251 ADCQ $0, R9 1252 1253 MOVQ (56)(REG_P2), AX 1254 MULQ R11 1255 ADDQ AX, R10 1256 ADCQ DX, R8 1257 ADCQ $0, R9 1258 1259 MOVQ (72)(REG_P2), AX 1260 MULQ R13 1261 ADDQ AX, R10 1262 MOVQ R10, (144)(CX) // c6 1263 ADCQ DX, R8 1264 ADCQ $0, R9 1265 1266 XORQ R10, R10 1267 MOVQ (88)(REG_P2), AX 1268 MULQ R14 1269 ADDQ AX, R8 1270 ADCQ DX, R9 1271 ADCQ $0, R10 1272 1273 MOVQ (64)(REG_P2), AX 1274 MULQ R11 1275 ADDQ AX, R8 1276 ADCQ DX, R9 1277 ADCQ $0, R10 1278 1279 MOVQ (80)(REG_P2), AX 1280 MULQ R13 1281 ADDQ AX, R8 1282 ADCQ DX, R9 1283 ADCQ $0, R10 1284 1285 MOVQ (72)(REG_P2), AX 1286 MULQ R15 1287 ADDQ AX, R8 1288 MOVQ R8, (152)(CX) // c7 1289 ADCQ DX, R9 1290 ADCQ $0, R10 1291 1292 XORQ R8, R8 1293 MOVQ (72)(REG_P2), AX 1294 MULQ R11 1295 ADDQ AX, R9 1296 ADCQ DX, R10 1297 ADCQ $0, R8 1298 1299 MOVQ (80)(REG_P2), AX 1300 MULQ R15 1301 ADDQ AX, R9 1302 ADCQ DX, R10 1303 ADCQ $0, R8 1304 1305 MOVQ (88)(REG_P2), AX 1306 MULQ R13 1307 ADDQ AX, R9 1308 MOVQ R9, (160)(CX) // c8 1309 ADCQ DX, R10 1310 ADCQ $0, R8 1311 1312 MOVQ (88)(REG_P2), AX 1313 MULQ R15 1314 ADDQ AX, R10 1315 ADCQ DX, R8 1316 1317 MOVQ (80)(REG_P2), AX 1318 MULQ R11 1319 ADDQ AX, R10 1320 MOVQ R10, (168)(CX) // c9 1321 ADCQ DX, R8 1322 1323 MOVQ (88)(REG_P2), AX 1324 MULQ R11 1325 ADDQ AX, R8 1326 MOVQ R8, (176)(CX) // c10 1327 ADCQ $0, DX 1328 MOVQ DX, (184)(CX) // c11 1329 1330 // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL)-AL*BL 1331 MOVQ (SP), R8 1332 SUBQ (CX), R8 1333 MOVQ (8)(SP), R9 1334 SBBQ (8)(CX), R9 1335 MOVQ (16)(SP), R10 1336 SBBQ (16)(CX), R10 1337 MOVQ (24)(SP), R11 1338 SBBQ (24)(CX), R11 1339 MOVQ (32)(SP), R12 1340 SBBQ (32)(CX), R12 1341 MOVQ (40)(SP), R13 1342 SBBQ (40)(CX), R13 1343 MOVQ (48)(SP), R14 1344 SBBQ (48)(CX), R14 1345 MOVQ (56)(SP), R15 1346 SBBQ (56)(CX), R15 1347 MOVQ (64)(SP), AX 1348 SBBQ (64)(CX), AX 1349 MOVQ (72)(SP), DX 1350 SBBQ (72)(CX), DX 1351 MOVQ (80)(SP), DI 1352 SBBQ (80)(CX), DI 1353 MOVQ (88)(SP), SI 1354 SBBQ (88)(CX), SI 1355 MOVQ SI, (SP) 1356 1357 // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH 1358 MOVQ (96)(CX), SI 1359 SUBQ SI, R8 1360 MOVQ (104)(CX), SI 1361 SBBQ SI, R9 1362 MOVQ (112)(CX), SI 1363 SBBQ SI, R10 1364 MOVQ (120)(CX), SI 1365 SBBQ SI, R11 1366 MOVQ (128)(CX), SI 1367 SBBQ SI, R12 1368 MOVQ (136)(CX), SI 1369 SBBQ SI, R13 1370 MOVQ (144)(CX), SI 1371 SBBQ SI, R14 1372 MOVQ (152)(CX), SI 1373 SBBQ SI, R15 1374 MOVQ (160)(CX), SI 1375 SBBQ SI, AX 1376 MOVQ (168)(CX), SI 1377 SBBQ SI, DX 1378 MOVQ (176)(CX), SI 1379 SBBQ SI, DI 1380 MOVQ (SP), SI 1381 SBBQ (184)(CX), SI 1382 1383 // FINAL RESULT 1384 ADDQ (48)(CX), R8 1385 MOVQ R8, (48)(CX) 1386 ADCQ (56)(CX), R9 1387 MOVQ R9, (56)(CX) 1388 ADCQ (64)(CX), R10 1389 MOVQ R10, (64)(CX) 1390 ADCQ (72)(CX), R11 1391 MOVQ R11, (72)(CX) 1392 ADCQ (80)(CX), R12 1393 MOVQ R12, (80)(CX) 1394 ADCQ (88)(CX), R13 1395 MOVQ R13, (88)(CX) 1396 ADCQ (96)(CX), R14 1397 MOVQ R14, (96)(CX) 1398 ADCQ (104)(CX), R15 1399 MOVQ R15, (104)(CX) 1400 ADCQ (112)(CX), AX 1401 MOVQ AX, (112)(CX) 1402 ADCQ (120)(CX), DX 1403 MOVQ DX, (120)(CX) 1404 ADCQ (128)(CX), DI 1405 MOVQ DI, (128)(CX) 1406 ADCQ (136)(CX), SI 1407 MOVQ SI, (136)(CX) 1408 MOVQ (144)(CX), AX 1409 ADCQ $0, AX 1410 MOVQ AX, (144)(CX) 1411 MOVQ (152)(CX), AX 1412 ADCQ $0, AX 1413 MOVQ AX, (152)(CX) 1414 MOVQ (160)(CX), AX 1415 ADCQ $0, AX 1416 MOVQ AX, (160)(CX) 1417 MOVQ (168)(CX), AX 1418 ADCQ $0, AX 1419 MOVQ AX, (168)(CX) 1420 MOVQ (176)(CX), AX 1421 ADCQ $0, AX 1422 MOVQ AX, (176)(CX) 1423 MOVQ (184)(CX), AX 1424 ADCQ $0, AX 1425 MOVQ AX, (184)(CX) 1426 1427 RET 1428 1429 // This multiplies a 256-bit number pointed to by M0 with p751+1. 1430 // It is assumed that M1 points to p751+1 stored as a 768-bit Fp751Element. 1431 // C points to the place to store the result and should be at least 192 bits. 1432 // This should only be used when the BMI2 and ADX instruction set extensions 1433 // are available. 1434 #define mul256x448bmi2adx(M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9) \ 1435 MOVQ 0+M0, DX \ 1436 MOVQ M1+40(SB), AX \ 1437 MULXQ AX, T1, T0 \ 1438 MOVQ M1+48(SB), AX \ 1439 MULXQ AX, T3, T2 \ 1440 MOVQ T1, 0+C \ // C0_final 1441 MOVQ M1+56(SB), AX \ 1442 MULXQ AX, T5, T4 \ 1443 ADOXQ T3, T0 \ 1444 ADOXQ T5, T2 \ 1445 MOVQ M1+64(SB), AX \ 1446 MULXQ AX, T3, T1 \ 1447 ADOXQ T3, T4 \ 1448 MOVQ M1+72(SB), AX \ 1449 MULXQ AX, T6, T5 \ 1450 ADOXQ T6, T1 \ 1451 MOVQ M1+80(SB), AX \ 1452 MULXQ AX, T7, T3 \ 1453 ADOXQ T7, T5 \ 1454 MOVQ M1+88(SB), AX \ 1455 MULXQ AX, T8, T6 \ 1456 ADOXQ T8, T3 \ 1457 MOVL $0, AX \ 1458 ADOXQ AX, T6 \ 1459 \ 1460 MOVQ 8+M0, DX \ 1461 MOVQ M1+40(SB), AX \ 1462 MULXQ AX, T7, T8 \ 1463 ADCXQ T7, T0 \ 1464 MOVQ T0, 8+C \ // C1_final 1465 ADCXQ T8, T2 \ 1466 MOVQ M1+48(SB), AX \ 1467 MULXQ AX, T8, T7 \ 1468 ADOXQ T8, T2 \ 1469 ADCXQ T7, T4 \ 1470 MOVQ M1+56(SB), AX \ 1471 MULXQ AX, T8, T0 \ 1472 ADOXQ T8, T4 \ 1473 ADCXQ T1, T0 \ 1474 MOVQ M1+64(SB), AX \ 1475 MULXQ AX, T7, T1 \ 1476 ADCXQ T5, T1 \ 1477 MOVQ M1+72(SB), AX \ 1478 MULXQ AX, T8, T5 \ 1479 ADCXQ T5, T3 \ 1480 MOVQ M1+80(SB), AX \ 1481 MULXQ AX, T9, T5 \ 1482 ADCXQ T5, T6 \ 1483 MOVQ M1+88(SB), AX \ 1484 MULXQ AX, DX, T5 \ 1485 MOVL $0, AX \ 1486 ADCXQ AX, T5 \ 1487 \ 1488 ADOXQ T7, T0 \ 1489 ADOXQ T8, T1 \ 1490 ADOXQ T9, T3 \ 1491 ADOXQ DX, T6 \ 1492 ADOXQ AX, T5 \ 1493 \ 1494 MOVQ 16+M0, DX \ 1495 MOVQ M1+40(SB), AX \ 1496 MULXQ AX, T7, T8 \ 1497 ADCXQ T7, T2 \ 1498 MOVQ T2, 16+C \ // C2_final 1499 ADCXQ T8, T4 \ 1500 MOVQ M1+48(SB), AX \ 1501 MULXQ AX, T7, T8 \ 1502 ADOXQ T7, T4 \ 1503 ADCXQ T8, T0 \ 1504 MOVQ M1+56(SB), AX \ 1505 MULXQ AX, T8, T2 \ 1506 ADOXQ T8, T0 \ 1507 ADCXQ T2, T1 \ 1508 MOVQ M1+64(SB), AX \ 1509 MULXQ AX, T7, T2 \ 1510 ADCXQ T2, T3 \ 1511 MOVQ M1+72(SB), AX \ 1512 MULXQ AX, T8, T2 \ 1513 ADCXQ T2, T6 \ 1514 MOVQ M1+80(SB), AX \ 1515 MULXQ AX, T9, T2 \ 1516 ADCXQ T2, T5 \ 1517 MOVQ M1+88(SB), AX \ 1518 MULXQ AX, DX, T2 \ 1519 MOVL $0, AX \ 1520 ADCXQ AX, T2 \ 1521 \ 1522 ADOXQ T7, T1 \ 1523 ADOXQ T8, T3 \ 1524 ADOXQ T9, T6 \ 1525 ADOXQ DX, T5 \ 1526 ADOXQ AX, T2 \ 1527 \ 1528 MOVQ 24+M0, DX \ 1529 MOVQ M1+40(SB), AX \ 1530 MULXQ AX, T7, T8 \ 1531 ADCXQ T4, T7 \ 1532 ADCXQ T8, T0 \ 1533 MOVQ M1+48(SB), AX \ 1534 MULXQ AX, T9, T8 \ 1535 ADOXQ T9, T0 \ 1536 ADCXQ T8, T1 \ 1537 MOVQ M1+56(SB), AX \ 1538 MULXQ AX, T8, T4 \ 1539 ADOXQ T8, T1 \ 1540 ADCXQ T4, T3 \ 1541 MOVQ M1+64(SB), AX \ 1542 MULXQ AX, AX, T4 \ 1543 ADCXQ T4, T6 \ 1544 ADOXQ AX, T3 \ 1545 MOVQ M1+72(SB), AX \ 1546 MULXQ AX, T8, T4 \ 1547 ADCXQ T4, T5 \ 1548 MOVQ M1+80(SB), AX \ 1549 MULXQ AX, T9, T4 \ 1550 ADCXQ T4, T2 \ 1551 MOVQ M1+88(SB), AX \ 1552 MULXQ AX, DX, T4 \ 1553 MOVL $0, AX \ 1554 ADCXQ AX, T4 \ 1555 \ 1556 ADOXQ T8, T6 \ 1557 ADOXQ T9, T5 \ 1558 ADOXQ DX, T2 \ 1559 ADOXQ AX, T4 1560 1561 // This multiplies a 256-bit number pointed to by M0 with p751+1. 1562 // It is assumed that M1 points to p751+1 stored as a 768-bit Fp751Element. 1563 // C points to the place to store the result and should be at least 192 bits. 1564 // This should only be used when the BMI2 instruction set extension is 1565 // available. 1566 #define mul256x448bmi2(M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9) \ 1567 MOVQ 0+M0, DX \ 1568 MOVQ M1+40(SB), AX \ 1569 MULXQ AX, T1, T0 \ 1570 MOVQ M1+48(SB), AX \ 1571 MULXQ AX, T3, T2 \ 1572 MOVQ T1, 0+C \ // C0_final 1573 MOVQ M1+56(SB), AX \ 1574 MULXQ AX, T5, T4 \ 1575 ADDQ T3, T0 \ 1576 ADCQ T5, T2 \ 1577 MOVQ M1+64(SB), AX \ 1578 MULXQ AX, T3, T1 \ 1579 ADCQ T3, T4 \ 1580 MOVQ M1+72(SB), AX \ 1581 MULXQ AX, T6, T5 \ 1582 ADCQ T6, T1 \ 1583 MOVQ M1+80(SB), AX \ 1584 MULXQ AX, T7, T3 \ 1585 ADCQ T7, T5 \ 1586 MOVQ M1+88(SB), AX \ 1587 MULXQ AX, T8, T6 \ 1588 ADCQ T8, T3 \ 1589 ADCQ $0, T6 \ 1590 \ 1591 MOVQ 8+M0, DX \ 1592 MOVQ M1+40(SB), AX \ 1593 MULXQ AX, T7, T8 \ 1594 ADDQ T7, T0 \ 1595 MOVQ T0, 8+C \ // C1_final 1596 ADCQ T8, T2 \ 1597 MOVQ M1+48(SB), AX \ 1598 MULXQ AX, T8, T7 \ 1599 MOVQ T8, 32+C \ 1600 ADCQ T7, T4 \ 1601 MOVQ M1+56(SB), AX \ 1602 MULXQ AX, T8, T0 \ 1603 MOVQ T8, 40+C \ 1604 ADCQ T1, T0 \ 1605 MOVQ M1+64(SB), AX \ 1606 MULXQ AX, T7, T1 \ 1607 ADCQ T5, T1 \ 1608 MOVQ M1+72(SB), AX \ 1609 MULXQ AX, T8, T5 \ 1610 ADCQ T5, T3 \ 1611 MOVQ M1+80(SB), AX \ 1612 MULXQ AX, T9, T5 \ 1613 ADCQ T5, T6 \ 1614 MOVQ M1+88(SB), AX \ 1615 MULXQ AX, DX, T5 \ 1616 ADCQ $0, T5 \ 1617 \ 1618 XORQ AX, AX \ 1619 ADDQ 32+C, T2 \ 1620 ADCQ 40+C, T4 \ 1621 ADCQ T7, T0 \ 1622 ADCQ T8, T1 \ 1623 ADCQ T9, T3 \ 1624 ADCQ DX, T6 \ 1625 ADCQ AX, T5 \ 1626 \ 1627 MOVQ 16+M0, DX \ 1628 MOVQ M1+40(SB), AX \ 1629 MULXQ AX, T7, T8 \ 1630 ADDQ T7, T2 \ 1631 MOVQ T2, 16+C \ // C2_final 1632 ADCQ T8, T4 \ 1633 MOVQ M1+48(SB), AX \ 1634 MULXQ AX, T7, T8 \ 1635 MOVQ T7, 32+C \ 1636 ADCQ T8, T0 \ 1637 MOVQ M1+56(SB), AX \ 1638 MULXQ AX, T8, T2 \ 1639 MOVQ T8, 40+C \ 1640 ADCQ T2, T1 \ 1641 MOVQ M1+64(SB), AX \ 1642 MULXQ AX, T7, T2 \ 1643 ADCQ T2, T3 \ 1644 MOVQ M1+72(SB), AX \ 1645 MULXQ AX, T8, T2 \ 1646 ADCQ T2, T6 \ 1647 MOVQ M1+80(SB), AX \ 1648 MULXQ AX, T9, T2 \ 1649 ADCQ T2, T5 \ 1650 MOVQ M1+88(SB), AX \ 1651 MULXQ AX, DX, T2 \ 1652 ADCQ $0, T2 \ 1653 \ 1654 XORQ AX, AX \ 1655 ADDQ 32+C, T4 \ 1656 ADCQ 40+C, T0 \ 1657 ADCQ T7, T1 \ 1658 ADCQ T8, T3 \ 1659 ADCQ T9, T6 \ 1660 ADCQ DX, T5 \ 1661 ADCQ AX, T2 \ 1662 \ 1663 MOVQ 24+M0, DX \ 1664 MOVQ M1+40(SB), AX \ 1665 MULXQ AX, T7, T8 \ 1666 ADDQ T4, T7 \ 1667 MOVQ T7, 8(SP) /* push T7 */ \ 1668 ADCQ T8, T0 \ 1669 MOVQ M1+48(SB), AX \ 1670 MULXQ AX, T9, T8 \ 1671 MOVQ T9, 32+C \ 1672 ADCQ T8, T1 \ 1673 MOVQ M1+56(SB), AX \ 1674 MULXQ AX, T8, T4 \ 1675 MOVQ T8, 40+C \ 1676 ADCQ T4, T3 \ 1677 MOVQ M1+64(SB), AX \ 1678 MULXQ AX, T7, T4 \ 1679 ADCQ T4, T6 \ 1680 MOVQ M1+72(SB), AX \ 1681 MULXQ AX, T8, T4 \ 1682 ADCQ T4, T5 \ 1683 MOVQ M1+80(SB), AX \ 1684 MULXQ AX, T9, T4 \ 1685 ADCQ T4, T2 \ 1686 MOVQ M1+88(SB), AX \ 1687 MULXQ AX, DX, T4 \ 1688 ADCQ $0, T4 \ 1689 \ 1690 XORQ AX, AX \ 1691 ADDQ 32+C, T0 \ 1692 ADCQ 40+C, T1 \ 1693 ADCQ T7, T3 \ 1694 ADCQ T8, T6 \ 1695 ADCQ T9, T5 \ 1696 ADCQ DX, T2 \ 1697 ADCQ AX, T4 \ 1698 MOVQ 8(SP), T7 /* pop T7 */ 1699 1700 // Template for calculating the Montgomery reduction algorithm described in 1701 // section 5.2.3 of https://eprint.iacr.org/2017/1015.pdf. Template must be 1702 // customized with schoolbook multiplication for 256 x 448-bit number. 1703 // This macro reuses memory of IN value and *changes* it. Smashes registers 1704 // R[8-15], AX, BX, CX, DX, BP. 1705 // Input: 1706 // * M0: 1536-bit number to be reduced 1707 // * C : either mul256x448bmi2 or mul256x448bmi2adx 1708 // Output: OUT 768-bit 1709 #define REDC(C, M0, MULS) \ 1710 \ // a[0-3] x p751p1_nz --> result: [reg_p2+48], [reg_p2+56], [reg_p2+64], and rbp, r8:r14 1711 MULS(M0, ·P751p1, 48+C, R8, R9, R13, R10, R14, R12, R11, BP, BX, CX) \ 1712 XORQ R15, R15 \ 1713 MOVQ 48+C, AX \ 1714 MOVQ 56+C, DX \ 1715 MOVQ 64+C, BX \ 1716 ADDQ 40+M0, AX \ 1717 ADCQ 48+M0, DX \ 1718 ADCQ 56+M0, BX \ 1719 MOVQ AX, 40+M0 \ 1720 MOVQ DX, 48+M0 \ 1721 MOVQ BX, 56+M0 \ 1722 ADCQ 64+M0, BP \ 1723 ADCQ 72+M0, R8 \ 1724 ADCQ 80+M0, R9 \ 1725 ADCQ 88+M0, R10 \ 1726 ADCQ 96+M0, R11 \ 1727 ADCQ 104+M0, R12 \ 1728 ADCQ 112+M0, R13 \ 1729 ADCQ 120+M0, R14 \ 1730 ADCQ 128+M0, R15 \ 1731 MOVQ BP, 64+M0 \ 1732 MOVQ R8, 72+M0 \ 1733 MOVQ R9, 80+M0 \ 1734 MOVQ R10, 88+M0 \ 1735 MOVQ R11, 96+M0 \ 1736 MOVQ R12, 104+M0 \ 1737 MOVQ R13, 112+M0 \ 1738 MOVQ R14, 120+M0 \ 1739 MOVQ R15, 128+M0 \ 1740 MOVQ 136+M0, R8 \ 1741 MOVQ 144+M0, R9 \ 1742 MOVQ 152+M0, R10 \ 1743 MOVQ 160+M0, R11 \ 1744 MOVQ 168+M0, R12 \ 1745 MOVQ 176+M0, R13 \ 1746 MOVQ 184+M0, R14 \ 1747 ADCQ $0, R8 \ 1748 ADCQ $0, R9 \ 1749 ADCQ $0, R10 \ 1750 ADCQ $0, R11 \ 1751 ADCQ $0, R12 \ 1752 ADCQ $0, R13 \ 1753 ADCQ $0, R14 \ 1754 MOVQ R8, 136+M0 \ 1755 MOVQ R9, 144+M0 \ 1756 MOVQ R10, 152+M0 \ 1757 MOVQ R11, 160+M0 \ 1758 MOVQ R12, 168+M0 \ 1759 MOVQ R13, 176+M0 \ 1760 MOVQ R14, 184+M0 \ 1761 \ // a[4-7] x p751p1_nz --> result: [reg_p2+48], [reg_p2+56], [reg_p2+64], and rbp, r8:r14 1762 MULS(32+M0, ·P751p1, 48+C, R8, R9, R13, R10, R14, R12, R11, BP, BX, CX) \ 1763 XORQ R15, R15 \ 1764 MOVQ 48+C, AX \ 1765 MOVQ 56+C, DX \ 1766 MOVQ 64+C, BX \ 1767 ADDQ 72+M0, AX \ 1768 ADCQ 80+M0, DX \ 1769 ADCQ 88+M0, BX \ 1770 MOVQ AX, 72+M0 \ 1771 MOVQ DX, 80+M0 \ 1772 MOVQ BX, 88+M0 \ 1773 ADCQ 96+M0, BP \ 1774 ADCQ 104+M0, R8 \ 1775 ADCQ 112+M0, R9 \ 1776 ADCQ 120+M0, R10 \ 1777 ADCQ 128+M0, R11 \ 1778 ADCQ 136+M0, R12 \ 1779 ADCQ 144+M0, R13 \ 1780 ADCQ 152+M0, R14 \ 1781 ADCQ 160+M0, R15 \ 1782 MOVQ BP, 0+C \ // Final result c0 1783 MOVQ R8, 104+M0 \ 1784 MOVQ R9, 112+M0 \ 1785 MOVQ R10, 120+M0 \ 1786 MOVQ R11, 128+M0 \ 1787 MOVQ R12, 136+M0 \ 1788 MOVQ R13, 144+M0 \ 1789 MOVQ R14, 152+M0 \ 1790 MOVQ R15, 160+M0 \ 1791 MOVQ 168+M0, R12 \ 1792 MOVQ 176+M0, R13 \ 1793 MOVQ 184+M0, R14 \ 1794 ADCQ $0, R12 \ 1795 ADCQ $0, R13 \ 1796 ADCQ $0, R14 \ 1797 MOVQ R12, 168+M0 \ 1798 MOVQ R13, 176+M0 \ 1799 MOVQ R14, 184+M0 \ 1800 \ // a[8-11] x p751p1_nz --> result: [reg_p2+48], [reg_p2+56], [reg_p2+64], and rbp, r8:r14 1801 MULS(64+M0, ·P751p1, 48+C, R8, R9, R13, R10, R14, R12, R11, BP, BX, CX) \ 1802 MOVQ 48+C, AX \ // Final result c1:c11 1803 MOVQ 56+C, DX \ 1804 MOVQ 64+C, BX \ 1805 ADDQ 104+M0, AX \ 1806 ADCQ 112+M0, DX \ 1807 ADCQ 120+M0, BX \ 1808 MOVQ AX, 8+C \ 1809 MOVQ DX, 16+C \ 1810 MOVQ BX, 24+C \ 1811 ADCQ 128+M0, BP \ 1812 ADCQ 136+M0, R8 \ 1813 ADCQ 144+M0, R9 \ 1814 ADCQ 152+M0, R10 \ 1815 ADCQ 160+M0, R11 \ 1816 ADCQ 168+M0, R12 \ 1817 ADCQ 176+M0, R13 \ 1818 ADCQ 184+M0, R14 \ 1819 MOVQ BP, 32+C \ 1820 MOVQ R8, 40+C \ 1821 MOVQ R9, 48+C \ 1822 MOVQ R10, 56+C \ 1823 MOVQ R11, 64+C \ 1824 MOVQ R12, 72+C \ 1825 MOVQ R13, 80+C \ 1826 MOVQ R14, 88+C 1827 1828 TEXT ·rdcP751(SB), $16-16 1829 MOVQ z+0(FP), REG_P2 1830 MOVQ x+8(FP), REG_P1 1831 1832 // Check whether to use optimized implementation 1833 CMPB ·HasADXandBMI2(SB), $1 1834 JE redc_with_mulx_adcx_adox 1835 CMPB ·HasBMI2(SB), $1 1836 JE redc_with_mulx 1837 1838 MOVQ (REG_P1), R11 1839 MOVQ P751P1_5, AX 1840 MULQ R11 1841 XORQ R8, R8 1842 ADDQ (40)(REG_P1), AX 1843 MOVQ AX, (40)(REG_P2) // Z5 1844 ADCQ DX, R8 1845 1846 XORQ R9, R9 1847 MOVQ P751P1_6, AX 1848 MULQ R11 1849 XORQ R10, R10 1850 ADDQ AX, R8 1851 ADCQ DX, R9 1852 1853 MOVQ (8)(REG_P1), R12 1854 MOVQ P751P1_5, AX 1855 MULQ R12 1856 ADDQ AX, R8 1857 ADCQ DX, R9 1858 ADCQ $0, R10 1859 ADDQ (48)(REG_P1), R8 1860 MOVQ R8, (48)(REG_P2) // Z6 1861 ADCQ $0, R9 1862 ADCQ $0, R10 1863 1864 XORQ R8, R8 1865 MOVQ P751P1_7, AX 1866 MULQ R11 1867 ADDQ AX, R9 1868 ADCQ DX, R10 1869 ADCQ $0, R8 1870 1871 MOVQ P751P1_6, AX 1872 MULQ R12 1873 ADDQ AX, R9 1874 ADCQ DX, R10 1875 ADCQ $0, R8 1876 1877 MOVQ (16)(REG_P1), R13 1878 MOVQ P751P1_5, AX 1879 MULQ R13 1880 ADDQ AX, R9 1881 ADCQ DX, R10 1882 ADCQ $0, R8 1883 ADDQ (56)(REG_P1), R9 1884 MOVQ R9, (56)(REG_P2) // Z7 1885 ADCQ $0, R10 1886 ADCQ $0, R8 1887 1888 XORQ R9, R9 1889 MOVQ P751P1_8, AX 1890 MULQ R11 1891 ADDQ AX, R10 1892 ADCQ DX, R8 1893 ADCQ $0, R9 1894 1895 MOVQ P751P1_7, AX 1896 MULQ R12 1897 ADDQ AX, R10 1898 ADCQ DX, R8 1899 ADCQ $0, R9 1900 1901 MOVQ P751P1_6, AX 1902 MULQ R13 1903 ADDQ AX, R10 1904 ADCQ DX, R8 1905 ADCQ $0, R9 1906 1907 MOVQ (24)(REG_P1), R14 1908 MOVQ P751P1_5, AX 1909 MULQ R14 1910 ADDQ AX, R10 1911 ADCQ DX, R8 1912 ADCQ $0, R9 1913 ADDQ (64)(REG_P1), R10 1914 MOVQ R10, (64)(REG_P2) // Z8 1915 ADCQ $0, R8 1916 ADCQ $0, R9 1917 1918 XORQ R10, R10 1919 MOVQ P751P1_9, AX 1920 MULQ R11 1921 ADDQ AX, R8 1922 ADCQ DX, R9 1923 ADCQ $0, R10 1924 1925 MOVQ P751P1_8, AX 1926 MULQ R12 1927 ADDQ AX, R8 1928 ADCQ DX, R9 1929 ADCQ $0, R10 1930 1931 MOVQ P751P1_7, AX 1932 MULQ R13 1933 ADDQ AX, R8 1934 ADCQ DX, R9 1935 ADCQ $0, R10 1936 1937 MOVQ P751P1_6, AX 1938 MULQ R14 1939 ADDQ AX, R8 1940 ADCQ DX, R9 1941 ADCQ $0, R10 1942 1943 MOVQ (32)(REG_P1), R15 1944 MOVQ P751P1_5, AX 1945 MULQ R15 1946 ADDQ AX, R8 1947 ADCQ DX, R9 1948 ADCQ $0, R10 1949 ADDQ (72)(REG_P1), R8 1950 MOVQ R8, (72)(REG_P2) // Z9 1951 ADCQ $0, R9 1952 ADCQ $0, R10 1953 1954 XORQ R8, R8 1955 MOVQ P751P1_10, AX 1956 MULQ R11 1957 ADDQ AX, R9 1958 ADCQ DX, R10 1959 ADCQ $0, R8 1960 1961 MOVQ P751P1_9, AX 1962 MULQ R12 1963 ADDQ AX, R9 1964 ADCQ DX, R10 1965 ADCQ $0, R8 1966 1967 MOVQ P751P1_8, AX 1968 MULQ R13 1969 ADDQ AX, R9 1970 ADCQ DX, R10 1971 ADCQ $0, R8 1972 1973 MOVQ P751P1_7, AX 1974 MULQ R14 1975 ADDQ AX, R9 1976 ADCQ DX, R10 1977 ADCQ $0, R8 1978 1979 MOVQ P751P1_6, AX 1980 MULQ R15 1981 ADDQ AX, R9 1982 ADCQ DX, R10 1983 ADCQ $0, R8 1984 1985 MOVQ (40)(REG_P2), CX 1986 MOVQ P751P1_5, AX 1987 MULQ CX 1988 ADDQ AX, R9 1989 ADCQ DX, R10 1990 ADCQ $0, R8 1991 ADDQ (80)(REG_P1), R9 1992 MOVQ R9, (80)(REG_P2) // Z10 1993 ADCQ $0, R10 1994 ADCQ $0, R8 1995 1996 XORQ R9, R9 1997 MOVQ P751P1_11, AX 1998 MULQ R11 1999 ADDQ AX, R10 2000 ADCQ DX, R8 2001 ADCQ $0, R9 2002 2003 MOVQ P751P1_10, AX 2004 MULQ R12 2005 ADDQ AX, R10 2006 ADCQ DX, R8 2007 ADCQ $0, R9 2008 2009 MOVQ P751P1_9, AX 2010 MULQ R13 2011 ADDQ AX, R10 2012 ADCQ DX, R8 2013 ADCQ $0, R9 2014 2015 MOVQ P751P1_8, AX 2016 MULQ R14 2017 ADDQ AX, R10 2018 ADCQ DX, R8 2019 ADCQ $0, R9 2020 2021 MOVQ P751P1_7, AX 2022 MULQ R15 2023 ADDQ AX, R10 2024 ADCQ DX, R8 2025 ADCQ $0, R9 2026 2027 MOVQ P751P1_6, AX 2028 MULQ CX 2029 ADDQ AX, R10 2030 ADCQ DX, R8 2031 ADCQ $0, R9 2032 2033 MOVQ (48)(REG_P2), R11 2034 MOVQ P751P1_5, AX 2035 MULQ R11 2036 ADDQ AX, R10 2037 ADCQ DX, R8 2038 ADCQ $0, R9 2039 ADDQ (88)(REG_P1), R10 2040 MOVQ R10, (88)(REG_P2) // Z11 2041 ADCQ $0, R8 2042 ADCQ $0, R9 2043 2044 XORQ R10, R10 2045 MOVQ P751P1_11, AX 2046 MULQ R12 2047 ADDQ AX, R8 2048 ADCQ DX, R9 2049 ADCQ $0, R10 2050 2051 MOVQ P751P1_10, AX 2052 MULQ R13 2053 ADDQ AX, R8 2054 ADCQ DX, R9 2055 ADCQ $0, R10 2056 2057 MOVQ P751P1_9, AX 2058 MULQ R14 2059 ADDQ AX, R8 2060 ADCQ DX, R9 2061 ADCQ $0, R10 2062 2063 MOVQ P751P1_8, AX 2064 MULQ R15 2065 ADDQ AX, R8 2066 ADCQ DX, R9 2067 ADCQ $0, R10 2068 2069 MOVQ P751P1_7, AX 2070 MULQ CX 2071 ADDQ AX, R8 2072 ADCQ DX, R9 2073 ADCQ $0, R10 2074 2075 MOVQ P751P1_6, AX 2076 MULQ R11 2077 ADDQ AX, R8 2078 ADCQ DX, R9 2079 ADCQ $0, R10 2080 2081 MOVQ (56)(REG_P2), R12 2082 MOVQ P751P1_5, AX 2083 MULQ R12 2084 ADDQ AX, R8 2085 ADCQ DX, R9 2086 ADCQ $0, R10 2087 ADDQ (96)(REG_P1), R8 2088 MOVQ R8, (REG_P2) // Z0 2089 ADCQ $0, R9 2090 ADCQ $0, R10 2091 2092 XORQ R8, R8 2093 MOVQ P751P1_11, AX 2094 MULQ R13 2095 ADDQ AX, R9 2096 ADCQ DX, R10 2097 ADCQ $0, R8 2098 2099 MOVQ P751P1_10, AX 2100 MULQ R14 2101 ADDQ AX, R9 2102 ADCQ DX, R10 2103 ADCQ $0, R8 2104 2105 MOVQ P751P1_9, AX 2106 MULQ R15 2107 ADDQ AX, R9 2108 ADCQ DX, R10 2109 ADCQ $0, R8 2110 2111 MOVQ P751P1_8, AX 2112 MULQ CX 2113 ADDQ AX, R9 2114 ADCQ DX, R10 2115 ADCQ $0, R8 2116 2117 MOVQ P751P1_7, AX 2118 MULQ R11 2119 ADDQ AX, R9 2120 ADCQ DX, R10 2121 ADCQ $0, R8 2122 2123 MOVQ P751P1_6, AX 2124 MULQ R12 2125 ADDQ AX, R9 2126 ADCQ DX, R10 2127 ADCQ $0, R8 2128 2129 MOVQ (64)(REG_P2), R13 2130 MOVQ P751P1_5, AX 2131 MULQ R13 2132 ADDQ AX, R9 2133 ADCQ DX, R10 2134 ADCQ $0, R8 2135 ADDQ (104)(REG_P1), R9 2136 MOVQ R9, (8)(REG_P2) // Z1 2137 ADCQ $0, R10 2138 ADCQ $0, R8 2139 2140 XORQ R9, R9 2141 MOVQ P751P1_11, AX 2142 MULQ R14 2143 ADDQ AX, R10 2144 ADCQ DX, R8 2145 ADCQ $0, R9 2146 2147 MOVQ P751P1_10, AX 2148 MULQ R15 2149 ADDQ AX, R10 2150 ADCQ DX, R8 2151 ADCQ $0, R9 2152 2153 MOVQ P751P1_9, AX 2154 MULQ CX 2155 ADDQ AX, R10 2156 ADCQ DX, R8 2157 ADCQ $0, R9 2158 2159 MOVQ P751P1_8, AX 2160 MULQ R11 2161 ADDQ AX, R10 2162 ADCQ DX, R8 2163 ADCQ $0, R9 2164 2165 MOVQ P751P1_7, AX 2166 MULQ R12 2167 ADDQ AX, R10 2168 ADCQ DX, R8 2169 ADCQ $0, R9 2170 2171 MOVQ P751P1_6, AX 2172 MULQ R13 2173 ADDQ AX, R10 2174 ADCQ DX, R8 2175 ADCQ $0, R9 2176 2177 MOVQ (72)(REG_P2), R14 2178 MOVQ P751P1_5, AX 2179 MULQ R14 2180 ADDQ AX, R10 2181 ADCQ DX, R8 2182 ADCQ $0, R9 2183 ADDQ (112)(REG_P1), R10 2184 MOVQ R10, (16)(REG_P2) // Z2 2185 ADCQ $0, R8 2186 ADCQ $0, R9 2187 2188 XORQ R10, R10 2189 MOVQ P751P1_11, AX 2190 MULQ R15 2191 ADDQ AX, R8 2192 ADCQ DX, R9 2193 ADCQ $0, R10 2194 2195 MOVQ P751P1_10, AX 2196 MULQ CX 2197 ADDQ AX, R8 2198 ADCQ DX, R9 2199 ADCQ $0, R10 2200 2201 MOVQ P751P1_9, AX 2202 MULQ R11 2203 ADDQ AX, R8 2204 ADCQ DX, R9 2205 ADCQ $0, R10 2206 2207 MOVQ P751P1_8, AX 2208 MULQ R12 2209 ADDQ AX, R8 2210 ADCQ DX, R9 2211 ADCQ $0, R10 2212 2213 MOVQ P751P1_7, AX 2214 MULQ R13 2215 ADDQ AX, R8 2216 ADCQ DX, R9 2217 ADCQ $0, R10 2218 2219 MOVQ P751P1_6, AX 2220 MULQ R14 2221 ADDQ AX, R8 2222 ADCQ DX, R9 2223 ADCQ $0, R10 2224 2225 MOVQ (80)(REG_P2), R15 2226 MOVQ P751P1_5, AX 2227 MULQ R15 2228 ADDQ AX, R8 2229 ADCQ DX, R9 2230 ADCQ $0, R10 2231 ADDQ (120)(REG_P1), R8 2232 MOVQ R8, (24)(REG_P2) // Z3 2233 ADCQ $0, R9 2234 ADCQ $0, R10 2235 2236 XORQ R8, R8 2237 MOVQ P751P1_11, AX 2238 MULQ CX 2239 ADDQ AX, R9 2240 ADCQ DX, R10 2241 ADCQ $0, R8 2242 2243 MOVQ P751P1_10, AX 2244 MULQ R11 2245 ADDQ AX, R9 2246 ADCQ DX, R10 2247 ADCQ $0, R8 2248 2249 MOVQ P751P1_9, AX 2250 MULQ R12 2251 ADDQ AX, R9 2252 ADCQ DX, R10 2253 ADCQ $0, R8 2254 2255 MOVQ P751P1_8, AX 2256 MULQ R13 2257 ADDQ AX, R9 2258 ADCQ DX, R10 2259 ADCQ $0, R8 2260 2261 MOVQ P751P1_7, AX 2262 MULQ R14 2263 ADDQ AX, R9 2264 ADCQ DX, R10 2265 ADCQ $0, R8 2266 2267 MOVQ P751P1_6, AX 2268 MULQ R15 2269 ADDQ AX, R9 2270 ADCQ DX, R10 2271 ADCQ $0, R8 2272 2273 MOVQ (88)(REG_P2), CX 2274 MOVQ P751P1_5, AX 2275 MULQ CX 2276 ADDQ AX, R9 2277 ADCQ DX, R10 2278 ADCQ $0, R8 2279 ADDQ (128)(REG_P1), R9 2280 MOVQ R9, (32)(REG_P2) // Z4 2281 ADCQ $0, R10 2282 ADCQ $0, R8 2283 2284 XORQ R9, R9 2285 MOVQ P751P1_11, AX 2286 MULQ R11 2287 ADDQ AX, R10 2288 ADCQ DX, R8 2289 ADCQ $0, R9 2290 2291 MOVQ P751P1_10, AX 2292 MULQ R12 2293 ADDQ AX, R10 2294 ADCQ DX, R8 2295 ADCQ $0, R9 2296 2297 MOVQ P751P1_9, AX 2298 MULQ R13 2299 ADDQ AX, R10 2300 ADCQ DX, R8 2301 ADCQ $0, R9 2302 2303 MOVQ P751P1_8, AX 2304 MULQ R14 2305 ADDQ AX, R10 2306 ADCQ DX, R8 2307 ADCQ $0, R9 2308 2309 MOVQ P751P1_7, AX 2310 MULQ R15 2311 ADDQ AX, R10 2312 ADCQ DX, R8 2313 ADCQ $0, R9 2314 2315 MOVQ P751P1_6, AX 2316 MULQ CX 2317 ADDQ AX, R10 2318 ADCQ DX, R8 2319 ADCQ $0, R9 2320 ADDQ (136)(REG_P1), R10 2321 MOVQ R10, (40)(REG_P2) // Z5 2322 ADCQ $0, R8 2323 ADCQ $0, R9 2324 2325 XORQ R10, R10 2326 MOVQ P751P1_11, AX 2327 MULQ R12 2328 ADDQ AX, R8 2329 ADCQ DX, R9 2330 ADCQ $0, R10 2331 2332 MOVQ P751P1_10, AX 2333 MULQ R13 2334 ADDQ AX, R8 2335 ADCQ DX, R9 2336 ADCQ $0, R10 2337 2338 MOVQ P751P1_9, AX 2339 MULQ R14 2340 ADDQ AX, R8 2341 ADCQ DX, R9 2342 ADCQ $0, R10 2343 2344 MOVQ P751P1_8, AX 2345 MULQ R15 2346 ADDQ AX, R8 2347 ADCQ DX, R9 2348 ADCQ $0, R10 2349 2350 MOVQ P751P1_7, AX 2351 MULQ CX 2352 ADDQ AX, R8 2353 ADCQ DX, R9 2354 ADCQ $0, R10 2355 ADDQ (144)(REG_P1), R8 2356 MOVQ R8, (48)(REG_P2) // Z6 2357 ADCQ $0, R9 2358 ADCQ $0, R10 2359 2360 XORQ R8, R8 2361 MOVQ P751P1_11, AX 2362 MULQ R13 2363 ADDQ AX, R9 2364 ADCQ DX, R10 2365 ADCQ $0, R8 2366 2367 MOVQ P751P1_10, AX 2368 MULQ R14 2369 ADDQ AX, R9 2370 ADCQ DX, R10 2371 ADCQ $0, R8 2372 2373 MOVQ P751P1_9, AX 2374 MULQ R15 2375 ADDQ AX, R9 2376 ADCQ DX, R10 2377 ADCQ $0, R8 2378 2379 MOVQ P751P1_8, AX 2380 MULQ CX 2381 ADDQ AX, R9 2382 ADCQ DX, R10 2383 ADCQ $0, R8 2384 ADDQ (152)(REG_P1), R9 2385 MOVQ R9, (56)(REG_P2) // Z7 2386 ADCQ $0, R10 2387 ADCQ $0, R8 2388 2389 XORQ R9, R9 2390 MOVQ P751P1_11, AX 2391 MULQ R14 2392 ADDQ AX, R10 2393 ADCQ DX, R8 2394 ADCQ $0, R9 2395 2396 MOVQ P751P1_10, AX 2397 MULQ R15 2398 ADDQ AX, R10 2399 ADCQ DX, R8 2400 ADCQ $0, R9 2401 2402 MOVQ P751P1_9, AX 2403 MULQ CX 2404 ADDQ AX, R10 2405 ADCQ DX, R8 2406 ADCQ $0, R9 2407 ADDQ (160)(REG_P1), R10 2408 MOVQ R10, (64)(REG_P2) // Z8 2409 ADCQ $0, R8 2410 ADCQ $0, R9 2411 2412 XORQ R10, R10 2413 MOVQ P751P1_11, AX 2414 MULQ R15 2415 ADDQ AX, R8 2416 ADCQ DX, R9 2417 ADCQ $0, R10 2418 2419 MOVQ P751P1_10, AX 2420 MULQ CX 2421 ADDQ AX, R8 2422 ADCQ DX, R9 2423 ADCQ $0, R10 2424 ADDQ (168)(REG_P1), R8 // Z9 2425 MOVQ R8, (72)(REG_P2) // Z9 2426 ADCQ $0, R9 2427 ADCQ $0, R10 2428 2429 MOVQ P751P1_11, AX 2430 MULQ CX 2431 ADDQ AX, R9 2432 ADCQ DX, R10 2433 ADDQ (176)(REG_P1), R9 // Z10 2434 MOVQ R9, (80)(REG_P2) // Z10 2435 ADCQ $0, R10 2436 ADDQ (184)(REG_P1), R10 // Z11 2437 MOVQ R10, (88)(REG_P2) // Z11 2438 RET 2439 2440 redc_with_mulx_adcx_adox: 2441 // This implements the Montgomery reduction algorithm described in 2442 // section 5.2.3 of https://eprint.iacr.org/2017/1015.pdf. 2443 // This assumes that the BMI2 and ADX instruction set extensions are available. 2444 MOVQ BP, 0(SP) // push: BP is Callee-save. 2445 REDC(0(REG_P2), 0(REG_P1), mul256x448bmi2adx) 2446 MOVQ 0(SP), BP // pop: BP is Callee-save. 2447 RET 2448 2449 redc_with_mulx: 2450 // This implements the Montgomery reduction algorithm described in 2451 // section 5.2.3 of https://eprint.iacr.org/2017/1015.pdf. 2452 // This assumes that the BMI2 instruction set extension is available. 2453 MOVQ BP, 0(SP) // push: BP is Callee-save. 2454 REDC(0(REG_P2), 0(REG_P1), mul256x448bmi2) 2455 MOVQ 0(SP), BP // pop: BP is Callee-save. 2456 RET 2457 2458 TEXT ·adlP751(SB), NOSPLIT, $0-24 2459 2460 MOVQ z+0(FP), REG_P3 2461 MOVQ x+8(FP), REG_P1 2462 MOVQ y+16(FP), REG_P2 2463 2464 MOVQ (REG_P1), R8 2465 MOVQ (8)(REG_P1), R9 2466 MOVQ (16)(REG_P1), R10 2467 MOVQ (24)(REG_P1), R11 2468 MOVQ (32)(REG_P1), R12 2469 MOVQ (40)(REG_P1), R13 2470 MOVQ (48)(REG_P1), R14 2471 MOVQ (56)(REG_P1), R15 2472 MOVQ (64)(REG_P1), AX 2473 MOVQ (72)(REG_P1), BX 2474 MOVQ (80)(REG_P1), CX 2475 2476 ADDQ (REG_P2), R8 2477 ADCQ (8)(REG_P2), R9 2478 ADCQ (16)(REG_P2), R10 2479 ADCQ (24)(REG_P2), R11 2480 ADCQ (32)(REG_P2), R12 2481 ADCQ (40)(REG_P2), R13 2482 ADCQ (48)(REG_P2), R14 2483 ADCQ (56)(REG_P2), R15 2484 ADCQ (64)(REG_P2), AX 2485 ADCQ (72)(REG_P2), BX 2486 ADCQ (80)(REG_P2), CX 2487 2488 MOVQ R8, (REG_P3) 2489 MOVQ R9, (8)(REG_P3) 2490 MOVQ R10, (16)(REG_P3) 2491 MOVQ R11, (24)(REG_P3) 2492 MOVQ R12, (32)(REG_P3) 2493 MOVQ R13, (40)(REG_P3) 2494 MOVQ R14, (48)(REG_P3) 2495 MOVQ R15, (56)(REG_P3) 2496 MOVQ AX, (64)(REG_P3) 2497 MOVQ BX, (72)(REG_P3) 2498 MOVQ CX, (80)(REG_P3) 2499 MOVQ (88)(REG_P1), AX 2500 ADCQ (88)(REG_P2), AX 2501 MOVQ AX, (88)(REG_P3) 2502 2503 MOVQ (96)(REG_P1), R8 2504 MOVQ (104)(REG_P1), R9 2505 MOVQ (112)(REG_P1), R10 2506 MOVQ (120)(REG_P1), R11 2507 MOVQ (128)(REG_P1), R12 2508 MOVQ (136)(REG_P1), R13 2509 MOVQ (144)(REG_P1), R14 2510 MOVQ (152)(REG_P1), R15 2511 MOVQ (160)(REG_P1), AX 2512 MOVQ (168)(REG_P1), BX 2513 MOVQ (176)(REG_P1), CX 2514 MOVQ (184)(REG_P1), DI 2515 2516 ADCQ (96)(REG_P2), R8 2517 ADCQ (104)(REG_P2), R9 2518 ADCQ (112)(REG_P2), R10 2519 ADCQ (120)(REG_P2), R11 2520 ADCQ (128)(REG_P2), R12 2521 ADCQ (136)(REG_P2), R13 2522 ADCQ (144)(REG_P2), R14 2523 ADCQ (152)(REG_P2), R15 2524 ADCQ (160)(REG_P2), AX 2525 ADCQ (168)(REG_P2), BX 2526 ADCQ (176)(REG_P2), CX 2527 ADCQ (184)(REG_P2), DI 2528 2529 MOVQ R8, (96)(REG_P3) 2530 MOVQ R9, (104)(REG_P3) 2531 MOVQ R10, (112)(REG_P3) 2532 MOVQ R11, (120)(REG_P3) 2533 MOVQ R12, (128)(REG_P3) 2534 MOVQ R13, (136)(REG_P3) 2535 MOVQ R14, (144)(REG_P3) 2536 MOVQ R15, (152)(REG_P3) 2537 MOVQ AX, (160)(REG_P3) 2538 MOVQ BX, (168)(REG_P3) 2539 MOVQ CX, (176)(REG_P3) 2540 MOVQ DI, (184)(REG_P3) 2541 2542 RET 2543 2544 2545 TEXT ·sulP751(SB), NOSPLIT, $0-24 2546 2547 MOVQ z+0(FP), REG_P3 2548 MOVQ x+8(FP), REG_P1 2549 MOVQ y+16(FP), REG_P2 2550 2551 MOVQ (REG_P1), R8 2552 MOVQ (8)(REG_P1), R9 2553 MOVQ (16)(REG_P1), R10 2554 MOVQ (24)(REG_P1), R11 2555 MOVQ (32)(REG_P1), R12 2556 MOVQ (40)(REG_P1), R13 2557 MOVQ (48)(REG_P1), R14 2558 MOVQ (56)(REG_P1), R15 2559 MOVQ (64)(REG_P1), AX 2560 MOVQ (72)(REG_P1), BX 2561 MOVQ (80)(REG_P1), CX 2562 2563 SUBQ (REG_P2), R8 2564 SBBQ (8)(REG_P2), R9 2565 SBBQ (16)(REG_P2), R10 2566 SBBQ (24)(REG_P2), R11 2567 SBBQ (32)(REG_P2), R12 2568 SBBQ (40)(REG_P2), R13 2569 SBBQ (48)(REG_P2), R14 2570 SBBQ (56)(REG_P2), R15 2571 SBBQ (64)(REG_P2), AX 2572 SBBQ (72)(REG_P2), BX 2573 SBBQ (80)(REG_P2), CX 2574 2575 MOVQ R8, (REG_P3) 2576 MOVQ R9, (8)(REG_P3) 2577 MOVQ R10, (16)(REG_P3) 2578 MOVQ R11, (24)(REG_P3) 2579 MOVQ R12, (32)(REG_P3) 2580 MOVQ R13, (40)(REG_P3) 2581 MOVQ R14, (48)(REG_P3) 2582 MOVQ R15, (56)(REG_P3) 2583 MOVQ AX, (64)(REG_P3) 2584 MOVQ BX, (72)(REG_P3) 2585 MOVQ CX, (80)(REG_P3) 2586 MOVQ (88)(REG_P1), AX 2587 SBBQ (88)(REG_P2), AX 2588 MOVQ AX, (88)(REG_P3) 2589 2590 MOVQ (96)(REG_P1), R8 2591 MOVQ (104)(REG_P1), R9 2592 MOVQ (112)(REG_P1), R10 2593 MOVQ (120)(REG_P1), R11 2594 MOVQ (128)(REG_P1), R12 2595 MOVQ (136)(REG_P1), R13 2596 MOVQ (144)(REG_P1), R14 2597 MOVQ (152)(REG_P1), R15 2598 MOVQ (160)(REG_P1), AX 2599 MOVQ (168)(REG_P1), BX 2600 MOVQ (176)(REG_P1), CX 2601 MOVQ (184)(REG_P1), DI 2602 2603 SBBQ (96)(REG_P2), R8 2604 SBBQ (104)(REG_P2), R9 2605 SBBQ (112)(REG_P2), R10 2606 SBBQ (120)(REG_P2), R11 2607 SBBQ (128)(REG_P2), R12 2608 SBBQ (136)(REG_P2), R13 2609 SBBQ (144)(REG_P2), R14 2610 SBBQ (152)(REG_P2), R15 2611 SBBQ (160)(REG_P2), AX 2612 SBBQ (168)(REG_P2), BX 2613 SBBQ (176)(REG_P2), CX 2614 SBBQ (184)(REG_P2), DI 2615 2616 MOVQ R8, (96)(REG_P3) 2617 MOVQ R9, (104)(REG_P3) 2618 MOVQ R10, (112)(REG_P3) 2619 MOVQ R11, (120)(REG_P3) 2620 MOVQ R12, (128)(REG_P3) 2621 MOVQ R13, (136)(REG_P3) 2622 MOVQ R14, (144)(REG_P3) 2623 MOVQ R15, (152)(REG_P3) 2624 MOVQ AX, (160)(REG_P3) 2625 MOVQ BX, (168)(REG_P3) 2626 MOVQ CX, (176)(REG_P3) 2627 MOVQ DI, (184)(REG_P3) 2628 2629 // Now the carry flag is 1 if x-y < 0. If so, add p*2^768. 2630 MOVQ $0, AX 2631 SBBQ $0, AX 2632 2633 // Load p into registers: 2634 MOVQ P751_0, R8 2635 // P751_{1,2,3,4} = P751_0, so reuse R8 2636 MOVQ P751_5, R9 2637 MOVQ P751_6, R10 2638 MOVQ P751_7, R11 2639 MOVQ P751_8, R12 2640 MOVQ P751_9, R13 2641 MOVQ P751_10, R14 2642 MOVQ P751_11, R15 2643 2644 ANDQ AX, R8 2645 ANDQ AX, R9 2646 ANDQ AX, R10 2647 ANDQ AX, R11 2648 ANDQ AX, R12 2649 ANDQ AX, R13 2650 ANDQ AX, R14 2651 ANDQ AX, R15 2652 2653 ADDQ R8, (96 )(REG_P3) 2654 ADCQ R8, (96+ 8)(REG_P3) 2655 ADCQ R8, (96+16)(REG_P3) 2656 ADCQ R8, (96+24)(REG_P3) 2657 ADCQ R8, (96+32)(REG_P3) 2658 ADCQ R9, (96+40)(REG_P3) 2659 ADCQ R10, (96+48)(REG_P3) 2660 ADCQ R11, (96+56)(REG_P3) 2661 ADCQ R12, (96+64)(REG_P3) 2662 ADCQ R13, (96+72)(REG_P3) 2663 ADCQ R14, (96+80)(REG_P3) 2664 ADCQ R15, (96+88)(REG_P3) 2665 2666 RET