github.com/cloudflare/circl@v1.5.0/ecc/fourq/fp_amd64.h (about) 1 // CHECK_BMI2 triggers bmi2 if supported, 2 // otherwise it fallbacks to legacy code. 3 #define CHECK_BMI2(label, legacy, bmi2) \ 4 CMPB ·hasBMI2(SB), $0 \ 5 JE label \ 6 bmi2 \ 7 RET \ 8 label: \ 9 legacy \ 10 RET 11 12 #define _fpReduce(c0, c1) \ 13 BTRQ $63, c1 \ 14 ADCQ $0, c0 \ 15 ADCQ $0, c1 16 17 // _fpMod: c = c mod p 18 // Uses: AX, DX, FLAGS 19 // Instr: x86_64 20 #define _fpMod(c) \ 21 MOVQ 0+c, AX \ 22 MOVQ 8+c, DX \ 23 SUBQ ·modulusP+0(SB), AX \ 24 SBBQ ·modulusP+8(SB), DX \ 25 BTRQ $63, DX \ 26 SBBQ $0, AX \ 27 SBBQ $0, DX \ 28 _fpReduce(AX, DX) \ 29 _fpReduce(AX, DX) \ 30 MOVQ AX, 0+c \ 31 MOVQ DX, 8+c 32 33 // _fpAdd: c = a + b 34 // Uses: AX, DX, FLAGS 35 // Instr: x86_64 36 #define _fpAdd(c,a,b) \ 37 MOVQ 0+a, AX \ 38 MOVQ 8+a, DX \ 39 ADDQ 0+b, AX \ 40 ADCQ 8+b, DX \ 41 _fpReduce(AX, DX) \ 42 MOVQ AX, 0+c \ 43 MOVQ DX, 8+c 44 45 // _fpSub: c = a - b 46 // Uses: AX, DX, FLAGS 47 // Instr: x86_64 48 #define _fpSub(c,a,b) \ 49 MOVQ 0+a, AX \ 50 MOVQ 8+a, DX \ 51 SUBQ 0+b, AX \ 52 SBBQ 8+b, DX \ 53 BTRQ $63, DX \ 54 SBBQ $0, AX \ 55 SBBQ $0, DX \ 56 MOVQ AX, 0+c \ 57 MOVQ DX, 8+c 58 59 #define _fpMulLeg(C2, C1, C0, a, b) \ 60 MOVQ $0, C2 \ 61 MOVQ 0+b, CX \ 62 MOVQ 0+a, AX \ 63 MULQ CX \ 64 MOVQ AX, C0 \ 65 MOVQ DX, C1 \ 66 MOVQ 8+a, AX \ 67 MULQ CX \ 68 SHLQ $1,DX \ 69 ADDQ DX,C0 \ 70 ADCQ AX, C1 \ 71 ADCQ $0, C2 \ 72 MOVQ 8+b, CX \ 73 MOVQ 0+a, AX \ 74 MULQ CX \ 75 SHLQ $1,DX \ 76 ADDQ DX,C0 \ 77 ADCQ AX, C1 \ 78 ADCQ $0, C2 \ 79 MOVQ 8+a, AX \ 80 MULQ CX \ 81 SHLQ $1,AX,DX \ 82 SHLQ $1,AX \ 83 ADDQ AX,C0 \ 84 ADCQ DX, C1 \ 85 ADCQ $0, C2