github.com/cloudflare/circl@v1.5.0/ecc/fourq/fp_amd64.h (about)

     1  // CHECK_BMI2 triggers bmi2 if supported,
     2  // otherwise it fallbacks to legacy code.
     3  #define CHECK_BMI2(label, legacy, bmi2) \
     4      CMPB ·hasBMI2(SB), $0 \
     5      JE label              \
     6      bmi2                  \
     7      RET                   \
     8      label:                \
     9      legacy                \
    10      RET
    11  
    12  #define _fpReduce(c0, c1) \
    13      BTRQ $63, c1          \
    14      ADCQ  $0, c0          \
    15      ADCQ  $0, c1
    16  
    17  // _fpMod: c = c mod p
    18  // Uses: AX, DX, FLAGS
    19  // Instr: x86_64
    20  #define _fpMod(c) \
    21      MOVQ 0+c, AX \
    22      MOVQ 8+c, DX \
    23      SUBQ ·modulusP+0(SB), AX \
    24      SBBQ ·modulusP+8(SB), DX \
    25      BTRQ $63, DX  \
    26      SBBQ  $0, AX  \
    27      SBBQ  $0, DX  \
    28      _fpReduce(AX, DX) \
    29      _fpReduce(AX, DX) \
    30      MOVQ AX, 0+c  \
    31      MOVQ DX, 8+c
    32  
    33  // _fpAdd: c = a + b
    34  // Uses: AX, DX, FLAGS
    35  // Instr: x86_64
    36  #define _fpAdd(c,a,b) \
    37      MOVQ 0+a, AX      \
    38      MOVQ 8+a, DX      \
    39      ADDQ 0+b, AX      \
    40      ADCQ 8+b, DX      \
    41      _fpReduce(AX, DX) \
    42      MOVQ AX, 0+c      \
    43      MOVQ DX, 8+c
    44  
    45  // _fpSub: c = a - b
    46  // Uses: AX, DX, FLAGS
    47  // Instr: x86_64
    48  #define _fpSub(c,a,b) \
    49      MOVQ 0+a, AX      \
    50      MOVQ 8+a, DX      \
    51      SUBQ 0+b, AX      \
    52      SBBQ 8+b, DX      \
    53      BTRQ $63, DX      \
    54      SBBQ  $0, AX      \
    55      SBBQ  $0, DX      \
    56      MOVQ AX, 0+c      \
    57      MOVQ DX, 8+c
    58  
    59  #define _fpMulLeg(C2, C1, C0, a, b) \
    60      MOVQ   $0, C2 \
    61      MOVQ  0+b, CX \
    62      MOVQ  0+a, AX \
    63      MULQ CX       \
    64      MOVQ AX, C0   \
    65      MOVQ DX, C1   \
    66      MOVQ  8+a, AX \
    67      MULQ CX       \
    68      SHLQ $1,DX    \
    69      ADDQ DX,C0    \
    70      ADCQ AX, C1   \
    71      ADCQ $0, C2   \
    72      MOVQ  8+b, CX \
    73      MOVQ  0+a, AX \
    74      MULQ CX       \
    75      SHLQ $1,DX    \
    76      ADDQ DX,C0    \
    77      ADCQ AX, C1   \
    78      ADCQ $0, C2   \
    79      MOVQ  8+a, AX \
    80      MULQ CX       \
    81      SHLQ $1,AX,DX \
    82      SHLQ $1,AX    \
    83      ADDQ AX,C0    \
    84      ADCQ DX, C1   \
    85      ADCQ $0, C2