github.com/consensys/gnark-crypto@v0.14.0/ecc/bls24-317/internal/fptower/e2_amd64.s (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include "textflag.h"
    16  #include "funcdata.h"
    17  
    18  // modulus q
    19  DATA q<>+0(SB)/8, $0x8d512e565dab2aab
    20  DATA q<>+8(SB)/8, $0xd6f339e43424bf7e
    21  DATA q<>+16(SB)/8, $0x169a61e684c73446
    22  DATA q<>+24(SB)/8, $0xf28fc5a0b7f9d039
    23  DATA q<>+32(SB)/8, $0x1058ca226f60892c
    24  GLOBL q<>(SB), (RODATA+NOPTR), $40
    25  
    26  // qInv0 q'[0]
    27  DATA qInv0<>(SB)/8, $0x55b5e0028b047ffd
    28  GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
    29  
    30  #define REDUCE(ra0, ra1, ra2, ra3, ra4, rb0, rb1, rb2, rb3, rb4) \
    31  	MOVQ    ra0, rb0;        \
    32  	SUBQ    q<>(SB), ra0;    \
    33  	MOVQ    ra1, rb1;        \
    34  	SBBQ    q<>+8(SB), ra1;  \
    35  	MOVQ    ra2, rb2;        \
    36  	SBBQ    q<>+16(SB), ra2; \
    37  	MOVQ    ra3, rb3;        \
    38  	SBBQ    q<>+24(SB), ra3; \
    39  	MOVQ    ra4, rb4;        \
    40  	SBBQ    q<>+32(SB), ra4; \
    41  	CMOVQCS rb0, ra0;        \
    42  	CMOVQCS rb1, ra1;        \
    43  	CMOVQCS rb2, ra2;        \
    44  	CMOVQCS rb3, ra3;        \
    45  	CMOVQCS rb4, ra4;        \
    46  
    47  TEXT ·addE2(SB), NOSPLIT, $0-24
    48  	MOVQ x+8(FP), AX
    49  	MOVQ 0(AX), BX
    50  	MOVQ 8(AX), SI
    51  	MOVQ 16(AX), DI
    52  	MOVQ 24(AX), R8
    53  	MOVQ 32(AX), R9
    54  	MOVQ y+16(FP), DX
    55  	ADDQ 0(DX), BX
    56  	ADCQ 8(DX), SI
    57  	ADCQ 16(DX), DI
    58  	ADCQ 24(DX), R8
    59  	ADCQ 32(DX), R9
    60  
    61  	// reduce element(BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14)
    62  	REDUCE(BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
    63  
    64  	MOVQ res+0(FP), CX
    65  	MOVQ BX, 0(CX)
    66  	MOVQ SI, 8(CX)
    67  	MOVQ DI, 16(CX)
    68  	MOVQ R8, 24(CX)
    69  	MOVQ R9, 32(CX)
    70  	MOVQ 40(AX), BX
    71  	MOVQ 48(AX), SI
    72  	MOVQ 56(AX), DI
    73  	MOVQ 64(AX), R8
    74  	MOVQ 72(AX), R9
    75  	ADDQ 40(DX), BX
    76  	ADCQ 48(DX), SI
    77  	ADCQ 56(DX), DI
    78  	ADCQ 64(DX), R8
    79  	ADCQ 72(DX), R9
    80  
    81  	// reduce element(BX,SI,DI,R8,R9) using temp registers (R15,R10,R11,R12,R13)
    82  	REDUCE(BX,SI,DI,R8,R9,R15,R10,R11,R12,R13)
    83  
    84  	MOVQ BX, 40(CX)
    85  	MOVQ SI, 48(CX)
    86  	MOVQ DI, 56(CX)
    87  	MOVQ R8, 64(CX)
    88  	MOVQ R9, 72(CX)
    89  	RET
    90  
    91  TEXT ·doubleE2(SB), NOSPLIT, $0-16
    92  	MOVQ res+0(FP), DX
    93  	MOVQ x+8(FP), AX
    94  	MOVQ 0(AX), CX
    95  	MOVQ 8(AX), BX
    96  	MOVQ 16(AX), SI
    97  	MOVQ 24(AX), DI
    98  	MOVQ 32(AX), R8
    99  	ADDQ CX, CX
   100  	ADCQ BX, BX
   101  	ADCQ SI, SI
   102  	ADCQ DI, DI
   103  	ADCQ R8, R8
   104  
   105  	// reduce element(CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13)
   106  	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
   107  
   108  	MOVQ CX, 0(DX)
   109  	MOVQ BX, 8(DX)
   110  	MOVQ SI, 16(DX)
   111  	MOVQ DI, 24(DX)
   112  	MOVQ R8, 32(DX)
   113  	MOVQ 40(AX), CX
   114  	MOVQ 48(AX), BX
   115  	MOVQ 56(AX), SI
   116  	MOVQ 64(AX), DI
   117  	MOVQ 72(AX), R8
   118  	ADDQ CX, CX
   119  	ADCQ BX, BX
   120  	ADCQ SI, SI
   121  	ADCQ DI, DI
   122  	ADCQ R8, R8
   123  
   124  	// reduce element(CX,BX,SI,DI,R8) using temp registers (R14,R15,R9,R10,R11)
   125  	REDUCE(CX,BX,SI,DI,R8,R14,R15,R9,R10,R11)
   126  
   127  	MOVQ CX, 40(DX)
   128  	MOVQ BX, 48(DX)
   129  	MOVQ SI, 56(DX)
   130  	MOVQ DI, 64(DX)
   131  	MOVQ R8, 72(DX)
   132  	RET
   133  
   134  TEXT ·subE2(SB), NOSPLIT, $0-24
   135  	XORQ    R8, R8
   136  	MOVQ    x+8(FP), DI
   137  	MOVQ    0(DI), AX
   138  	MOVQ    8(DI), DX
   139  	MOVQ    16(DI), CX
   140  	MOVQ    24(DI), BX
   141  	MOVQ    32(DI), SI
   142  	MOVQ    y+16(FP), DI
   143  	SUBQ    0(DI), AX
   144  	SBBQ    8(DI), DX
   145  	SBBQ    16(DI), CX
   146  	SBBQ    24(DI), BX
   147  	SBBQ    32(DI), SI
   148  	MOVQ    x+8(FP), DI
   149  	MOVQ    $0x8d512e565dab2aab, R9
   150  	MOVQ    $0xd6f339e43424bf7e, R10
   151  	MOVQ    $0x169a61e684c73446, R11
   152  	MOVQ    $0xf28fc5a0b7f9d039, R12
   153  	MOVQ    $0x1058ca226f60892c, R13
   154  	CMOVQCC R8, R9
   155  	CMOVQCC R8, R10
   156  	CMOVQCC R8, R11
   157  	CMOVQCC R8, R12
   158  	CMOVQCC R8, R13
   159  	ADDQ    R9, AX
   160  	ADCQ    R10, DX
   161  	ADCQ    R11, CX
   162  	ADCQ    R12, BX
   163  	ADCQ    R13, SI
   164  	MOVQ    res+0(FP), R14
   165  	MOVQ    AX, 0(R14)
   166  	MOVQ    DX, 8(R14)
   167  	MOVQ    CX, 16(R14)
   168  	MOVQ    BX, 24(R14)
   169  	MOVQ    SI, 32(R14)
   170  	MOVQ    40(DI), AX
   171  	MOVQ    48(DI), DX
   172  	MOVQ    56(DI), CX
   173  	MOVQ    64(DI), BX
   174  	MOVQ    72(DI), SI
   175  	MOVQ    y+16(FP), DI
   176  	SUBQ    40(DI), AX
   177  	SBBQ    48(DI), DX
   178  	SBBQ    56(DI), CX
   179  	SBBQ    64(DI), BX
   180  	SBBQ    72(DI), SI
   181  	MOVQ    $0x8d512e565dab2aab, R15
   182  	MOVQ    $0xd6f339e43424bf7e, R9
   183  	MOVQ    $0x169a61e684c73446, R10
   184  	MOVQ    $0xf28fc5a0b7f9d039, R11
   185  	MOVQ    $0x1058ca226f60892c, R12
   186  	CMOVQCC R8, R15
   187  	CMOVQCC R8, R9
   188  	CMOVQCC R8, R10
   189  	CMOVQCC R8, R11
   190  	CMOVQCC R8, R12
   191  	ADDQ    R15, AX
   192  	ADCQ    R9, DX
   193  	ADCQ    R10, CX
   194  	ADCQ    R11, BX
   195  	ADCQ    R12, SI
   196  	MOVQ    res+0(FP), DI
   197  	MOVQ    AX, 40(DI)
   198  	MOVQ    DX, 48(DI)
   199  	MOVQ    CX, 56(DI)
   200  	MOVQ    BX, 64(DI)
   201  	MOVQ    SI, 72(DI)
   202  	RET
   203  
   204  TEXT ·negE2(SB), NOSPLIT, $0-16
   205  	MOVQ  res+0(FP), DX
   206  	MOVQ  x+8(FP), AX
   207  	MOVQ  0(AX), BX
   208  	MOVQ  8(AX), SI
   209  	MOVQ  16(AX), DI
   210  	MOVQ  24(AX), R8
   211  	MOVQ  32(AX), R9
   212  	MOVQ  BX, AX
   213  	ORQ   SI, AX
   214  	ORQ   DI, AX
   215  	ORQ   R8, AX
   216  	ORQ   R9, AX
   217  	TESTQ AX, AX
   218  	JNE   l1
   219  	MOVQ  AX, 0(DX)
   220  	MOVQ  AX, 8(DX)
   221  	MOVQ  AX, 16(DX)
   222  	MOVQ  AX, 24(DX)
   223  	MOVQ  AX, 32(DX)
   224  	JMP   l3
   225  
   226  l1:
   227  	MOVQ $0x8d512e565dab2aab, CX
   228  	SUBQ BX, CX
   229  	MOVQ CX, 0(DX)
   230  	MOVQ $0xd6f339e43424bf7e, CX
   231  	SBBQ SI, CX
   232  	MOVQ CX, 8(DX)
   233  	MOVQ $0x169a61e684c73446, CX
   234  	SBBQ DI, CX
   235  	MOVQ CX, 16(DX)
   236  	MOVQ $0xf28fc5a0b7f9d039, CX
   237  	SBBQ R8, CX
   238  	MOVQ CX, 24(DX)
   239  	MOVQ $0x1058ca226f60892c, CX
   240  	SBBQ R9, CX
   241  	MOVQ CX, 32(DX)
   242  
   243  l3:
   244  	MOVQ  x+8(FP), AX
   245  	MOVQ  40(AX), BX
   246  	MOVQ  48(AX), SI
   247  	MOVQ  56(AX), DI
   248  	MOVQ  64(AX), R8
   249  	MOVQ  72(AX), R9
   250  	MOVQ  BX, AX
   251  	ORQ   SI, AX
   252  	ORQ   DI, AX
   253  	ORQ   R8, AX
   254  	ORQ   R9, AX
   255  	TESTQ AX, AX
   256  	JNE   l2
   257  	MOVQ  AX, 40(DX)
   258  	MOVQ  AX, 48(DX)
   259  	MOVQ  AX, 56(DX)
   260  	MOVQ  AX, 64(DX)
   261  	MOVQ  AX, 72(DX)
   262  	RET
   263  
   264  l2:
   265  	MOVQ $0x8d512e565dab2aab, CX
   266  	SUBQ BX, CX
   267  	MOVQ CX, 40(DX)
   268  	MOVQ $0xd6f339e43424bf7e, CX
   269  	SBBQ SI, CX
   270  	MOVQ CX, 48(DX)
   271  	MOVQ $0x169a61e684c73446, CX
   272  	SBBQ DI, CX
   273  	MOVQ CX, 56(DX)
   274  	MOVQ $0xf28fc5a0b7f9d039, CX
   275  	SBBQ R8, CX
   276  	MOVQ CX, 64(DX)
   277  	MOVQ $0x1058ca226f60892c, CX
   278  	SBBQ R9, CX
   279  	MOVQ CX, 72(DX)
   280  	RET