github.com/consensys/gnark-crypto@v0.14.0/ecc/bls12-378/internal/fptower/e2_amd64.s (about)

     1  // Copyright 2020 ConsenSys Software Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include "textflag.h"
    16  #include "funcdata.h"
    17  
    18  // modulus q
    19  DATA q<>+0(SB)/8, $0x9948a20000000001
    20  DATA q<>+8(SB)/8, $0xce97f76a822c0000
    21  DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
    22  DATA q<>+24(SB)/8, $0x84059eb647102326
    23  DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
    24  DATA q<>+40(SB)/8, $0x03eeb0416684d190
    25  GLOBL q<>(SB), (RODATA+NOPTR), $48
    26  
    27  // qInv0 q'[0]
    28  DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
    29  GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
    30  
    31  #define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
    32  	MOVQ    ra0, rb0;        \
    33  	SUBQ    q<>(SB), ra0;    \
    34  	MOVQ    ra1, rb1;        \
    35  	SBBQ    q<>+8(SB), ra1;  \
    36  	MOVQ    ra2, rb2;        \
    37  	SBBQ    q<>+16(SB), ra2; \
    38  	MOVQ    ra3, rb3;        \
    39  	SBBQ    q<>+24(SB), ra3; \
    40  	MOVQ    ra4, rb4;        \
    41  	SBBQ    q<>+32(SB), ra4; \
    42  	MOVQ    ra5, rb5;        \
    43  	SBBQ    q<>+40(SB), ra5; \
    44  	CMOVQCS rb0, ra0;        \
    45  	CMOVQCS rb1, ra1;        \
    46  	CMOVQCS rb2, ra2;        \
    47  	CMOVQCS rb3, ra3;        \
    48  	CMOVQCS rb4, ra4;        \
    49  	CMOVQCS rb5, ra5;        \
    50  
    51  TEXT ·addE2(SB), NOSPLIT, $0-24
    52  	MOVQ x+8(FP), AX
    53  	MOVQ 0(AX), BX
    54  	MOVQ 8(AX), SI
    55  	MOVQ 16(AX), DI
    56  	MOVQ 24(AX), R8
    57  	MOVQ 32(AX), R9
    58  	MOVQ 40(AX), R10
    59  	MOVQ y+16(FP), DX
    60  	ADDQ 0(DX), BX
    61  	ADCQ 8(DX), SI
    62  	ADCQ 16(DX), DI
    63  	ADCQ 24(DX), R8
    64  	ADCQ 32(DX), R9
    65  	ADCQ 40(DX), R10
    66  
    67  	// reduce element(BX,SI,DI,R8,R9,R10) using temp registers (R11,R12,R13,R14,R15,s0-8(SP))
    68  	REDUCE(BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP))
    69  
    70  	MOVQ res+0(FP), CX
    71  	MOVQ BX, 0(CX)
    72  	MOVQ SI, 8(CX)
    73  	MOVQ DI, 16(CX)
    74  	MOVQ R8, 24(CX)
    75  	MOVQ R9, 32(CX)
    76  	MOVQ R10, 40(CX)
    77  	MOVQ 48(AX), BX
    78  	MOVQ 56(AX), SI
    79  	MOVQ 64(AX), DI
    80  	MOVQ 72(AX), R8
    81  	MOVQ 80(AX), R9
    82  	MOVQ 88(AX), R10
    83  	ADDQ 48(DX), BX
    84  	ADCQ 56(DX), SI
    85  	ADCQ 64(DX), DI
    86  	ADCQ 72(DX), R8
    87  	ADCQ 80(DX), R9
    88  	ADCQ 88(DX), R10
    89  
    90  	// reduce element(BX,SI,DI,R8,R9,R10) using temp registers (R11,R12,R13,R14,R15,s0-8(SP))
    91  	REDUCE(BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP))
    92  
    93  	MOVQ BX, 48(CX)
    94  	MOVQ SI, 56(CX)
    95  	MOVQ DI, 64(CX)
    96  	MOVQ R8, 72(CX)
    97  	MOVQ R9, 80(CX)
    98  	MOVQ R10, 88(CX)
    99  	RET
   100  
   101  TEXT ·doubleE2(SB), NOSPLIT, $0-16
   102  	MOVQ res+0(FP), DX
   103  	MOVQ x+8(FP), AX
   104  	MOVQ 0(AX), CX
   105  	MOVQ 8(AX), BX
   106  	MOVQ 16(AX), SI
   107  	MOVQ 24(AX), DI
   108  	MOVQ 32(AX), R8
   109  	MOVQ 40(AX), R9
   110  	ADDQ CX, CX
   111  	ADCQ BX, BX
   112  	ADCQ SI, SI
   113  	ADCQ DI, DI
   114  	ADCQ R8, R8
   115  	ADCQ R9, R9
   116  
   117  	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
   118  	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
   119  
   120  	MOVQ CX, 0(DX)
   121  	MOVQ BX, 8(DX)
   122  	MOVQ SI, 16(DX)
   123  	MOVQ DI, 24(DX)
   124  	MOVQ R8, 32(DX)
   125  	MOVQ R9, 40(DX)
   126  	MOVQ 48(AX), CX
   127  	MOVQ 56(AX), BX
   128  	MOVQ 64(AX), SI
   129  	MOVQ 72(AX), DI
   130  	MOVQ 80(AX), R8
   131  	MOVQ 88(AX), R9
   132  	ADDQ CX, CX
   133  	ADCQ BX, BX
   134  	ADCQ SI, SI
   135  	ADCQ DI, DI
   136  	ADCQ R8, R8
   137  	ADCQ R9, R9
   138  
   139  	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
   140  	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
   141  
   142  	MOVQ CX, 48(DX)
   143  	MOVQ BX, 56(DX)
   144  	MOVQ SI, 64(DX)
   145  	MOVQ DI, 72(DX)
   146  	MOVQ R8, 80(DX)
   147  	MOVQ R9, 88(DX)
   148  	RET
   149  
   150  TEXT ·subE2(SB), NOSPLIT, $0-24
   151  	XORQ    R9, R9
   152  	MOVQ    x+8(FP), R8
   153  	MOVQ    0(R8), AX
   154  	MOVQ    8(R8), DX
   155  	MOVQ    16(R8), CX
   156  	MOVQ    24(R8), BX
   157  	MOVQ    32(R8), SI
   158  	MOVQ    40(R8), DI
   159  	MOVQ    y+16(FP), R8
   160  	SUBQ    0(R8), AX
   161  	SBBQ    8(R8), DX
   162  	SBBQ    16(R8), CX
   163  	SBBQ    24(R8), BX
   164  	SBBQ    32(R8), SI
   165  	SBBQ    40(R8), DI
   166  	MOVQ    x+8(FP), R8
   167  	MOVQ    $0x9948a20000000001, R10
   168  	MOVQ    $0xce97f76a822c0000, R11
   169  	MOVQ    $0x980dc360d0a49d7f, R12
   170  	MOVQ    $0x84059eb647102326, R13
   171  	MOVQ    $0x53cb5d240ed107a2, R14
   172  	MOVQ    $0x03eeb0416684d190, R15
   173  	CMOVQCC R9, R10
   174  	CMOVQCC R9, R11
   175  	CMOVQCC R9, R12
   176  	CMOVQCC R9, R13
   177  	CMOVQCC R9, R14
   178  	CMOVQCC R9, R15
   179  	ADDQ    R10, AX
   180  	ADCQ    R11, DX
   181  	ADCQ    R12, CX
   182  	ADCQ    R13, BX
   183  	ADCQ    R14, SI
   184  	ADCQ    R15, DI
   185  	MOVQ    res+0(FP), R10
   186  	MOVQ    AX, 0(R10)
   187  	MOVQ    DX, 8(R10)
   188  	MOVQ    CX, 16(R10)
   189  	MOVQ    BX, 24(R10)
   190  	MOVQ    SI, 32(R10)
   191  	MOVQ    DI, 40(R10)
   192  	MOVQ    48(R8), AX
   193  	MOVQ    56(R8), DX
   194  	MOVQ    64(R8), CX
   195  	MOVQ    72(R8), BX
   196  	MOVQ    80(R8), SI
   197  	MOVQ    88(R8), DI
   198  	MOVQ    y+16(FP), R8
   199  	SUBQ    48(R8), AX
   200  	SBBQ    56(R8), DX
   201  	SBBQ    64(R8), CX
   202  	SBBQ    72(R8), BX
   203  	SBBQ    80(R8), SI
   204  	SBBQ    88(R8), DI
   205  	MOVQ    $0x9948a20000000001, R11
   206  	MOVQ    $0xce97f76a822c0000, R12
   207  	MOVQ    $0x980dc360d0a49d7f, R13
   208  	MOVQ    $0x84059eb647102326, R14
   209  	MOVQ    $0x53cb5d240ed107a2, R15
   210  	MOVQ    $0x03eeb0416684d190, R10
   211  	CMOVQCC R9, R11
   212  	CMOVQCC R9, R12
   213  	CMOVQCC R9, R13
   214  	CMOVQCC R9, R14
   215  	CMOVQCC R9, R15
   216  	CMOVQCC R9, R10
   217  	ADDQ    R11, AX
   218  	ADCQ    R12, DX
   219  	ADCQ    R13, CX
   220  	ADCQ    R14, BX
   221  	ADCQ    R15, SI
   222  	ADCQ    R10, DI
   223  	MOVQ    res+0(FP), R8
   224  	MOVQ    AX, 48(R8)
   225  	MOVQ    DX, 56(R8)
   226  	MOVQ    CX, 64(R8)
   227  	MOVQ    BX, 72(R8)
   228  	MOVQ    SI, 80(R8)
   229  	MOVQ    DI, 88(R8)
   230  	RET
   231  
   232  TEXT ·negE2(SB), NOSPLIT, $0-16
   233  	MOVQ  res+0(FP), DX
   234  	MOVQ  x+8(FP), AX
   235  	MOVQ  0(AX), BX
   236  	MOVQ  8(AX), SI
   237  	MOVQ  16(AX), DI
   238  	MOVQ  24(AX), R8
   239  	MOVQ  32(AX), R9
   240  	MOVQ  40(AX), R10
   241  	MOVQ  BX, AX
   242  	ORQ   SI, AX
   243  	ORQ   DI, AX
   244  	ORQ   R8, AX
   245  	ORQ   R9, AX
   246  	ORQ   R10, AX
   247  	TESTQ AX, AX
   248  	JNE   l1
   249  	MOVQ  AX, 0(DX)
   250  	MOVQ  AX, 8(DX)
   251  	MOVQ  AX, 16(DX)
   252  	MOVQ  AX, 24(DX)
   253  	MOVQ  AX, 32(DX)
   254  	MOVQ  AX, 40(DX)
   255  	JMP   l3
   256  
   257  l1:
   258  	MOVQ $0x9948a20000000001, CX
   259  	SUBQ BX, CX
   260  	MOVQ CX, 0(DX)
   261  	MOVQ $0xce97f76a822c0000, CX
   262  	SBBQ SI, CX
   263  	MOVQ CX, 8(DX)
   264  	MOVQ $0x980dc360d0a49d7f, CX
   265  	SBBQ DI, CX
   266  	MOVQ CX, 16(DX)
   267  	MOVQ $0x84059eb647102326, CX
   268  	SBBQ R8, CX
   269  	MOVQ CX, 24(DX)
   270  	MOVQ $0x53cb5d240ed107a2, CX
   271  	SBBQ R9, CX
   272  	MOVQ CX, 32(DX)
   273  	MOVQ $0x03eeb0416684d190, CX
   274  	SBBQ R10, CX
   275  	MOVQ CX, 40(DX)
   276  
   277  l3:
   278  	MOVQ  x+8(FP), AX
   279  	MOVQ  48(AX), BX
   280  	MOVQ  56(AX), SI
   281  	MOVQ  64(AX), DI
   282  	MOVQ  72(AX), R8
   283  	MOVQ  80(AX), R9
   284  	MOVQ  88(AX), R10
   285  	MOVQ  BX, AX
   286  	ORQ   SI, AX
   287  	ORQ   DI, AX
   288  	ORQ   R8, AX
   289  	ORQ   R9, AX
   290  	ORQ   R10, AX
   291  	TESTQ AX, AX
   292  	JNE   l2
   293  	MOVQ  AX, 48(DX)
   294  	MOVQ  AX, 56(DX)
   295  	MOVQ  AX, 64(DX)
   296  	MOVQ  AX, 72(DX)
   297  	MOVQ  AX, 80(DX)
   298  	MOVQ  AX, 88(DX)
   299  	RET
   300  
   301  l2:
   302  	MOVQ $0x9948a20000000001, CX
   303  	SUBQ BX, CX
   304  	MOVQ CX, 48(DX)
   305  	MOVQ $0xce97f76a822c0000, CX
   306  	SBBQ SI, CX
   307  	MOVQ CX, 56(DX)
   308  	MOVQ $0x980dc360d0a49d7f, CX
   309  	SBBQ DI, CX
   310  	MOVQ CX, 64(DX)
   311  	MOVQ $0x84059eb647102326, CX
   312  	SBBQ R8, CX
   313  	MOVQ CX, 72(DX)
   314  	MOVQ $0x53cb5d240ed107a2, CX
   315  	SBBQ R9, CX
   316  	MOVQ CX, 80(DX)
   317  	MOVQ $0x03eeb0416684d190, CX
   318  	SBBQ R10, CX
   319  	MOVQ CX, 88(DX)
   320  	RET