github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-633/fr/element_ops_amd64.s (about)

     1  // +build !purego
     2  
     3  // Copyright 2020 ConsenSys Software Inc.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  #include "textflag.h"
    18  #include "funcdata.h"
    19  
    20  // modulus q
    21  DATA q<>+0(SB)/8, $0x6fe802ff40300001
    22  DATA q<>+8(SB)/8, $0x421ee5da52bde502
    23  DATA q<>+16(SB)/8, $0xdec1d01aa27a1ae0
    24  DATA q<>+24(SB)/8, $0xd3f7498be97c5eaf
    25  DATA q<>+32(SB)/8, $0x04c23a02b586d650
    26  GLOBL q<>(SB), (RODATA+NOPTR), $40
    27  
    28  // qInv0 q'[0]
    29  DATA qInv0<>(SB)/8, $0x702ff9ff402fffff
    30  GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
    31  
    32  #define REDUCE(ra0, ra1, ra2, ra3, ra4, rb0, rb1, rb2, rb3, rb4) \
    33  	MOVQ    ra0, rb0;        \
    34  	SUBQ    q<>(SB), ra0;    \
    35  	MOVQ    ra1, rb1;        \
    36  	SBBQ    q<>+8(SB), ra1;  \
    37  	MOVQ    ra2, rb2;        \
    38  	SBBQ    q<>+16(SB), ra2; \
    39  	MOVQ    ra3, rb3;        \
    40  	SBBQ    q<>+24(SB), ra3; \
    41  	MOVQ    ra4, rb4;        \
    42  	SBBQ    q<>+32(SB), ra4; \
    43  	CMOVQCS rb0, ra0;        \
    44  	CMOVQCS rb1, ra1;        \
    45  	CMOVQCS rb2, ra2;        \
    46  	CMOVQCS rb3, ra3;        \
    47  	CMOVQCS rb4, ra4;        \
    48  
    49  TEXT ·reduce(SB), NOSPLIT, $0-8
    50  	MOVQ res+0(FP), AX
    51  	MOVQ 0(AX), DX
    52  	MOVQ 8(AX), CX
    53  	MOVQ 16(AX), BX
    54  	MOVQ 24(AX), SI
    55  	MOVQ 32(AX), DI
    56  
    57  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12)
    58  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12)
    59  
    60  	MOVQ DX, 0(AX)
    61  	MOVQ CX, 8(AX)
    62  	MOVQ BX, 16(AX)
    63  	MOVQ SI, 24(AX)
    64  	MOVQ DI, 32(AX)
    65  	RET
    66  
    67  // MulBy3(x *Element)
    68  TEXT ·MulBy3(SB), NOSPLIT, $0-8
    69  	MOVQ x+0(FP), AX
    70  	MOVQ 0(AX), DX
    71  	MOVQ 8(AX), CX
    72  	MOVQ 16(AX), BX
    73  	MOVQ 24(AX), SI
    74  	MOVQ 32(AX), DI
    75  	ADDQ DX, DX
    76  	ADCQ CX, CX
    77  	ADCQ BX, BX
    78  	ADCQ SI, SI
    79  	ADCQ DI, DI
    80  
    81  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12)
    82  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12)
    83  
    84  	ADDQ 0(AX), DX
    85  	ADCQ 8(AX), CX
    86  	ADCQ 16(AX), BX
    87  	ADCQ 24(AX), SI
    88  	ADCQ 32(AX), DI
    89  
    90  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R13,R14,R15,R8,R9)
    91  	REDUCE(DX,CX,BX,SI,DI,R13,R14,R15,R8,R9)
    92  
    93  	MOVQ DX, 0(AX)
    94  	MOVQ CX, 8(AX)
    95  	MOVQ BX, 16(AX)
    96  	MOVQ SI, 24(AX)
    97  	MOVQ DI, 32(AX)
    98  	RET
    99  
   100  // MulBy5(x *Element)
   101  TEXT ·MulBy5(SB), NOSPLIT, $0-8
   102  	MOVQ x+0(FP), AX
   103  	MOVQ 0(AX), DX
   104  	MOVQ 8(AX), CX
   105  	MOVQ 16(AX), BX
   106  	MOVQ 24(AX), SI
   107  	MOVQ 32(AX), DI
   108  	ADDQ DX, DX
   109  	ADCQ CX, CX
   110  	ADCQ BX, BX
   111  	ADCQ SI, SI
   112  	ADCQ DI, DI
   113  
   114  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12)
   115  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12)
   116  
   117  	ADDQ DX, DX
   118  	ADCQ CX, CX
   119  	ADCQ BX, BX
   120  	ADCQ SI, SI
   121  	ADCQ DI, DI
   122  
   123  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R13,R14,R15,R8,R9)
   124  	REDUCE(DX,CX,BX,SI,DI,R13,R14,R15,R8,R9)
   125  
   126  	ADDQ 0(AX), DX
   127  	ADCQ 8(AX), CX
   128  	ADCQ 16(AX), BX
   129  	ADCQ 24(AX), SI
   130  	ADCQ 32(AX), DI
   131  
   132  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R10,R11,R12,R13,R14)
   133  	REDUCE(DX,CX,BX,SI,DI,R10,R11,R12,R13,R14)
   134  
   135  	MOVQ DX, 0(AX)
   136  	MOVQ CX, 8(AX)
   137  	MOVQ BX, 16(AX)
   138  	MOVQ SI, 24(AX)
   139  	MOVQ DI, 32(AX)
   140  	RET
   141  
   142  // MulBy13(x *Element)
   143  TEXT ·MulBy13(SB), $16-8
   144  	MOVQ x+0(FP), AX
   145  	MOVQ 0(AX), DX
   146  	MOVQ 8(AX), CX
   147  	MOVQ 16(AX), BX
   148  	MOVQ 24(AX), SI
   149  	MOVQ 32(AX), DI
   150  	ADDQ DX, DX
   151  	ADCQ CX, CX
   152  	ADCQ BX, BX
   153  	ADCQ SI, SI
   154  	ADCQ DI, DI
   155  
   156  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12)
   157  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12)
   158  
   159  	ADDQ DX, DX
   160  	ADCQ CX, CX
   161  	ADCQ BX, BX
   162  	ADCQ SI, SI
   163  	ADCQ DI, DI
   164  
   165  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP))
   166  	REDUCE(DX,CX,BX,SI,DI,R13,R14,R15,s0-8(SP),s1-16(SP))
   167  
   168  	MOVQ DX, R13
   169  	MOVQ CX, R14
   170  	MOVQ BX, R15
   171  	MOVQ SI, s0-8(SP)
   172  	MOVQ DI, s1-16(SP)
   173  	ADDQ DX, DX
   174  	ADCQ CX, CX
   175  	ADCQ BX, BX
   176  	ADCQ SI, SI
   177  	ADCQ DI, DI
   178  
   179  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12)
   180  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12)
   181  
   182  	ADDQ R13, DX
   183  	ADCQ R14, CX
   184  	ADCQ R15, BX
   185  	ADCQ s0-8(SP), SI
   186  	ADCQ s1-16(SP), DI
   187  
   188  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12)
   189  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12)
   190  
   191  	ADDQ 0(AX), DX
   192  	ADCQ 8(AX), CX
   193  	ADCQ 16(AX), BX
   194  	ADCQ 24(AX), SI
   195  	ADCQ 32(AX), DI
   196  
   197  	// reduce element(DX,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12)
   198  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12)
   199  
   200  	MOVQ DX, 0(AX)
   201  	MOVQ CX, 8(AX)
   202  	MOVQ BX, 16(AX)
   203  	MOVQ SI, 24(AX)
   204  	MOVQ DI, 32(AX)
   205  	RET
   206  
   207  // Butterfly(a, b *Element) sets a = a + b; b = a - b
   208  TEXT ·Butterfly(SB), $24-16
   209  	MOVQ    a+0(FP), AX
   210  	MOVQ    0(AX), CX
   211  	MOVQ    8(AX), BX
   212  	MOVQ    16(AX), SI
   213  	MOVQ    24(AX), DI
   214  	MOVQ    32(AX), R8
   215  	MOVQ    CX, R9
   216  	MOVQ    BX, R10
   217  	MOVQ    SI, R11
   218  	MOVQ    DI, R12
   219  	MOVQ    R8, R13
   220  	XORQ    AX, AX
   221  	MOVQ    b+8(FP), DX
   222  	ADDQ    0(DX), CX
   223  	ADCQ    8(DX), BX
   224  	ADCQ    16(DX), SI
   225  	ADCQ    24(DX), DI
   226  	ADCQ    32(DX), R8
   227  	SUBQ    0(DX), R9
   228  	SBBQ    8(DX), R10
   229  	SBBQ    16(DX), R11
   230  	SBBQ    24(DX), R12
   231  	SBBQ    32(DX), R13
   232  	MOVQ    CX, R14
   233  	MOVQ    BX, R15
   234  	MOVQ    SI, s0-8(SP)
   235  	MOVQ    DI, s1-16(SP)
   236  	MOVQ    R8, s2-24(SP)
   237  	MOVQ    $0x6fe802ff40300001, CX
   238  	MOVQ    $0x421ee5da52bde502, BX
   239  	MOVQ    $0xdec1d01aa27a1ae0, SI
   240  	MOVQ    $0xd3f7498be97c5eaf, DI
   241  	MOVQ    $0x04c23a02b586d650, R8
   242  	CMOVQCC AX, CX
   243  	CMOVQCC AX, BX
   244  	CMOVQCC AX, SI
   245  	CMOVQCC AX, DI
   246  	CMOVQCC AX, R8
   247  	ADDQ    CX, R9
   248  	ADCQ    BX, R10
   249  	ADCQ    SI, R11
   250  	ADCQ    DI, R12
   251  	ADCQ    R8, R13
   252  	MOVQ    R14, CX
   253  	MOVQ    R15, BX
   254  	MOVQ    s0-8(SP), SI
   255  	MOVQ    s1-16(SP), DI
   256  	MOVQ    s2-24(SP), R8
   257  	MOVQ    R9, 0(DX)
   258  	MOVQ    R10, 8(DX)
   259  	MOVQ    R11, 16(DX)
   260  	MOVQ    R12, 24(DX)
   261  	MOVQ    R13, 32(DX)
   262  
   263  	// reduce element(CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13)
   264  	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
   265  
   266  	MOVQ a+0(FP), AX
   267  	MOVQ CX, 0(AX)
   268  	MOVQ BX, 8(AX)
   269  	MOVQ SI, 16(AX)
   270  	MOVQ DI, 24(AX)
   271  	MOVQ R8, 32(AX)
   272  	RET