github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-761/fr/element_ops_amd64.s (about)

     1  // +build !purego
     2  
     3  // Copyright 2020 ConsenSys Software Inc.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  #include "textflag.h"
    18  #include "funcdata.h"
    19  
    20  // modulus q
    21  DATA q<>+0(SB)/8, $0x8508c00000000001
    22  DATA q<>+8(SB)/8, $0x170b5d4430000000
    23  DATA q<>+16(SB)/8, $0x1ef3622fba094800
    24  DATA q<>+24(SB)/8, $0x1a22d9f300f5138f
    25  DATA q<>+32(SB)/8, $0xc63b05c06ca1493b
    26  DATA q<>+40(SB)/8, $0x01ae3a4617c510ea
    27  GLOBL q<>(SB), (RODATA+NOPTR), $48
    28  
    29  // qInv0 q'[0]
    30  DATA qInv0<>(SB)/8, $0x8508bfffffffffff
    31  GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
    32  
    33  #define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
    34  	MOVQ    ra0, rb0;        \
    35  	SUBQ    q<>(SB), ra0;    \
    36  	MOVQ    ra1, rb1;        \
    37  	SBBQ    q<>+8(SB), ra1;  \
    38  	MOVQ    ra2, rb2;        \
    39  	SBBQ    q<>+16(SB), ra2; \
    40  	MOVQ    ra3, rb3;        \
    41  	SBBQ    q<>+24(SB), ra3; \
    42  	MOVQ    ra4, rb4;        \
    43  	SBBQ    q<>+32(SB), ra4; \
    44  	MOVQ    ra5, rb5;        \
    45  	SBBQ    q<>+40(SB), ra5; \
    46  	CMOVQCS rb0, ra0;        \
    47  	CMOVQCS rb1, ra1;        \
    48  	CMOVQCS rb2, ra2;        \
    49  	CMOVQCS rb3, ra3;        \
    50  	CMOVQCS rb4, ra4;        \
    51  	CMOVQCS rb5, ra5;        \
    52  
    53  TEXT ·reduce(SB), NOSPLIT, $0-8
    54  	MOVQ res+0(FP), AX
    55  	MOVQ 0(AX), DX
    56  	MOVQ 8(AX), CX
    57  	MOVQ 16(AX), BX
    58  	MOVQ 24(AX), SI
    59  	MOVQ 32(AX), DI
    60  	MOVQ 40(AX), R8
    61  
    62  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
    63  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
    64  
    65  	MOVQ DX, 0(AX)
    66  	MOVQ CX, 8(AX)
    67  	MOVQ BX, 16(AX)
    68  	MOVQ SI, 24(AX)
    69  	MOVQ DI, 32(AX)
    70  	MOVQ R8, 40(AX)
    71  	RET
    72  
    73  // MulBy3(x *Element)
    74  TEXT ·MulBy3(SB), NOSPLIT, $0-8
    75  	MOVQ x+0(FP), AX
    76  	MOVQ 0(AX), DX
    77  	MOVQ 8(AX), CX
    78  	MOVQ 16(AX), BX
    79  	MOVQ 24(AX), SI
    80  	MOVQ 32(AX), DI
    81  	MOVQ 40(AX), R8
    82  	ADDQ DX, DX
    83  	ADCQ CX, CX
    84  	ADCQ BX, BX
    85  	ADCQ SI, SI
    86  	ADCQ DI, DI
    87  	ADCQ R8, R8
    88  
    89  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
    90  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
    91  
    92  	ADDQ 0(AX), DX
    93  	ADCQ 8(AX), CX
    94  	ADCQ 16(AX), BX
    95  	ADCQ 24(AX), SI
    96  	ADCQ 32(AX), DI
    97  	ADCQ 40(AX), R8
    98  
    99  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
   100  	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
   101  
   102  	MOVQ DX, 0(AX)
   103  	MOVQ CX, 8(AX)
   104  	MOVQ BX, 16(AX)
   105  	MOVQ SI, 24(AX)
   106  	MOVQ DI, 32(AX)
   107  	MOVQ R8, 40(AX)
   108  	RET
   109  
   110  // MulBy5(x *Element)
   111  TEXT ·MulBy5(SB), NOSPLIT, $0-8
   112  	MOVQ x+0(FP), AX
   113  	MOVQ 0(AX), DX
   114  	MOVQ 8(AX), CX
   115  	MOVQ 16(AX), BX
   116  	MOVQ 24(AX), SI
   117  	MOVQ 32(AX), DI
   118  	MOVQ 40(AX), R8
   119  	ADDQ DX, DX
   120  	ADCQ CX, CX
   121  	ADCQ BX, BX
   122  	ADCQ SI, SI
   123  	ADCQ DI, DI
   124  	ADCQ R8, R8
   125  
   126  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
   127  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
   128  
   129  	ADDQ DX, DX
   130  	ADCQ CX, CX
   131  	ADCQ BX, BX
   132  	ADCQ SI, SI
   133  	ADCQ DI, DI
   134  	ADCQ R8, R8
   135  
   136  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
   137  	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
   138  
   139  	ADDQ 0(AX), DX
   140  	ADCQ 8(AX), CX
   141  	ADCQ 16(AX), BX
   142  	ADCQ 24(AX), SI
   143  	ADCQ 32(AX), DI
   144  	ADCQ 40(AX), R8
   145  
   146  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R14,R15,R9,R10,R11,R12)
   147  	REDUCE(DX,CX,BX,SI,DI,R8,R14,R15,R9,R10,R11,R12)
   148  
   149  	MOVQ DX, 0(AX)
   150  	MOVQ CX, 8(AX)
   151  	MOVQ BX, 16(AX)
   152  	MOVQ SI, 24(AX)
   153  	MOVQ DI, 32(AX)
   154  	MOVQ R8, 40(AX)
   155  	RET
   156  
   157  // MulBy13(x *Element)
   158  TEXT ·MulBy13(SB), $40-8
   159  	MOVQ x+0(FP), AX
   160  	MOVQ 0(AX), DX
   161  	MOVQ 8(AX), CX
   162  	MOVQ 16(AX), BX
   163  	MOVQ 24(AX), SI
   164  	MOVQ 32(AX), DI
   165  	MOVQ 40(AX), R8
   166  	ADDQ DX, DX
   167  	ADCQ CX, CX
   168  	ADCQ BX, BX
   169  	ADCQ SI, SI
   170  	ADCQ DI, DI
   171  	ADCQ R8, R8
   172  
   173  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
   174  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
   175  
   176  	ADDQ DX, DX
   177  	ADCQ CX, CX
   178  	ADCQ BX, BX
   179  	ADCQ SI, SI
   180  	ADCQ DI, DI
   181  	ADCQ R8, R8
   182  
   183  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
   184  	REDUCE(DX,CX,BX,SI,DI,R8,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
   185  
   186  	MOVQ DX, R15
   187  	MOVQ CX, s0-8(SP)
   188  	MOVQ BX, s1-16(SP)
   189  	MOVQ SI, s2-24(SP)
   190  	MOVQ DI, s3-32(SP)
   191  	MOVQ R8, s4-40(SP)
   192  	ADDQ DX, DX
   193  	ADCQ CX, CX
   194  	ADCQ BX, BX
   195  	ADCQ SI, SI
   196  	ADCQ DI, DI
   197  	ADCQ R8, R8
   198  
   199  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
   200  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
   201  
   202  	ADDQ R15, DX
   203  	ADCQ s0-8(SP), CX
   204  	ADCQ s1-16(SP), BX
   205  	ADCQ s2-24(SP), SI
   206  	ADCQ s3-32(SP), DI
   207  	ADCQ s4-40(SP), R8
   208  
   209  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
   210  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
   211  
   212  	ADDQ 0(AX), DX
   213  	ADCQ 8(AX), CX
   214  	ADCQ 16(AX), BX
   215  	ADCQ 24(AX), SI
   216  	ADCQ 32(AX), DI
   217  	ADCQ 40(AX), R8
   218  
   219  	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
   220  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
   221  
   222  	MOVQ DX, 0(AX)
   223  	MOVQ CX, 8(AX)
   224  	MOVQ BX, 16(AX)
   225  	MOVQ SI, 24(AX)
   226  	MOVQ DI, 32(AX)
   227  	MOVQ R8, 40(AX)
   228  	RET
   229  
   230  // Butterfly(a, b *Element) sets a = a + b; b = a - b
   231  TEXT ·Butterfly(SB), $48-16
   232  	MOVQ    a+0(FP), AX
   233  	MOVQ    0(AX), CX
   234  	MOVQ    8(AX), BX
   235  	MOVQ    16(AX), SI
   236  	MOVQ    24(AX), DI
   237  	MOVQ    32(AX), R8
   238  	MOVQ    40(AX), R9
   239  	MOVQ    CX, R10
   240  	MOVQ    BX, R11
   241  	MOVQ    SI, R12
   242  	MOVQ    DI, R13
   243  	MOVQ    R8, R14
   244  	MOVQ    R9, R15
   245  	XORQ    AX, AX
   246  	MOVQ    b+8(FP), DX
   247  	ADDQ    0(DX), CX
   248  	ADCQ    8(DX), BX
   249  	ADCQ    16(DX), SI
   250  	ADCQ    24(DX), DI
   251  	ADCQ    32(DX), R8
   252  	ADCQ    40(DX), R9
   253  	SUBQ    0(DX), R10
   254  	SBBQ    8(DX), R11
   255  	SBBQ    16(DX), R12
   256  	SBBQ    24(DX), R13
   257  	SBBQ    32(DX), R14
   258  	SBBQ    40(DX), R15
   259  	MOVQ    CX, s0-8(SP)
   260  	MOVQ    BX, s1-16(SP)
   261  	MOVQ    SI, s2-24(SP)
   262  	MOVQ    DI, s3-32(SP)
   263  	MOVQ    R8, s4-40(SP)
   264  	MOVQ    R9, s5-48(SP)
   265  	MOVQ    $0x8508c00000000001, CX
   266  	MOVQ    $0x170b5d4430000000, BX
   267  	MOVQ    $0x1ef3622fba094800, SI
   268  	MOVQ    $0x1a22d9f300f5138f, DI
   269  	MOVQ    $0xc63b05c06ca1493b, R8
   270  	MOVQ    $0x01ae3a4617c510ea, R9
   271  	CMOVQCC AX, CX
   272  	CMOVQCC AX, BX
   273  	CMOVQCC AX, SI
   274  	CMOVQCC AX, DI
   275  	CMOVQCC AX, R8
   276  	CMOVQCC AX, R9
   277  	ADDQ    CX, R10
   278  	ADCQ    BX, R11
   279  	ADCQ    SI, R12
   280  	ADCQ    DI, R13
   281  	ADCQ    R8, R14
   282  	ADCQ    R9, R15
   283  	MOVQ    s0-8(SP), CX
   284  	MOVQ    s1-16(SP), BX
   285  	MOVQ    s2-24(SP), SI
   286  	MOVQ    s3-32(SP), DI
   287  	MOVQ    s4-40(SP), R8
   288  	MOVQ    s5-48(SP), R9
   289  	MOVQ    R10, 0(DX)
   290  	MOVQ    R11, 8(DX)
   291  	MOVQ    R12, 16(DX)
   292  	MOVQ    R13, 24(DX)
   293  	MOVQ    R14, 32(DX)
   294  	MOVQ    R15, 40(DX)
   295  
   296  	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
   297  	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
   298  
   299  	MOVQ a+0(FP), AX
   300  	MOVQ CX, 0(AX)
   301  	MOVQ BX, 8(AX)
   302  	MOVQ SI, 16(AX)
   303  	MOVQ DI, 24(AX)
   304  	MOVQ R8, 32(AX)
   305  	MOVQ R9, 40(AX)
   306  	RET