github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-633/fp/element_ops_amd64.s (about)

     1  // +build !purego
     2  
     3  // Copyright 2020 ConsenSys Software Inc.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  #include "textflag.h"
    18  #include "funcdata.h"
    19  
    20  // modulus q
    21  DATA q<>+0(SB)/8, $0xd74916ea4570000d
    22  DATA q<>+8(SB)/8, $0x3d369bd31147f73c
    23  DATA q<>+16(SB)/8, $0xd7b5ce7ab839c225
    24  DATA q<>+24(SB)/8, $0x7e0e8850edbda407
    25  DATA q<>+32(SB)/8, $0xb8da9f5e83f57c49
    26  DATA q<>+40(SB)/8, $0x8152a6c0fadea490
    27  DATA q<>+48(SB)/8, $0x4e59769ad9bbda2f
    28  DATA q<>+56(SB)/8, $0xa8fcd8c75d79d2c7
    29  DATA q<>+64(SB)/8, $0xfc1a174f01d72ab5
    30  DATA q<>+72(SB)/8, $0x0126633cc0f35f63
    31  GLOBL q<>(SB), (RODATA+NOPTR), $80
    32  
    33  // qInv0 q'[0]
    34  DATA qInv0<>(SB)/8, $0xb50f29ab0b03b13b
    35  GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
    36  
    37  #define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra7, ra8, ra9, rb0, rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8, rb9) \
    38  	MOVQ    ra0, rb0;        \
    39  	SUBQ    q<>(SB), ra0;    \
    40  	MOVQ    ra1, rb1;        \
    41  	SBBQ    q<>+8(SB), ra1;  \
    42  	MOVQ    ra2, rb2;        \
    43  	SBBQ    q<>+16(SB), ra2; \
    44  	MOVQ    ra3, rb3;        \
    45  	SBBQ    q<>+24(SB), ra3; \
    46  	MOVQ    ra4, rb4;        \
    47  	SBBQ    q<>+32(SB), ra4; \
    48  	MOVQ    ra5, rb5;        \
    49  	SBBQ    q<>+40(SB), ra5; \
    50  	MOVQ    ra6, rb6;        \
    51  	SBBQ    q<>+48(SB), ra6; \
    52  	MOVQ    ra7, rb7;        \
    53  	SBBQ    q<>+56(SB), ra7; \
    54  	MOVQ    ra8, rb8;        \
    55  	SBBQ    q<>+64(SB), ra8; \
    56  	MOVQ    ra9, rb9;        \
    57  	SBBQ    q<>+72(SB), ra9; \
    58  	CMOVQCS rb0, ra0;        \
    59  	CMOVQCS rb1, ra1;        \
    60  	CMOVQCS rb2, ra2;        \
    61  	CMOVQCS rb3, ra3;        \
    62  	CMOVQCS rb4, ra4;        \
    63  	CMOVQCS rb5, ra5;        \
    64  	CMOVQCS rb6, ra6;        \
    65  	CMOVQCS rb7, ra7;        \
    66  	CMOVQCS rb8, ra8;        \
    67  	CMOVQCS rb9, ra9;        \
    68  
    69  TEXT ·reduce(SB), $56-8
    70  	MOVQ res+0(FP), AX
    71  	MOVQ 0(AX), DX
    72  	MOVQ 8(AX), CX
    73  	MOVQ 16(AX), BX
    74  	MOVQ 24(AX), SI
    75  	MOVQ 32(AX), DI
    76  	MOVQ 40(AX), R8
    77  	MOVQ 48(AX), R9
    78  	MOVQ 56(AX), R10
    79  	MOVQ 64(AX), R11
    80  	MOVQ 72(AX), R12
    81  
    82  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
    83  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
    84  
    85  	MOVQ DX, 0(AX)
    86  	MOVQ CX, 8(AX)
    87  	MOVQ BX, 16(AX)
    88  	MOVQ SI, 24(AX)
    89  	MOVQ DI, 32(AX)
    90  	MOVQ R8, 40(AX)
    91  	MOVQ R9, 48(AX)
    92  	MOVQ R10, 56(AX)
    93  	MOVQ R11, 64(AX)
    94  	MOVQ R12, 72(AX)
    95  	RET
    96  
    97  // MulBy3(x *Element)
    98  TEXT ·MulBy3(SB), $56-8
    99  	MOVQ x+0(FP), AX
   100  	MOVQ 0(AX), DX
   101  	MOVQ 8(AX), CX
   102  	MOVQ 16(AX), BX
   103  	MOVQ 24(AX), SI
   104  	MOVQ 32(AX), DI
   105  	MOVQ 40(AX), R8
   106  	MOVQ 48(AX), R9
   107  	MOVQ 56(AX), R10
   108  	MOVQ 64(AX), R11
   109  	MOVQ 72(AX), R12
   110  	ADDQ DX, DX
   111  	ADCQ CX, CX
   112  	ADCQ BX, BX
   113  	ADCQ SI, SI
   114  	ADCQ DI, DI
   115  	ADCQ R8, R8
   116  	ADCQ R9, R9
   117  	ADCQ R10, R10
   118  	ADCQ R11, R11
   119  	ADCQ R12, R12
   120  
   121  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   122  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   123  
   124  	ADDQ 0(AX), DX
   125  	ADCQ 8(AX), CX
   126  	ADCQ 16(AX), BX
   127  	ADCQ 24(AX), SI
   128  	ADCQ 32(AX), DI
   129  	ADCQ 40(AX), R8
   130  	ADCQ 48(AX), R9
   131  	ADCQ 56(AX), R10
   132  	ADCQ 64(AX), R11
   133  	ADCQ 72(AX), R12
   134  
   135  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   136  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   137  
   138  	MOVQ DX, 0(AX)
   139  	MOVQ CX, 8(AX)
   140  	MOVQ BX, 16(AX)
   141  	MOVQ SI, 24(AX)
   142  	MOVQ DI, 32(AX)
   143  	MOVQ R8, 40(AX)
   144  	MOVQ R9, 48(AX)
   145  	MOVQ R10, 56(AX)
   146  	MOVQ R11, 64(AX)
   147  	MOVQ R12, 72(AX)
   148  	RET
   149  
   150  // MulBy5(x *Element)
   151  TEXT ·MulBy5(SB), $56-8
   152  	MOVQ x+0(FP), AX
   153  	MOVQ 0(AX), DX
   154  	MOVQ 8(AX), CX
   155  	MOVQ 16(AX), BX
   156  	MOVQ 24(AX), SI
   157  	MOVQ 32(AX), DI
   158  	MOVQ 40(AX), R8
   159  	MOVQ 48(AX), R9
   160  	MOVQ 56(AX), R10
   161  	MOVQ 64(AX), R11
   162  	MOVQ 72(AX), R12
   163  	ADDQ DX, DX
   164  	ADCQ CX, CX
   165  	ADCQ BX, BX
   166  	ADCQ SI, SI
   167  	ADCQ DI, DI
   168  	ADCQ R8, R8
   169  	ADCQ R9, R9
   170  	ADCQ R10, R10
   171  	ADCQ R11, R11
   172  	ADCQ R12, R12
   173  
   174  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   175  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   176  
   177  	ADDQ DX, DX
   178  	ADCQ CX, CX
   179  	ADCQ BX, BX
   180  	ADCQ SI, SI
   181  	ADCQ DI, DI
   182  	ADCQ R8, R8
   183  	ADCQ R9, R9
   184  	ADCQ R10, R10
   185  	ADCQ R11, R11
   186  	ADCQ R12, R12
   187  
   188  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   189  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   190  
   191  	ADDQ 0(AX), DX
   192  	ADCQ 8(AX), CX
   193  	ADCQ 16(AX), BX
   194  	ADCQ 24(AX), SI
   195  	ADCQ 32(AX), DI
   196  	ADCQ 40(AX), R8
   197  	ADCQ 48(AX), R9
   198  	ADCQ 56(AX), R10
   199  	ADCQ 64(AX), R11
   200  	ADCQ 72(AX), R12
   201  
   202  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   203  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   204  
   205  	MOVQ DX, 0(AX)
   206  	MOVQ CX, 8(AX)
   207  	MOVQ BX, 16(AX)
   208  	MOVQ SI, 24(AX)
   209  	MOVQ DI, 32(AX)
   210  	MOVQ R8, 40(AX)
   211  	MOVQ R9, 48(AX)
   212  	MOVQ R10, 56(AX)
   213  	MOVQ R11, 64(AX)
   214  	MOVQ R12, 72(AX)
   215  	RET
   216  
   217  // MulBy13(x *Element)
   218  TEXT ·MulBy13(SB), $136-8
   219  	MOVQ x+0(FP), AX
   220  	MOVQ 0(AX), DX
   221  	MOVQ 8(AX), CX
   222  	MOVQ 16(AX), BX
   223  	MOVQ 24(AX), SI
   224  	MOVQ 32(AX), DI
   225  	MOVQ 40(AX), R8
   226  	MOVQ 48(AX), R9
   227  	MOVQ 56(AX), R10
   228  	MOVQ 64(AX), R11
   229  	MOVQ 72(AX), R12
   230  	ADDQ DX, DX
   231  	ADCQ CX, CX
   232  	ADCQ BX, BX
   233  	ADCQ SI, SI
   234  	ADCQ DI, DI
   235  	ADCQ R8, R8
   236  	ADCQ R9, R9
   237  	ADCQ R10, R10
   238  	ADCQ R11, R11
   239  	ADCQ R12, R12
   240  
   241  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   242  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   243  
   244  	ADDQ DX, DX
   245  	ADCQ CX, CX
   246  	ADCQ BX, BX
   247  	ADCQ SI, SI
   248  	ADCQ DI, DI
   249  	ADCQ R8, R8
   250  	ADCQ R9, R9
   251  	ADCQ R10, R10
   252  	ADCQ R11, R11
   253  	ADCQ R12, R12
   254  
   255  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP))
   256  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP))
   257  
   258  	MOVQ DX, s7-64(SP)
   259  	MOVQ CX, s8-72(SP)
   260  	MOVQ BX, s9-80(SP)
   261  	MOVQ SI, s10-88(SP)
   262  	MOVQ DI, s11-96(SP)
   263  	MOVQ R8, s12-104(SP)
   264  	MOVQ R9, s13-112(SP)
   265  	MOVQ R10, s14-120(SP)
   266  	MOVQ R11, s15-128(SP)
   267  	MOVQ R12, s16-136(SP)
   268  	ADDQ DX, DX
   269  	ADCQ CX, CX
   270  	ADCQ BX, BX
   271  	ADCQ SI, SI
   272  	ADCQ DI, DI
   273  	ADCQ R8, R8
   274  	ADCQ R9, R9
   275  	ADCQ R10, R10
   276  	ADCQ R11, R11
   277  	ADCQ R12, R12
   278  
   279  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   280  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   281  
   282  	ADDQ s7-64(SP), DX
   283  	ADCQ s8-72(SP), CX
   284  	ADCQ s9-80(SP), BX
   285  	ADCQ s10-88(SP), SI
   286  	ADCQ s11-96(SP), DI
   287  	ADCQ s12-104(SP), R8
   288  	ADCQ s13-112(SP), R9
   289  	ADCQ s14-120(SP), R10
   290  	ADCQ s15-128(SP), R11
   291  	ADCQ s16-136(SP), R12
   292  
   293  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   294  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   295  
   296  	ADDQ 0(AX), DX
   297  	ADCQ 8(AX), CX
   298  	ADCQ 16(AX), BX
   299  	ADCQ 24(AX), SI
   300  	ADCQ 32(AX), DI
   301  	ADCQ 40(AX), R8
   302  	ADCQ 48(AX), R9
   303  	ADCQ 56(AX), R10
   304  	ADCQ 64(AX), R11
   305  	ADCQ 72(AX), R12
   306  
   307  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   308  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   309  
   310  	MOVQ DX, 0(AX)
   311  	MOVQ CX, 8(AX)
   312  	MOVQ BX, 16(AX)
   313  	MOVQ SI, 24(AX)
   314  	MOVQ DI, 32(AX)
   315  	MOVQ R8, 40(AX)
   316  	MOVQ R9, 48(AX)
   317  	MOVQ R10, 56(AX)
   318  	MOVQ R11, 64(AX)
   319  	MOVQ R12, 72(AX)
   320  	RET
   321  
   322  // Butterfly(a, b *Element) sets a = a + b; b = a - b
   323  TEXT ·Butterfly(SB), $56-16
   324  	MOVQ b+8(FP), AX
   325  	MOVQ 0(AX), DX
   326  	MOVQ 8(AX), CX
   327  	MOVQ 16(AX), BX
   328  	MOVQ 24(AX), SI
   329  	MOVQ 32(AX), DI
   330  	MOVQ 40(AX), R8
   331  	MOVQ 48(AX), R9
   332  	MOVQ 56(AX), R10
   333  	MOVQ 64(AX), R11
   334  	MOVQ 72(AX), R12
   335  	MOVQ a+0(FP), AX
   336  	ADDQ 0(AX), DX
   337  	ADCQ 8(AX), CX
   338  	ADCQ 16(AX), BX
   339  	ADCQ 24(AX), SI
   340  	ADCQ 32(AX), DI
   341  	ADCQ 40(AX), R8
   342  	ADCQ 48(AX), R9
   343  	ADCQ 56(AX), R10
   344  	ADCQ 64(AX), R11
   345  	ADCQ 72(AX), R12
   346  	MOVQ DX, R13
   347  	MOVQ CX, R14
   348  	MOVQ BX, R15
   349  	MOVQ SI, s0-8(SP)
   350  	MOVQ DI, s1-16(SP)
   351  	MOVQ R8, s2-24(SP)
   352  	MOVQ R9, s3-32(SP)
   353  	MOVQ R10, s4-40(SP)
   354  	MOVQ R11, s5-48(SP)
   355  	MOVQ R12, s6-56(SP)
   356  	MOVQ 0(AX), DX
   357  	MOVQ 8(AX), CX
   358  	MOVQ 16(AX), BX
   359  	MOVQ 24(AX), SI
   360  	MOVQ 32(AX), DI
   361  	MOVQ 40(AX), R8
   362  	MOVQ 48(AX), R9
   363  	MOVQ 56(AX), R10
   364  	MOVQ 64(AX), R11
   365  	MOVQ 72(AX), R12
   366  	MOVQ b+8(FP), AX
   367  	SUBQ 0(AX), DX
   368  	SBBQ 8(AX), CX
   369  	SBBQ 16(AX), BX
   370  	SBBQ 24(AX), SI
   371  	SBBQ 32(AX), DI
   372  	SBBQ 40(AX), R8
   373  	SBBQ 48(AX), R9
   374  	SBBQ 56(AX), R10
   375  	SBBQ 64(AX), R11
   376  	SBBQ 72(AX), R12
   377  	JCC  l1
   378  	MOVQ $0xd74916ea4570000d, AX
   379  	ADDQ AX, DX
   380  	MOVQ $0x3d369bd31147f73c, AX
   381  	ADCQ AX, CX
   382  	MOVQ $0xd7b5ce7ab839c225, AX
   383  	ADCQ AX, BX
   384  	MOVQ $0x7e0e8850edbda407, AX
   385  	ADCQ AX, SI
   386  	MOVQ $0xb8da9f5e83f57c49, AX
   387  	ADCQ AX, DI
   388  	MOVQ $0x8152a6c0fadea490, AX
   389  	ADCQ AX, R8
   390  	MOVQ $0x4e59769ad9bbda2f, AX
   391  	ADCQ AX, R9
   392  	MOVQ $0xa8fcd8c75d79d2c7, AX
   393  	ADCQ AX, R10
   394  	MOVQ $0xfc1a174f01d72ab5, AX
   395  	ADCQ AX, R11
   396  	MOVQ $0x0126633cc0f35f63, AX
   397  	ADCQ AX, R12
   398  
   399  l1:
   400  	MOVQ b+8(FP), AX
   401  	MOVQ DX, 0(AX)
   402  	MOVQ CX, 8(AX)
   403  	MOVQ BX, 16(AX)
   404  	MOVQ SI, 24(AX)
   405  	MOVQ DI, 32(AX)
   406  	MOVQ R8, 40(AX)
   407  	MOVQ R9, 48(AX)
   408  	MOVQ R10, 56(AX)
   409  	MOVQ R11, 64(AX)
   410  	MOVQ R12, 72(AX)
   411  	MOVQ R13, DX
   412  	MOVQ R14, CX
   413  	MOVQ R15, BX
   414  	MOVQ s0-8(SP), SI
   415  	MOVQ s1-16(SP), DI
   416  	MOVQ s2-24(SP), R8
   417  	MOVQ s3-32(SP), R9
   418  	MOVQ s4-40(SP), R10
   419  	MOVQ s5-48(SP), R11
   420  	MOVQ s6-56(SP), R12
   421  
   422  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12) using temp registers (R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   423  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP))
   424  
   425  	MOVQ a+0(FP), AX
   426  	MOVQ DX, 0(AX)
   427  	MOVQ CX, 8(AX)
   428  	MOVQ BX, 16(AX)
   429  	MOVQ SI, 24(AX)
   430  	MOVQ DI, 32(AX)
   431  	MOVQ R8, 40(AX)
   432  	MOVQ R9, 48(AX)
   433  	MOVQ R10, 56(AX)
   434  	MOVQ R11, 64(AX)
   435  	MOVQ R12, 72(AX)
   436  	RET