github.com/consensys/gnark-crypto@v0.14.0/ecc/bls12-377/fr/element_ops_amd64.s (about)

     1  // +build !purego
     2  
     3  // Copyright 2020 ConsenSys Software Inc.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  #include "textflag.h"
    18  #include "funcdata.h"
    19  
    20  // modulus q
    21  DATA q<>+0(SB)/8, $0x0a11800000000001
    22  DATA q<>+8(SB)/8, $0x59aa76fed0000001
    23  DATA q<>+16(SB)/8, $0x60b44d1e5c37b001
    24  DATA q<>+24(SB)/8, $0x12ab655e9a2ca556
    25  GLOBL q<>(SB), (RODATA+NOPTR), $32
    26  
    27  // qInv0 q'[0]
    28  DATA qInv0<>(SB)/8, $0x0a117fffffffffff
    29  GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
    30  
    31  #define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
    32  	MOVQ    ra0, rb0;        \
    33  	SUBQ    q<>(SB), ra0;    \
    34  	MOVQ    ra1, rb1;        \
    35  	SBBQ    q<>+8(SB), ra1;  \
    36  	MOVQ    ra2, rb2;        \
    37  	SBBQ    q<>+16(SB), ra2; \
    38  	MOVQ    ra3, rb3;        \
    39  	SBBQ    q<>+24(SB), ra3; \
    40  	CMOVQCS rb0, ra0;        \
    41  	CMOVQCS rb1, ra1;        \
    42  	CMOVQCS rb2, ra2;        \
    43  	CMOVQCS rb3, ra3;        \
    44  
    45  TEXT ·reduce(SB), NOSPLIT, $0-8
    46  	MOVQ res+0(FP), AX
    47  	MOVQ 0(AX), DX
    48  	MOVQ 8(AX), CX
    49  	MOVQ 16(AX), BX
    50  	MOVQ 24(AX), SI
    51  
    52  	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
    53  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
    54  
    55  	MOVQ DX, 0(AX)
    56  	MOVQ CX, 8(AX)
    57  	MOVQ BX, 16(AX)
    58  	MOVQ SI, 24(AX)
    59  	RET
    60  
    61  // MulBy3(x *Element)
    62  TEXT ·MulBy3(SB), NOSPLIT, $0-8
    63  	MOVQ x+0(FP), AX
    64  	MOVQ 0(AX), DX
    65  	MOVQ 8(AX), CX
    66  	MOVQ 16(AX), BX
    67  	MOVQ 24(AX), SI
    68  	ADDQ DX, DX
    69  	ADCQ CX, CX
    70  	ADCQ BX, BX
    71  	ADCQ SI, SI
    72  
    73  	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
    74  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
    75  
    76  	ADDQ 0(AX), DX
    77  	ADCQ 8(AX), CX
    78  	ADCQ 16(AX), BX
    79  	ADCQ 24(AX), SI
    80  
    81  	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
    82  	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
    83  
    84  	MOVQ DX, 0(AX)
    85  	MOVQ CX, 8(AX)
    86  	MOVQ BX, 16(AX)
    87  	MOVQ SI, 24(AX)
    88  	RET
    89  
    90  // MulBy5(x *Element)
    91  TEXT ·MulBy5(SB), NOSPLIT, $0-8
    92  	MOVQ x+0(FP), AX
    93  	MOVQ 0(AX), DX
    94  	MOVQ 8(AX), CX
    95  	MOVQ 16(AX), BX
    96  	MOVQ 24(AX), SI
    97  	ADDQ DX, DX
    98  	ADCQ CX, CX
    99  	ADCQ BX, BX
   100  	ADCQ SI, SI
   101  
   102  	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
   103  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
   104  
   105  	ADDQ DX, DX
   106  	ADCQ CX, CX
   107  	ADCQ BX, BX
   108  	ADCQ SI, SI
   109  
   110  	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
   111  	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
   112  
   113  	ADDQ 0(AX), DX
   114  	ADCQ 8(AX), CX
   115  	ADCQ 16(AX), BX
   116  	ADCQ 24(AX), SI
   117  
   118  	// reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9)
   119  	REDUCE(DX,CX,BX,SI,R15,DI,R8,R9)
   120  
   121  	MOVQ DX, 0(AX)
   122  	MOVQ CX, 8(AX)
   123  	MOVQ BX, 16(AX)
   124  	MOVQ SI, 24(AX)
   125  	RET
   126  
   127  // MulBy13(x *Element)
   128  TEXT ·MulBy13(SB), NOSPLIT, $0-8
   129  	MOVQ x+0(FP), AX
   130  	MOVQ 0(AX), DX
   131  	MOVQ 8(AX), CX
   132  	MOVQ 16(AX), BX
   133  	MOVQ 24(AX), SI
   134  	ADDQ DX, DX
   135  	ADCQ CX, CX
   136  	ADCQ BX, BX
   137  	ADCQ SI, SI
   138  
   139  	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
   140  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
   141  
   142  	ADDQ DX, DX
   143  	ADCQ CX, CX
   144  	ADCQ BX, BX
   145  	ADCQ SI, SI
   146  
   147  	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
   148  	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
   149  
   150  	MOVQ DX, R11
   151  	MOVQ CX, R12
   152  	MOVQ BX, R13
   153  	MOVQ SI, R14
   154  	ADDQ DX, DX
   155  	ADCQ CX, CX
   156  	ADCQ BX, BX
   157  	ADCQ SI, SI
   158  
   159  	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
   160  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
   161  
   162  	ADDQ R11, DX
   163  	ADCQ R12, CX
   164  	ADCQ R13, BX
   165  	ADCQ R14, SI
   166  
   167  	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
   168  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
   169  
   170  	ADDQ 0(AX), DX
   171  	ADCQ 8(AX), CX
   172  	ADCQ 16(AX), BX
   173  	ADCQ 24(AX), SI
   174  
   175  	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
   176  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
   177  
   178  	MOVQ DX, 0(AX)
   179  	MOVQ CX, 8(AX)
   180  	MOVQ BX, 16(AX)
   181  	MOVQ SI, 24(AX)
   182  	RET
   183  
   184  // Butterfly(a, b *Element) sets a = a + b; b = a - b
   185  TEXT ·Butterfly(SB), NOSPLIT, $0-16
   186  	MOVQ    a+0(FP), AX
   187  	MOVQ    0(AX), CX
   188  	MOVQ    8(AX), BX
   189  	MOVQ    16(AX), SI
   190  	MOVQ    24(AX), DI
   191  	MOVQ    CX, R8
   192  	MOVQ    BX, R9
   193  	MOVQ    SI, R10
   194  	MOVQ    DI, R11
   195  	XORQ    AX, AX
   196  	MOVQ    b+8(FP), DX
   197  	ADDQ    0(DX), CX
   198  	ADCQ    8(DX), BX
   199  	ADCQ    16(DX), SI
   200  	ADCQ    24(DX), DI
   201  	SUBQ    0(DX), R8
   202  	SBBQ    8(DX), R9
   203  	SBBQ    16(DX), R10
   204  	SBBQ    24(DX), R11
   205  	MOVQ    $0x0a11800000000001, R12
   206  	MOVQ    $0x59aa76fed0000001, R13
   207  	MOVQ    $0x60b44d1e5c37b001, R14
   208  	MOVQ    $0x12ab655e9a2ca556, R15
   209  	CMOVQCC AX, R12
   210  	CMOVQCC AX, R13
   211  	CMOVQCC AX, R14
   212  	CMOVQCC AX, R15
   213  	ADDQ    R12, R8
   214  	ADCQ    R13, R9
   215  	ADCQ    R14, R10
   216  	ADCQ    R15, R11
   217  	MOVQ    R8, 0(DX)
   218  	MOVQ    R9, 8(DX)
   219  	MOVQ    R10, 16(DX)
   220  	MOVQ    R11, 24(DX)
   221  
   222  	// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
   223  	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
   224  
   225  	MOVQ a+0(FP), AX
   226  	MOVQ CX, 0(AX)
   227  	MOVQ BX, 8(AX)
   228  	MOVQ SI, 16(AX)
   229  	MOVQ DI, 24(AX)
   230  	RET