github.com/consensys/gnark-crypto@v0.14.0/ecc/bw6-756/fp/element_ops_amd64.s (about)

     1  // +build !purego
     2  
     3  // Copyright 2020 ConsenSys Software Inc.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  #include "textflag.h"
    18  #include "funcdata.h"
    19  
    20  // modulus q
    21  DATA q<>+0(SB)/8, $1
    22  DATA q<>+8(SB)/8, $0x33c7e63f86840000
    23  DATA q<>+16(SB)/8, $0xd0b685e868524ec0
    24  DATA q<>+24(SB)/8, $0x4302aa3c258de7de
    25  DATA q<>+32(SB)/8, $0xe292cd15edb646a5
    26  DATA q<>+40(SB)/8, $0x0a7eb1cb3d06e646
    27  DATA q<>+48(SB)/8, $0xeb02c812ea04faaa
    28  DATA q<>+56(SB)/8, $0xccc6ae73c42a46d9
    29  DATA q<>+64(SB)/8, $0xfbf23221455163a6
    30  DATA q<>+72(SB)/8, $0x5c978cd2fac2ce89
    31  DATA q<>+80(SB)/8, $0xe2ac127e1e3568cf
    32  DATA q<>+88(SB)/8, $0x000f76adbb5bb98a
    33  GLOBL q<>(SB), (RODATA+NOPTR), $96
    34  
    35  // qInv0 q'[0]
    36  DATA qInv0<>(SB)/8, $0xffffffffffffffff
    37  GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
    38  
    39  #define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra7, ra8, ra9, ra10, ra11, rb0, rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8, rb9, rb10, rb11) \
    40  	MOVQ    ra0, rb0;         \
    41  	SUBQ    q<>(SB), ra0;     \
    42  	MOVQ    ra1, rb1;         \
    43  	SBBQ    q<>+8(SB), ra1;   \
    44  	MOVQ    ra2, rb2;         \
    45  	SBBQ    q<>+16(SB), ra2;  \
    46  	MOVQ    ra3, rb3;         \
    47  	SBBQ    q<>+24(SB), ra3;  \
    48  	MOVQ    ra4, rb4;         \
    49  	SBBQ    q<>+32(SB), ra4;  \
    50  	MOVQ    ra5, rb5;         \
    51  	SBBQ    q<>+40(SB), ra5;  \
    52  	MOVQ    ra6, rb6;         \
    53  	SBBQ    q<>+48(SB), ra6;  \
    54  	MOVQ    ra7, rb7;         \
    55  	SBBQ    q<>+56(SB), ra7;  \
    56  	MOVQ    ra8, rb8;         \
    57  	SBBQ    q<>+64(SB), ra8;  \
    58  	MOVQ    ra9, rb9;         \
    59  	SBBQ    q<>+72(SB), ra9;  \
    60  	MOVQ    ra10, rb10;       \
    61  	SBBQ    q<>+80(SB), ra10; \
    62  	MOVQ    ra11, rb11;       \
    63  	SBBQ    q<>+88(SB), ra11; \
    64  	CMOVQCS rb0, ra0;         \
    65  	CMOVQCS rb1, ra1;         \
    66  	CMOVQCS rb2, ra2;         \
    67  	CMOVQCS rb3, ra3;         \
    68  	CMOVQCS rb4, ra4;         \
    69  	CMOVQCS rb5, ra5;         \
    70  	CMOVQCS rb6, ra6;         \
    71  	CMOVQCS rb7, ra7;         \
    72  	CMOVQCS rb8, ra8;         \
    73  	CMOVQCS rb9, ra9;         \
    74  	CMOVQCS rb10, ra10;       \
    75  	CMOVQCS rb11, ra11;       \
    76  
    77  TEXT ·reduce(SB), $88-8
    78  	MOVQ res+0(FP), AX
    79  	MOVQ 0(AX), DX
    80  	MOVQ 8(AX), CX
    81  	MOVQ 16(AX), BX
    82  	MOVQ 24(AX), SI
    83  	MOVQ 32(AX), DI
    84  	MOVQ 40(AX), R8
    85  	MOVQ 48(AX), R9
    86  	MOVQ 56(AX), R10
    87  	MOVQ 64(AX), R11
    88  	MOVQ 72(AX), R12
    89  	MOVQ 80(AX), R13
    90  	MOVQ 88(AX), R14
    91  
    92  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
    93  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
    94  
    95  	MOVQ DX, 0(AX)
    96  	MOVQ CX, 8(AX)
    97  	MOVQ BX, 16(AX)
    98  	MOVQ SI, 24(AX)
    99  	MOVQ DI, 32(AX)
   100  	MOVQ R8, 40(AX)
   101  	MOVQ R9, 48(AX)
   102  	MOVQ R10, 56(AX)
   103  	MOVQ R11, 64(AX)
   104  	MOVQ R12, 72(AX)
   105  	MOVQ R13, 80(AX)
   106  	MOVQ R14, 88(AX)
   107  	RET
   108  
   109  // MulBy3(x *Element)
   110  TEXT ·MulBy3(SB), $88-8
   111  	MOVQ x+0(FP), AX
   112  	MOVQ 0(AX), DX
   113  	MOVQ 8(AX), CX
   114  	MOVQ 16(AX), BX
   115  	MOVQ 24(AX), SI
   116  	MOVQ 32(AX), DI
   117  	MOVQ 40(AX), R8
   118  	MOVQ 48(AX), R9
   119  	MOVQ 56(AX), R10
   120  	MOVQ 64(AX), R11
   121  	MOVQ 72(AX), R12
   122  	MOVQ 80(AX), R13
   123  	MOVQ 88(AX), R14
   124  	ADDQ DX, DX
   125  	ADCQ CX, CX
   126  	ADCQ BX, BX
   127  	ADCQ SI, SI
   128  	ADCQ DI, DI
   129  	ADCQ R8, R8
   130  	ADCQ R9, R9
   131  	ADCQ R10, R10
   132  	ADCQ R11, R11
   133  	ADCQ R12, R12
   134  	ADCQ R13, R13
   135  	ADCQ R14, R14
   136  
   137  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   138  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   139  
   140  	ADDQ 0(AX), DX
   141  	ADCQ 8(AX), CX
   142  	ADCQ 16(AX), BX
   143  	ADCQ 24(AX), SI
   144  	ADCQ 32(AX), DI
   145  	ADCQ 40(AX), R8
   146  	ADCQ 48(AX), R9
   147  	ADCQ 56(AX), R10
   148  	ADCQ 64(AX), R11
   149  	ADCQ 72(AX), R12
   150  	ADCQ 80(AX), R13
   151  	ADCQ 88(AX), R14
   152  
   153  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   154  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   155  
   156  	MOVQ DX, 0(AX)
   157  	MOVQ CX, 8(AX)
   158  	MOVQ BX, 16(AX)
   159  	MOVQ SI, 24(AX)
   160  	MOVQ DI, 32(AX)
   161  	MOVQ R8, 40(AX)
   162  	MOVQ R9, 48(AX)
   163  	MOVQ R10, 56(AX)
   164  	MOVQ R11, 64(AX)
   165  	MOVQ R12, 72(AX)
   166  	MOVQ R13, 80(AX)
   167  	MOVQ R14, 88(AX)
   168  	RET
   169  
   170  // MulBy5(x *Element)
   171  TEXT ·MulBy5(SB), $88-8
   172  	MOVQ x+0(FP), AX
   173  	MOVQ 0(AX), DX
   174  	MOVQ 8(AX), CX
   175  	MOVQ 16(AX), BX
   176  	MOVQ 24(AX), SI
   177  	MOVQ 32(AX), DI
   178  	MOVQ 40(AX), R8
   179  	MOVQ 48(AX), R9
   180  	MOVQ 56(AX), R10
   181  	MOVQ 64(AX), R11
   182  	MOVQ 72(AX), R12
   183  	MOVQ 80(AX), R13
   184  	MOVQ 88(AX), R14
   185  	ADDQ DX, DX
   186  	ADCQ CX, CX
   187  	ADCQ BX, BX
   188  	ADCQ SI, SI
   189  	ADCQ DI, DI
   190  	ADCQ R8, R8
   191  	ADCQ R9, R9
   192  	ADCQ R10, R10
   193  	ADCQ R11, R11
   194  	ADCQ R12, R12
   195  	ADCQ R13, R13
   196  	ADCQ R14, R14
   197  
   198  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   199  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   200  
   201  	ADDQ DX, DX
   202  	ADCQ CX, CX
   203  	ADCQ BX, BX
   204  	ADCQ SI, SI
   205  	ADCQ DI, DI
   206  	ADCQ R8, R8
   207  	ADCQ R9, R9
   208  	ADCQ R10, R10
   209  	ADCQ R11, R11
   210  	ADCQ R12, R12
   211  	ADCQ R13, R13
   212  	ADCQ R14, R14
   213  
   214  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   215  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   216  
   217  	ADDQ 0(AX), DX
   218  	ADCQ 8(AX), CX
   219  	ADCQ 16(AX), BX
   220  	ADCQ 24(AX), SI
   221  	ADCQ 32(AX), DI
   222  	ADCQ 40(AX), R8
   223  	ADCQ 48(AX), R9
   224  	ADCQ 56(AX), R10
   225  	ADCQ 64(AX), R11
   226  	ADCQ 72(AX), R12
   227  	ADCQ 80(AX), R13
   228  	ADCQ 88(AX), R14
   229  
   230  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   231  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   232  
   233  	MOVQ DX, 0(AX)
   234  	MOVQ CX, 8(AX)
   235  	MOVQ BX, 16(AX)
   236  	MOVQ SI, 24(AX)
   237  	MOVQ DI, 32(AX)
   238  	MOVQ R8, 40(AX)
   239  	MOVQ R9, 48(AX)
   240  	MOVQ R10, 56(AX)
   241  	MOVQ R11, 64(AX)
   242  	MOVQ R12, 72(AX)
   243  	MOVQ R13, 80(AX)
   244  	MOVQ R14, 88(AX)
   245  	RET
   246  
   247  // MulBy13(x *Element)
   248  TEXT ·MulBy13(SB), $184-8
   249  	MOVQ x+0(FP), AX
   250  	MOVQ 0(AX), DX
   251  	MOVQ 8(AX), CX
   252  	MOVQ 16(AX), BX
   253  	MOVQ 24(AX), SI
   254  	MOVQ 32(AX), DI
   255  	MOVQ 40(AX), R8
   256  	MOVQ 48(AX), R9
   257  	MOVQ 56(AX), R10
   258  	MOVQ 64(AX), R11
   259  	MOVQ 72(AX), R12
   260  	MOVQ 80(AX), R13
   261  	MOVQ 88(AX), R14
   262  	ADDQ DX, DX
   263  	ADCQ CX, CX
   264  	ADCQ BX, BX
   265  	ADCQ SI, SI
   266  	ADCQ DI, DI
   267  	ADCQ R8, R8
   268  	ADCQ R9, R9
   269  	ADCQ R10, R10
   270  	ADCQ R11, R11
   271  	ADCQ R12, R12
   272  	ADCQ R13, R13
   273  	ADCQ R14, R14
   274  
   275  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   276  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   277  
   278  	ADDQ DX, DX
   279  	ADCQ CX, CX
   280  	ADCQ BX, BX
   281  	ADCQ SI, SI
   282  	ADCQ DI, DI
   283  	ADCQ R8, R8
   284  	ADCQ R9, R9
   285  	ADCQ R10, R10
   286  	ADCQ R11, R11
   287  	ADCQ R12, R12
   288  	ADCQ R13, R13
   289  	ADCQ R14, R14
   290  
   291  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP),s17-144(SP),s18-152(SP),s19-160(SP),s20-168(SP),s21-176(SP),s22-184(SP))
   292  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP),s17-144(SP),s18-152(SP),s19-160(SP),s20-168(SP),s21-176(SP),s22-184(SP))
   293  
   294  	MOVQ DX, s11-96(SP)
   295  	MOVQ CX, s12-104(SP)
   296  	MOVQ BX, s13-112(SP)
   297  	MOVQ SI, s14-120(SP)
   298  	MOVQ DI, s15-128(SP)
   299  	MOVQ R8, s16-136(SP)
   300  	MOVQ R9, s17-144(SP)
   301  	MOVQ R10, s18-152(SP)
   302  	MOVQ R11, s19-160(SP)
   303  	MOVQ R12, s20-168(SP)
   304  	MOVQ R13, s21-176(SP)
   305  	MOVQ R14, s22-184(SP)
   306  	ADDQ DX, DX
   307  	ADCQ CX, CX
   308  	ADCQ BX, BX
   309  	ADCQ SI, SI
   310  	ADCQ DI, DI
   311  	ADCQ R8, R8
   312  	ADCQ R9, R9
   313  	ADCQ R10, R10
   314  	ADCQ R11, R11
   315  	ADCQ R12, R12
   316  	ADCQ R13, R13
   317  	ADCQ R14, R14
   318  
   319  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   320  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   321  
   322  	ADDQ s11-96(SP), DX
   323  	ADCQ s12-104(SP), CX
   324  	ADCQ s13-112(SP), BX
   325  	ADCQ s14-120(SP), SI
   326  	ADCQ s15-128(SP), DI
   327  	ADCQ s16-136(SP), R8
   328  	ADCQ s17-144(SP), R9
   329  	ADCQ s18-152(SP), R10
   330  	ADCQ s19-160(SP), R11
   331  	ADCQ s20-168(SP), R12
   332  	ADCQ s21-176(SP), R13
   333  	ADCQ s22-184(SP), R14
   334  
   335  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   336  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   337  
   338  	ADDQ 0(AX), DX
   339  	ADCQ 8(AX), CX
   340  	ADCQ 16(AX), BX
   341  	ADCQ 24(AX), SI
   342  	ADCQ 32(AX), DI
   343  	ADCQ 40(AX), R8
   344  	ADCQ 48(AX), R9
   345  	ADCQ 56(AX), R10
   346  	ADCQ 64(AX), R11
   347  	ADCQ 72(AX), R12
   348  	ADCQ 80(AX), R13
   349  	ADCQ 88(AX), R14
   350  
   351  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   352  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   353  
   354  	MOVQ DX, 0(AX)
   355  	MOVQ CX, 8(AX)
   356  	MOVQ BX, 16(AX)
   357  	MOVQ SI, 24(AX)
   358  	MOVQ DI, 32(AX)
   359  	MOVQ R8, 40(AX)
   360  	MOVQ R9, 48(AX)
   361  	MOVQ R10, 56(AX)
   362  	MOVQ R11, 64(AX)
   363  	MOVQ R12, 72(AX)
   364  	MOVQ R13, 80(AX)
   365  	MOVQ R14, 88(AX)
   366  	RET
   367  
   368  // Butterfly(a, b *Element) sets a = a + b; b = a - b
   369  TEXT ·Butterfly(SB), $88-16
   370  	MOVQ b+8(FP), AX
   371  	MOVQ 0(AX), DX
   372  	MOVQ 8(AX), CX
   373  	MOVQ 16(AX), BX
   374  	MOVQ 24(AX), SI
   375  	MOVQ 32(AX), DI
   376  	MOVQ 40(AX), R8
   377  	MOVQ 48(AX), R9
   378  	MOVQ 56(AX), R10
   379  	MOVQ 64(AX), R11
   380  	MOVQ 72(AX), R12
   381  	MOVQ 80(AX), R13
   382  	MOVQ 88(AX), R14
   383  	MOVQ a+0(FP), AX
   384  	ADDQ 0(AX), DX
   385  	ADCQ 8(AX), CX
   386  	ADCQ 16(AX), BX
   387  	ADCQ 24(AX), SI
   388  	ADCQ 32(AX), DI
   389  	ADCQ 40(AX), R8
   390  	ADCQ 48(AX), R9
   391  	ADCQ 56(AX), R10
   392  	ADCQ 64(AX), R11
   393  	ADCQ 72(AX), R12
   394  	ADCQ 80(AX), R13
   395  	ADCQ 88(AX), R14
   396  	MOVQ DX, R15
   397  	MOVQ CX, s0-8(SP)
   398  	MOVQ BX, s1-16(SP)
   399  	MOVQ SI, s2-24(SP)
   400  	MOVQ DI, s3-32(SP)
   401  	MOVQ R8, s4-40(SP)
   402  	MOVQ R9, s5-48(SP)
   403  	MOVQ R10, s6-56(SP)
   404  	MOVQ R11, s7-64(SP)
   405  	MOVQ R12, s8-72(SP)
   406  	MOVQ R13, s9-80(SP)
   407  	MOVQ R14, s10-88(SP)
   408  	MOVQ 0(AX), DX
   409  	MOVQ 8(AX), CX
   410  	MOVQ 16(AX), BX
   411  	MOVQ 24(AX), SI
   412  	MOVQ 32(AX), DI
   413  	MOVQ 40(AX), R8
   414  	MOVQ 48(AX), R9
   415  	MOVQ 56(AX), R10
   416  	MOVQ 64(AX), R11
   417  	MOVQ 72(AX), R12
   418  	MOVQ 80(AX), R13
   419  	MOVQ 88(AX), R14
   420  	MOVQ b+8(FP), AX
   421  	SUBQ 0(AX), DX
   422  	SBBQ 8(AX), CX
   423  	SBBQ 16(AX), BX
   424  	SBBQ 24(AX), SI
   425  	SBBQ 32(AX), DI
   426  	SBBQ 40(AX), R8
   427  	SBBQ 48(AX), R9
   428  	SBBQ 56(AX), R10
   429  	SBBQ 64(AX), R11
   430  	SBBQ 72(AX), R12
   431  	SBBQ 80(AX), R13
   432  	SBBQ 88(AX), R14
   433  	JCC  l1
   434  	MOVQ $1, AX
   435  	ADDQ AX, DX
   436  	MOVQ $0x33c7e63f86840000, AX
   437  	ADCQ AX, CX
   438  	MOVQ $0xd0b685e868524ec0, AX
   439  	ADCQ AX, BX
   440  	MOVQ $0x4302aa3c258de7de, AX
   441  	ADCQ AX, SI
   442  	MOVQ $0xe292cd15edb646a5, AX
   443  	ADCQ AX, DI
   444  	MOVQ $0x0a7eb1cb3d06e646, AX
   445  	ADCQ AX, R8
   446  	MOVQ $0xeb02c812ea04faaa, AX
   447  	ADCQ AX, R9
   448  	MOVQ $0xccc6ae73c42a46d9, AX
   449  	ADCQ AX, R10
   450  	MOVQ $0xfbf23221455163a6, AX
   451  	ADCQ AX, R11
   452  	MOVQ $0x5c978cd2fac2ce89, AX
   453  	ADCQ AX, R12
   454  	MOVQ $0xe2ac127e1e3568cf, AX
   455  	ADCQ AX, R13
   456  	MOVQ $0x000f76adbb5bb98a, AX
   457  	ADCQ AX, R14
   458  
   459  l1:
   460  	MOVQ b+8(FP), AX
   461  	MOVQ DX, 0(AX)
   462  	MOVQ CX, 8(AX)
   463  	MOVQ BX, 16(AX)
   464  	MOVQ SI, 24(AX)
   465  	MOVQ DI, 32(AX)
   466  	MOVQ R8, 40(AX)
   467  	MOVQ R9, 48(AX)
   468  	MOVQ R10, 56(AX)
   469  	MOVQ R11, 64(AX)
   470  	MOVQ R12, 72(AX)
   471  	MOVQ R13, 80(AX)
   472  	MOVQ R14, 88(AX)
   473  	MOVQ R15, DX
   474  	MOVQ s0-8(SP), CX
   475  	MOVQ s1-16(SP), BX
   476  	MOVQ s2-24(SP), SI
   477  	MOVQ s3-32(SP), DI
   478  	MOVQ s4-40(SP), R8
   479  	MOVQ s5-48(SP), R9
   480  	MOVQ s6-56(SP), R10
   481  	MOVQ s7-64(SP), R11
   482  	MOVQ s8-72(SP), R12
   483  	MOVQ s9-80(SP), R13
   484  	MOVQ s10-88(SP), R14
   485  
   486  	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   487  	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
   488  
   489  	MOVQ a+0(FP), AX
   490  	MOVQ DX, 0(AX)
   491  	MOVQ CX, 8(AX)
   492  	MOVQ BX, 16(AX)
   493  	MOVQ SI, 24(AX)
   494  	MOVQ DI, 32(AX)
   495  	MOVQ R8, 40(AX)
   496  	MOVQ R9, 48(AX)
   497  	MOVQ R10, 56(AX)
   498  	MOVQ R11, 64(AX)
   499  	MOVQ R12, 72(AX)
   500  	MOVQ R13, 80(AX)
   501  	MOVQ R14, 88(AX)
   502  	RET