github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/x/crypto/curve25519/square_amd64.s (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This code was translated into a form compatible with 6a from the public
     6  // domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
     7  
     8  // +build amd64,!gccgo,!appengine
     9  
    10  #include "const_amd64.h"
    11  
    12  // func square(out, in *[5]uint64)
    13  TEXT ·square(SB),7,$0-16
    14  	MOVQ out+0(FP), DI
    15  	MOVQ in+8(FP), SI
    16  
    17  	MOVQ 0(SI),AX
    18  	MULQ 0(SI)
    19  	MOVQ AX,CX
    20  	MOVQ DX,R8
    21  	MOVQ 0(SI),AX
    22  	SHLQ $1,AX
    23  	MULQ 8(SI)
    24  	MOVQ AX,R9
    25  	MOVQ DX,R10
    26  	MOVQ 0(SI),AX
    27  	SHLQ $1,AX
    28  	MULQ 16(SI)
    29  	MOVQ AX,R11
    30  	MOVQ DX,R12
    31  	MOVQ 0(SI),AX
    32  	SHLQ $1,AX
    33  	MULQ 24(SI)
    34  	MOVQ AX,R13
    35  	MOVQ DX,R14
    36  	MOVQ 0(SI),AX
    37  	SHLQ $1,AX
    38  	MULQ 32(SI)
    39  	MOVQ AX,R15
    40  	MOVQ DX,BX
    41  	MOVQ 8(SI),AX
    42  	MULQ 8(SI)
    43  	ADDQ AX,R11
    44  	ADCQ DX,R12
    45  	MOVQ 8(SI),AX
    46  	SHLQ $1,AX
    47  	MULQ 16(SI)
    48  	ADDQ AX,R13
    49  	ADCQ DX,R14
    50  	MOVQ 8(SI),AX
    51  	SHLQ $1,AX
    52  	MULQ 24(SI)
    53  	ADDQ AX,R15
    54  	ADCQ DX,BX
    55  	MOVQ 8(SI),DX
    56  	IMUL3Q $38,DX,AX
    57  	MULQ 32(SI)
    58  	ADDQ AX,CX
    59  	ADCQ DX,R8
    60  	MOVQ 16(SI),AX
    61  	MULQ 16(SI)
    62  	ADDQ AX,R15
    63  	ADCQ DX,BX
    64  	MOVQ 16(SI),DX
    65  	IMUL3Q $38,DX,AX
    66  	MULQ 24(SI)
    67  	ADDQ AX,CX
    68  	ADCQ DX,R8
    69  	MOVQ 16(SI),DX
    70  	IMUL3Q $38,DX,AX
    71  	MULQ 32(SI)
    72  	ADDQ AX,R9
    73  	ADCQ DX,R10
    74  	MOVQ 24(SI),DX
    75  	IMUL3Q $19,DX,AX
    76  	MULQ 24(SI)
    77  	ADDQ AX,R9
    78  	ADCQ DX,R10
    79  	MOVQ 24(SI),DX
    80  	IMUL3Q $38,DX,AX
    81  	MULQ 32(SI)
    82  	ADDQ AX,R11
    83  	ADCQ DX,R12
    84  	MOVQ 32(SI),DX
    85  	IMUL3Q $19,DX,AX
    86  	MULQ 32(SI)
    87  	ADDQ AX,R13
    88  	ADCQ DX,R14
    89  	MOVQ $REDMASK51,SI
    90  	SHLQ $13,R8:CX
    91  	ANDQ SI,CX
    92  	SHLQ $13,R10:R9
    93  	ANDQ SI,R9
    94  	ADDQ R8,R9
    95  	SHLQ $13,R12:R11
    96  	ANDQ SI,R11
    97  	ADDQ R10,R11
    98  	SHLQ $13,R14:R13
    99  	ANDQ SI,R13
   100  	ADDQ R12,R13
   101  	SHLQ $13,BX:R15
   102  	ANDQ SI,R15
   103  	ADDQ R14,R15
   104  	IMUL3Q $19,BX,DX
   105  	ADDQ DX,CX
   106  	MOVQ CX,DX
   107  	SHRQ $51,DX
   108  	ADDQ R9,DX
   109  	ANDQ SI,CX
   110  	MOVQ DX,R8
   111  	SHRQ $51,DX
   112  	ADDQ R11,DX
   113  	ANDQ SI,R8
   114  	MOVQ DX,R9
   115  	SHRQ $51,DX
   116  	ADDQ R13,DX
   117  	ANDQ SI,R9
   118  	MOVQ DX,AX
   119  	SHRQ $51,DX
   120  	ADDQ R15,DX
   121  	ANDQ SI,AX
   122  	MOVQ DX,R10
   123  	SHRQ $51,DX
   124  	IMUL3Q $19,DX,DX
   125  	ADDQ DX,CX
   126  	ANDQ SI,R10
   127  	MOVQ CX,0(DI)
   128  	MOVQ R8,8(DI)
   129  	MOVQ R9,16(DI)
   130  	MOVQ AX,24(DI)
   131  	MOVQ R10,32(DI)
   132  	RET