github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/x/crypto/poly1305/sum_amd64.s (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build amd64,!gccgo,!appengine
     6  
     7  #include "textflag.h"
     8  
     9  #define POLY1305_ADD(msg, h0, h1, h2) \
    10  	ADDQ 0(msg), h0;  \
    11  	ADCQ 8(msg), h1;  \
    12  	ADCQ $1, h2;      \
    13  	LEAQ 16(msg), msg
    14  
    15  #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
    16  	MOVQ  r0, AX;                  \
    17  	MULQ  h0;                      \
    18  	MOVQ  AX, t0;                  \
    19  	MOVQ  DX, t1;                  \
    20  	MOVQ  r0, AX;                  \
    21  	MULQ  h1;                      \
    22  	ADDQ  AX, t1;                  \
    23  	ADCQ  $0, DX;                  \
    24  	MOVQ  r0, t2;                  \
    25  	IMULQ h2, t2;                  \
    26  	ADDQ  DX, t2;                  \
    27  	                               \
    28  	MOVQ  r1, AX;                  \
    29  	MULQ  h0;                      \
    30  	ADDQ  AX, t1;                  \
    31  	ADCQ  $0, DX;                  \
    32  	MOVQ  DX, h0;                  \
    33  	MOVQ  r1, t3;                  \
    34  	IMULQ h2, t3;                  \
    35  	MOVQ  r1, AX;                  \
    36  	MULQ  h1;                      \
    37  	ADDQ  AX, t2;                  \
    38  	ADCQ  DX, t3;                  \
    39  	ADDQ  h0, t2;                  \
    40  	ADCQ  $0, t3;                  \
    41  	                               \
    42  	MOVQ  t0, h0;                  \
    43  	MOVQ  t1, h1;                  \
    44  	MOVQ  t2, h2;                  \
    45  	ANDQ  $3, h2;                  \
    46  	MOVQ  t2, t0;                  \
    47  	ANDQ  $0xFFFFFFFFFFFFFFFC, t0; \
    48  	ADDQ  t0, h0;                  \
    49  	ADCQ  t3, h1;                  \
    50  	ADCQ  $0, h2;                  \
    51  	SHRQ  $2, t3, t2;              \
    52  	SHRQ  $2, t3;                  \
    53  	ADDQ  t2, h0;                  \
    54  	ADCQ  t3, h1;                  \
    55  	ADCQ  $0, h2
    56  
    57  DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
    58  DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
    59  GLOBL ·poly1305Mask<>(SB), RODATA, $16
    60  
    61  // func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
    62  TEXT ·poly1305(SB), $0-32
    63  	MOVQ out+0(FP), DI
    64  	MOVQ m+8(FP), SI
    65  	MOVQ mlen+16(FP), R15
    66  	MOVQ key+24(FP), AX
    67  
    68  	MOVQ 0(AX), R11
    69  	MOVQ 8(AX), R12
    70  	ANDQ ·poly1305Mask<>(SB), R11   // r0
    71  	ANDQ ·poly1305Mask<>+8(SB), R12 // r1
    72  	XORQ R8, R8                    // h0
    73  	XORQ R9, R9                    // h1
    74  	XORQ R10, R10                  // h2
    75  
    76  	CMPQ R15, $16
    77  	JB   bytes_between_0_and_15
    78  
    79  loop:
    80  	POLY1305_ADD(SI, R8, R9, R10)
    81  
    82  multiply:
    83  	POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
    84  	SUBQ $16, R15
    85  	CMPQ R15, $16
    86  	JAE  loop
    87  
    88  bytes_between_0_and_15:
    89  	TESTQ R15, R15
    90  	JZ    done
    91  	MOVQ  $1, BX
    92  	XORQ  CX, CX
    93  	XORQ  R13, R13
    94  	ADDQ  R15, SI
    95  
    96  flush_buffer:
    97  	SHLQ $8, BX, CX
    98  	SHLQ $8, BX
    99  	MOVB -1(SI), R13
   100  	XORQ R13, BX
   101  	DECQ SI
   102  	DECQ R15
   103  	JNZ  flush_buffer
   104  
   105  	ADDQ BX, R8
   106  	ADCQ CX, R9
   107  	ADCQ $0, R10
   108  	MOVQ $16, R15
   109  	JMP  multiply
   110  
   111  done:
   112  	MOVQ    R8, AX
   113  	MOVQ    R9, BX
   114  	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
   115  	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
   116  	SBBQ    $3, R10
   117  	CMOVQCS R8, AX
   118  	CMOVQCS R9, BX
   119  	MOVQ    key+24(FP), R8
   120  	ADDQ    16(R8), AX
   121  	ADCQ    24(R8), BX
   122  
   123  	MOVQ AX, 0(DI)
   124  	MOVQ BX, 8(DI)
   125  	RET