github.com/psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/internal/poly1305/sum_amd64.s (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build gc && !purego
     6  // +build gc,!purego
     7  
     8  #include "textflag.h"
     9  
    10  #define POLY1305_ADD(msg, h0, h1, h2) \
    11  	ADDQ 0(msg), h0;  \
    12  	ADCQ 8(msg), h1;  \
    13  	ADCQ $1, h2;      \
    14  	LEAQ 16(msg), msg
    15  
    16  #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
    17  	MOVQ  r0, AX;                  \
    18  	MULQ  h0;                      \
    19  	MOVQ  AX, t0;                  \
    20  	MOVQ  DX, t1;                  \
    21  	MOVQ  r0, AX;                  \
    22  	MULQ  h1;                      \
    23  	ADDQ  AX, t1;                  \
    24  	ADCQ  $0, DX;                  \
    25  	MOVQ  r0, t2;                  \
    26  	IMULQ h2, t2;                  \
    27  	ADDQ  DX, t2;                  \
    28  	                               \
    29  	MOVQ  r1, AX;                  \
    30  	MULQ  h0;                      \
    31  	ADDQ  AX, t1;                  \
    32  	ADCQ  $0, DX;                  \
    33  	MOVQ  DX, h0;                  \
    34  	MOVQ  r1, t3;                  \
    35  	IMULQ h2, t3;                  \
    36  	MOVQ  r1, AX;                  \
    37  	MULQ  h1;                      \
    38  	ADDQ  AX, t2;                  \
    39  	ADCQ  DX, t3;                  \
    40  	ADDQ  h0, t2;                  \
    41  	ADCQ  $0, t3;                  \
    42  	                               \
    43  	MOVQ  t0, h0;                  \
    44  	MOVQ  t1, h1;                  \
    45  	MOVQ  t2, h2;                  \
    46  	ANDQ  $3, h2;                  \
    47  	MOVQ  t2, t0;                  \
    48  	ANDQ  $0xFFFFFFFFFFFFFFFC, t0; \
    49  	ADDQ  t0, h0;                  \
    50  	ADCQ  t3, h1;                  \
    51  	ADCQ  $0, h2;                  \
    52  	SHRQ  $2, t3, t2;              \
    53  	SHRQ  $2, t3;                  \
    54  	ADDQ  t2, h0;                  \
    55  	ADCQ  t3, h1;                  \
    56  	ADCQ  $0, h2
    57  
    58  // func update(state *[7]uint64, msg []byte)
    59  TEXT ·update(SB), $0-32
    60  	MOVQ state+0(FP), DI
    61  	MOVQ msg_base+8(FP), SI
    62  	MOVQ msg_len+16(FP), R15
    63  
    64  	MOVQ 0(DI), R8   // h0
    65  	MOVQ 8(DI), R9   // h1
    66  	MOVQ 16(DI), R10 // h2
    67  	MOVQ 24(DI), R11 // r0
    68  	MOVQ 32(DI), R12 // r1
    69  
    70  	CMPQ R15, $16
    71  	JB   bytes_between_0_and_15
    72  
    73  loop:
    74  	POLY1305_ADD(SI, R8, R9, R10)
    75  
    76  multiply:
    77  	POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
    78  	SUBQ $16, R15
    79  	CMPQ R15, $16
    80  	JAE  loop
    81  
    82  bytes_between_0_and_15:
    83  	TESTQ R15, R15
    84  	JZ    done
    85  	MOVQ  $1, BX
    86  	XORQ  CX, CX
    87  	XORQ  R13, R13
    88  	ADDQ  R15, SI
    89  
    90  flush_buffer:
    91  	SHLQ $8, BX, CX
    92  	SHLQ $8, BX
    93  	MOVB -1(SI), R13
    94  	XORQ R13, BX
    95  	DECQ SI
    96  	DECQ R15
    97  	JNZ  flush_buffer
    98  
    99  	ADDQ BX, R8
   100  	ADCQ CX, R9
   101  	ADCQ $0, R10
   102  	MOVQ $16, R15
   103  	JMP  multiply
   104  
   105  done:
   106  	MOVQ R8, 0(DI)
   107  	MOVQ R9, 8(DI)
   108  	MOVQ R10, 16(DI)
   109  	RET