git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/crypto/chacha/hchacha20_amd64.s (about)

     1  //go:build amd64 && gc && !purego
     2  
     3  #include "const.s"
     4  #include "macro.s"
     5  
     6  #define Dst DI
     7  #define Nonce AX
     8  #define Key BX
     9  #define Rounds DX
    10  
    11  // func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte)
    12  TEXT ·hChaCha20AVX(SB), 4, $0-24
    13  	MOVQ out+0(FP), Dst
    14  	MOVQ nonce+8(FP), Nonce
    15  	MOVQ key+16(FP), Key
    16  
    17  	VMOVDQU ·sigma<>(SB), X0
    18  	VMOVDQU 0*16(Key), X1
    19  	VMOVDQU 1*16(Key), X2
    20  	VMOVDQU 0*16(Nonce), X3
    21  	VMOVDQU ·rol16_AVX2<>(SB), X5
    22  	VMOVDQU ·rol8_AVX2<>(SB), X6
    23  	MOVQ    $20, Rounds
    24  
    25  CHACHA_LOOP:
    26  	CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, X5, X6)
    27  	CHACHA_SHUFFLE_AVX(X1, X2, X3)
    28  	CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, X5, X6)
    29  	CHACHA_SHUFFLE_AVX(X3, X2, X1)
    30  	SUBQ $2, Rounds
    31  	JNZ  CHACHA_LOOP
    32  
    33  	VMOVDQU X0, 0*16(Dst)
    34  	VMOVDQU X3, 1*16(Dst)
    35  	VZEROUPPER
    36  	RET
    37  
    38  // func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte)
    39  TEXT ·hChaCha20SSE2(SB), 4, $0-24
    40  	MOVQ out+0(FP), Dst
    41  	MOVQ nonce+8(FP), Nonce
    42  	MOVQ key+16(FP), Key
    43  
    44  	MOVOU ·sigma<>(SB), X0
    45  	MOVOU 0*16(Key), X1
    46  	MOVOU 1*16(Key), X2
    47  	MOVOU 0*16(Nonce), X3
    48  	MOVQ  $20, Rounds
    49  
    50  CHACHA_LOOP:
    51  	CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
    52  	CHACHA_SHUFFLE_SSE(X1, X2, X3)
    53  	CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4)
    54  	CHACHA_SHUFFLE_SSE(X3, X2, X1)
    55  	SUBQ $2, Rounds
    56  	JNZ  CHACHA_LOOP
    57  
    58  	MOVOU X0, 0*16(Dst)
    59  	MOVOU X3, 1*16(Dst)
    60  	RET
    61  
    62  // func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte)
    63  TEXT ·hChaCha20SSSE3(SB), 4, $0-24
    64  	MOVQ out+0(FP), Dst
    65  	MOVQ nonce+8(FP), Nonce
    66  	MOVQ key+16(FP), Key
    67  
    68  	MOVOU ·sigma<>(SB), X0
    69  	MOVOU 0*16(Key), X1
    70  	MOVOU 1*16(Key), X2
    71  	MOVOU 0*16(Nonce), X3
    72  	MOVOU ·rol16<>(SB), X5
    73  	MOVOU ·rol8<>(SB), X6
    74  	MOVQ  $20, Rounds
    75  
    76  chacha_loop:
    77  	CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
    78  	CHACHA_SHUFFLE_SSE(X1, X2, X3)
    79  	CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6)
    80  	CHACHA_SHUFFLE_SSE(X3, X2, X1)
    81  	SUBQ $2, Rounds
    82  	JNZ  chacha_loop
    83  
    84  	MOVOU X0, 0*16(Dst)
    85  	MOVOU X3, 1*16(Dst)
    86  	RET
    87  
    88  #undef Dst
    89  #undef Nonce
    90  #undef Key
    91  #undef Rounds