git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/crypto/chacha/hchacha20_amd64.s (about) 1 //go:build amd64 && gc && !purego 2 3 #include "const.s" 4 #include "macro.s" 5 6 #define Dst DI 7 #define Nonce AX 8 #define Key BX 9 #define Rounds DX 10 11 // func hChaCha20AVX(out *[32]byte, nonce *[16]byte, key *[32]byte) 12 TEXT ·hChaCha20AVX(SB), 4, $0-24 13 MOVQ out+0(FP), Dst 14 MOVQ nonce+8(FP), Nonce 15 MOVQ key+16(FP), Key 16 17 VMOVDQU ·sigma<>(SB), X0 18 VMOVDQU 0*16(Key), X1 19 VMOVDQU 1*16(Key), X2 20 VMOVDQU 0*16(Nonce), X3 21 VMOVDQU ·rol16_AVX2<>(SB), X5 22 VMOVDQU ·rol8_AVX2<>(SB), X6 23 MOVQ $20, Rounds 24 25 CHACHA_LOOP: 26 CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, X5, X6) 27 CHACHA_SHUFFLE_AVX(X1, X2, X3) 28 CHACHA_QROUND_AVX(X0, X1, X2, X3, X4, X5, X6) 29 CHACHA_SHUFFLE_AVX(X3, X2, X1) 30 SUBQ $2, Rounds 31 JNZ CHACHA_LOOP 32 33 VMOVDQU X0, 0*16(Dst) 34 VMOVDQU X3, 1*16(Dst) 35 VZEROUPPER 36 RET 37 38 // func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) 39 TEXT ·hChaCha20SSE2(SB), 4, $0-24 40 MOVQ out+0(FP), Dst 41 MOVQ nonce+8(FP), Nonce 42 MOVQ key+16(FP), Key 43 44 MOVOU ·sigma<>(SB), X0 45 MOVOU 0*16(Key), X1 46 MOVOU 1*16(Key), X2 47 MOVOU 0*16(Nonce), X3 48 MOVQ $20, Rounds 49 50 CHACHA_LOOP: 51 CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) 52 CHACHA_SHUFFLE_SSE(X1, X2, X3) 53 CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) 54 CHACHA_SHUFFLE_SSE(X3, X2, X1) 55 SUBQ $2, Rounds 56 JNZ CHACHA_LOOP 57 58 MOVOU X0, 0*16(Dst) 59 MOVOU X3, 1*16(Dst) 60 RET 61 62 // func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) 63 TEXT ·hChaCha20SSSE3(SB), 4, $0-24 64 MOVQ out+0(FP), Dst 65 MOVQ nonce+8(FP), Nonce 66 MOVQ key+16(FP), Key 67 68 MOVOU ·sigma<>(SB), X0 69 MOVOU 0*16(Key), X1 70 MOVOU 1*16(Key), X2 71 MOVOU 0*16(Nonce), X3 72 MOVOU ·rol16<>(SB), X5 73 MOVOU ·rol8<>(SB), X6 74 MOVQ $20, Rounds 75 76 chacha_loop: 77 CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) 78 CHACHA_SHUFFLE_SSE(X1, X2, X3) 79 CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) 80 CHACHA_SHUFFLE_SSE(X3, X2, X1) 81 SUBQ $2, Rounds 82 JNZ chacha_loop 83 84 MOVOU X0, 0*16(Dst) 85 MOVOU X3, 1*16(Dst) 86 RET 87 88 #undef Dst 89 #undef Nonce 90 #undef Key 91 #undef Rounds