git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/crypto/chacha/chacha_386.s (about) 1 // Copyright (c) 2016 Andreas Auernhammer. All rights reserved. 2 // Use of this source code is governed by a license that can be 3 // found in the LICENSE file. 4 5 // +build 386,!gccgo,!appengine,!nacl 6 7 #include "const.s" 8 #include "macro.s" 9 10 // FINALIZE xors len bytes from src and block using 11 // the temp. registers t0 and t1 and writes the result 12 // to dst. 13 #define FINALIZE(dst, src, block, len, t0, t1) \ 14 XORL t0, t0; \ 15 XORL t1, t1; \ 16 FINALIZE_LOOP:; \ 17 MOVB 0(src), t0; \ 18 MOVB 0(block), t1; \ 19 XORL t0, t1; \ 20 MOVB t1, 0(dst); \ 21 INCL src; \ 22 INCL block; \ 23 INCL dst; \ 24 DECL len; \ 25 JG FINALIZE_LOOP \ 26 27 #define Dst DI 28 #define Nonce AX 29 #define Key BX 30 #define Rounds DX 31 32 // func hChaCha20SSE2(out *[32]byte, nonce *[16]byte, key *[32]byte) 33 TEXT ·hChaCha20SSE2(SB), 4, $0-12 34 MOVL out+0(FP), Dst 35 MOVL nonce+4(FP), Nonce 36 MOVL key+8(FP), Key 37 38 MOVOU ·sigma<>(SB), X0 39 MOVOU 0*16(Key), X1 40 MOVOU 1*16(Key), X2 41 MOVOU 0*16(Nonce), X3 42 MOVL $20, Rounds 43 44 chacha_loop: 45 CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) 46 CHACHA_SHUFFLE_SSE(X1, X2, X3) 47 CHACHA_QROUND_SSE2(X0, X1, X2, X3, X4) 48 CHACHA_SHUFFLE_SSE(X3, X2, X1) 49 SUBL $2, Rounds 50 JNZ chacha_loop 51 52 MOVOU X0, 0*16(Dst) 53 MOVOU X3, 1*16(Dst) 54 RET 55 56 // func hChaCha20SSSE3(out *[32]byte, nonce *[16]byte, key *[32]byte) 57 TEXT ·hChaCha20SSSE3(SB), 4, $0-12 58 MOVL out+0(FP), Dst 59 MOVL nonce+4(FP), Nonce 60 MOVL key+8(FP), Key 61 62 MOVOU ·sigma<>(SB), X0 63 MOVOU 0*16(Key), X1 64 MOVOU 1*16(Key), X2 65 MOVOU 0*16(Nonce), X3 66 MOVL $20, Rounds 67 68 MOVOU ·rol16<>(SB), X5 69 MOVOU ·rol8<>(SB), X6 70 71 chacha_loop: 72 CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) 73 CHACHA_SHUFFLE_SSE(X1, X2, X3) 74 CHACHA_QROUND_SSSE3(X0, X1, X2, X3, X4, X5, X6) 75 CHACHA_SHUFFLE_SSE(X3, X2, X1) 76 SUBL $2, Rounds 77 JNZ chacha_loop 78 79 MOVOU X0, 0*16(Dst) 80 MOVOU X3, 1*16(Dst) 81 RET 82 83 #undef Dst 84 #undef Nonce 85 #undef Key 86 #undef Rounds 87 88 #define State AX 89 #define Dst DI 90 #define Src SI 91 #define Len DX 92 #define Tmp0 BX 93 #define Tmp1 BP 94 95 // func xorKeyStreamSSE2(dst, src []byte, block, state *[64]byte, rounds int) int 96 TEXT ·xorKeyStreamSSE2(SB), 4, $0-40 97 MOVL dst_base+0(FP), Dst 98 MOVL src_base+12(FP), Src 99 MOVL state+28(FP), State 100 MOVL src_len+16(FP), Len 101 MOVL $0, ret+36(FP) // Number of bytes written to the keystream buffer - 0 iff len mod 64 == 0 102 103 MOVOU 0*16(State), X0 104 MOVOU 1*16(State), X1 105 MOVOU 2*16(State), X2 106 MOVOU 3*16(State), X3 107 TESTL Len, Len 108 JZ DONE 109 110 GENERATE_KEYSTREAM: 111 MOVO X0, X4 112 MOVO X1, X5 113 MOVO X2, X6 114 MOVO X3, X7 115 MOVL rounds+32(FP), Tmp0 116 117 CHACHA_LOOP: 118 CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) 119 CHACHA_SHUFFLE_SSE(X5, X6, X7) 120 CHACHA_QROUND_SSE2(X4, X5, X6, X7, X0) 121 CHACHA_SHUFFLE_SSE(X7, X6, X5) 122 SUBL $2, Tmp0 123 JA CHACHA_LOOP 124 125 MOVOU 0*16(State), X0 // Restore X0 from state 126 PADDL X0, X4 127 PADDL X1, X5 128 PADDL X2, X6 129 PADDL X3, X7 130 MOVOU ·one<>(SB), X0 131 PADDQ X0, X3 132 133 CMPL Len, $64 134 JL BUFFER_KEYSTREAM 135 136 XOR_SSE(Dst, Src, 0, X4, X5, X6, X7, X0) 137 MOVOU 0*16(State), X0 // Restore X0 from state 138 ADDL $64, Src 139 ADDL $64, Dst 140 SUBL $64, Len 141 JZ DONE 142 JMP GENERATE_KEYSTREAM // There is at least one more plaintext byte 143 144 BUFFER_KEYSTREAM: 145 MOVL block+24(FP), State 146 MOVOU X4, 0(State) 147 MOVOU X5, 16(State) 148 MOVOU X6, 32(State) 149 MOVOU X7, 48(State) 150 MOVL Len, ret+36(FP) // Number of bytes written to the keystream buffer - 0 < Len < 64 151 FINALIZE(Dst, Src, State, Len, Tmp0, Tmp1) 152 153 DONE: 154 MOVL state+28(FP), State 155 MOVOU X3, 3*16(State) 156 RET 157 158 #undef State 159 #undef Dst 160 #undef Src 161 #undef Len 162 #undef Tmp0 163 #undef Tmp1