github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/internal/xor/xor_amd64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 7 // func xorBytesSSE2(dst, a, b *byte, n int) 8 TEXT ·xorBytesSSE2(SB), NOSPLIT, $0 9 MOVQ dst+0(FP), BX 10 MOVQ a+8(FP), SI 11 MOVQ b+16(FP), CX 12 MOVQ n+24(FP), DX 13 TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned. 14 JNZ not_aligned 15 16 aligned: 17 MOVQ $0, AX // position in slices 18 19 loop16b: 20 MOVOU (SI)(AX*1), X0 // XOR 16byte forwards. 21 MOVOU (CX)(AX*1), X1 22 PXOR X1, X0 23 MOVOU X0, (BX)(AX*1) 24 ADDQ $16, AX 25 CMPQ DX, AX 26 JNE loop16b 27 RET 28 29 loop_1b: 30 SUBQ $1, DX // XOR 1byte backwards. 31 MOVB (SI)(DX*1), DI 32 MOVB (CX)(DX*1), AX 33 XORB AX, DI 34 MOVB DI, (BX)(DX*1) 35 TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b. 36 JNZ loop_1b 37 CMPQ DX, $0 // if len is 0, ret. 38 JE ret 39 TESTQ $15, DX // AND 15 & len, if zero jump to aligned. 40 JZ aligned 41 42 not_aligned: 43 TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b. 44 JNE loop_1b 45 SUBQ $8, DX // XOR 8bytes backwards. 46 MOVQ (SI)(DX*1), DI 47 MOVQ (CX)(DX*1), AX 48 XORQ AX, DI 49 MOVQ DI, (BX)(DX*1) 50 CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned. 51 JGE aligned 52 53 ret: 54 RET