github.com/emmansun/gmsm@v0.29.1/internal/subtle/xor_amd64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 //go:build !purego 6 7 #include "textflag.h" 8 9 // func xorBytes(dst, a, b *byte, n int) 10 TEXT ·xorBytes(SB), NOSPLIT, $0 11 MOVQ dst+0(FP), BX 12 MOVQ a+8(FP), SI 13 MOVQ b+16(FP), CX 14 MOVQ n+24(FP), DX 15 CMPQ DX, $32 // if len less than 32, non avx2. 16 JL non_avx2 17 CMPB ·useAVX2(SB), $1 18 JE avx2 19 20 non_avx2: 21 TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned. 22 JNZ not_aligned 23 24 aligned: 25 MOVQ $0, AX // position in slices 26 27 loop16b: 28 MOVOU (SI)(AX*1), X0 // XOR 16byte forwards. 29 MOVOU (CX)(AX*1), X1 30 PXOR X1, X0 31 MOVOU X0, (BX)(AX*1) 32 ADDQ $16, AX 33 CMPQ DX, AX 34 JNE loop16b 35 RET 36 37 loop_1b: 38 SUBQ $1, DX // XOR 1byte backwards. 39 MOVB (SI)(DX*1), DI 40 MOVB (CX)(DX*1), AX 41 XORB AX, DI 42 MOVB DI, (BX)(DX*1) 43 TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b. 44 JNZ loop_1b 45 CMPQ DX, $0 // if len is 0, ret. 46 JE ret 47 TESTQ $15, DX // AND 15 & len, if zero jump to aligned. 48 JZ aligned 49 50 not_aligned: 51 TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b. 52 JNE loop_1b 53 SUBQ $8, DX // XOR 8bytes backwards. 54 MOVQ (SI)(DX*1), DI 55 MOVQ (CX)(DX*1), AX 56 XORQ AX, DI 57 MOVQ DI, (BX)(DX*1) 58 CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned. 59 JGE aligned 60 61 ret: 62 RET 63 64 avx2: 65 TESTQ $31, DX // AND 31 & len, if not zero jump to avx2_not_aligned. 66 JNZ avx2_not_aligned 67 68 avx2_aligned: // input length = 16*n, where n is greater or equal 2. 69 TESTQ $16, DX // AND 16 & len, if zero jump to loop32b_start. 70 JE loop32b_start 71 SUBQ $16, DX // XOR 16bytes backwards. 72 VMOVDQU (SI)(DX*1), X0 73 VPXOR (CX)(DX*1), X0, X0 74 VMOVDQU X0, (BX)(DX*1) 75 76 loop32b_start: 77 MOVQ $0, AX // position in slices 78 79 loop32b: 80 VMOVDQU (SI)(AX*1), Y0 // XOR 32byte forwards. 81 VPXOR (CX)(AX*1), Y0, Y0 82 VMOVDQU Y0, (BX)(AX*1) 83 ADDQ $32, AX 84 CMPQ DX, AX 85 JNE loop32b 86 87 avx2_ret: 88 VZEROUPPER 89 RET 90 91 avx2_loop_1b: 92 SUBQ $1, DX // XOR 1byte backwards. 93 MOVB (SI)(DX*1), DI 94 MOVB (CX)(DX*1), AX 95 XORB AX, DI 96 MOVB DI, (BX)(DX*1) 97 TESTQ $7, DX // AND 7 & len, if not zero jump to avx2_loop_1b. 98 JNZ avx2_loop_1b 99 TESTQ $15, DX // AND 15 & len, if zero jump to aligned. 100 JZ avx2_aligned 101 102 avx2_not_aligned: 103 TESTQ $7, DX // AND $7 & len, if not zero jump to avx2_loop_1b. 104 JNE avx2_loop_1b 105 TESTQ $8, DX // AND $8 & len, if zero jump to avx2_aligned. 106 JE avx2_aligned 107 SUBQ $8, DX // XOR 8bytes backwards. 108 MOVQ (SI)(DX*1), DI 109 MOVQ (CX)(DX*1), AX 110 XORQ AX, DI 111 MOVQ DI, (BX)(DX*1) 112 JMP avx2_aligned