github.com/m10x/go/src@v0.0.0-20220112094212-ba61592315da/internal/bytealg/equal_amd64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 // memequal(a, b unsafe.Pointer, size uintptr) bool 9 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25 10 // AX = a (want in SI) 11 // BX = b (want in DI) 12 // CX = size (want in BX) 13 CMPQ AX, BX 14 JNE neq 15 MOVQ $1, AX // return 1 16 RET 17 neq: 18 MOVQ AX, SI 19 MOVQ BX, DI 20 MOVQ CX, BX 21 JMP memeqbody<>(SB) 22 23 // memequal_varlen(a, b unsafe.Pointer) bool 24 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17 25 // AX = a (want in SI) 26 // BX = b (want in DI) 27 // 8(DX) = size (want in BX) 28 CMPQ AX, BX 29 JNE neq 30 MOVQ $1, AX // return 1 31 RET 32 neq: 33 MOVQ AX, SI 34 MOVQ BX, DI 35 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure 36 JMP memeqbody<>(SB) 37 38 // Input: 39 // a in SI 40 // b in DI 41 // count in BX 42 // Output: 43 // result in AX 44 TEXT memeqbody<>(SB),NOSPLIT,$0-0 45 CMPQ BX, $8 46 JB small 47 CMPQ BX, $64 48 JB bigloop 49 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 50 JE hugeloop_avx2 51 52 // 64 bytes at a time using xmm registers 53 hugeloop: 54 CMPQ BX, $64 55 JB bigloop 56 MOVOU (SI), X0 57 MOVOU (DI), X1 58 MOVOU 16(SI), X2 59 MOVOU 16(DI), X3 60 MOVOU 32(SI), X4 61 MOVOU 32(DI), X5 62 MOVOU 48(SI), X6 63 MOVOU 48(DI), X7 64 PCMPEQB X1, X0 65 PCMPEQB X3, X2 66 PCMPEQB X5, X4 67 PCMPEQB X7, X6 68 PAND X2, X0 69 PAND X6, X4 70 PAND X4, X0 71 PMOVMSKB X0, DX 72 ADDQ $64, SI 73 ADDQ $64, DI 74 SUBQ $64, BX 75 CMPL DX, $0xffff 76 JEQ hugeloop 77 XORQ AX, AX // return 0 78 RET 79 80 // 64 bytes at a time using ymm registers 81 hugeloop_avx2: 82 CMPQ BX, $64 83 JB bigloop_avx2 84 VMOVDQU (SI), Y0 85 VMOVDQU (DI), Y1 86 VMOVDQU 32(SI), Y2 87 VMOVDQU 32(DI), Y3 88 VPCMPEQB Y1, Y0, Y4 89 VPCMPEQB Y2, Y3, Y5 90 VPAND Y4, Y5, Y6 91 VPMOVMSKB Y6, DX 92 ADDQ $64, SI 93 ADDQ $64, DI 94 SUBQ $64, BX 95 CMPL DX, $0xffffffff 96 JEQ hugeloop_avx2 97 VZEROUPPER 98 XORQ AX, AX // return 0 99 RET 100 101 bigloop_avx2: 102 VZEROUPPER 103 104 // 8 bytes at a time using 64-bit register 105 bigloop: 106 CMPQ BX, $8 107 JBE leftover 108 MOVQ (SI), CX 109 MOVQ (DI), DX 110 ADDQ $8, SI 111 ADDQ $8, DI 112 SUBQ $8, BX 113 CMPQ CX, DX 114 JEQ bigloop 115 XORQ AX, AX // return 0 116 RET 117 118 // remaining 0-8 bytes 119 leftover: 120 MOVQ -8(SI)(BX*1), CX 121 MOVQ -8(DI)(BX*1), DX 122 CMPQ CX, DX 123 SETEQ AX 124 RET 125 126 small: 127 CMPQ BX, $0 128 JEQ equal 129 130 LEAQ 0(BX*8), CX 131 NEGQ CX 132 133 CMPB SI, $0xf8 134 JA si_high 135 136 // load at SI won't cross a page boundary. 137 MOVQ (SI), SI 138 JMP si_finish 139 si_high: 140 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 141 MOVQ -8(SI)(BX*1), SI 142 SHRQ CX, SI 143 si_finish: 144 145 // same for DI. 146 CMPB DI, $0xf8 147 JA di_high 148 MOVQ (DI), DI 149 JMP di_finish 150 di_high: 151 MOVQ -8(DI)(BX*1), DI 152 SHRQ CX, DI 153 di_finish: 154 155 SUBQ SI, DI 156 SHLQ CX, DI 157 equal: 158 SETEQ AX 159 RET