github.com/JimmyHuang454/JLS-go@v0.0.0-20230831150107-90d536585ba0/internal/bytealg/equal_amd64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "asm_amd64.h" 7 #include "textflag.h" 8 9 // memequal(a, b unsafe.Pointer, size uintptr) bool 10 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25 11 // AX = a (want in SI) 12 // BX = b (want in DI) 13 // CX = size (want in BX) 14 CMPQ AX, BX 15 JNE neq 16 MOVQ $1, AX // return 1 17 RET 18 neq: 19 MOVQ AX, SI 20 MOVQ BX, DI 21 MOVQ CX, BX 22 JMP memeqbody<>(SB) 23 24 // memequal_varlen(a, b unsafe.Pointer) bool 25 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17 26 // AX = a (want in SI) 27 // BX = b (want in DI) 28 // 8(DX) = size (want in BX) 29 CMPQ AX, BX 30 JNE neq 31 MOVQ $1, AX // return 1 32 RET 33 neq: 34 MOVQ AX, SI 35 MOVQ BX, DI 36 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure 37 JMP memeqbody<>(SB) 38 39 // Input: 40 // a in SI 41 // b in DI 42 // count in BX 43 // Output: 44 // result in AX 45 TEXT memeqbody<>(SB),NOSPLIT,$0-0 46 CMPQ BX, $8 47 JB small 48 CMPQ BX, $64 49 JB bigloop 50 #ifndef hasAVX2 51 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 52 JE hugeloop_avx2 53 54 // 64 bytes at a time using xmm registers 55 hugeloop: 56 CMPQ BX, $64 57 JB bigloop 58 MOVOU (SI), X0 59 MOVOU (DI), X1 60 MOVOU 16(SI), X2 61 MOVOU 16(DI), X3 62 MOVOU 32(SI), X4 63 MOVOU 32(DI), X5 64 MOVOU 48(SI), X6 65 MOVOU 48(DI), X7 66 PCMPEQB X1, X0 67 PCMPEQB X3, X2 68 PCMPEQB X5, X4 69 PCMPEQB X7, X6 70 PAND X2, X0 71 PAND X6, X4 72 PAND X4, X0 73 PMOVMSKB X0, DX 74 ADDQ $64, SI 75 ADDQ $64, DI 76 SUBQ $64, BX 77 CMPL DX, $0xffff 78 JEQ hugeloop 79 XORQ AX, AX // return 0 80 RET 81 #endif 82 83 // 64 bytes at a time using ymm registers 84 hugeloop_avx2: 85 CMPQ BX, $64 86 JB bigloop_avx2 87 VMOVDQU (SI), Y0 88 VMOVDQU (DI), Y1 89 VMOVDQU 32(SI), Y2 90 VMOVDQU 32(DI), Y3 91 VPCMPEQB Y1, Y0, Y4 92 VPCMPEQB Y2, Y3, Y5 93 VPAND Y4, Y5, Y6 94 VPMOVMSKB Y6, DX 95 ADDQ $64, SI 96 ADDQ $64, DI 97 SUBQ $64, BX 98 CMPL DX, $0xffffffff 99 JEQ hugeloop_avx2 100 VZEROUPPER 101 XORQ AX, AX // return 0 102 RET 103 104 bigloop_avx2: 105 VZEROUPPER 106 107 // 8 bytes at a time using 64-bit register 108 bigloop: 109 CMPQ BX, $8 110 JBE leftover 111 MOVQ (SI), CX 112 MOVQ (DI), DX 113 ADDQ $8, SI 114 ADDQ $8, DI 115 SUBQ $8, BX 116 CMPQ CX, DX 117 JEQ bigloop 118 XORQ AX, AX // return 0 119 RET 120 121 // remaining 0-8 bytes 122 leftover: 123 MOVQ -8(SI)(BX*1), CX 124 MOVQ -8(DI)(BX*1), DX 125 CMPQ CX, DX 126 SETEQ AX 127 RET 128 129 small: 130 CMPQ BX, $0 131 JEQ equal 132 133 LEAQ 0(BX*8), CX 134 NEGQ CX 135 136 CMPB SI, $0xf8 137 JA si_high 138 139 // load at SI won't cross a page boundary. 140 MOVQ (SI), SI 141 JMP si_finish 142 si_high: 143 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 144 MOVQ -8(SI)(BX*1), SI 145 SHRQ CX, SI 146 si_finish: 147 148 // same for DI. 149 CMPB DI, $0xf8 150 JA di_high 151 MOVQ (DI), DI 152 JMP di_finish 153 di_high: 154 MOVQ -8(DI)(BX*1), DI 155 SHRQ CX, DI 156 di_finish: 157 158 SUBQ SI, DI 159 SHLQ CX, DI 160 equal: 161 SETEQ AX 162 RET