github.com/bir3/gocompiler@v0.9.2202/src/internal/bytealg/equal_amd64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "asm_amd64.h" 7 #include "textflag.h" 8 9 // memequal(a, b unsafe.Pointer, size uintptr) bool 10 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25 11 // AX = a (want in SI) 12 // BX = b (want in DI) 13 // CX = size (want in BX) 14 CMPQ AX, BX 15 JNE neq 16 MOVQ $1, AX // return 1 17 RET 18 neq: 19 MOVQ AX, SI 20 MOVQ BX, DI 21 MOVQ CX, BX 22 JMP memeqbody<>(SB) 23 24 // memequal_varlen(a, b unsafe.Pointer) bool 25 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17 26 // AX = a (want in SI) 27 // BX = b (want in DI) 28 // 8(DX) = size (want in BX) 29 CMPQ AX, BX 30 JNE neq 31 MOVQ $1, AX // return 1 32 RET 33 neq: 34 MOVQ AX, SI 35 MOVQ BX, DI 36 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure 37 JMP memeqbody<>(SB) 38 39 // Input: 40 // a in SI 41 // b in DI 42 // count in BX 43 // Output: 44 // result in AX 45 TEXT memeqbody<>(SB),NOSPLIT,$0-0 46 CMPQ BX, $8 47 JB small 48 CMPQ BX, $64 49 JB bigloop 50 #ifndef hasAVX2 51 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 52 JE hugeloop_avx2 53 54 // 64 bytes at a time using xmm registers 55 PCALIGN $16 56 hugeloop: 57 CMPQ BX, $64 58 JB bigloop 59 MOVOU (SI), X0 60 MOVOU (DI), X1 61 MOVOU 16(SI), X2 62 MOVOU 16(DI), X3 63 MOVOU 32(SI), X4 64 MOVOU 32(DI), X5 65 MOVOU 48(SI), X6 66 MOVOU 48(DI), X7 67 PCMPEQB X1, X0 68 PCMPEQB X3, X2 69 PCMPEQB X5, X4 70 PCMPEQB X7, X6 71 PAND X2, X0 72 PAND X6, X4 73 PAND X4, X0 74 PMOVMSKB X0, DX 75 ADDQ $64, SI 76 ADDQ $64, DI 77 SUBQ $64, BX 78 CMPL DX, $0xffff 79 JEQ hugeloop 80 XORQ AX, AX // return 0 81 RET 82 #endif 83 84 // 64 bytes at a time using ymm registers 85 PCALIGN $16 86 hugeloop_avx2: 87 CMPQ BX, $64 88 JB bigloop_avx2 89 VMOVDQU (SI), Y0 90 VMOVDQU (DI), Y1 91 VMOVDQU 32(SI), Y2 92 VMOVDQU 32(DI), Y3 93 VPCMPEQB Y1, Y0, Y4 94 VPCMPEQB Y2, Y3, Y5 95 VPAND Y4, Y5, Y6 96 VPMOVMSKB Y6, DX 97 ADDQ $64, SI 98 ADDQ $64, DI 99 SUBQ $64, BX 100 CMPL DX, $0xffffffff 101 JEQ hugeloop_avx2 102 VZEROUPPER 103 XORQ AX, AX // return 0 104 RET 105 106 bigloop_avx2: 107 VZEROUPPER 108 109 // 8 bytes at a time using 64-bit register 110 PCALIGN $16 111 bigloop: 112 CMPQ BX, $8 113 JBE leftover 114 MOVQ (SI), CX 115 MOVQ (DI), DX 116 ADDQ $8, SI 117 ADDQ $8, DI 118 SUBQ $8, BX 119 CMPQ CX, DX 120 JEQ bigloop 121 XORQ AX, AX // return 0 122 RET 123 124 // remaining 0-8 bytes 125 leftover: 126 MOVQ -8(SI)(BX*1), CX 127 MOVQ -8(DI)(BX*1), DX 128 CMPQ CX, DX 129 SETEQ AX 130 RET 131 132 small: 133 CMPQ BX, $0 134 JEQ equal 135 136 LEAQ 0(BX*8), CX 137 NEGQ CX 138 139 CMPB SI, $0xf8 140 JA si_high 141 142 // load at SI won't cross a page boundary. 143 MOVQ (SI), SI 144 JMP si_finish 145 si_high: 146 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 147 MOVQ -8(SI)(BX*1), SI 148 SHRQ CX, SI 149 si_finish: 150 151 // same for DI. 152 CMPB DI, $0xf8 153 JA di_high 154 MOVQ (DI), DI 155 JMP di_finish 156 di_high: 157 MOVQ -8(DI)(BX*1), DI 158 SHRQ CX, DI 159 di_finish: 160 161 SUBQ SI, DI 162 SHLQ CX, DI 163 equal: 164 SETEQ AX 165 RET