github.com/SandwichDev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/equal_amd64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 // memequal(a, b unsafe.Pointer, size uintptr) bool 9 TEXT runtime·memequal(SB),NOSPLIT,$0-25 10 MOVQ a+0(FP), SI 11 MOVQ b+8(FP), DI 12 CMPQ SI, DI 13 JEQ eq 14 MOVQ size+16(FP), BX 15 LEAQ ret+24(FP), AX 16 JMP memeqbody<>(SB) 17 eq: 18 MOVB $1, ret+24(FP) 19 RET 20 21 // memequal_varlen(a, b unsafe.Pointer) bool 22 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17 23 MOVQ a+0(FP), SI 24 MOVQ b+8(FP), DI 25 CMPQ SI, DI 26 JEQ eq 27 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure 28 LEAQ ret+16(FP), AX 29 JMP memeqbody<>(SB) 30 eq: 31 MOVB $1, ret+16(FP) 32 RET 33 34 // a in SI 35 // b in DI 36 // count in BX 37 // address of result byte in AX 38 TEXT memeqbody<>(SB),NOSPLIT,$0-0 39 CMPQ BX, $8 40 JB small 41 CMPQ BX, $64 42 JB bigloop 43 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1 44 JE hugeloop_avx2 45 46 // 64 bytes at a time using xmm registers 47 hugeloop: 48 CMPQ BX, $64 49 JB bigloop 50 MOVOU (SI), X0 51 MOVOU (DI), X1 52 MOVOU 16(SI), X2 53 MOVOU 16(DI), X3 54 MOVOU 32(SI), X4 55 MOVOU 32(DI), X5 56 MOVOU 48(SI), X6 57 MOVOU 48(DI), X7 58 PCMPEQB X1, X0 59 PCMPEQB X3, X2 60 PCMPEQB X5, X4 61 PCMPEQB X7, X6 62 PAND X2, X0 63 PAND X6, X4 64 PAND X4, X0 65 PMOVMSKB X0, DX 66 ADDQ $64, SI 67 ADDQ $64, DI 68 SUBQ $64, BX 69 CMPL DX, $0xffff 70 JEQ hugeloop 71 MOVB $0, (AX) 72 RET 73 74 // 64 bytes at a time using ymm registers 75 hugeloop_avx2: 76 CMPQ BX, $64 77 JB bigloop_avx2 78 VMOVDQU (SI), Y0 79 VMOVDQU (DI), Y1 80 VMOVDQU 32(SI), Y2 81 VMOVDQU 32(DI), Y3 82 VPCMPEQB Y1, Y0, Y4 83 VPCMPEQB Y2, Y3, Y5 84 VPAND Y4, Y5, Y6 85 VPMOVMSKB Y6, DX 86 ADDQ $64, SI 87 ADDQ $64, DI 88 SUBQ $64, BX 89 CMPL DX, $0xffffffff 90 JEQ hugeloop_avx2 91 VZEROUPPER 92 MOVB $0, (AX) 93 RET 94 95 bigloop_avx2: 96 VZEROUPPER 97 98 // 8 bytes at a time using 64-bit register 99 bigloop: 100 CMPQ BX, $8 101 JBE leftover 102 MOVQ (SI), CX 103 MOVQ (DI), DX 104 ADDQ $8, SI 105 ADDQ $8, DI 106 SUBQ $8, BX 107 CMPQ CX, DX 108 JEQ bigloop 109 MOVB $0, (AX) 110 RET 111 112 // remaining 0-8 bytes 113 leftover: 114 MOVQ -8(SI)(BX*1), CX 115 MOVQ -8(DI)(BX*1), DX 116 CMPQ CX, DX 117 SETEQ (AX) 118 RET 119 120 small: 121 CMPQ BX, $0 122 JEQ equal 123 124 LEAQ 0(BX*8), CX 125 NEGQ CX 126 127 CMPB SI, $0xf8 128 JA si_high 129 130 // load at SI won't cross a page boundary. 131 MOVQ (SI), SI 132 JMP si_finish 133 si_high: 134 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 135 MOVQ -8(SI)(BX*1), SI 136 SHRQ CX, SI 137 si_finish: 138 139 // same for DI. 140 CMPB DI, $0xf8 141 JA di_high 142 MOVQ (DI), DI 143 JMP di_finish 144 di_high: 145 MOVQ -8(DI)(BX*1), DI 146 SHRQ CX, DI 147 di_finish: 148 149 SUBQ SI, DI 150 SHLQ CX, DI 151 equal: 152 SETEQ (AX) 153 RET 154