github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/equal_amd64p32.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Equal(SB),NOSPLIT,$0-25 9 MOVL a_len+4(FP), BX 10 MOVL b_len+16(FP), CX 11 CMPL BX, CX 12 JNE neq 13 MOVL a_base+0(FP), SI 14 MOVL b_base+12(FP), DI 15 CMPL SI, DI 16 JEQ eq 17 CALL memeqbody<>(SB) 18 MOVB AX, ret+24(FP) 19 RET 20 neq: 21 MOVB $0, ret+24(FP) 22 RET 23 eq: 24 MOVB $1, ret+24(FP) 25 RET 26 27 // memequal(a, b unsafe.Pointer, size uintptr) bool 28 TEXT runtime·memequal(SB),NOSPLIT,$0-17 29 MOVL a+0(FP), SI 30 MOVL b+4(FP), DI 31 CMPL SI, DI 32 JEQ eq 33 MOVL size+8(FP), BX 34 CALL memeqbody<>(SB) 35 MOVB AX, ret+16(FP) 36 RET 37 eq: 38 MOVB $1, ret+16(FP) 39 RET 40 41 // memequal_varlen(a, b unsafe.Pointer) bool 42 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 43 MOVL a+0(FP), SI 44 MOVL b+4(FP), DI 45 CMPL SI, DI 46 JEQ eq 47 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 48 CALL memeqbody<>(SB) 49 MOVB AX, ret+8(FP) 50 RET 51 eq: 52 MOVB $1, ret+8(FP) 53 RET 54 55 // a in SI 56 // b in DI 57 // count in BX 58 TEXT memeqbody<>(SB),NOSPLIT,$0-0 59 XORQ AX, AX 60 61 CMPQ BX, $8 62 JB small 63 64 // 64 bytes at a time using xmm registers 65 hugeloop: 66 CMPQ BX, $64 67 JB bigloop 68 MOVOU (SI), X0 69 MOVOU (DI), X1 70 MOVOU 16(SI), X2 71 MOVOU 16(DI), X3 72 MOVOU 32(SI), X4 73 MOVOU 32(DI), X5 74 MOVOU 48(SI), X6 75 MOVOU 48(DI), X7 76 PCMPEQB X1, X0 77 PCMPEQB X3, X2 78 PCMPEQB X5, X4 79 PCMPEQB X7, X6 80 PAND X2, X0 81 PAND X6, X4 82 PAND X4, X0 83 PMOVMSKB X0, DX 84 ADDQ $64, SI 85 ADDQ $64, DI 86 SUBQ $64, BX 87 CMPL DX, $0xffff 88 JEQ hugeloop 89 RET 90 91 // 8 bytes at a time using 64-bit register 92 bigloop: 93 CMPQ BX, $8 94 JBE leftover 95 MOVQ (SI), CX 96 MOVQ (DI), DX 97 ADDQ $8, SI 98 ADDQ $8, DI 99 SUBQ $8, BX 100 CMPQ CX, DX 101 JEQ bigloop 102 RET 103 104 // remaining 0-8 bytes 105 leftover: 106 ADDQ BX, SI 107 ADDQ BX, DI 108 MOVQ -8(SI), CX 109 MOVQ -8(DI), DX 110 CMPQ CX, DX 111 SETEQ AX 112 RET 113 114 small: 115 CMPQ BX, $0 116 JEQ equal 117 118 LEAQ 0(BX*8), CX 119 NEGQ CX 120 121 CMPB SI, $0xf8 122 JA si_high 123 124 // load at SI won't cross a page boundary. 125 MOVQ (SI), SI 126 JMP si_finish 127 si_high: 128 // address ends in 11111xxx. Load up to bytes we want, move to correct position. 129 MOVQ BX, DX 130 ADDQ SI, DX 131 MOVQ -8(DX), SI 132 SHRQ CX, SI 133 si_finish: 134 135 // same for DI. 136 CMPB DI, $0xf8 137 JA di_high 138 MOVQ (DI), DI 139 JMP di_finish 140 di_high: 141 MOVQ BX, DX 142 ADDQ DI, DX 143 MOVQ -8(DX), DI 144 SHRQ CX, DI 145 di_finish: 146 147 SUBQ SI, DI 148 SHLQ CX, DI 149 equal: 150 SETEQ AX 151 RET