github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/equal_386.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Equal(SB),NOSPLIT,$0-25 9 MOVL a_len+4(FP), BX 10 MOVL b_len+16(FP), CX 11 CMPL BX, CX 12 JNE neq 13 MOVL a_base+0(FP), SI 14 MOVL b_base+12(FP), DI 15 CMPL SI, DI 16 JEQ eq 17 LEAL ret+24(FP), AX 18 JMP memeqbody<>(SB) 19 neq: 20 MOVB $0, ret+24(FP) 21 RET 22 eq: 23 MOVB $1, ret+24(FP) 24 RET 25 26 // memequal(a, b unsafe.Pointer, size uintptr) bool 27 TEXT runtime·memequal(SB),NOSPLIT,$0-13 28 MOVL a+0(FP), SI 29 MOVL b+4(FP), DI 30 CMPL SI, DI 31 JEQ eq 32 MOVL size+8(FP), BX 33 LEAL ret+12(FP), AX 34 JMP memeqbody<>(SB) 35 eq: 36 MOVB $1, ret+12(FP) 37 RET 38 39 // memequal_varlen(a, b unsafe.Pointer) bool 40 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 41 MOVL a+0(FP), SI 42 MOVL b+4(FP), DI 43 CMPL SI, DI 44 JEQ eq 45 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 46 LEAL ret+8(FP), AX 47 JMP memeqbody<>(SB) 48 eq: 49 MOVB $1, ret+8(FP) 50 RET 51 52 // a in SI 53 // b in DI 54 // count in BX 55 // address of result byte in AX 56 TEXT memeqbody<>(SB),NOSPLIT,$0-0 57 CMPL BX, $4 58 JB small 59 60 // 64 bytes at a time using xmm registers 61 hugeloop: 62 CMPL BX, $64 63 JB bigloop 64 CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 65 JNE bigloop 66 MOVOU (SI), X0 67 MOVOU (DI), X1 68 MOVOU 16(SI), X2 69 MOVOU 16(DI), X3 70 MOVOU 32(SI), X4 71 MOVOU 32(DI), X5 72 MOVOU 48(SI), X6 73 MOVOU 48(DI), X7 74 PCMPEQB X1, X0 75 PCMPEQB X3, X2 76 PCMPEQB X5, X4 77 PCMPEQB X7, X6 78 PAND X2, X0 79 PAND X6, X4 80 PAND X4, X0 81 PMOVMSKB X0, DX 82 ADDL $64, SI 83 ADDL $64, DI 84 SUBL $64, BX 85 CMPL DX, $0xffff 86 JEQ hugeloop 87 MOVB $0, (AX) 88 RET 89 90 // 4 bytes at a time using 32-bit register 91 bigloop: 92 CMPL BX, $4 93 JBE leftover 94 MOVL (SI), CX 95 MOVL (DI), DX 96 ADDL $4, SI 97 ADDL $4, DI 98 SUBL $4, BX 99 CMPL CX, DX 100 JEQ bigloop 101 MOVB $0, (AX) 102 RET 103 104 // remaining 0-4 bytes 105 leftover: 106 MOVL -4(SI)(BX*1), CX 107 MOVL -4(DI)(BX*1), DX 108 CMPL CX, DX 109 SETEQ (AX) 110 RET 111 112 small: 113 CMPL BX, $0 114 JEQ equal 115 116 LEAL 0(BX*8), CX 117 NEGL CX 118 119 MOVL SI, DX 120 CMPB DX, $0xfc 121 JA si_high 122 123 // load at SI won't cross a page boundary. 124 MOVL (SI), SI 125 JMP si_finish 126 si_high: 127 // address ends in 111111xx. Load up to bytes we want, move to correct position. 128 MOVL -4(SI)(BX*1), SI 129 SHRL CX, SI 130 si_finish: 131 132 // same for DI. 133 MOVL DI, DX 134 CMPB DX, $0xfc 135 JA di_high 136 MOVL (DI), DI 137 JMP di_finish 138 di_high: 139 MOVL -4(DI)(BX*1), DI 140 SHRL CX, DI 141 di_finish: 142 143 SUBL SI, DI 144 SHLL CX, DI 145 equal: 146 SETEQ (AX) 147 RET