github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/equal_386.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 // memequal(a, b unsafe.Pointer, size uintptr) bool 9 TEXT runtime·memequal(SB),NOSPLIT,$0-13 10 MOVL a+0(FP), SI 11 MOVL b+4(FP), DI 12 CMPL SI, DI 13 JEQ eq 14 MOVL size+8(FP), BX 15 LEAL ret+12(FP), AX 16 JMP memeqbody<>(SB) 17 eq: 18 MOVB $1, ret+12(FP) 19 RET 20 21 // memequal_varlen(a, b unsafe.Pointer) bool 22 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 23 MOVL a+0(FP), SI 24 MOVL b+4(FP), DI 25 CMPL SI, DI 26 JEQ eq 27 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 28 LEAL ret+8(FP), AX 29 JMP memeqbody<>(SB) 30 eq: 31 MOVB $1, ret+8(FP) 32 RET 33 34 // a in SI 35 // b in DI 36 // count in BX 37 // address of result byte in AX 38 TEXT memeqbody<>(SB),NOSPLIT,$0-0 39 CMPL BX, $4 40 JB small 41 42 // 64 bytes at a time using xmm registers 43 hugeloop: 44 CMPL BX, $64 45 JB bigloop 46 CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 47 JNE bigloop 48 MOVOU (SI), X0 49 MOVOU (DI), X1 50 MOVOU 16(SI), X2 51 MOVOU 16(DI), X3 52 MOVOU 32(SI), X4 53 MOVOU 32(DI), X5 54 MOVOU 48(SI), X6 55 MOVOU 48(DI), X7 56 PCMPEQB X1, X0 57 PCMPEQB X3, X2 58 PCMPEQB X5, X4 59 PCMPEQB X7, X6 60 PAND X2, X0 61 PAND X6, X4 62 PAND X4, X0 63 PMOVMSKB X0, DX 64 ADDL $64, SI 65 ADDL $64, DI 66 SUBL $64, BX 67 CMPL DX, $0xffff 68 JEQ hugeloop 69 MOVB $0, (AX) 70 RET 71 72 // 4 bytes at a time using 32-bit register 73 bigloop: 74 CMPL BX, $4 75 JBE leftover 76 MOVL (SI), CX 77 MOVL (DI), DX 78 ADDL $4, SI 79 ADDL $4, DI 80 SUBL $4, BX 81 CMPL CX, DX 82 JEQ bigloop 83 MOVB $0, (AX) 84 RET 85 86 // remaining 0-4 bytes 87 leftover: 88 MOVL -4(SI)(BX*1), CX 89 MOVL -4(DI)(BX*1), DX 90 CMPL CX, DX 91 SETEQ (AX) 92 RET 93 94 small: 95 CMPL BX, $0 96 JEQ equal 97 98 LEAL 0(BX*8), CX 99 NEGL CX 100 101 MOVL SI, DX 102 CMPB DX, $0xfc 103 JA si_high 104 105 // load at SI won't cross a page boundary. 106 MOVL (SI), SI 107 JMP si_finish 108 si_high: 109 // address ends in 111111xx. Load up to bytes we want, move to correct position. 110 MOVL -4(SI)(BX*1), SI 111 SHRL CX, SI 112 si_finish: 113 114 // same for DI. 115 MOVL DI, DX 116 CMPB DX, $0xfc 117 JA di_high 118 MOVL (DI), DI 119 JMP di_finish 120 di_high: 121 MOVL -4(DI)(BX*1), DI 122 SHRL CX, DI 123 di_finish: 124 125 SUBL SI, DI 126 SHLL CX, DI 127 equal: 128 SETEQ (AX) 129 RET