github.com/primecitizens/pcz/std@v0.2.1/core/mem/equal_386.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2018 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build pcz && 386 9 10 #include "textflag.h" 11 12 // Equal(a, b unsafe.Pointer, size uintptr) bool 13 TEXT ·Equal(SB),NOSPLIT,$0-13 14 MOVL a+0(FP), SI 15 MOVL b+4(FP), DI 16 CMPL SI, DI 17 JEQ eq 18 MOVL size+8(FP), BX 19 LEAL ret+12(FP), AX 20 JMP memeqbody<>(SB) 21 eq: 22 MOVB $1, ret+12(FP) 23 RET 24 25 // memequal_varlen(a, b unsafe.Pointer) bool 26 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9 27 MOVL a+0(FP), SI 28 MOVL b+4(FP), DI 29 CMPL SI, DI 30 JEQ eq 31 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure 32 LEAL ret+8(FP), AX 33 JMP memeqbody<>(SB) 34 eq: 35 MOVB $1, ret+8(FP) 36 RET 37 38 // a in SI 39 // b in DI 40 // count in BX 41 // address of result byte in AX 42 TEXT memeqbody<>(SB),NOSPLIT,$0-0 43 CMPL BX, $4 44 JB small 45 46 // 64 bytes at a time using xmm registers 47 hugeloop: 48 CMPL BX, $64 49 JB bigloop 50 #ifdef GO386_softfloat 51 JMP bigloop 52 #endif 53 MOVOU (SI), X0 54 MOVOU (DI), X1 55 MOVOU 16(SI), X2 56 MOVOU 16(DI), X3 57 MOVOU 32(SI), X4 58 MOVOU 32(DI), X5 59 MOVOU 48(SI), X6 60 MOVOU 48(DI), X7 61 PCMPEQB X1, X0 62 PCMPEQB X3, X2 63 PCMPEQB X5, X4 64 PCMPEQB X7, X6 65 PAND X2, X0 66 PAND X6, X4 67 PAND X4, X0 68 PMOVMSKB X0, DX 69 ADDL $64, SI 70 ADDL $64, DI 71 SUBL $64, BX 72 CMPL DX, $0xffff 73 JEQ hugeloop 74 MOVB $0, (AX) 75 RET 76 77 // 4 bytes at a time using 32-bit register 78 bigloop: 79 CMPL BX, $4 80 JBE leftover 81 MOVL (SI), CX 82 MOVL (DI), DX 83 ADDL $4, SI 84 ADDL $4, DI 85 SUBL $4, BX 86 CMPL CX, DX 87 JEQ bigloop 88 MOVB $0, (AX) 89 RET 90 91 // remaining 0-4 bytes 92 leftover: 93 MOVL -4(SI)(BX*1), CX 94 MOVL -4(DI)(BX*1), DX 95 CMPL CX, DX 96 SETEQ (AX) 97 RET 98 99 small: 100 CMPL BX, $0 101 JEQ equal 102 103 LEAL 0(BX*8), CX 104 NEGL CX 105 106 MOVL SI, DX 107 CMPB DX, $0xfc 108 JA si_high 109 110 // load at SI won't cross a page boundary. 111 MOVL (SI), SI 112 JMP si_finish 113 si_high: 114 // address ends in 111111xx. Load up to bytes we want, move to correct position. 115 MOVL -4(SI)(BX*1), SI 116 SHRL CX, SI 117 si_finish: 118 119 // same for DI. 120 MOVL DI, DX 121 CMPB DX, $0xfc 122 JA di_high 123 MOVL (DI), DI 124 JMP di_finish 125 di_high: 126 MOVL -4(DI)(BX*1), DI 127 SHRL CX, DI 128 di_finish: 129 130 SUBL SI, DI 131 SHLL CX, DI 132 equal: 133 SETEQ (AX) 134 RET