github.com/primecitizens/pcz/std@v0.2.1/core/mem/equal_arm64.s (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright 2023 The Prime Citizens 3 // 4 // Copyright 2018 The Go Authors. All rights reserved. 5 // Use of this source code is governed by a BSD-style 6 // license that can be found in the LICENSE file. 7 8 //go:build pcz && arm64 9 10 #include "textflag.h" 11 12 // Equal(a, b unsafe.Pointer, size uintptr) bool 13 TEXT ·Equal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25 14 // short path to handle 0-byte case 15 CBZ R2, equal 16 B memeqbody<>(SB) 17 equal: 18 MOVD $1, R0 19 RET 20 21 // memequal_varlen(a, b unsafe.Pointer) bool 22 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17 23 CMP R0, R1 24 BEQ eq 25 MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure 26 CBZ R2, eq 27 B memeqbody<>(SB) 28 eq: 29 MOVD $1, R0 30 RET 31 32 // input: 33 // R0: pointer a 34 // R1: pointer b 35 // R2: data len 36 // at return: result in R0 37 TEXT memeqbody<>(SB),NOSPLIT,$0 38 CMP $1, R2 39 // handle 1-byte special case for better performance 40 BEQ one 41 CMP $16, R2 42 // handle specially if length < 16 43 BLO tail 44 BIC $0x3f, R2, R3 45 CBZ R3, chunk16 46 // work with 64-byte chunks 47 ADD R3, R0, R6 // end of chunks 48 chunk64_loop: 49 VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2] 50 VLD1.P (R1), [V4.D2, V5.D2, V6.D2, V7.D2] 51 VCMEQ V0.D2, V4.D2, V8.D2 52 VCMEQ V1.D2, V5.D2, V9.D2 53 VCMEQ V2.D2, V6.D2, V10.D2 54 VCMEQ V3.D2, V7.D2, V11.D2 55 VAND V8.B16, V9.B16, V8.B16 56 VAND V8.B16, V10.B16, V8.B16 57 VAND V8.B16, V11.B16, V8.B16 58 CMP R0, R6 59 VMOV V8.D[0], R4 60 VMOV V8.D[1], R5 61 CBZ R4, not_equal 62 CBZ R5, not_equal 63 BNE chunk64_loop 64 AND $0x3f, R2, R2 65 CBZ R2, equal 66 chunk16: 67 // work with 16-byte chunks 68 BIC $0xf, R2, R3 69 CBZ R3, tail 70 ADD R3, R0, R6 // end of chunks 71 chunk16_loop: 72 LDP.P 16(R0), (R4, R5) 73 LDP.P 16(R1), (R7, R9) 74 EOR R4, R7 75 CBNZ R7, not_equal 76 EOR R5, R9 77 CBNZ R9, not_equal 78 CMP R0, R6 79 BNE chunk16_loop 80 AND $0xf, R2, R2 81 CBZ R2, equal 82 tail: 83 // special compare of tail with length < 16 84 TBZ $3, R2, lt_8 85 MOVD (R0), R4 86 MOVD (R1), R5 87 EOR R4, R5 88 CBNZ R5, not_equal 89 SUB $8, R2, R6 // offset of the last 8 bytes 90 MOVD (R0)(R6), R4 91 MOVD (R1)(R6), R5 92 EOR R4, R5 93 CBNZ R5, not_equal 94 B equal 95 lt_8: 96 TBZ $2, R2, lt_4 97 MOVWU (R0), R4 98 MOVWU (R1), R5 99 EOR R4, R5 100 CBNZ R5, not_equal 101 SUB $4, R2, R6 // offset of the last 4 bytes 102 MOVWU (R0)(R6), R4 103 MOVWU (R1)(R6), R5 104 EOR R4, R5 105 CBNZ R5, not_equal 106 B equal 107 lt_4: 108 TBZ $1, R2, lt_2 109 MOVHU.P 2(R0), R4 110 MOVHU.P 2(R1), R5 111 CMP R4, R5 112 BNE not_equal 113 lt_2: 114 TBZ $0, R2, equal 115 one: 116 MOVBU (R0), R4 117 MOVBU (R1), R5 118 CMP R4, R5 119 BNE not_equal 120 equal: 121 MOVD $1, R0 122 RET 123 not_equal: 124 MOVB ZR, R0 125 RET