github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/equal_arm64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 TEXT ·Equal(SB),NOSPLIT,$0-49 9 MOVD a_len+8(FP), R1 10 MOVD b_len+32(FP), R3 11 CMP R1, R3 12 // unequal lengths are not equal 13 BNE not_equal 14 // short path to handle 0-byte case 15 CBZ R1, equal 16 MOVD a_base+0(FP), R0 17 MOVD b_base+24(FP), R2 18 MOVD $ret+48(FP), R8 19 B memeqbody<>(SB) 20 equal: 21 MOVD $1, R0 22 MOVB R0, ret+48(FP) 23 RET 24 not_equal: 25 MOVB ZR, ret+48(FP) 26 RET 27 28 // memequal(a, b unsafe.Pointer, size uintptr) bool 29 TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 30 MOVD size+16(FP), R1 31 // short path to handle 0-byte case 32 CBZ R1, equal 33 MOVD a+0(FP), R0 34 MOVD b+8(FP), R2 35 MOVD $ret+24(FP), R8 36 B memeqbody<>(SB) 37 equal: 38 MOVD $1, R0 39 MOVB R0, ret+24(FP) 40 RET 41 42 // memequal_varlen(a, b unsafe.Pointer) bool 43 TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 44 MOVD a+0(FP), R3 45 MOVD b+8(FP), R4 46 CMP R3, R4 47 BEQ eq 48 MOVD 8(R26), R5 // compiler stores size at offset 8 in the closure 49 CBZ R5, eq 50 MOVD R3, 8(RSP) 51 MOVD R4, 16(RSP) 52 MOVD R5, 24(RSP) 53 BL runtime·memequal(SB) 54 MOVBU 32(RSP), R3 55 MOVB R3, ret+16(FP) 56 RET 57 eq: 58 MOVD $1, R3 59 MOVB R3, ret+16(FP) 60 RET 61 62 // input: 63 // R0: pointer a 64 // R1: data len 65 // R2: pointer b 66 // R8: address to put result 67 TEXT memeqbody<>(SB),NOSPLIT,$0 68 CMP $1, R1 69 // handle 1-byte special case for better performance 70 BEQ one 71 CMP $16, R1 72 // handle specially if length < 16 73 BLO tail 74 BIC $0x3f, R1, R3 75 CBZ R3, chunk16 76 // work with 64-byte chunks 77 ADD R3, R0, R6 // end of chunks 78 chunk64_loop: 79 VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2] 80 VLD1.P (R2), [V4.D2, V5.D2, V6.D2, V7.D2] 81 VCMEQ V0.D2, V4.D2, V8.D2 82 VCMEQ V1.D2, V5.D2, V9.D2 83 VCMEQ V2.D2, V6.D2, V10.D2 84 VCMEQ V3.D2, V7.D2, V11.D2 85 VAND V8.B16, V9.B16, V8.B16 86 VAND V8.B16, V10.B16, V8.B16 87 VAND V8.B16, V11.B16, V8.B16 88 CMP R0, R6 89 VMOV V8.D[0], R4 90 VMOV V8.D[1], R5 91 CBZ R4, not_equal 92 CBZ R5, not_equal 93 BNE chunk64_loop 94 AND $0x3f, R1, R1 95 CBZ R1, equal 96 chunk16: 97 // work with 16-byte chunks 98 BIC $0xf, R1, R3 99 CBZ R3, tail 100 ADD R3, R0, R6 // end of chunks 101 chunk16_loop: 102 LDP.P 16(R0), (R4, R5) 103 LDP.P 16(R2), (R7, R9) 104 EOR R4, R7 105 CBNZ R7, not_equal 106 EOR R5, R9 107 CBNZ R9, not_equal 108 CMP R0, R6 109 BNE chunk16_loop 110 AND $0xf, R1, R1 111 CBZ R1, equal 112 tail: 113 // special compare of tail with length < 16 114 TBZ $3, R1, lt_8 115 MOVD (R0), R4 116 MOVD (R2), R5 117 EOR R4, R5 118 CBNZ R5, not_equal 119 SUB $8, R1, R6 // offset of the last 8 bytes 120 MOVD (R0)(R6), R4 121 MOVD (R2)(R6), R5 122 EOR R4, R5 123 CBNZ R5, not_equal 124 B equal 125 lt_8: 126 TBZ $2, R1, lt_4 127 MOVWU (R0), R4 128 MOVWU (R2), R5 129 EOR R4, R5 130 CBNZ R5, not_equal 131 SUB $4, R1, R6 // offset of the last 4 bytes 132 MOVWU (R0)(R6), R4 133 MOVWU (R2)(R6), R5 134 EOR R4, R5 135 CBNZ R5, not_equal 136 B equal 137 lt_4: 138 TBZ $1, R1, lt_2 139 MOVHU.P 2(R0), R4 140 MOVHU.P 2(R2), R5 141 CMP R4, R5 142 BNE not_equal 143 lt_2: 144 TBZ $0, R1, equal 145 one: 146 MOVBU (R0), R4 147 MOVBU (R2), R5 148 CMP R4, R5 149 BNE not_equal 150 equal: 151 MOVD $1, R0 152 MOVB R0, (R8) 153 RET 154 not_equal: 155 MOVB ZR, (R8) 156 RET