github.com/JimmyHuang454/JLS-go@v0.0.0-20230831150107-90d536585ba0/internal/bytealg/equal_arm64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 // memequal(a, b unsafe.Pointer, size uintptr) bool 9 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25 10 // short path to handle 0-byte case 11 CBZ R2, equal 12 B memeqbody<>(SB) 13 equal: 14 MOVD $1, R0 15 RET 16 17 // memequal_varlen(a, b unsafe.Pointer) bool 18 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17 19 CMP R0, R1 20 BEQ eq 21 MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure 22 CBZ R2, eq 23 B memeqbody<>(SB) 24 eq: 25 MOVD $1, R0 26 RET 27 28 // input: 29 // R0: pointer a 30 // R1: pointer b 31 // R2: data len 32 // at return: result in R0 33 TEXT memeqbody<>(SB),NOSPLIT,$0 34 CMP $1, R2 35 // handle 1-byte special case for better performance 36 BEQ one 37 CMP $16, R2 38 // handle specially if length < 16 39 BLO tail 40 BIC $0x3f, R2, R3 41 CBZ R3, chunk16 42 // work with 64-byte chunks 43 ADD R3, R0, R6 // end of chunks 44 chunk64_loop: 45 VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2] 46 VLD1.P (R1), [V4.D2, V5.D2, V6.D2, V7.D2] 47 VCMEQ V0.D2, V4.D2, V8.D2 48 VCMEQ V1.D2, V5.D2, V9.D2 49 VCMEQ V2.D2, V6.D2, V10.D2 50 VCMEQ V3.D2, V7.D2, V11.D2 51 VAND V8.B16, V9.B16, V8.B16 52 VAND V8.B16, V10.B16, V8.B16 53 VAND V8.B16, V11.B16, V8.B16 54 CMP R0, R6 55 VMOV V8.D[0], R4 56 VMOV V8.D[1], R5 57 CBZ R4, not_equal 58 CBZ R5, not_equal 59 BNE chunk64_loop 60 AND $0x3f, R2, R2 61 CBZ R2, equal 62 chunk16: 63 // work with 16-byte chunks 64 BIC $0xf, R2, R3 65 CBZ R3, tail 66 ADD R3, R0, R6 // end of chunks 67 chunk16_loop: 68 LDP.P 16(R0), (R4, R5) 69 LDP.P 16(R1), (R7, R9) 70 EOR R4, R7 71 CBNZ R7, not_equal 72 EOR R5, R9 73 CBNZ R9, not_equal 74 CMP R0, R6 75 BNE chunk16_loop 76 AND $0xf, R2, R2 77 CBZ R2, equal 78 tail: 79 // special compare of tail with length < 16 80 TBZ $3, R2, lt_8 81 MOVD (R0), R4 82 MOVD (R1), R5 83 EOR R4, R5 84 CBNZ R5, not_equal 85 SUB $8, R2, R6 // offset of the last 8 bytes 86 MOVD (R0)(R6), R4 87 MOVD (R1)(R6), R5 88 EOR R4, R5 89 CBNZ R5, not_equal 90 B equal 91 lt_8: 92 TBZ $2, R2, lt_4 93 MOVWU (R0), R4 94 MOVWU (R1), R5 95 EOR R4, R5 96 CBNZ R5, not_equal 97 SUB $4, R2, R6 // offset of the last 4 bytes 98 MOVWU (R0)(R6), R4 99 MOVWU (R1)(R6), R5 100 EOR R4, R5 101 CBNZ R5, not_equal 102 B equal 103 lt_4: 104 TBZ $1, R2, lt_2 105 MOVHU.P 2(R0), R4 106 MOVHU.P 2(R1), R5 107 CMP R4, R5 108 BNE not_equal 109 lt_2: 110 TBZ $0, R2, equal 111 one: 112 MOVBU (R0), R4 113 MOVBU (R1), R5 114 CMP R4, R5 115 BNE not_equal 116 equal: 117 MOVD $1, R0 118 RET 119 not_equal: 120 MOVB ZR, R0 121 RET