github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/equal_arm64.s (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "go_asm.h" 6 #include "textflag.h" 7 8 // memequal(a, b unsafe.Pointer, size uintptr) bool 9 TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 10 MOVD size+16(FP), R1 11 // short path to handle 0-byte case 12 CBZ R1, equal 13 MOVD a+0(FP), R0 14 MOVD b+8(FP), R2 15 MOVD $ret+24(FP), R8 16 B memeqbody<>(SB) 17 equal: 18 MOVD $1, R0 19 MOVB R0, ret+24(FP) 20 RET 21 22 // memequal_varlen(a, b unsafe.Pointer) bool 23 TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 24 MOVD a+0(FP), R3 25 MOVD b+8(FP), R4 26 CMP R3, R4 27 BEQ eq 28 MOVD 8(R26), R5 // compiler stores size at offset 8 in the closure 29 CBZ R5, eq 30 MOVD R3, 8(RSP) 31 MOVD R4, 16(RSP) 32 MOVD R5, 24(RSP) 33 BL runtime·memequal(SB) 34 MOVBU 32(RSP), R3 35 MOVB R3, ret+16(FP) 36 RET 37 eq: 38 MOVD $1, R3 39 MOVB R3, ret+16(FP) 40 RET 41 42 // input: 43 // R0: pointer a 44 // R1: data len 45 // R2: pointer b 46 // R8: address to put result 47 TEXT memeqbody<>(SB),NOSPLIT,$0 48 CMP $1, R1 49 // handle 1-byte special case for better performance 50 BEQ one 51 CMP $16, R1 52 // handle specially if length < 16 53 BLO tail 54 BIC $0x3f, R1, R3 55 CBZ R3, chunk16 56 // work with 64-byte chunks 57 ADD R3, R0, R6 // end of chunks 58 chunk64_loop: 59 VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2] 60 VLD1.P (R2), [V4.D2, V5.D2, V6.D2, V7.D2] 61 VCMEQ V0.D2, V4.D2, V8.D2 62 VCMEQ V1.D2, V5.D2, V9.D2 63 VCMEQ V2.D2, V6.D2, V10.D2 64 VCMEQ V3.D2, V7.D2, V11.D2 65 VAND V8.B16, V9.B16, V8.B16 66 VAND V8.B16, V10.B16, V8.B16 67 VAND V8.B16, V11.B16, V8.B16 68 CMP R0, R6 69 VMOV V8.D[0], R4 70 VMOV V8.D[1], R5 71 CBZ R4, not_equal 72 CBZ R5, not_equal 73 BNE chunk64_loop 74 AND $0x3f, R1, R1 75 CBZ R1, equal 76 chunk16: 77 // work with 16-byte chunks 78 BIC $0xf, R1, R3 79 CBZ R3, tail 80 ADD R3, R0, R6 // end of chunks 81 chunk16_loop: 82 LDP.P 16(R0), (R4, R5) 83 LDP.P 16(R2), (R7, R9) 84 EOR R4, R7 85 CBNZ R7, not_equal 86 EOR R5, R9 87 CBNZ R9, not_equal 88 CMP R0, R6 89 BNE chunk16_loop 90 AND $0xf, R1, R1 91 CBZ R1, equal 92 tail: 93 // special compare of tail with length < 16 94 TBZ $3, R1, lt_8 95 MOVD (R0), R4 96 MOVD (R2), R5 97 EOR R4, R5 98 CBNZ R5, not_equal 99 SUB $8, R1, R6 // offset of the last 8 bytes 100 MOVD (R0)(R6), R4 101 MOVD (R2)(R6), R5 102 EOR R4, R5 103 CBNZ R5, not_equal 104 B equal 105 lt_8: 106 TBZ $2, R1, lt_4 107 MOVWU (R0), R4 108 MOVWU (R2), R5 109 EOR R4, R5 110 CBNZ R5, not_equal 111 SUB $4, R1, R6 // offset of the last 4 bytes 112 MOVWU (R0)(R6), R4 113 MOVWU (R2)(R6), R5 114 EOR R4, R5 115 CBNZ R5, not_equal 116 B equal 117 lt_4: 118 TBZ $1, R1, lt_2 119 MOVHU.P 2(R0), R4 120 MOVHU.P 2(R2), R5 121 CMP R4, R5 122 BNE not_equal 123 lt_2: 124 TBZ $0, R1, equal 125 one: 126 MOVBU (R0), R4 127 MOVBU (R2), R5 128 CMP R4, R5 129 BNE not_equal 130 equal: 131 MOVD $1, R0 132 MOVB R0, (R8) 133 RET 134 not_equal: 135 MOVB ZR, (R8) 136 RET