github.com/icodeface/tls@v0.0.0-20230910023335-34df9250cd12/internal/bytealg/equal_arm64.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Equal(SB),NOSPLIT,$0-49
     9  	MOVD	a_len+8(FP), R1
    10  	MOVD	b_len+32(FP), R3
    11  	CMP	R1, R3
    12  	// unequal lengths are not equal
    13  	BNE	not_equal
    14  	// short path to handle 0-byte case
    15  	CBZ	R1, equal
    16  	MOVD	a_base+0(FP), R0
    17  	MOVD	b_base+24(FP), R2
    18  	MOVD	$ret+48(FP), R8
    19  	B	memeqbody<>(SB)
    20  equal:
    21  	MOVD	$1, R0
    22  	MOVB	R0, ret+48(FP)
    23  	RET
    24  not_equal:
    25  	MOVB	ZR, ret+48(FP)
    26  	RET
    27  
    28  // memequal(a, b unsafe.Pointer, size uintptr) bool
    29  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
    30  	MOVD	size+16(FP), R1
    31  	// short path to handle 0-byte case
    32  	CBZ	R1, equal
    33  	MOVD	a+0(FP), R0
    34  	MOVD	b+8(FP), R2
    35  	MOVD	$ret+24(FP), R8
    36  	B	memeqbody<>(SB)
    37  equal:
    38  	MOVD	$1, R0
    39  	MOVB	R0, ret+24(FP)
    40  	RET
    41  
    42  // memequal_varlen(a, b unsafe.Pointer) bool
    43  TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
    44  	MOVD	a+0(FP), R3
    45  	MOVD	b+8(FP), R4
    46  	CMP	R3, R4
    47  	BEQ	eq
    48  	MOVD	8(R26), R5    // compiler stores size at offset 8 in the closure
    49  	CBZ	R5, eq
    50  	MOVD	R3, 8(RSP)
    51  	MOVD	R4, 16(RSP)
    52  	MOVD	R5, 24(RSP)
    53  	BL	runtime·memequal(SB)
    54  	MOVBU	32(RSP), R3
    55  	MOVB	R3, ret+16(FP)
    56  	RET
    57  eq:
    58  	MOVD	$1, R3
    59  	MOVB	R3, ret+16(FP)
    60  	RET
    61  
    62  // input:
    63  // R0: pointer a
    64  // R1: data len
    65  // R2: pointer b
    66  // R8: address to put result
    67  TEXT memeqbody<>(SB),NOSPLIT,$0
    68  	CMP	$1, R1
    69  	// handle 1-byte special case for better performance
    70  	BEQ	one
    71  	CMP	$16, R1
    72  	// handle specially if length < 16
    73  	BLO	tail
    74  	BIC	$0x3f, R1, R3
    75  	CBZ	R3, chunk16
    76  	// work with 64-byte chunks
    77  	ADD	R3, R0, R6	// end of chunks
    78  chunk64_loop:
    79  	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
    80  	VLD1.P	(R2), [V4.D2, V5.D2, V6.D2, V7.D2]
    81  	VCMEQ	V0.D2, V4.D2, V8.D2
    82  	VCMEQ	V1.D2, V5.D2, V9.D2
    83  	VCMEQ	V2.D2, V6.D2, V10.D2
    84  	VCMEQ	V3.D2, V7.D2, V11.D2
    85  	VAND	V8.B16, V9.B16, V8.B16
    86  	VAND	V8.B16, V10.B16, V8.B16
    87  	VAND	V8.B16, V11.B16, V8.B16
    88  	CMP	R0, R6
    89  	VMOV	V8.D[0], R4
    90  	VMOV	V8.D[1], R5
    91  	CBZ	R4, not_equal
    92  	CBZ	R5, not_equal
    93  	BNE	chunk64_loop
    94  	AND	$0x3f, R1, R1
    95  	CBZ	R1, equal
    96  chunk16:
    97  	// work with 16-byte chunks
    98  	BIC	$0xf, R1, R3
    99  	CBZ	R3, tail
   100  	ADD	R3, R0, R6	// end of chunks
   101  chunk16_loop:
   102  	LDP.P	16(R0), (R4, R5)
   103  	LDP.P	16(R2), (R7, R9)
   104  	EOR	R4, R7
   105  	CBNZ	R7, not_equal
   106  	EOR	R5, R9
   107  	CBNZ	R9, not_equal
   108  	CMP	R0, R6
   109  	BNE	chunk16_loop
   110  	AND	$0xf, R1, R1
   111  	CBZ	R1, equal
   112  tail:
   113  	// special compare of tail with length < 16
   114  	TBZ	$3, R1, lt_8
   115  	MOVD	(R0), R4
   116  	MOVD	(R2), R5
   117  	EOR	R4, R5
   118  	CBNZ	R5, not_equal
   119  	SUB	$8, R1, R6	// offset of the last 8 bytes
   120  	MOVD	(R0)(R6), R4
   121  	MOVD	(R2)(R6), R5
   122  	EOR	R4, R5
   123  	CBNZ	R5, not_equal
   124  	B	equal
   125  lt_8:
   126  	TBZ	$2, R1, lt_4
   127  	MOVWU	(R0), R4
   128  	MOVWU	(R2), R5
   129  	EOR	R4, R5
   130  	CBNZ	R5, not_equal
   131  	SUB	$4, R1, R6	// offset of the last 4 bytes
   132  	MOVWU	(R0)(R6), R4
   133  	MOVWU	(R2)(R6), R5
   134  	EOR	R4, R5
   135  	CBNZ	R5, not_equal
   136  	B	equal
   137  lt_4:
   138  	TBZ	$1, R1, lt_2
   139  	MOVHU.P	2(R0), R4
   140  	MOVHU.P	2(R2), R5
   141  	CMP	R4, R5
   142  	BNE	not_equal
   143  lt_2:
   144  	TBZ	$0, R1, equal
   145  one:
   146  	MOVBU	(R0), R4
   147  	MOVBU	(R2), R5
   148  	CMP	R4, R5
   149  	BNE	not_equal
   150  equal:
   151  	MOVD	$1, R0
   152  	MOVB	R0, (R8)
   153  	RET
   154  not_equal:
   155  	MOVB	ZR, (R8)
   156  	RET