github.com/sandwichdev/go-internals@v0.0.0-20210605002614-12311ac6b2c5/bytealg/equal_arm64.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // memequal(a, b unsafe.Pointer, size uintptr) bool
     9  TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
    10  	MOVD	size+16(FP), R1
    11  	// short path to handle 0-byte case
    12  	CBZ	R1, equal
    13  	MOVD	a+0(FP), R0
    14  	MOVD	b+8(FP), R2
    15  	MOVD	$ret+24(FP), R8
    16  	B	memeqbody<>(SB)
    17  equal:
    18  	MOVD	$1, R0
    19  	MOVB	R0, ret+24(FP)
    20  	RET
    21  
    22  // memequal_varlen(a, b unsafe.Pointer) bool
    23  TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
    24  	MOVD	a+0(FP), R3
    25  	MOVD	b+8(FP), R4
    26  	CMP	R3, R4
    27  	BEQ	eq
    28  	MOVD	8(R26), R5    // compiler stores size at offset 8 in the closure
    29  	CBZ	R5, eq
    30  	MOVD	R3, 8(RSP)
    31  	MOVD	R4, 16(RSP)
    32  	MOVD	R5, 24(RSP)
    33  	BL	runtime·memequal(SB)
    34  	MOVBU	32(RSP), R3
    35  	MOVB	R3, ret+16(FP)
    36  	RET
    37  eq:
    38  	MOVD	$1, R3
    39  	MOVB	R3, ret+16(FP)
    40  	RET
    41  
    42  // input:
    43  // R0: pointer a
    44  // R1: data len
    45  // R2: pointer b
    46  // R8: address to put result
    47  TEXT memeqbody<>(SB),NOSPLIT,$0
    48  	CMP	$1, R1
    49  	// handle 1-byte special case for better performance
    50  	BEQ	one
    51  	CMP	$16, R1
    52  	// handle specially if length < 16
    53  	BLO	tail
    54  	BIC	$0x3f, R1, R3
    55  	CBZ	R3, chunk16
    56  	// work with 64-byte chunks
    57  	ADD	R3, R0, R6	// end of chunks
    58  chunk64_loop:
    59  	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
    60  	VLD1.P	(R2), [V4.D2, V5.D2, V6.D2, V7.D2]
    61  	VCMEQ	V0.D2, V4.D2, V8.D2
    62  	VCMEQ	V1.D2, V5.D2, V9.D2
    63  	VCMEQ	V2.D2, V6.D2, V10.D2
    64  	VCMEQ	V3.D2, V7.D2, V11.D2
    65  	VAND	V8.B16, V9.B16, V8.B16
    66  	VAND	V8.B16, V10.B16, V8.B16
    67  	VAND	V8.B16, V11.B16, V8.B16
    68  	CMP	R0, R6
    69  	VMOV	V8.D[0], R4
    70  	VMOV	V8.D[1], R5
    71  	CBZ	R4, not_equal
    72  	CBZ	R5, not_equal
    73  	BNE	chunk64_loop
    74  	AND	$0x3f, R1, R1
    75  	CBZ	R1, equal
    76  chunk16:
    77  	// work with 16-byte chunks
    78  	BIC	$0xf, R1, R3
    79  	CBZ	R3, tail
    80  	ADD	R3, R0, R6	// end of chunks
    81  chunk16_loop:
    82  	LDP.P	16(R0), (R4, R5)
    83  	LDP.P	16(R2), (R7, R9)
    84  	EOR	R4, R7
    85  	CBNZ	R7, not_equal
    86  	EOR	R5, R9
    87  	CBNZ	R9, not_equal
    88  	CMP	R0, R6
    89  	BNE	chunk16_loop
    90  	AND	$0xf, R1, R1
    91  	CBZ	R1, equal
    92  tail:
    93  	// special compare of tail with length < 16
    94  	TBZ	$3, R1, lt_8
    95  	MOVD	(R0), R4
    96  	MOVD	(R2), R5
    97  	EOR	R4, R5
    98  	CBNZ	R5, not_equal
    99  	SUB	$8, R1, R6	// offset of the last 8 bytes
   100  	MOVD	(R0)(R6), R4
   101  	MOVD	(R2)(R6), R5
   102  	EOR	R4, R5
   103  	CBNZ	R5, not_equal
   104  	B	equal
   105  lt_8:
   106  	TBZ	$2, R1, lt_4
   107  	MOVWU	(R0), R4
   108  	MOVWU	(R2), R5
   109  	EOR	R4, R5
   110  	CBNZ	R5, not_equal
   111  	SUB	$4, R1, R6	// offset of the last 4 bytes
   112  	MOVWU	(R0)(R6), R4
   113  	MOVWU	(R2)(R6), R5
   114  	EOR	R4, R5
   115  	CBNZ	R5, not_equal
   116  	B	equal
   117  lt_4:
   118  	TBZ	$1, R1, lt_2
   119  	MOVHU.P	2(R0), R4
   120  	MOVHU.P	2(R2), R5
   121  	CMP	R4, R5
   122  	BNE	not_equal
   123  lt_2:
   124  	TBZ	$0, R1, equal
   125  one:
   126  	MOVBU	(R0), R4
   127  	MOVBU	(R2), R5
   128  	CMP	R4, R5
   129  	BNE	not_equal
   130  equal:
   131  	MOVD	$1, R0
   132  	MOVB	R0, (R8)
   133  	RET
   134  not_equal:
   135  	MOVB	ZR, (R8)
   136  	RET