github.com/bir3/gocompiler@v0.9.2202/src/internal/bytealg/equal_arm64.s (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // memequal(a, b unsafe.Pointer, size uintptr) bool
     9  TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    10  	// short path to handle 0-byte case
    11  	CBZ	R2, equal
    12  	B	memeqbody<>(SB)
    13  equal:
    14  	MOVD	$1, R0
    15  	RET
    16  
    17  // memequal_varlen(a, b unsafe.Pointer) bool
    18  TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
    19  	CMP	R0, R1
    20  	BEQ	eq
    21  	MOVD	8(R26), R2    // compiler stores size at offset 8 in the closure
    22  	CBZ	R2, eq
    23  	B	memeqbody<>(SB)
    24  eq:
    25  	MOVD	$1, R0
    26  	RET
    27  
    28  // input:
    29  // R0: pointer a
    30  // R1: pointer b
    31  // R2: data len
    32  // at return: result in R0
    33  TEXT memeqbody<>(SB),NOSPLIT,$0
    34  	CMP	$1, R2
    35  	// handle 1-byte special case for better performance
    36  	BEQ	one
    37  	CMP	$16, R2
    38  	// handle specially if length < 16
    39  	BLO	tail
    40  	BIC	$0x3f, R2, R3
    41  	CBZ	R3, chunk16
    42  	// work with 64-byte chunks
    43  	ADD	R3, R0, R6	// end of chunks
    44  chunk64_loop:
    45  	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
    46  	VLD1.P	(R1), [V4.D2, V5.D2, V6.D2, V7.D2]
    47  	VCMEQ	V0.D2, V4.D2, V8.D2
    48  	VCMEQ	V1.D2, V5.D2, V9.D2
    49  	VCMEQ	V2.D2, V6.D2, V10.D2
    50  	VCMEQ	V3.D2, V7.D2, V11.D2
    51  	VAND	V8.B16, V9.B16, V8.B16
    52  	VAND	V8.B16, V10.B16, V8.B16
    53  	VAND	V8.B16, V11.B16, V8.B16
    54  	CMP	R0, R6
    55  	VMOV	V8.D[0], R4
    56  	VMOV	V8.D[1], R5
    57  	CBZ	R4, not_equal
    58  	CBZ	R5, not_equal
    59  	BNE	chunk64_loop
    60  	AND	$0x3f, R2, R2
    61  	CBZ	R2, equal
    62  chunk16:
    63  	// work with 16-byte chunks
    64  	BIC	$0xf, R2, R3
    65  	CBZ	R3, tail
    66  	ADD	R3, R0, R6	// end of chunks
    67  chunk16_loop:
    68  	LDP.P	16(R0), (R4, R5)
    69  	LDP.P	16(R1), (R7, R9)
    70  	EOR	R4, R7
    71  	CBNZ	R7, not_equal
    72  	EOR	R5, R9
    73  	CBNZ	R9, not_equal
    74  	CMP	R0, R6
    75  	BNE	chunk16_loop
    76  	AND	$0xf, R2, R2
    77  	CBZ	R2, equal
    78  tail:
    79  	// special compare of tail with length < 16
    80  	TBZ	$3, R2, lt_8
    81  	MOVD	(R0), R4
    82  	MOVD	(R1), R5
    83  	EOR	R4, R5
    84  	CBNZ	R5, not_equal
    85  	SUB	$8, R2, R6	// offset of the last 8 bytes
    86  	MOVD	(R0)(R6), R4
    87  	MOVD	(R1)(R6), R5
    88  	EOR	R4, R5
    89  	CBNZ	R5, not_equal
    90  	B	equal
    91  lt_8:
    92  	TBZ	$2, R2, lt_4
    93  	MOVWU	(R0), R4
    94  	MOVWU	(R1), R5
    95  	EOR	R4, R5
    96  	CBNZ	R5, not_equal
    97  	SUB	$4, R2, R6	// offset of the last 4 bytes
    98  	MOVWU	(R0)(R6), R4
    99  	MOVWU	(R1)(R6), R5
   100  	EOR	R4, R5
   101  	CBNZ	R5, not_equal
   102  	B	equal
   103  lt_4:
   104  	TBZ	$1, R2, lt_2
   105  	MOVHU.P	2(R0), R4
   106  	MOVHU.P	2(R1), R5
   107  	CMP	R4, R5
   108  	BNE	not_equal
   109  lt_2:
   110  	TBZ	$0, R2, equal
   111  one:
   112  	MOVBU	(R0), R4
   113  	MOVBU	(R1), R5
   114  	CMP	R4, R5
   115  	BNE	not_equal
   116  equal:
   117  	MOVD	$1, R0
   118  	RET
   119  not_equal:
   120  	MOVB	ZR, R0
   121  	RET